feat: telemetry data integrity — source tagging, UUID fingerprint, duration guards

- Add source field (live/test/dev) to telemetry pipeline: --source flag in
  gstack-telemetry-log, GSTACK_TELEMETRY_SOURCE env fallback, pass-through
  in telemetry-sync, source=eq.live filter on all dashboard queries
- Replace SHA-256 installation_id with UUID install_fingerprint for all tiers
  (not just community). Expand-contract migration: ADD new column + trigger
  to copy installation_id, preserving backward compat with old clients
- Fix duration bug: persist _TEL_START to file via $PPID (stable across bash
  blocks), cap durations at 86400s, reject negative values
- Ungate update-check pings from telemetry=off — sends only version + OS +
  random UUID. Generate .install-id in update-check for telemetry=off users
- Migration 003: source columns, install_fingerprint, duration CHECK
  constraint, indexes, recreated views with source filter, growth funnel
  (first-seen based), materialized views for daily installs + version adoption
- E2E test isolation: session-runner sets GSTACK_TELEMETRY_SOURCE=test
- 8 new telemetry tests (source field, duration caps, fingerprint persistence)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-03-23 15:48:25 -07:00
parent b437b531b7
commit 6bd6d5ba0f
8 changed files with 269 additions and 50 deletions
+9 -14
View File
@@ -48,20 +48,15 @@ echo ""
# ─── Weekly active installs ──────────────────────────────────
WEEK_AGO="$(date -u -v-7d +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -d '7 days ago' +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")"
if [ -n "$WEEK_AGO" ]; then
PULSE="$(curl -sf --max-time 10 \
"${SUPABASE_URL}/functions/v1/community-pulse" \
# Direct REST query (replaces unreliable community-pulse edge function)
WEEKLY="$(curl -sf --max-time 10 \
"${SUPABASE_URL}/rest/v1/update_checks?select=install_fingerprint&checked_at=gte.${WEEK_AGO}&source=eq.live" \
-H "apikey: ${ANON_KEY}" \
-H "Authorization: Bearer ${ANON_KEY}" \
2>/dev/null || echo '{"weekly_active":0}')"
2>/dev/null | grep -o '"install_fingerprint":"[^"]*"' | sort -u | wc -l | tr -d ' ')"
WEEKLY="${WEEKLY:-0}"
WEEKLY="$(echo "$PULSE" | grep -o '"weekly_active":[0-9]*' | grep -o '[0-9]*' || echo "0")"
CHANGE="$(echo "$PULSE" | grep -o '"change_pct":[0-9-]*' | grep -o '[0-9-]*' || echo "0")"
echo "Weekly active installs: ${WEEKLY}"
if [ "$CHANGE" -gt 0 ] 2>/dev/null; then
echo " Change: +${CHANGE}%"
elif [ "$CHANGE" -lt 0 ] 2>/dev/null; then
echo " Change: ${CHANGE}%"
fi
echo "Weekly active installs: ${WEEKLY} unique"
echo ""
fi
@@ -70,7 +65,7 @@ echo "Top skills (last 7 days)"
echo "────────────────────────"
# Query telemetry_events, group by skill
EVENTS="$(query "telemetry_events" "select=skill,gstack_version,session_id&event_type=eq.skill_run&event_timestamp=gte.${WEEK_AGO}&limit=1000" 2>/dev/null || echo "[]")"
EVENTS="$(query "telemetry_events" "select=skill,gstack_version,session_id&event_type=eq.skill_run&event_timestamp=gte.${WEEK_AGO}&source=eq.live&limit=1000" 2>/dev/null || echo "[]")"
if [ "$EVENTS" != "[]" ] && [ -n "$EVENTS" ]; then
echo "$EVENTS" | grep -o '"skill":"[^"]*"' | awk -F'"' '{print $4}' | sort | uniq -c | sort -rn | head -10 | while read -r COUNT SKILL; do
@@ -85,7 +80,7 @@ echo ""
echo "Top errors (last 7 days)"
echo "────────────────────────"
ERRORS="$(query "telemetry_events" "select=skill,error_class,error_message,failed_step,duration_s,session_id&outcome=eq.error&event_timestamp=gte.${WEEK_AGO}&order=event_timestamp.desc&limit=200" 2>/dev/null || echo "[]")"
ERRORS="$(query "telemetry_events" "select=skill,error_class,error_message,failed_step,duration_s,session_id&outcome=eq.error&event_timestamp=gte.${WEEK_AGO}&source=eq.live&order=event_timestamp.desc&limit=200" 2>/dev/null || echo "[]")"
if [ "$ERRORS" != "[]" ] && [ -n "$ERRORS" ]; then
# Group by skill + error_class, show count and example message
+28 -15
View File
@@ -35,6 +35,7 @@ ERROR_CLASS=""
ERROR_MESSAGE=""
FAILED_STEP=""
EVENT_TYPE="skill_run"
SOURCE=""
while [ $# -gt 0 ]; do
case "$1" in
@@ -47,10 +48,14 @@ while [ $# -gt 0 ]; do
--error-message) ERROR_MESSAGE="$2"; shift 2 ;;
--failed-step) FAILED_STEP="$2"; shift 2 ;;
--event-type) EVENT_TYPE="$2"; shift 2 ;;
--source) SOURCE="$2"; shift 2 ;;
*) shift ;;
esac
done
# Source: flag > env > default 'live'
SOURCE="${SOURCE:-${GSTACK_TELEMETRY_SOURCE:-live}}"
# ─── Read telemetry tier ─────────────────────────────────────
TIER="$("$CONFIG_CMD" get telemetry 2>/dev/null || true)"
TIER="${TIER:-off}"
@@ -109,19 +114,19 @@ if [ -d "$STATE_DIR/sessions" ]; then
[ -n "$_SC" ] && [ "$_SC" -gt 0 ] 2>/dev/null && SESSIONS="$_SC"
fi
# Generate installation_id for community tier
INSTALL_ID=""
if [ "$TIER" = "community" ]; then
HOST="$(hostname 2>/dev/null || echo "unknown")"
USER="$(whoami 2>/dev/null || echo "unknown")"
if command -v shasum >/dev/null 2>&1; then
INSTALL_ID="$(printf '%s-%s' "$HOST" "$USER" | shasum -a 256 | awk '{print $1}')"
elif command -v sha256sum >/dev/null 2>&1; then
INSTALL_ID="$(printf '%s-%s' "$HOST" "$USER" | sha256sum | awk '{print $1}')"
elif command -v openssl >/dev/null 2>&1; then
INSTALL_ID="$(printf '%s-%s' "$HOST" "$USER" | openssl dgst -sha256 | awk '{print $NF}')"
# Generate/read persistent UUID fingerprint (all tiers, not just community)
INSTALL_FP=""
FP_FILE="$STATE_DIR/.install-id"
if [ -f "$FP_FILE" ]; then
INSTALL_FP="$(cat "$FP_FILE" 2>/dev/null | tr -d '[:space:]')"
fi
if [ -z "$INSTALL_FP" ]; then
INSTALL_FP="$(uuidgen 2>/dev/null || cat /proc/sys/kernel/random/uuid 2>/dev/null || python3 -c 'import uuid; print(uuid.uuid4())' 2>/dev/null || echo "")"
INSTALL_FP="$(echo "$INSTALL_FP" | tr '[:upper:]' '[:lower:]')" # normalize case
if [ -n "$INSTALL_FP" ]; then
mkdir -p "$STATE_DIR"
echo "$INSTALL_FP" > "$FP_FILE"
fi
# If no SHA-256 command available, install_id stays empty
fi
# Local-only fields (never sent remotely)
@@ -145,20 +150,28 @@ ERR_MSG_FIELD="null"
STEP_FIELD="null"
[ -n "$FAILED_STEP" ] && STEP_FIELD="\"$(echo "$FAILED_STEP" | head -c 30)\""
# Cap unreasonable durations
if [ -n "$DURATION" ] && [ "$DURATION" -gt 86400 ] 2>/dev/null; then
DURATION="" # null if > 24h
fi
if [ -n "$DURATION" ] && [ "$DURATION" -lt 0 ] 2>/dev/null; then
DURATION="" # null if negative
fi
DUR_FIELD="null"
[ -n "$DURATION" ] && DUR_FIELD="$DURATION"
INSTALL_FIELD="null"
[ -n "$INSTALL_ID" ] && INSTALL_FIELD="\"$INSTALL_ID\""
[ -n "$INSTALL_FP" ] && INSTALL_FIELD="\"$INSTALL_FP\""
BROWSE_BOOL="false"
[ "$USED_BROWSE" = "true" ] && BROWSE_BOOL="true"
printf '{"v":1,"ts":"%s","event_type":"%s","skill":"%s","session_id":"%s","gstack_version":"%s","os":"%s","arch":"%s","duration_s":%s,"outcome":"%s","error_class":%s,"error_message":%s,"failed_step":%s,"used_browse":%s,"sessions":%s,"installation_id":%s,"_repo_slug":"%s","_branch":"%s"}\n' \
printf '{"v":1,"ts":"%s","event_type":"%s","skill":"%s","session_id":"%s","gstack_version":"%s","os":"%s","arch":"%s","duration_s":%s,"outcome":"%s","error_class":%s,"error_message":%s,"failed_step":%s,"used_browse":%s,"sessions":%s,"install_fingerprint":%s,"source":"%s","_repo_slug":"%s","_branch":"%s"}\n' \
"$TS" "$EVENT_TYPE" "$SKILL" "$SESSION_ID" "$GSTACK_VERSION" "$OS" "$ARCH" \
"$DUR_FIELD" "$OUTCOME" "$ERR_FIELD" "$ERR_MSG_FIELD" "$STEP_FIELD" \
"$BROWSE_BOOL" "${SESSIONS:-1}" \
"$INSTALL_FIELD" "$REPO_SLUG" "$BRANCH" >> "$JSONL_FILE" 2>/dev/null || true
"$INSTALL_FIELD" "$SOURCE" "$REPO_SLUG" "$BRANCH" >> "$JSONL_FILE" 2>/dev/null || true
# ─── Trigger sync if tier is not off ─────────────────────────
SYNC_CMD="$GSTACK_DIR/bin/gstack-telemetry-sync"
+2 -5
View File
@@ -76,19 +76,16 @@ while IFS= read -r LINE; do
echo "$LINE" | grep -q '^{' || continue
# Strip local-only fields + map JSONL field names to Postgres column names
# Backward compat: map old installation_id → install_fingerprint for unsent entries
CLEAN="$(echo "$LINE" | sed \
-e 's/,"_repo_slug":"[^"]*"//g' \
-e 's/,"_branch":"[^"]*"//g' \
-e 's/"v":/"schema_version":/g' \
-e 's/"ts":/"event_timestamp":/g' \
-e 's/"sessions":/"concurrent_sessions":/g' \
-e 's/"installation_id":/"install_fingerprint":/g' \
-e 's/,"repo":"[^"]*"//g')"
# If anonymous tier, strip installation_id
if [ "$TIER" = "anonymous" ]; then
CLEAN="$(echo "$CLEAN" | sed 's/,"installation_id":"[^"]*"//g; s/,"installation_id":null//g')"
fi
if [ "$FIRST" = "true" ]; then
FIRST=false
else
+19 -4
View File
@@ -167,9 +167,24 @@ if [ -z "${GSTACK_TELEMETRY_ENDPOINT:-}" ] && [ -f "$GSTACK_DIR/supabase/config.
fi
_SUPA_ENDPOINT="${GSTACK_TELEMETRY_ENDPOINT:-}"
_SUPA_KEY="${GSTACK_SUPABASE_ANON_KEY:-}"
# Respect telemetry opt-out — don't ping Supabase if user set telemetry: off
_TEL_TIER="$("$GSTACK_DIR/bin/gstack-config" get telemetry 2>/dev/null || true)"
if [ -n "$_SUPA_ENDPOINT" ] && [ -n "$_SUPA_KEY" ] && [ "${_TEL_TIER:-off}" != "off" ]; then
# Generate/read install fingerprint (runs for ALL tiers including off)
_FP=""
_FP_FILE="$STATE_DIR/.install-id"
if [ -f "$_FP_FILE" ]; then
_FP="$(cat "$_FP_FILE" 2>/dev/null | tr -d '[:space:]')"
fi
if [ -z "$_FP" ]; then
_FP="$(uuidgen 2>/dev/null || cat /proc/sys/kernel/random/uuid 2>/dev/null || python3 -c 'import uuid; print(uuid.uuid4())' 2>/dev/null || echo "")"
_FP="$(echo "$_FP" | tr '[:upper:]' '[:lower:]')"
if [ -n "$_FP" ]; then
mkdir -p "$STATE_DIR"
echo "$_FP" > "$_FP_FILE"
fi
fi
# Update-check pings always fire (ungated from telemetry tier).
# This sends only: version, OS, and a random UUID. No usage data.
# Equivalent to what GitHub sees in HTTP access logs for VERSION.
if [ -n "$_SUPA_ENDPOINT" ] && [ -n "$_SUPA_KEY" ]; then
_OS="$(uname -s | tr '[:upper:]' '[:lower:]')"
curl -sf --max-time 5 \
-X POST "${_SUPA_ENDPOINT}/update_checks" \
@@ -177,7 +192,7 @@ if [ -n "$_SUPA_ENDPOINT" ] && [ -n "$_SUPA_KEY" ] && [ "${_TEL_TIER:-off}" != "
-H "apikey: ${_SUPA_KEY}" \
-H "Authorization: Bearer ${_SUPA_KEY}" \
-H "Prefer: return=minimal" \
-d "{\"gstack_version\":\"$LOCAL\",\"os\":\"$_OS\"}" \
-d "{\"gstack_version\":\"$LOCAL\",\"os\":\"$_OS\",\"install_fingerprint\":\"${_FP}\"}" \
>/dev/null 2>&1 &
fi