feat: telemetry data integrity — source tagging, UUID fingerprint, duration guards

- Add source field (live/test/dev) to telemetry pipeline: --source flag in
  gstack-telemetry-log, GSTACK_TELEMETRY_SOURCE env fallback, pass-through
  in telemetry-sync, source=eq.live filter on all dashboard queries
- Replace SHA-256 installation_id with UUID install_fingerprint for all tiers
  (not just community). Expand-contract migration: ADD new column + trigger
  to copy installation_id, preserving backward compat with old clients
- Fix duration bug: persist _TEL_START to file via $PPID (stable across bash
  blocks), cap durations at 86400s, reject negative values
- Ungate update-check pings from telemetry=off — sends only version + OS +
  random UUID. Generate .install-id in update-check for telemetry=off users
- Migration 003: source columns, install_fingerprint, duration CHECK
  constraint, indexes, recreated views with source filter, growth funnel
  (first-seen based), materialized views for daily installs + version adoption
- E2E test isolation: session-runner sets GSTACK_TELEMETRY_SOURCE=test
- 8 new telemetry tests (source field, duration caps, fingerprint persistence)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-03-23 15:48:25 -07:00
parent b437b531b7
commit 6bd6d5ba0f
8 changed files with 269 additions and 50 deletions
+9 -14
View File
@@ -48,20 +48,15 @@ echo ""
# ─── Weekly active installs ──────────────────────────────────
WEEK_AGO="$(date -u -v-7d +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -d '7 days ago' +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "")"
if [ -n "$WEEK_AGO" ]; then
PULSE="$(curl -sf --max-time 10 \
"${SUPABASE_URL}/functions/v1/community-pulse" \
# Direct REST query (replaces unreliable community-pulse edge function)
WEEKLY="$(curl -sf --max-time 10 \
"${SUPABASE_URL}/rest/v1/update_checks?select=install_fingerprint&checked_at=gte.${WEEK_AGO}&source=eq.live" \
-H "apikey: ${ANON_KEY}" \
-H "Authorization: Bearer ${ANON_KEY}" \
2>/dev/null || echo '{"weekly_active":0}')"
2>/dev/null | grep -o '"install_fingerprint":"[^"]*"' | sort -u | wc -l | tr -d ' ')"
WEEKLY="${WEEKLY:-0}"
WEEKLY="$(echo "$PULSE" | grep -o '"weekly_active":[0-9]*' | grep -o '[0-9]*' || echo "0")"
CHANGE="$(echo "$PULSE" | grep -o '"change_pct":[0-9-]*' | grep -o '[0-9-]*' || echo "0")"
echo "Weekly active installs: ${WEEKLY}"
if [ "$CHANGE" -gt 0 ] 2>/dev/null; then
echo " Change: +${CHANGE}%"
elif [ "$CHANGE" -lt 0 ] 2>/dev/null; then
echo " Change: ${CHANGE}%"
fi
echo "Weekly active installs: ${WEEKLY} unique"
echo ""
fi
@@ -70,7 +65,7 @@ echo "Top skills (last 7 days)"
echo "────────────────────────"
# Query telemetry_events, group by skill
EVENTS="$(query "telemetry_events" "select=skill,gstack_version,session_id&event_type=eq.skill_run&event_timestamp=gte.${WEEK_AGO}&limit=1000" 2>/dev/null || echo "[]")"
EVENTS="$(query "telemetry_events" "select=skill,gstack_version,session_id&event_type=eq.skill_run&event_timestamp=gte.${WEEK_AGO}&source=eq.live&limit=1000" 2>/dev/null || echo "[]")"
if [ "$EVENTS" != "[]" ] && [ -n "$EVENTS" ]; then
echo "$EVENTS" | grep -o '"skill":"[^"]*"' | awk -F'"' '{print $4}' | sort | uniq -c | sort -rn | head -10 | while read -r COUNT SKILL; do
@@ -85,7 +80,7 @@ echo ""
echo "Top errors (last 7 days)"
echo "────────────────────────"
ERRORS="$(query "telemetry_events" "select=skill,error_class,error_message,failed_step,duration_s,session_id&outcome=eq.error&event_timestamp=gte.${WEEK_AGO}&order=event_timestamp.desc&limit=200" 2>/dev/null || echo "[]")"
ERRORS="$(query "telemetry_events" "select=skill,error_class,error_message,failed_step,duration_s,session_id&outcome=eq.error&event_timestamp=gte.${WEEK_AGO}&source=eq.live&order=event_timestamp.desc&limit=200" 2>/dev/null || echo "[]")"
if [ "$ERRORS" != "[]" ] && [ -n "$ERRORS" ]; then
# Group by skill + error_class, show count and example message