Merge remote-tracking branch 'origin/main' into garrytan/usage-telemetry

# Conflicts:
#	SKILL.md
#	TODOS.md
#	browse/SKILL.md
#	design-consultation/SKILL.md
#	design-review/SKILL.md
#	document-release/SKILL.md
#	plan-ceo-review/SKILL.md
#	plan-design-review/SKILL.md
#	plan-eng-review/SKILL.md
#	qa-only/SKILL.md
#	qa/SKILL.md
#	retro/SKILL.md
#	retro/SKILL.md.tmpl
#	review/SKILL.md
#	scripts/gen-skill-docs.ts
#	setup-browser-cookies/SKILL.md
#	ship/SKILL.md
This commit is contained in:
Garry Tan
2026-03-19 00:50:11 -07:00
81 changed files with 8178 additions and 609 deletions
+190
View File
@@ -0,0 +1,190 @@
#!/usr/bin/env bun
/**
* analytics — CLI for viewing gstack skill usage statistics.
*
* Reads ~/.gstack/analytics/skill-usage.jsonl and displays:
* - Top skills by invocation count
* - Per-repo skill breakdown
* - Safety hook fire events
*
* Usage:
* bun run scripts/analytics.ts [--period 7d|30d|all]
*/
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
export interface AnalyticsEvent {
skill: string;
ts: string;
repo: string;
event?: string;
pattern?: string;
}
const ANALYTICS_FILE = path.join(os.homedir(), '.gstack', 'analytics', 'skill-usage.jsonl');
/**
* Parse JSONL content into AnalyticsEvent[], skipping malformed lines.
*/
export function parseJSONL(content: string): AnalyticsEvent[] {
const events: AnalyticsEvent[] = [];
for (const line of content.split('\n')) {
const trimmed = line.trim();
if (!trimmed) continue;
try {
const obj = JSON.parse(trimmed);
if (typeof obj === 'object' && obj !== null && typeof obj.ts === 'string') {
events.push(obj as AnalyticsEvent);
}
} catch {
// skip malformed lines
}
}
return events;
}
/**
* Filter events by period. Supports "7d", "30d", and "all".
*/
export function filterByPeriod(events: AnalyticsEvent[], period: string): AnalyticsEvent[] {
if (period === 'all') return events;
const match = period.match(/^(\d+)d$/);
if (!match) return events;
const days = parseInt(match[1], 10);
const cutoff = new Date(Date.now() - days * 24 * 60 * 60 * 1000);
return events.filter(e => {
const d = new Date(e.ts);
return !isNaN(d.getTime()) && d >= cutoff;
});
}
/**
* Format a report string from a list of events.
*/
export function formatReport(events: AnalyticsEvent[], period: string = 'all'): string {
const skillEvents = events.filter(e => e.event !== 'hook_fire');
const hookEvents = events.filter(e => e.event === 'hook_fire');
const lines: string[] = [];
lines.push('gstack skill usage analytics');
lines.push('\u2550'.repeat(39));
lines.push('');
const periodLabel = period === 'all' ? 'all time' : `last ${period.replace('d', ' days')}`;
lines.push(`Period: ${periodLabel}`);
// Top Skills
const skillCounts = new Map<string, number>();
for (const e of skillEvents) {
skillCounts.set(e.skill, (skillCounts.get(e.skill) || 0) + 1);
}
if (skillCounts.size > 0) {
lines.push('');
lines.push('Top Skills');
const sorted = [...skillCounts.entries()].sort((a, b) => b[1] - a[1]);
const maxName = Math.max(...sorted.map(([name]) => name.length + 1)); // +1 for /
const maxCount = Math.max(...sorted.map(([, count]) => String(count).length));
for (const [name, count] of sorted) {
const label = `/${name}`;
const suffix = `${count} invocation${count === 1 ? '' : 's'}`;
const dotLen = Math.max(2, 25 - label.length - suffix.length);
const dots = ' ' + '.'.repeat(dotLen) + ' ';
lines.push(` ${label}${dots}${suffix}`);
}
}
// By Repo
const repoSkills = new Map<string, Map<string, number>>();
for (const e of skillEvents) {
if (!repoSkills.has(e.repo)) repoSkills.set(e.repo, new Map());
const m = repoSkills.get(e.repo)!;
m.set(e.skill, (m.get(e.skill) || 0) + 1);
}
if (repoSkills.size > 0) {
lines.push('');
lines.push('By Repo');
const sortedRepos = [...repoSkills.entries()].sort((a, b) => a[0].localeCompare(b[0]));
for (const [repo, skills] of sortedRepos) {
const parts = [...skills.entries()]
.sort((a, b) => b[1] - a[1])
.map(([s, c]) => `${s}(${c})`);
lines.push(` ${repo}: ${parts.join(' ')}`);
}
}
// Safety Hook Events
const hookCounts = new Map<string, number>();
for (const e of hookEvents) {
if (e.pattern) {
hookCounts.set(e.pattern, (hookCounts.get(e.pattern) || 0) + 1);
}
}
if (hookCounts.size > 0) {
lines.push('');
lines.push('Safety Hook Events');
const sortedHooks = [...hookCounts.entries()].sort((a, b) => b[1] - a[1]);
for (const [pattern, count] of sortedHooks) {
const suffix = `${count} fire${count === 1 ? '' : 's'}`;
const dotLen = Math.max(2, 25 - pattern.length - suffix.length);
const dots = ' ' + '.'.repeat(dotLen) + ' ';
lines.push(` ${pattern}${dots}${suffix}`);
}
}
// Total
const totalSkills = skillEvents.length;
const totalHooks = hookEvents.length;
lines.push('');
lines.push(`Total: ${totalSkills} skill invocation${totalSkills === 1 ? '' : 's'}, ${totalHooks} hook fire${totalHooks === 1 ? '' : 's'}`);
return lines.join('\n');
}
function main() {
// Parse --period flag
let period = 'all';
const args = process.argv.slice(2);
for (let i = 0; i < args.length; i++) {
if (args[i] === '--period' && i + 1 < args.length) {
period = args[i + 1];
i++;
}
}
// Read file
if (!fs.existsSync(ANALYTICS_FILE)) {
console.log('No analytics data found.');
process.exit(0);
}
const content = fs.readFileSync(ANALYTICS_FILE, 'utf-8').trim();
if (!content) {
console.log('No analytics data found.');
process.exit(0);
}
const events = parseJSONL(content);
if (events.length === 0) {
console.log('No analytics data found.');
process.exit(0);
}
const filtered = filterByPeriod(events, period);
console.log(formatReport(filtered, period));
}
if (import.meta.main) {
main();
}
+82 -33
View File
@@ -17,9 +17,16 @@ import * as path from 'path';
const ROOT = path.resolve(import.meta.dir, '..');
const DRY_RUN = process.argv.includes('--dry-run');
// ─── Template Context ───────────────────────────────────────
interface TemplateContext {
skillName: string;
tmplPath: string;
}
// ─── Placeholder Resolvers ──────────────────────────────────
function generateCommandReference(): string {
function generateCommandReference(_ctx: TemplateContext): string {
// Group commands by category
const groups = new Map<string, Array<{ command: string; description: string; usage?: string }>>();
for (const [cmd, meta] of Object.entries(COMMAND_DESCRIPTIONS)) {
@@ -55,7 +62,7 @@ function generateCommandReference(): string {
return sections.join('\n').trimEnd();
}
function generateSnapshotFlags(): string {
function generateSnapshotFlags(_ctx: TemplateContext): string {
const lines: string[] = [
'The snapshot is your primary tool for understanding and interacting with pages.',
'',
@@ -94,7 +101,7 @@ function generateSnapshotFlags(): string {
return lines.join('\n');
}
function generatePreamble(): string {
function generatePreamble(ctx: TemplateContext): string {
return `## Preamble (run first)
\`\`\`bash
@@ -118,7 +125,8 @@ _SESSION_ID="$$-$(date +%s)"
echo "TELEMETRY: \${_TEL:-off}"
echo "TEL_PROMPTED: $_TEL_PROMPTED"
mkdir -p ~/.gstack/analytics
for _PF in ~/.gstack/analytics/.pending-* 2>/dev/null; do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done
echo '{"skill":"${ctx.skillName}","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done
\`\`\`
If \`PROACTIVE\` is \`"false"\`, do not proactively suggest gstack skills — only invoke
@@ -233,13 +241,37 @@ Hey gstack team — ran into this while using /{skill-name}:
Slug: lowercase, hyphens, max 60 chars (e.g. \`browse-js-no-await\`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}"
## Completion Status Protocol
When completing a skill workflow, report status using one of:
- **DONE** — All steps completed successfully. Evidence provided for each claim.
- **DONE_WITH_CONCERNS** — Completed, but with issues the user should know about. List each concern.
- **BLOCKED** — Cannot proceed. State what is blocking and what was tried.
- **NEEDS_CONTEXT** — Missing information required to continue. State exactly what you need.
### Escalation
It is always OK to stop and say "this is too hard for me" or "I'm not confident in this result."
Bad work is worse than no work. You will not be penalized for escalating.
- If you have attempted a task 3 times without success, STOP and escalate.
- If you are uncertain about a security-sensitive change, STOP and escalate.
- If the scope of work exceeds what you can verify, STOP and escalate.
Escalation format:
\`\`\`
STATUS: BLOCKED | NEEDS_CONTEXT
REASON: [1-2 sentences]
ATTEMPTED: [what you tried]
RECOMMENDATION: [what the user should do next]
\`\`\`
## Telemetry (run last)
After the skill workflow completes (success, error, or abort), write the .pending marker
with the actual skill name, then log the telemetry event. Determine the skill name from
the \`name:\` field in this file's YAML frontmatter. Determine the outcome from the
workflow result (success if completed normally, error if it failed, abort if the user
interrupted). Run this bash:
After the skill workflow completes (success, error, or abort), log the telemetry event.
Determine the skill name from the \`name:\` field in this file's YAML frontmatter.
Determine the outcome from the workflow result (success if completed normally, error
if it failed, abort if the user interrupted). Run this bash:
\`\`\`bash
_TEL_END=$(date +%s)
@@ -256,7 +288,7 @@ If you cannot determine the outcome, use "unknown". This runs in the background
never blocks the user.`;
}
function generateBrowseSetup(): string {
function generateBrowseSetup(_ctx: TemplateContext): string {
return `## SETUP (run this check BEFORE any browse command)
\`\`\`bash
@@ -277,7 +309,7 @@ If \`NEEDS_SETUP\`:
3. If \`bun\` is not installed: \`curl -fsSL https://bun.sh/install | bash\``;
}
function generateBaseBranchDetect(): string {
function generateBaseBranchDetect(_ctx: TemplateContext): string {
return `## Step 0: Detect base branch
Determine which branch this PR targets. Use the result as "the base branch" in all subsequent steps.
@@ -298,7 +330,7 @@ branch name wherever the instructions say "the base branch."
---`;
}
function generateQAMethodology(): string {
function generateQAMethodology(_ctx: TemplateContext): string {
return `## Modes
### Diff-aware (automatic when on a feature branch with no URL)
@@ -319,6 +351,8 @@ This is the **primary mode** for developers verifying their work. When the user
- API endpoints → test them directly with \`$B js "await fetch('/api/...')"\`
- Static pages (markdown, HTML) → navigate to them directly
**If no obvious pages/routes are identified from the diff:** Do not skip browser testing. The user invoked /qa because they want browser-based verification. Fall back to Quick mode — navigate to the homepage, follow the top 5 navigation targets, check console for errors, and test any interactive elements found. Backend, config, and infrastructure changes affect app behavior — always verify the app still works.
3. **Detect the running app** — check common local dev ports:
\`\`\`bash
$B goto http://localhost:3000 2>/dev/null && echo "Found app on :3000" || \\
@@ -572,16 +606,17 @@ Minimum 0 per category.
8. **Depth over breadth.** 5-10 well-documented issues with evidence > 20 vague descriptions.
9. **Never delete output files.** Screenshots and reports accumulate — that's intentional.
10. **Use \`snapshot -C\` for tricky UIs.** Finds clickable divs that the accessibility tree misses.
11. **Show screenshots to the user.** After every \`$B screenshot\`, \`$B snapshot -a -o\`, or \`$B responsive\` command, use the Read tool on the output file(s) so the user can see them inline. For \`responsive\` (3 files), Read all three. This is critical — without it, screenshots are invisible to the user.`;
11. **Show screenshots to the user.** After every \`$B screenshot\`, \`$B snapshot -a -o\`, or \`$B responsive\` command, use the Read tool on the output file(s) so the user can see them inline. For \`responsive\` (3 files), Read all three. This is critical — without it, screenshots are invisible to the user.
12. **Never refuse to use the browser.** When the user invokes /qa or /qa-only, they are requesting browser-based testing. Never suggest evals, unit tests, or other alternatives as a substitute. Even if the diff appears to have no UI changes, backend changes affect app behavior — always open the browser and test.`;
}
function generateDesignReviewLite(): string {
function generateDesignReviewLite(_ctx: TemplateContext): string {
return `## Design Review (conditional, diff-scoped)
Check if the diff touches frontend files using \`gstack-diff-scope\`:
\`\`\`bash
eval $(~/.claude/skills/gstack/bin/gstack-diff-scope <base> 2>/dev/null)
source <(~/.claude/skills/gstack/bin/gstack-diff-scope <base> 2>/dev/null)
\`\`\`
**If \`SCOPE_FRONTEND=false\`:** Skip design review silently. No output.
@@ -604,17 +639,15 @@ eval $(~/.claude/skills/gstack/bin/gstack-diff-scope <base> 2>/dev/null)
6. **Log the result** for the Review Readiness Dashboard:
\`\`\`bash
eval $(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)
mkdir -p ~/.gstack/projects/$SLUG
echo '{"skill":"design-review-lite","timestamp":"TIMESTAMP","status":"STATUS","findings":N,"auto_fixed":M}' >> ~/.gstack/projects/$SLUG/$BRANCH-reviews.jsonl
~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"design-review-lite","timestamp":"TIMESTAMP","status":"STATUS","findings":N,"auto_fixed":M,"commit":"COMMIT"}'
\`\`\`
Substitute: TIMESTAMP = ISO 8601 datetime, STATUS = "clean" if 0 findings or "issues_found", N = total findings, M = auto-fixed count.`;
Substitute: TIMESTAMP = ISO 8601 datetime, STATUS = "clean" if 0 findings or "issues_found", N = total findings, M = auto-fixed count, COMMIT = output of \`git rev-parse --short HEAD\`.`;
}
// NOTE: design-checklist.md is a subset of this methodology for code-level detection.
// When adding items here, also update review/design-checklist.md, and vice versa.
function generateDesignMethodology(): string {
function generateDesignMethodology(_ctx: TemplateContext): string {
return `## Modes
### Full (default)
@@ -864,8 +897,7 @@ Compare screenshots and observations across pages for:
**Project-scoped:**
\`\`\`bash
eval $(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)
mkdir -p ~/.gstack/projects/$SLUG
source <(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null) && mkdir -p ~/.gstack/projects/$SLUG
\`\`\`
Write to: \`~/.gstack/projects/{slug}/{user}-{branch}-design-audit-{datetime}.md\`
@@ -948,19 +980,16 @@ Tie everything to user goals and product objectives. Always suggest specific imp
11. **Show screenshots to the user.** After every \`$B screenshot\`, \`$B snapshot -a -o\`, or \`$B responsive\` command, use the Read tool on the output file(s) so the user can see them inline. For \`responsive\` (3 files), Read all three. This is critical — without it, screenshots are invisible to the user.`;
}
function generateReviewDashboard(): string {
function generateReviewDashboard(_ctx: TemplateContext): string {
return `## Review Readiness Dashboard
After completing the review, read the review log and config to display the dashboard.
\`\`\`bash
eval $(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)
cat ~/.gstack/projects/$SLUG/$BRANCH-reviews.jsonl 2>/dev/null || echo "NO_REVIEWS"
echo "---CONFIG---"
~/.claude/skills/gstack/bin/gstack-config get skip_eng_review 2>/dev/null || echo "false"
~/.claude/skills/gstack/bin/gstack-review-read
\`\`\`
Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite). Ignore entries with timestamps older than 7 days. For Design Review, show whichever is more recent between \`plan-design-review\` (full visual audit) and \`design-review-lite\` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display:
Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite, codex-review). Ignore entries with timestamps older than 7 days. For Design Review, show whichever is more recent between \`plan-design-review\` (full visual audit) and \`design-review-lite\` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display:
\`\`\`
+====================================================================+
@@ -971,6 +1000,7 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl
| Eng Review | 1 | 2026-03-16 15:00 | CLEAR | YES |
| CEO Review | 0 | — | — | no |
| Design Review | 0 | — | — | no |
| Codex Review | 0 | — | — | no |
+--------------------------------------------------------------------+
| VERDICT: CLEARED — Eng Review passed |
+====================================================================+
@@ -980,15 +1010,22 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl
- **Eng Review (required by default):** The only review that gates shipping. Covers architecture, code quality, tests, performance. Can be disabled globally with \\\`gstack-config set skip_eng_review true\\\` (the "don't bother me" setting).
- **CEO Review (optional):** Use your judgment. Recommend it for big product/business changes, new user-facing features, or scope decisions. Skip for bug fixes, refactors, infra, and cleanup.
- **Design Review (optional):** Use your judgment. Recommend it for UI/UX changes. Skip for backend-only, infra, or prompt-only changes.
- **Codex Review (optional):** Independent second opinion from OpenAI Codex CLI. Shows pass/fail gate. Recommend for critical code changes where a second AI perspective adds value. Skip when Codex CLI is not installed.
**Verdict logic:**
- **CLEARED**: Eng Review has >= 1 entry within 7 days with status "clean" (or \\\`skip_eng_review\\\` is \\\`true\\\`)
- **NOT CLEARED**: Eng Review missing, stale (>7 days), or has open issues
- CEO and Design reviews are shown for context but never block shipping
- If \\\`skip_eng_review\\\` config is \\\`true\\\`, Eng Review shows "SKIPPED (global)" and verdict is CLEARED`;
- CEO, Design, and Codex reviews are shown for context but never block shipping
- If \\\`skip_eng_review\\\` config is \\\`true\\\`, Eng Review shows "SKIPPED (global)" and verdict is CLEARED
**Staleness detection:** After displaying the dashboard, check if any existing reviews may be stale:
- Parse the \\\`---HEAD---\\\` section from the bash output to get the current HEAD commit hash
- For each review entry that has a \\\`commit\\\` field: compare it against the current HEAD. If different, count elapsed commits: \\\`git rev-list --count STORED_COMMIT..HEAD\\\`. Display: "Note: {skill} review from {date} may be stale — {N} commits since review"
- For entries without a \\\`commit\\\` field (legacy entries): display "Note: {skill} review from {date} has no commit tracking — consider re-running for accurate staleness detection"
- If all reviews match the current HEAD, do not display any staleness notes`;
}
function generateTestBootstrap(): string {
function generateTestBootstrap(_ctx: TemplateContext): string {
return `## Test Framework Bootstrap
**Detect existing test framework and project runtime:**
@@ -1143,7 +1180,7 @@ Only commit if there are changes. Stage all bootstrap files (config, test direct
---`;
}
const RESOLVERS: Record<string, () => string> = {
const RESOLVERS: Record<string, (ctx: TemplateContext) => string> = {
COMMAND_REFERENCE: generateCommandReference,
SNAPSHOT_FLAGS: generateSnapshotFlags,
PREAMBLE: generatePreamble,
@@ -1165,11 +1202,16 @@ function processTemplate(tmplPath: string): { outputPath: string; content: strin
const relTmplPath = path.relative(ROOT, tmplPath);
const outputPath = tmplPath.replace(/\.tmpl$/, '');
// Extract skill name from frontmatter for TemplateContext
const nameMatch = tmplContent.match(/^name:\s*(.+)$/m);
const skillName = nameMatch ? nameMatch[1].trim() : path.basename(path.dirname(tmplPath));
const ctx: TemplateContext = { skillName, tmplPath };
// Replace placeholders
let content = tmplContent.replace(/\{\{(\w+)\}\}/g, (match, name) => {
const resolver = RESOLVERS[name];
if (!resolver) throw new Error(`Unknown placeholder {{${name}}} in ${relTmplPath}`);
return resolver();
return resolver(ctx);
});
// Check for any remaining unresolved placeholders
@@ -1206,11 +1248,18 @@ function findTemplates(): string[] {
path.join(ROOT, 'plan-ceo-review', 'SKILL.md.tmpl'),
path.join(ROOT, 'plan-eng-review', 'SKILL.md.tmpl'),
path.join(ROOT, 'retro', 'SKILL.md.tmpl'),
path.join(ROOT, 'office-hours', 'SKILL.md.tmpl'),
path.join(ROOT, 'investigate', 'SKILL.md.tmpl'),
path.join(ROOT, 'gstack-upgrade', 'SKILL.md.tmpl'),
path.join(ROOT, 'plan-design-review', 'SKILL.md.tmpl'),
path.join(ROOT, 'design-review', 'SKILL.md.tmpl'),
path.join(ROOT, 'design-consultation', 'SKILL.md.tmpl'),
path.join(ROOT, 'document-release', 'SKILL.md.tmpl'),
path.join(ROOT, 'codex', 'SKILL.md.tmpl'),
path.join(ROOT, 'careful', 'SKILL.md.tmpl'),
path.join(ROOT, 'freeze', 'SKILL.md.tmpl'),
path.join(ROOT, 'guard', 'SKILL.md.tmpl'),
path.join(ROOT, 'unfreeze', 'SKILL.md.tmpl'),
];
for (const p of candidates) {
if (fs.existsSync(p)) templates.push(p);