Merge branch 'main' into garrytan/team-supabase-store

Resolved conflicts across 23 files. Key merge decisions:
- Adopted main's TemplateContext type in gen-skill-docs.ts
- Adopted main's new features (trigger phrases, codex integration,
  analytics, proactive config, review chaining)
- Replaced gstack-review-log/gstack-review-read helpers with inline
  approach using $PROJECTS_DIR/$SLUG/reviews/$BRANCH.jsonl paths
- Added "commit":"COMMIT" field to all review log entries (from main)
- Kept our $PROJECTS_DIR/$SLUG path reorganization throughout
- Added Codex E2E test from main + our E2E isolation cleanup

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-03-19 17:16:08 -07:00
84 changed files with 8122 additions and 1234 deletions
+190
View File
@@ -0,0 +1,190 @@
#!/usr/bin/env bun
/**
* analytics — CLI for viewing gstack skill usage statistics.
*
* Reads ~/.gstack/analytics/skill-usage.jsonl and displays:
* - Top skills by invocation count
* - Per-repo skill breakdown
* - Safety hook fire events
*
* Usage:
* bun run scripts/analytics.ts [--period 7d|30d|all]
*/
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
export interface AnalyticsEvent {
skill: string;
ts: string;
repo: string;
event?: string;
pattern?: string;
}
const ANALYTICS_FILE = path.join(os.homedir(), '.gstack', 'analytics', 'skill-usage.jsonl');
/**
* Parse JSONL content into AnalyticsEvent[], skipping malformed lines.
*/
export function parseJSONL(content: string): AnalyticsEvent[] {
const events: AnalyticsEvent[] = [];
for (const line of content.split('\n')) {
const trimmed = line.trim();
if (!trimmed) continue;
try {
const obj = JSON.parse(trimmed);
if (typeof obj === 'object' && obj !== null && typeof obj.ts === 'string') {
events.push(obj as AnalyticsEvent);
}
} catch {
// skip malformed lines
}
}
return events;
}
/**
* Filter events by period. Supports "7d", "30d", and "all".
*/
export function filterByPeriod(events: AnalyticsEvent[], period: string): AnalyticsEvent[] {
if (period === 'all') return events;
const match = period.match(/^(\d+)d$/);
if (!match) return events;
const days = parseInt(match[1], 10);
const cutoff = new Date(Date.now() - days * 24 * 60 * 60 * 1000);
return events.filter(e => {
const d = new Date(e.ts);
return !isNaN(d.getTime()) && d >= cutoff;
});
}
/**
* Format a report string from a list of events.
*/
export function formatReport(events: AnalyticsEvent[], period: string = 'all'): string {
const skillEvents = events.filter(e => e.event !== 'hook_fire');
const hookEvents = events.filter(e => e.event === 'hook_fire');
const lines: string[] = [];
lines.push('gstack skill usage analytics');
lines.push('\u2550'.repeat(39));
lines.push('');
const periodLabel = period === 'all' ? 'all time' : `last ${period.replace('d', ' days')}`;
lines.push(`Period: ${periodLabel}`);
// Top Skills
const skillCounts = new Map<string, number>();
for (const e of skillEvents) {
skillCounts.set(e.skill, (skillCounts.get(e.skill) || 0) + 1);
}
if (skillCounts.size > 0) {
lines.push('');
lines.push('Top Skills');
const sorted = [...skillCounts.entries()].sort((a, b) => b[1] - a[1]);
const maxName = Math.max(...sorted.map(([name]) => name.length + 1)); // +1 for /
const maxCount = Math.max(...sorted.map(([, count]) => String(count).length));
for (const [name, count] of sorted) {
const label = `/${name}`;
const suffix = `${count} invocation${count === 1 ? '' : 's'}`;
const dotLen = Math.max(2, 25 - label.length - suffix.length);
const dots = ' ' + '.'.repeat(dotLen) + ' ';
lines.push(` ${label}${dots}${suffix}`);
}
}
// By Repo
const repoSkills = new Map<string, Map<string, number>>();
for (const e of skillEvents) {
if (!repoSkills.has(e.repo)) repoSkills.set(e.repo, new Map());
const m = repoSkills.get(e.repo)!;
m.set(e.skill, (m.get(e.skill) || 0) + 1);
}
if (repoSkills.size > 0) {
lines.push('');
lines.push('By Repo');
const sortedRepos = [...repoSkills.entries()].sort((a, b) => a[0].localeCompare(b[0]));
for (const [repo, skills] of sortedRepos) {
const parts = [...skills.entries()]
.sort((a, b) => b[1] - a[1])
.map(([s, c]) => `${s}(${c})`);
lines.push(` ${repo}: ${parts.join(' ')}`);
}
}
// Safety Hook Events
const hookCounts = new Map<string, number>();
for (const e of hookEvents) {
if (e.pattern) {
hookCounts.set(e.pattern, (hookCounts.get(e.pattern) || 0) + 1);
}
}
if (hookCounts.size > 0) {
lines.push('');
lines.push('Safety Hook Events');
const sortedHooks = [...hookCounts.entries()].sort((a, b) => b[1] - a[1]);
for (const [pattern, count] of sortedHooks) {
const suffix = `${count} fire${count === 1 ? '' : 's'}`;
const dotLen = Math.max(2, 25 - pattern.length - suffix.length);
const dots = ' ' + '.'.repeat(dotLen) + ' ';
lines.push(` ${pattern}${dots}${suffix}`);
}
}
// Total
const totalSkills = skillEvents.length;
const totalHooks = hookEvents.length;
lines.push('');
lines.push(`Total: ${totalSkills} skill invocation${totalSkills === 1 ? '' : 's'}, ${totalHooks} hook fire${totalHooks === 1 ? '' : 's'}`);
return lines.join('\n');
}
function main() {
// Parse --period flag
let period = 'all';
const args = process.argv.slice(2);
for (let i = 0; i < args.length; i++) {
if (args[i] === '--period' && i + 1 < args.length) {
period = args[i + 1];
i++;
}
}
// Read file
if (!fs.existsSync(ANALYTICS_FILE)) {
console.log('No analytics data found.');
process.exit(0);
}
const content = fs.readFileSync(ANALYTICS_FILE, 'utf-8').trim();
if (!content) {
console.log('No analytics data found.');
process.exit(0);
}
const events = parseJSONL(content);
if (events.length === 0) {
console.log('No analytics data found.');
process.exit(0);
}
const filtered = filterByPeriod(events, period);
console.log(formatReport(filtered, period));
}
if (import.meta.main) {
main();
}
+83 -22
View File
@@ -17,9 +17,16 @@ import * as path from 'path';
const ROOT = path.resolve(import.meta.dir, '..');
const DRY_RUN = process.argv.includes('--dry-run');
// ─── Template Context ───────────────────────────────────────
interface TemplateContext {
skillName: string;
tmplPath: string;
}
// ─── Placeholder Resolvers ──────────────────────────────────
function generateCommandReference(): string {
function generateCommandReference(_ctx: TemplateContext): string {
// Group commands by category
const groups = new Map<string, Array<{ command: string; description: string; usage?: string }>>();
for (const [cmd, meta] of Object.entries(COMMAND_DESCRIPTIONS)) {
@@ -55,7 +62,7 @@ function generateCommandReference(): string {
return sections.join('\n').trimEnd();
}
function generateSnapshotFlags(): string {
function generateSnapshotFlags(_ctx: TemplateContext): string {
const lines: string[] = [
'The snapshot is your primary tool for understanding and interacting with pages.',
'',
@@ -94,7 +101,7 @@ function generateSnapshotFlags(): string {
return lines.join('\n');
}
function generatePreamble(): string {
function generatePreamble(ctx: TemplateContext): string {
return `## Preamble (run first)
\`\`\`bash
@@ -105,12 +112,19 @@ touch ~/.gstack/sessions/"$PPID"
_SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
_CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
_PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true")
_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
echo "BRANCH: $_BRANCH"
echo "PROACTIVE: $_PROACTIVE"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
mkdir -p ~/.gstack/analytics
echo '{"skill":"${ctx.skillName}","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
\`\`\`
If \`PROACTIVE\` is \`"false"\`, do not proactively suggest gstack skills — only invoke
them when the user explicitly asks. The user opted out of proactive suggestions.
If output shows \`UPGRADE_AVAILABLE <old> <new>\`: read \`~/.claude/skills/gstack/gstack-upgrade/SKILL.md\` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If \`JUST_UPGRADED <from> <to>\`: tell user "Running gstack v{to} (just updated!)" and continue.
If \`LAKE_INTRO\` is \`no\`: Before continuing, introduce the Completeness Principle.
@@ -197,10 +211,35 @@ Hey gstack team — ran into this while using /{skill-name}:
**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill}
\`\`\`
Slug: lowercase, hyphens, max 60 chars (e.g. \`browse-js-no-await\`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}"`;
Slug: lowercase, hyphens, max 60 chars (e.g. \`browse-js-no-await\`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}"
## Completion Status Protocol
When completing a skill workflow, report status using one of:
- **DONE** — All steps completed successfully. Evidence provided for each claim.
- **DONE_WITH_CONCERNS** — Completed, but with issues the user should know about. List each concern.
- **BLOCKED** — Cannot proceed. State what is blocking and what was tried.
- **NEEDS_CONTEXT** — Missing information required to continue. State exactly what you need.
### Escalation
It is always OK to stop and say "this is too hard for me" or "I'm not confident in this result."
Bad work is worse than no work. You will not be penalized for escalating.
- If you have attempted a task 3 times without success, STOP and escalate.
- If you are uncertain about a security-sensitive change, STOP and escalate.
- If the scope of work exceeds what you can verify, STOP and escalate.
Escalation format:
\`\`\`
STATUS: BLOCKED | NEEDS_CONTEXT
REASON: [1-2 sentences]
ATTEMPTED: [what you tried]
RECOMMENDATION: [what the user should do next]
\`\`\``;
}
function generateBrowseSetup(): string {
function generateBrowseSetup(_ctx: TemplateContext): string {
return `## SETUP (run this check BEFORE any browse command)
\`\`\`bash
@@ -221,7 +260,7 @@ If \`NEEDS_SETUP\`:
3. If \`bun\` is not installed: \`curl -fsSL https://bun.sh/install | bash\``;
}
function generateBaseBranchDetect(): string {
function generateBaseBranchDetect(_ctx: TemplateContext): string {
return `## Step 0: Detect base branch
Determine which branch this PR targets. Use the result as "the base branch" in all subsequent steps.
@@ -242,7 +281,7 @@ branch name wherever the instructions say "the base branch."
---`;
}
function generateQAMethodology(): string {
function generateQAMethodology(_ctx: TemplateContext): string {
return `## Modes
### Diff-aware (automatic when on a feature branch with no URL)
@@ -263,6 +302,8 @@ This is the **primary mode** for developers verifying their work. When the user
- API endpoints → test them directly with \`$B js "await fetch('/api/...')"\`
- Static pages (markdown, HTML) → navigate to them directly
**If no obvious pages/routes are identified from the diff:** Do not skip browser testing. The user invoked /qa because they want browser-based verification. Fall back to Quick mode — navigate to the homepage, follow the top 5 navigation targets, check console for errors, and test any interactive elements found. Backend, config, and infrastructure changes affect app behavior — always verify the app still works.
3. **Detect the running app** — check common local dev ports:
\`\`\`bash
$B goto http://localhost:3000 2>/dev/null && echo "Found app on :3000" || \\
@@ -516,16 +557,17 @@ Minimum 0 per category.
8. **Depth over breadth.** 5-10 well-documented issues with evidence > 20 vague descriptions.
9. **Never delete output files.** Screenshots and reports accumulate — that's intentional.
10. **Use \`snapshot -C\` for tricky UIs.** Finds clickable divs that the accessibility tree misses.
11. **Show screenshots to the user.** After every \`$B screenshot\`, \`$B snapshot -a -o\`, or \`$B responsive\` command, use the Read tool on the output file(s) so the user can see them inline. For \`responsive\` (3 files), Read all three. This is critical — without it, screenshots are invisible to the user.`;
11. **Show screenshots to the user.** After every \`$B screenshot\`, \`$B snapshot -a -o\`, or \`$B responsive\` command, use the Read tool on the output file(s) so the user can see them inline. For \`responsive\` (3 files), Read all three. This is critical — without it, screenshots are invisible to the user.
12. **Never refuse to use the browser.** When the user invokes /qa or /qa-only, they are requesting browser-based testing. Never suggest evals, unit tests, or other alternatives as a substitute. Even if the diff appears to have no UI changes, backend changes affect app behavior — always open the browser and test.`;
}
function generateDesignReviewLite(): string {
function generateDesignReviewLite(_ctx: TemplateContext): string {
return `## Design Review (conditional, diff-scoped)
Check if the diff touches frontend files using \`gstack-diff-scope\`:
\`\`\`bash
eval $(~/.claude/skills/gstack/bin/gstack-diff-scope <base> 2>/dev/null)
source <(~/.claude/skills/gstack/bin/gstack-diff-scope <base> 2>/dev/null)
\`\`\`
**If \`SCOPE_FRONTEND=false\`:** Skip design review silently. No output.
@@ -550,15 +592,15 @@ eval $(~/.claude/skills/gstack/bin/gstack-diff-scope <base> 2>/dev/null)
\`\`\`bash
eval $(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)
mkdir -p $PROJECTS_DIR/$SLUG/reviews
echo '{"skill":"design-review-lite","timestamp":"TIMESTAMP","status":"STATUS","findings":N,"auto_fixed":M}' >> $PROJECTS_DIR/$SLUG/reviews/$BRANCH.jsonl
echo '{"skill":"design-review-lite","timestamp":"TIMESTAMP","status":"STATUS","findings":N,"auto_fixed":M,"commit":"COMMIT"}' >> $PROJECTS_DIR/$SLUG/reviews/$BRANCH.jsonl
\`\`\`
Substitute: TIMESTAMP = ISO 8601 datetime, STATUS = "clean" if 0 findings or "issues_found", N = total findings, M = auto-fixed count.`;
Substitute: TIMESTAMP = ISO 8601 datetime, STATUS = "clean" if 0 findings or "issues_found", N = total findings, M = auto-fixed count, COMMIT = output of \`git rev-parse --short HEAD\`.`;
}
// NOTE: design-checklist.md is a subset of this methodology for code-level detection.
// When adding items here, also update review/design-checklist.md, and vice versa.
function generateDesignMethodology(): string {
function generateDesignMethodology(_ctx: TemplateContext): string {
return `## Modes
### Full (default)
@@ -893,7 +935,7 @@ Tie everything to user goals and product objectives. Always suggest specific imp
11. **Show screenshots to the user.** After every \`$B screenshot\`, \`$B snapshot -a -o\`, or \`$B responsive\` command, use the Read tool on the output file(s) so the user can see them inline. For \`responsive\` (3 files), Read all three. This is critical — without it, screenshots are invisible to the user.`;
}
function generateReviewDashboard(): string {
function generateReviewDashboard(_ctx: TemplateContext): string {
return `## Review Readiness Dashboard
After completing the review, read the review log and config to display the dashboard.
@@ -905,7 +947,7 @@ echo "---CONFIG---"
~/.claude/skills/gstack/bin/gstack-config get skip_eng_review 2>/dev/null || echo "false"
\`\`\`
Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite). Ignore entries with timestamps older than 7 days. For Design Review, show whichever is more recent between \`plan-design-review\` (full visual audit) and \`design-review-lite\` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display:
Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite, codex-review). Ignore entries with timestamps older than 7 days. For Design Review, show whichever is more recent between \`plan-design-review\` (full visual audit) and \`design-review-lite\` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display:
\`\`\`
+====================================================================+
@@ -916,6 +958,7 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl
| Eng Review | 1 | 2026-03-16 15:00 | CLEAR | YES |
| CEO Review | 0 | — | — | no |
| Design Review | 0 | — | — | no |
| Codex Review | 0 | — | — | no |
+--------------------------------------------------------------------+
| VERDICT: CLEARED — Eng Review passed |
+====================================================================+
@@ -925,15 +968,22 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl
- **Eng Review (required by default):** The only review that gates shipping. Covers architecture, code quality, tests, performance. Can be disabled globally with \\\`gstack-config set skip_eng_review true\\\` (the "don't bother me" setting).
- **CEO Review (optional):** Use your judgment. Recommend it for big product/business changes, new user-facing features, or scope decisions. Skip for bug fixes, refactors, infra, and cleanup.
- **Design Review (optional):** Use your judgment. Recommend it for UI/UX changes. Skip for backend-only, infra, or prompt-only changes.
- **Codex Review (optional):** Independent second opinion from OpenAI Codex CLI. Shows pass/fail gate. Recommend for critical code changes where a second AI perspective adds value. Skip when Codex CLI is not installed.
**Verdict logic:**
- **CLEARED**: Eng Review has >= 1 entry within 7 days with status "clean" (or \\\`skip_eng_review\\\` is \\\`true\\\`)
- **NOT CLEARED**: Eng Review missing, stale (>7 days), or has open issues
- CEO and Design reviews are shown for context but never block shipping
- If \\\`skip_eng_review\\\` config is \\\`true\\\`, Eng Review shows "SKIPPED (global)" and verdict is CLEARED`;
- CEO, Design, and Codex reviews are shown for context but never block shipping
- If \\\`skip_eng_review\\\` config is \\\`true\\\`, Eng Review shows "SKIPPED (global)" and verdict is CLEARED
**Staleness detection:** After displaying the dashboard, check if any existing reviews may be stale:
- Parse the \\\`---HEAD---\\\` section from the bash output to get the current HEAD commit hash
- For each review entry that has a \\\`commit\\\` field: compare it against the current HEAD. If different, count elapsed commits: \\\`git rev-list --count STORED_COMMIT..HEAD\\\`. Display: "Note: {skill} review from {date} may be stale — {N} commits since review"
- For entries without a \\\`commit\\\` field (legacy entries): display "Note: {skill} review from {date} has no commit tracking — consider re-running for accurate staleness detection"
- If all reviews match the current HEAD, do not display any staleness notes`;
}
function generateTestBootstrap(): string {
function generateTestBootstrap(_ctx: TemplateContext): string {
return `## Test Framework Bootstrap
**Detect existing test framework and project runtime:**
@@ -1088,14 +1138,14 @@ Only commit if there are changes. Stage all bootstrap files (config, test direct
---`;
}
function generateArtifactSetup(): string {
function generateArtifactSetup(_ctx: TemplateContext): string {
return `\`\`\`bash
eval $(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)
DATE=$(date +%Y-%m-%d)
\`\`\``;
}
const RESOLVERS: Record<string, () => string> = {
const RESOLVERS: Record<string, (ctx: TemplateContext) => string> = {
COMMAND_REFERENCE: generateCommandReference,
SNAPSHOT_FLAGS: generateSnapshotFlags,
PREAMBLE: generatePreamble,
@@ -1118,11 +1168,16 @@ function processTemplate(tmplPath: string): { outputPath: string; content: strin
const relTmplPath = path.relative(ROOT, tmplPath);
const outputPath = tmplPath.replace(/\.tmpl$/, '');
// Extract skill name from frontmatter for TemplateContext
const nameMatch = tmplContent.match(/^name:\s*(.+)$/m);
const skillName = nameMatch ? nameMatch[1].trim() : path.basename(path.dirname(tmplPath));
const ctx: TemplateContext = { skillName, tmplPath };
// Replace placeholders
let content = tmplContent.replace(/\{\{(\w+)\}\}/g, (match, name) => {
const resolver = RESOLVERS[name];
if (!resolver) throw new Error(`Unknown placeholder {{${name}}} in ${relTmplPath}`);
return resolver();
return resolver(ctx);
});
// Check for any remaining unresolved placeholders
@@ -1159,12 +1214,18 @@ function findTemplates(): string[] {
path.join(ROOT, 'plan-ceo-review', 'SKILL.md.tmpl'),
path.join(ROOT, 'plan-eng-review', 'SKILL.md.tmpl'),
path.join(ROOT, 'retro', 'SKILL.md.tmpl'),
path.join(ROOT, 'office-hours', 'SKILL.md.tmpl'),
path.join(ROOT, 'investigate', 'SKILL.md.tmpl'),
path.join(ROOT, 'gstack-upgrade', 'SKILL.md.tmpl'),
path.join(ROOT, 'setup-team-sync', 'SKILL.md.tmpl'),
path.join(ROOT, 'plan-design-review', 'SKILL.md.tmpl'),
path.join(ROOT, 'design-review', 'SKILL.md.tmpl'),
path.join(ROOT, 'design-consultation', 'SKILL.md.tmpl'),
path.join(ROOT, 'document-release', 'SKILL.md.tmpl'),
path.join(ROOT, 'codex', 'SKILL.md.tmpl'),
path.join(ROOT, 'careful', 'SKILL.md.tmpl'),
path.join(ROOT, 'freeze', 'SKILL.md.tmpl'),
path.join(ROOT, 'guard', 'SKILL.md.tmpl'),
path.join(ROOT, 'unfreeze', 'SKILL.md.tmpl'),
];
for (const p of candidates) {
if (fs.existsSync(p)) templates.push(p);