mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-01 19:25:10 +02:00
feat: QA restructure, browser ref staleness, eval efficiency metrics (v0.4.0) (#83)
* feat: browser ref staleness detection via async count() validation resolveRef() now checks element count to detect stale refs after page mutations (e.g. SPA navigation). RefEntry stores role+name metadata for better diagnostics. 3 new snapshot tests for staleness detection. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat: qa-only skill, qa fix loop, plan-to-QA artifact flow Add /qa-only (report-only, Edit tool blocked), restructure /qa with find-fix-verify cycle, add {{QA_METHODOLOGY}} DRY placeholder for shared methodology. /plan-eng-review now writes test-plan artifacts to ~/.gstack/projects/<slug>/ for QA consumption. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat: eval efficiency metrics — turns, duration, commentary across all surfaces Add generateCommentary() for natural-language delta interpretation, per-test turns/duration in comparison and summary output, judgePassed unit tests, 3 new E2E tests (qa-only, qa fix loop, plan artifact). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * chore: bump version and changelog (v0.4.0) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * docs: update ARCHITECTURE, BROWSER, CONTRIBUTING, README for v0.4.0 - ARCHITECTURE: add ref staleness detection section, update RefEntry type - BROWSER: add ref staleness paragraph to snapshot system docs - CONTRIBUTING: update eval tool descriptions with commentary feature - README: fix missing qa-only in project-local uninstall command Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * docs: add user-facing benefit descriptions to v0.4.0 changelog Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+18
-7
@@ -47,6 +47,8 @@ interface RunSummary {
|
||||
passed: number;
|
||||
total: number;
|
||||
cost: number;
|
||||
duration: number;
|
||||
turns: number;
|
||||
}
|
||||
|
||||
const runs: RunSummary[] = [];
|
||||
@@ -55,6 +57,7 @@ for (const file of files) {
|
||||
const data = JSON.parse(fs.readFileSync(path.join(EVAL_DIR, file), 'utf-8'));
|
||||
if (filterBranch && data.branch !== filterBranch) continue;
|
||||
if (filterTier && data.tier !== filterTier) continue;
|
||||
const totalTurns = (data.tests || []).reduce((s: number, t: any) => s + (t.turns_used || 0), 0);
|
||||
runs.push({
|
||||
file,
|
||||
timestamp: data.timestamp || '',
|
||||
@@ -64,6 +67,8 @@ for (const file of files) {
|
||||
passed: data.passed || 0,
|
||||
total: data.total_tests || 0,
|
||||
cost: data.total_cost_usd || 0,
|
||||
duration: data.total_duration_ms || 0,
|
||||
turns: totalTurns,
|
||||
});
|
||||
} catch { continue; }
|
||||
}
|
||||
@@ -77,29 +82,35 @@ const displayed = runs.slice(0, limit);
|
||||
// Print table
|
||||
console.log('');
|
||||
console.log(`Eval History (${runs.length} total runs)`);
|
||||
console.log('═'.repeat(90));
|
||||
console.log('═'.repeat(105));
|
||||
console.log(
|
||||
' ' +
|
||||
'Date'.padEnd(17) +
|
||||
'Branch'.padEnd(28) +
|
||||
'Branch'.padEnd(25) +
|
||||
'Tier'.padEnd(12) +
|
||||
'Pass'.padEnd(8) +
|
||||
'Cost'.padEnd(8) +
|
||||
'Turns'.padEnd(7) +
|
||||
'Duration'.padEnd(10) +
|
||||
'Version'
|
||||
);
|
||||
console.log('─'.repeat(90));
|
||||
console.log('─'.repeat(105));
|
||||
|
||||
for (const run of displayed) {
|
||||
const date = run.timestamp.replace('T', ' ').slice(0, 16);
|
||||
const branch = run.branch.length > 26 ? run.branch.slice(0, 23) + '...' : run.branch.padEnd(28);
|
||||
const branch = run.branch.length > 23 ? run.branch.slice(0, 20) + '...' : run.branch.padEnd(25);
|
||||
const pass = `${run.passed}/${run.total}`.padEnd(8);
|
||||
const cost = `$${run.cost.toFixed(2)}`.padEnd(8);
|
||||
console.log(` ${date.padEnd(17)}${branch}${run.tier.padEnd(12)}${pass}${cost}v${run.version}`);
|
||||
const turns = run.turns > 0 ? `${run.turns}t`.padEnd(7) : ''.padEnd(7);
|
||||
const dur = run.duration > 0 ? `${Math.round(run.duration / 1000)}s`.padEnd(10) : ''.padEnd(10);
|
||||
console.log(` ${date.padEnd(17)}${branch}${run.tier.padEnd(12)}${pass}${cost}${turns}${dur}v${run.version}`);
|
||||
}
|
||||
|
||||
console.log('─'.repeat(90));
|
||||
console.log('─'.repeat(105));
|
||||
|
||||
const totalCost = runs.reduce((s, r) => s + r.cost, 0);
|
||||
console.log(` ${runs.length} runs | Total spend: $${totalCost.toFixed(2)} | Showing: ${displayed.length}`);
|
||||
const totalDur = runs.reduce((s, r) => s + r.duration, 0);
|
||||
const totalTurns = runs.reduce((s, r) => s + r.turns, 0);
|
||||
console.log(` ${runs.length} runs | $${totalCost.toFixed(2)} total | ${totalTurns} turns | ${Math.round(totalDur / 1000)}s | Showing: ${displayed.length}`);
|
||||
console.log(` Dir: ${EVAL_DIR}`);
|
||||
console.log('');
|
||||
|
||||
+57
-4
@@ -40,6 +40,33 @@ const totalCost = results.reduce((s, r) => s + (r.total_cost_usd || 0), 0);
|
||||
const avgE2ECost = e2eRuns.length > 0 ? e2eRuns.reduce((s, r) => s + r.total_cost_usd, 0) / e2eRuns.length : 0;
|
||||
const avgJudgeCost = judgeRuns.length > 0 ? judgeRuns.reduce((s, r) => s + r.total_cost_usd, 0) / judgeRuns.length : 0;
|
||||
|
||||
// Duration + turns from E2E runs
|
||||
const avgE2EDuration = e2eRuns.length > 0
|
||||
? e2eRuns.reduce((s, r) => s + (r.total_duration_ms || 0), 0) / e2eRuns.length
|
||||
: 0;
|
||||
const e2eTurns: number[] = [];
|
||||
for (const r of e2eRuns) {
|
||||
const runTurns = r.tests.reduce((s, t) => s + (t.turns_used || 0), 0);
|
||||
if (runTurns > 0) e2eTurns.push(runTurns);
|
||||
}
|
||||
const avgE2ETurns = e2eTurns.length > 0
|
||||
? e2eTurns.reduce((a, b) => a + b, 0) / e2eTurns.length
|
||||
: 0;
|
||||
|
||||
// Per-test efficiency stats (avg turns + duration across runs)
|
||||
const testEfficiency = new Map<string, { turns: number[]; durations: number[]; costs: number[] }>();
|
||||
for (const r of e2eRuns) {
|
||||
for (const t of r.tests) {
|
||||
if (!testEfficiency.has(t.name)) {
|
||||
testEfficiency.set(t.name, { turns: [], durations: [], costs: [] });
|
||||
}
|
||||
const stats = testEfficiency.get(t.name)!;
|
||||
if (t.turns_used !== undefined) stats.turns.push(t.turns_used);
|
||||
if (t.duration_ms > 0) stats.durations.push(t.duration_ms);
|
||||
if (t.cost_usd > 0) stats.costs.push(t.cost_usd);
|
||||
}
|
||||
}
|
||||
|
||||
// Detection rates from outcome evals
|
||||
const detectionRates: number[] = [];
|
||||
for (const r of e2eRuns) {
|
||||
@@ -94,22 +121,48 @@ for (const stats of branchStats.values()) {
|
||||
// Print summary
|
||||
console.log('');
|
||||
console.log('Eval Summary');
|
||||
console.log('═'.repeat(60));
|
||||
console.log('═'.repeat(70));
|
||||
console.log(` Total runs: ${results.length} (${e2eRuns.length} e2e, ${judgeRuns.length} llm-judge)`);
|
||||
console.log(` Total spend: $${totalCost.toFixed(2)}`);
|
||||
console.log(` Avg cost/e2e: $${avgE2ECost.toFixed(2)}`);
|
||||
console.log(` Avg cost/judge: $${avgJudgeCost.toFixed(2)}`);
|
||||
if (avgE2EDuration > 0) {
|
||||
console.log(` Avg duration/e2e: ${Math.round(avgE2EDuration / 1000)}s`);
|
||||
}
|
||||
if (avgE2ETurns > 0) {
|
||||
console.log(` Avg turns/e2e: ${Math.round(avgE2ETurns)}`);
|
||||
}
|
||||
if (avgDetection !== null) {
|
||||
console.log(` Avg detection: ${avgDetection.toFixed(1)} bugs`);
|
||||
}
|
||||
console.log('─'.repeat(60));
|
||||
console.log('─'.repeat(70));
|
||||
|
||||
// Per-test efficiency averages (only if we have enough data)
|
||||
if (testEfficiency.size > 0 && e2eRuns.length >= 2) {
|
||||
console.log(' Per-test efficiency (averages across runs):');
|
||||
const sorted = [...testEfficiency.entries()]
|
||||
.filter(([, s]) => s.turns.length >= 2)
|
||||
.sort((a, b) => {
|
||||
const avgA = a[1].costs.reduce((s, c) => s + c, 0) / a[1].costs.length;
|
||||
const avgB = b[1].costs.reduce((s, c) => s + c, 0) / b[1].costs.length;
|
||||
return avgB - avgA;
|
||||
});
|
||||
for (const [name, stats] of sorted) {
|
||||
const avgT = Math.round(stats.turns.reduce((a, b) => a + b, 0) / stats.turns.length);
|
||||
const avgD = Math.round(stats.durations.reduce((a, b) => a + b, 0) / stats.durations.length / 1000);
|
||||
const avgC = (stats.costs.reduce((a, b) => a + b, 0) / stats.costs.length).toFixed(2);
|
||||
const label = name.length > 30 ? name.slice(0, 27) + '...' : name.padEnd(30);
|
||||
console.log(` ${label} $${avgC} ${avgT}t ${avgD}s (${stats.turns.length} runs)`);
|
||||
}
|
||||
console.log('─'.repeat(70));
|
||||
}
|
||||
|
||||
if (flakyTests.length > 0) {
|
||||
console.log(` Flaky tests (${flakyTests.length}):`);
|
||||
for (const name of flakyTests) {
|
||||
console.log(` - ${name}`);
|
||||
}
|
||||
console.log('─'.repeat(60));
|
||||
console.log('─'.repeat(70));
|
||||
}
|
||||
|
||||
if (branchStats.size > 0) {
|
||||
@@ -119,7 +172,7 @@ if (branchStats.size > 0) {
|
||||
const det = stats.detections.length > 0 ? ` avg det: ${stats.avgDetection.toFixed(1)}` : '';
|
||||
console.log(` ${branch.padEnd(30)} ${stats.runs} runs${det}`);
|
||||
}
|
||||
console.log('─'.repeat(60));
|
||||
console.log('─'.repeat(70));
|
||||
}
|
||||
|
||||
// Date range
|
||||
|
||||
@@ -126,11 +126,288 @@ If \`NEEDS_SETUP\`:
|
||||
3. If \`bun\` is not installed: \`curl -fsSL https://bun.sh/install | bash\``;
|
||||
}
|
||||
|
||||
function generateQAMethodology(): string {
|
||||
return `## Modes
|
||||
|
||||
### Diff-aware (automatic when on a feature branch with no URL)
|
||||
|
||||
This is the **primary mode** for developers verifying their work. When the user says \`/qa\` without a URL and the repo is on a feature branch, automatically:
|
||||
|
||||
1. **Analyze the branch diff** to understand what changed:
|
||||
\`\`\`bash
|
||||
git diff main...HEAD --name-only
|
||||
git log main..HEAD --oneline
|
||||
\`\`\`
|
||||
|
||||
2. **Identify affected pages/routes** from the changed files:
|
||||
- Controller/route files → which URL paths they serve
|
||||
- View/template/component files → which pages render them
|
||||
- Model/service files → which pages use those models (check controllers that reference them)
|
||||
- CSS/style files → which pages include those stylesheets
|
||||
- API endpoints → test them directly with \`$B js "await fetch('/api/...')"\`
|
||||
- Static pages (markdown, HTML) → navigate to them directly
|
||||
|
||||
3. **Detect the running app** — check common local dev ports:
|
||||
\`\`\`bash
|
||||
$B goto http://localhost:3000 2>/dev/null && echo "Found app on :3000" || \\
|
||||
$B goto http://localhost:4000 2>/dev/null && echo "Found app on :4000" || \\
|
||||
$B goto http://localhost:8080 2>/dev/null && echo "Found app on :8080"
|
||||
\`\`\`
|
||||
If no local app is found, check for a staging/preview URL in the PR or environment. If nothing works, ask the user for the URL.
|
||||
|
||||
4. **Test each affected page/route:**
|
||||
- Navigate to the page
|
||||
- Take a screenshot
|
||||
- Check console for errors
|
||||
- If the change was interactive (forms, buttons, flows), test the interaction end-to-end
|
||||
- Use \`snapshot -D\` before and after actions to verify the change had the expected effect
|
||||
|
||||
5. **Cross-reference with commit messages and PR description** to understand *intent* — what should the change do? Verify it actually does that.
|
||||
|
||||
6. **Check TODOS.md** (if it exists) for known bugs or issues related to the changed files. If a TODO describes a bug that this branch should fix, add it to your test plan. If you find a new bug during QA that isn't in TODOS.md, note it in the report.
|
||||
|
||||
7. **Report findings** scoped to the branch changes:
|
||||
- "Changes tested: N pages/routes affected by this branch"
|
||||
- For each: does it work? Screenshot evidence.
|
||||
- Any regressions on adjacent pages?
|
||||
|
||||
**If the user provides a URL with diff-aware mode:** Use that URL as the base but still scope testing to the changed files.
|
||||
|
||||
### Full (default when URL is provided)
|
||||
Systematic exploration. Visit every reachable page. Document 5-10 well-evidenced issues. Produce health score. Takes 5-15 minutes depending on app size.
|
||||
|
||||
### Quick (\`--quick\`)
|
||||
30-second smoke test. Visit homepage + top 5 navigation targets. Check: page loads? Console errors? Broken links? Produce health score. No detailed issue documentation.
|
||||
|
||||
### Regression (\`--regression <baseline>\`)
|
||||
Run full mode, then load \`baseline.json\` from a previous run. Diff: which issues are fixed? Which are new? What's the score delta? Append regression section to report.
|
||||
|
||||
---
|
||||
|
||||
## Workflow
|
||||
|
||||
### Phase 1: Initialize
|
||||
|
||||
1. Find browse binary (see Setup above)
|
||||
2. Create output directories
|
||||
3. Copy report template from \`qa/templates/qa-report-template.md\` to output dir
|
||||
4. Start timer for duration tracking
|
||||
|
||||
### Phase 2: Authenticate (if needed)
|
||||
|
||||
**If the user specified auth credentials:**
|
||||
|
||||
\`\`\`bash
|
||||
$B goto <login-url>
|
||||
$B snapshot -i # find the login form
|
||||
$B fill @e3 "user@example.com"
|
||||
$B fill @e4 "[REDACTED]" # NEVER include real passwords in report
|
||||
$B click @e5 # submit
|
||||
$B snapshot -D # verify login succeeded
|
||||
\`\`\`
|
||||
|
||||
**If the user provided a cookie file:**
|
||||
|
||||
\`\`\`bash
|
||||
$B cookie-import cookies.json
|
||||
$B goto <target-url>
|
||||
\`\`\`
|
||||
|
||||
**If 2FA/OTP is required:** Ask the user for the code and wait.
|
||||
|
||||
**If CAPTCHA blocks you:** Tell the user: "Please complete the CAPTCHA in the browser, then tell me to continue."
|
||||
|
||||
### Phase 3: Orient
|
||||
|
||||
Get a map of the application:
|
||||
|
||||
\`\`\`bash
|
||||
$B goto <target-url>
|
||||
$B snapshot -i -a -o "$REPORT_DIR/screenshots/initial.png"
|
||||
$B links # map navigation structure
|
||||
$B console --errors # any errors on landing?
|
||||
\`\`\`
|
||||
|
||||
**Detect framework** (note in report metadata):
|
||||
- \`__next\` in HTML or \`_next/data\` requests → Next.js
|
||||
- \`csrf-token\` meta tag → Rails
|
||||
- \`wp-content\` in URLs → WordPress
|
||||
- Client-side routing with no page reloads → SPA
|
||||
|
||||
**For SPAs:** The \`links\` command may return few results because navigation is client-side. Use \`snapshot -i\` to find nav elements (buttons, menu items) instead.
|
||||
|
||||
### Phase 4: Explore
|
||||
|
||||
Visit pages systematically. At each page:
|
||||
|
||||
\`\`\`bash
|
||||
$B goto <page-url>
|
||||
$B snapshot -i -a -o "$REPORT_DIR/screenshots/page-name.png"
|
||||
$B console --errors
|
||||
\`\`\`
|
||||
|
||||
Then follow the **per-page exploration checklist** (see \`qa/references/issue-taxonomy.md\`):
|
||||
|
||||
1. **Visual scan** — Look at the annotated screenshot for layout issues
|
||||
2. **Interactive elements** — Click buttons, links, controls. Do they work?
|
||||
3. **Forms** — Fill and submit. Test empty, invalid, edge cases
|
||||
4. **Navigation** — Check all paths in and out
|
||||
5. **States** — Empty state, loading, error, overflow
|
||||
6. **Console** — Any new JS errors after interactions?
|
||||
7. **Responsiveness** — Check mobile viewport if relevant:
|
||||
\`\`\`bash
|
||||
$B viewport 375x812
|
||||
$B screenshot "$REPORT_DIR/screenshots/page-mobile.png"
|
||||
$B viewport 1280x720
|
||||
\`\`\`
|
||||
|
||||
**Depth judgment:** Spend more time on core features (homepage, dashboard, checkout, search) and less on secondary pages (about, terms, privacy).
|
||||
|
||||
**Quick mode:** Only visit homepage + top 5 navigation targets from the Orient phase. Skip the per-page checklist — just check: loads? Console errors? Broken links visible?
|
||||
|
||||
### Phase 5: Document
|
||||
|
||||
Document each issue **immediately when found** — don't batch them.
|
||||
|
||||
**Two evidence tiers:**
|
||||
|
||||
**Interactive bugs** (broken flows, dead buttons, form failures):
|
||||
1. Take a screenshot before the action
|
||||
2. Perform the action
|
||||
3. Take a screenshot showing the result
|
||||
4. Use \`snapshot -D\` to show what changed
|
||||
5. Write repro steps referencing screenshots
|
||||
|
||||
\`\`\`bash
|
||||
$B screenshot "$REPORT_DIR/screenshots/issue-001-step-1.png"
|
||||
$B click @e5
|
||||
$B screenshot "$REPORT_DIR/screenshots/issue-001-result.png"
|
||||
$B snapshot -D
|
||||
\`\`\`
|
||||
|
||||
**Static bugs** (typos, layout issues, missing images):
|
||||
1. Take a single annotated screenshot showing the problem
|
||||
2. Describe what's wrong
|
||||
|
||||
\`\`\`bash
|
||||
$B snapshot -i -a -o "$REPORT_DIR/screenshots/issue-002.png"
|
||||
\`\`\`
|
||||
|
||||
**Write each issue to the report immediately** using the template format from \`qa/templates/qa-report-template.md\`.
|
||||
|
||||
### Phase 6: Wrap Up
|
||||
|
||||
1. **Compute health score** using the rubric below
|
||||
2. **Write "Top 3 Things to Fix"** — the 3 highest-severity issues
|
||||
3. **Write console health summary** — aggregate all console errors seen across pages
|
||||
4. **Update severity counts** in the summary table
|
||||
5. **Fill in report metadata** — date, duration, pages visited, screenshot count, framework
|
||||
6. **Save baseline** — write \`baseline.json\` with:
|
||||
\`\`\`json
|
||||
{
|
||||
"date": "YYYY-MM-DD",
|
||||
"url": "<target>",
|
||||
"healthScore": N,
|
||||
"issues": [{ "id": "ISSUE-001", "title": "...", "severity": "...", "category": "..." }],
|
||||
"categoryScores": { "console": N, "links": N, ... }
|
||||
}
|
||||
\`\`\`
|
||||
|
||||
**Regression mode:** After writing the report, load the baseline file. Compare:
|
||||
- Health score delta
|
||||
- Issues fixed (in baseline but not current)
|
||||
- New issues (in current but not baseline)
|
||||
- Append the regression section to the report
|
||||
|
||||
---
|
||||
|
||||
## Health Score Rubric
|
||||
|
||||
Compute each category score (0-100), then take the weighted average.
|
||||
|
||||
### Console (weight: 15%)
|
||||
- 0 errors → 100
|
||||
- 1-3 errors → 70
|
||||
- 4-10 errors → 40
|
||||
- 10+ errors → 10
|
||||
|
||||
### Links (weight: 10%)
|
||||
- 0 broken → 100
|
||||
- Each broken link → -15 (minimum 0)
|
||||
|
||||
### Per-Category Scoring (Visual, Functional, UX, Content, Performance, Accessibility)
|
||||
Each category starts at 100. Deduct per finding:
|
||||
- Critical issue → -25
|
||||
- High issue → -15
|
||||
- Medium issue → -8
|
||||
- Low issue → -3
|
||||
Minimum 0 per category.
|
||||
|
||||
### Weights
|
||||
| Category | Weight |
|
||||
|----------|--------|
|
||||
| Console | 15% |
|
||||
| Links | 10% |
|
||||
| Visual | 10% |
|
||||
| Functional | 20% |
|
||||
| UX | 15% |
|
||||
| Performance | 10% |
|
||||
| Content | 5% |
|
||||
| Accessibility | 15% |
|
||||
|
||||
### Final Score
|
||||
\`score = Σ (category_score × weight)\`
|
||||
|
||||
---
|
||||
|
||||
## Framework-Specific Guidance
|
||||
|
||||
### Next.js
|
||||
- Check console for hydration errors (\`Hydration failed\`, \`Text content did not match\`)
|
||||
- Monitor \`_next/data\` requests in network — 404s indicate broken data fetching
|
||||
- Test client-side navigation (click links, don't just \`goto\`) — catches routing issues
|
||||
- Check for CLS (Cumulative Layout Shift) on pages with dynamic content
|
||||
|
||||
### Rails
|
||||
- Check for N+1 query warnings in console (if development mode)
|
||||
- Verify CSRF token presence in forms
|
||||
- Test Turbo/Stimulus integration — do page transitions work smoothly?
|
||||
- Check for flash messages appearing and dismissing correctly
|
||||
|
||||
### WordPress
|
||||
- Check for plugin conflicts (JS errors from different plugins)
|
||||
- Verify admin bar visibility for logged-in users
|
||||
- Test REST API endpoints (\`/wp-json/\`)
|
||||
- Check for mixed content warnings (common with WP)
|
||||
|
||||
### General SPA (React, Vue, Angular)
|
||||
- Use \`snapshot -i\` for navigation — \`links\` command misses client-side routes
|
||||
- Check for stale state (navigate away and back — does data refresh?)
|
||||
- Test browser back/forward — does the app handle history correctly?
|
||||
- Check for memory leaks (monitor console after extended use)
|
||||
|
||||
---
|
||||
|
||||
## Important Rules
|
||||
|
||||
1. **Repro is everything.** Every issue needs at least one screenshot. No exceptions.
|
||||
2. **Verify before documenting.** Retry the issue once to confirm it's reproducible, not a fluke.
|
||||
3. **Never include credentials.** Write \`[REDACTED]\` for passwords in repro steps.
|
||||
4. **Write incrementally.** Append each issue to the report as you find it. Don't batch.
|
||||
5. **Never read source code.** Test as a user, not a developer.
|
||||
6. **Check console after every interaction.** JS errors that don't surface visually are still bugs.
|
||||
7. **Test like a user.** Use realistic data. Walk through complete workflows end-to-end.
|
||||
8. **Depth over breadth.** 5-10 well-documented issues with evidence > 20 vague descriptions.
|
||||
9. **Never delete output files.** Screenshots and reports accumulate — that's intentional.
|
||||
10. **Use \`snapshot -C\` for tricky UIs.** Finds clickable divs that the accessibility tree misses.`;
|
||||
}
|
||||
|
||||
const RESOLVERS: Record<string, () => string> = {
|
||||
COMMAND_REFERENCE: generateCommandReference,
|
||||
SNAPSHOT_FLAGS: generateSnapshotFlags,
|
||||
UPDATE_CHECK: generateUpdateCheck,
|
||||
BROWSE_SETUP: generateBrowseSetup,
|
||||
QA_METHODOLOGY: generateQAMethodology,
|
||||
};
|
||||
|
||||
// ─── Template Processing ────────────────────────────────────
|
||||
@@ -176,6 +453,7 @@ function findTemplates(): string[] {
|
||||
path.join(ROOT, 'SKILL.md.tmpl'),
|
||||
path.join(ROOT, 'browse', 'SKILL.md.tmpl'),
|
||||
path.join(ROOT, 'qa', 'SKILL.md.tmpl'),
|
||||
path.join(ROOT, 'qa-only', 'SKILL.md.tmpl'),
|
||||
path.join(ROOT, 'setup-browser-cookies', 'SKILL.md.tmpl'),
|
||||
path.join(ROOT, 'ship', 'SKILL.md.tmpl'),
|
||||
path.join(ROOT, 'review', 'SKILL.md.tmpl'),
|
||||
|
||||
@@ -20,6 +20,7 @@ const SKILL_FILES = [
|
||||
'SKILL.md',
|
||||
'browse/SKILL.md',
|
||||
'qa/SKILL.md',
|
||||
'qa-only/SKILL.md',
|
||||
'ship/SKILL.md',
|
||||
'review/SKILL.md',
|
||||
'retro/SKILL.md',
|
||||
|
||||
Reference in New Issue
Block a user