mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-06 05:35:46 +02:00
1052711ca6
Add generateDesignMethodology() to gen-skill-docs.ts with 10-category, 80-item design audit checklist. Register plan-design-review and qa-design-review templates in findTemplates(). Add both skills to skill-check.ts SKILL_FILES. Add command and snapshot flag validation tests for both skills in skill-validation.test.ts.
879 lines
37 KiB
TypeScript
879 lines
37 KiB
TypeScript
#!/usr/bin/env bun
|
||
/**
|
||
* Generate SKILL.md files from .tmpl templates.
|
||
*
|
||
* Pipeline:
|
||
* read .tmpl → find {{PLACEHOLDERS}} → resolve from source → format → write .md
|
||
*
|
||
* Supports --dry-run: generate to memory, exit 1 if different from committed file.
|
||
* Used by skill:check and CI freshness checks.
|
||
*/
|
||
|
||
import { COMMAND_DESCRIPTIONS } from '../browse/src/commands';
|
||
import { SNAPSHOT_FLAGS } from '../browse/src/snapshot';
|
||
import * as fs from 'fs';
|
||
import * as path from 'path';
|
||
|
||
const ROOT = path.resolve(import.meta.dir, '..');
|
||
const DRY_RUN = process.argv.includes('--dry-run');
|
||
|
||
// ─── Placeholder Resolvers ──────────────────────────────────
|
||
|
||
function generateCommandReference(): string {
|
||
// Group commands by category
|
||
const groups = new Map<string, Array<{ command: string; description: string; usage?: string }>>();
|
||
for (const [cmd, meta] of Object.entries(COMMAND_DESCRIPTIONS)) {
|
||
const list = groups.get(meta.category) || [];
|
||
list.push({ command: cmd, description: meta.description, usage: meta.usage });
|
||
groups.set(meta.category, list);
|
||
}
|
||
|
||
// Category display order
|
||
const categoryOrder = [
|
||
'Navigation', 'Reading', 'Interaction', 'Inspection',
|
||
'Visual', 'Snapshot', 'Meta', 'Tabs', 'Server',
|
||
];
|
||
|
||
const sections: string[] = [];
|
||
for (const category of categoryOrder) {
|
||
const commands = groups.get(category);
|
||
if (!commands || commands.length === 0) continue;
|
||
|
||
// Sort alphabetically within category
|
||
commands.sort((a, b) => a.command.localeCompare(b.command));
|
||
|
||
sections.push(`### ${category}`);
|
||
sections.push('| Command | Description |');
|
||
sections.push('|---------|-------------|');
|
||
for (const cmd of commands) {
|
||
const display = cmd.usage ? `\`${cmd.usage}\`` : `\`${cmd.command}\``;
|
||
sections.push(`| ${display} | ${cmd.description} |`);
|
||
}
|
||
sections.push('');
|
||
}
|
||
|
||
return sections.join('\n').trimEnd();
|
||
}
|
||
|
||
function generateSnapshotFlags(): string {
|
||
const lines: string[] = [
|
||
'The snapshot is your primary tool for understanding and interacting with pages.',
|
||
'',
|
||
'```',
|
||
];
|
||
|
||
for (const flag of SNAPSHOT_FLAGS) {
|
||
const label = flag.valueHint ? `${flag.short} ${flag.valueHint}` : flag.short;
|
||
lines.push(`${label.padEnd(10)}${flag.long.padEnd(24)}${flag.description}`);
|
||
}
|
||
|
||
lines.push('```');
|
||
lines.push('');
|
||
lines.push('All flags can be combined freely. `-o` only applies when `-a` is also used.');
|
||
lines.push('Example: `$B snapshot -i -a -C -o /tmp/annotated.png`');
|
||
lines.push('');
|
||
lines.push('**Ref numbering:** @e refs are assigned sequentially (@e1, @e2, ...) in tree order.');
|
||
lines.push('@c refs from `-C` are numbered separately (@c1, @c2, ...).');
|
||
lines.push('');
|
||
lines.push('After snapshot, use @refs as selectors in any command:');
|
||
lines.push('```bash');
|
||
lines.push('$B click @e3 $B fill @e4 "value" $B hover @e1');
|
||
lines.push('$B html @e2 $B css @e5 "color" $B attrs @e6');
|
||
lines.push('$B click @c1 # cursor-interactive ref (from -C)');
|
||
lines.push('```');
|
||
lines.push('');
|
||
lines.push('**Output format:** indented accessibility tree with @ref IDs, one element per line.');
|
||
lines.push('```');
|
||
lines.push(' @e1 [heading] "Welcome" [level=1]');
|
||
lines.push(' @e2 [textbox] "Email"');
|
||
lines.push(' @e3 [button] "Submit"');
|
||
lines.push('```');
|
||
lines.push('');
|
||
lines.push('Refs are invalidated on navigation — run `snapshot` again after `goto`.');
|
||
|
||
return lines.join('\n');
|
||
}
|
||
|
||
function generatePreamble(): string {
|
||
return `## Preamble (run first)
|
||
|
||
\`\`\`bash
|
||
_UPD=$(~/.claude/skills/gstack/bin/gstack-update-check 2>/dev/null || .claude/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
|
||
[ -n "$_UPD" ] && echo "$_UPD" || true
|
||
mkdir -p ~/.gstack/sessions
|
||
touch ~/.gstack/sessions/"$PPID"
|
||
_SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
|
||
find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
|
||
_CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
|
||
\`\`\`
|
||
|
||
If output shows \`UPGRADE_AVAILABLE <old> <new>\`: read \`~/.claude/skills/gstack/gstack-upgrade/SKILL.md\` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If \`JUST_UPGRADED <from> <to>\`: tell user "Running gstack v{to} (just updated!)" and continue.
|
||
|
||
## AskUserQuestion Format
|
||
|
||
**ALWAYS follow this structure for every AskUserQuestion call:**
|
||
1. Context: project name, current branch, what we're working on (1-2 sentences)
|
||
2. The specific question or decision point
|
||
3. \`RECOMMENDATION: Choose [X] because [one-line reason]\`
|
||
4. Lettered options: \`A) ... B) ... C) ...\`
|
||
|
||
If \`_SESSIONS\` is 3 or more: the user is juggling multiple gstack sessions and context-switching heavily. **ELI16 mode** — they may not remember what this conversation is about. Every AskUserQuestion MUST re-ground them: state the project, the branch, the current plan/task, then the specific problem, THEN the recommendation and options. Be extra clear and self-contained — assume they haven't looked at this window in 20 minutes.
|
||
|
||
Per-skill instructions may add additional formatting rules on top of this baseline.
|
||
|
||
## Contributor Mode
|
||
|
||
If \`_CONTRIB\` is \`true\`: you are in **contributor mode**. When you hit friction with **gstack itself** (not the user's app), file a field report. Think: "hey, I was trying to do X with gstack and it didn't work / was confusing / was annoying. Here's what happened."
|
||
|
||
**gstack issues:** browse command fails/wrong output, snapshot missing elements, skill instructions unclear or misleading, binary crash/hang, unhelpful error message, any rough edge or annoyance — even minor stuff.
|
||
**NOT gstack issues:** user's app bugs, network errors to user's URL, auth failures on user's site.
|
||
|
||
**To file:** write \`~/.gstack/contributor-logs/{slug}.md\` with this structure:
|
||
|
||
\`\`\`
|
||
# {Title}
|
||
|
||
Hey gstack team — ran into this while using /{skill-name}:
|
||
|
||
**What I was trying to do:** {what the user/agent was attempting}
|
||
**What happened instead:** {what actually happened}
|
||
**How annoying (1-5):** {1=meh, 3=friction, 5=blocker}
|
||
|
||
## Steps to reproduce
|
||
1. {step}
|
||
|
||
## Raw output
|
||
(wrap any error messages or unexpected output in a markdown code block)
|
||
|
||
**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill}
|
||
\`\`\`
|
||
|
||
Then run: \`mkdir -p ~/.gstack/contributor-logs && open ~/.gstack/contributor-logs/{slug}.md\`
|
||
|
||
Slug: lowercase, hyphens, max 60 chars (e.g. \`browse-snapshot-ref-gap\`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}"`;
|
||
}
|
||
|
||
function generateBrowseSetup(): string {
|
||
return `## SETUP (run this check BEFORE any browse command)
|
||
|
||
\`\`\`bash
|
||
_ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
|
||
B=""
|
||
[ -n "$_ROOT" ] && [ -x "$_ROOT/.claude/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.claude/skills/gstack/browse/dist/browse"
|
||
[ -z "$B" ] && B=~/.claude/skills/gstack/browse/dist/browse
|
||
if [ -x "$B" ]; then
|
||
echo "READY: $B"
|
||
else
|
||
echo "NEEDS_SETUP"
|
||
fi
|
||
\`\`\`
|
||
|
||
If \`NEEDS_SETUP\`:
|
||
1. Tell the user: "gstack browse needs a one-time build (~10 seconds). OK to proceed?" Then STOP and wait.
|
||
2. Run: \`cd <SKILL_DIR> && ./setup\`
|
||
3. If \`bun\` is not installed: \`curl -fsSL https://bun.sh/install | bash\``;
|
||
}
|
||
|
||
function generateQAMethodology(): string {
|
||
return `## Modes
|
||
|
||
### Diff-aware (automatic when on a feature branch with no URL)
|
||
|
||
This is the **primary mode** for developers verifying their work. When the user says \`/qa\` without a URL and the repo is on a feature branch, automatically:
|
||
|
||
1. **Analyze the branch diff** to understand what changed:
|
||
\`\`\`bash
|
||
git diff main...HEAD --name-only
|
||
git log main..HEAD --oneline
|
||
\`\`\`
|
||
|
||
2. **Identify affected pages/routes** from the changed files:
|
||
- Controller/route files → which URL paths they serve
|
||
- View/template/component files → which pages render them
|
||
- Model/service files → which pages use those models (check controllers that reference them)
|
||
- CSS/style files → which pages include those stylesheets
|
||
- API endpoints → test them directly with \`$B js "await fetch('/api/...')"\`
|
||
- Static pages (markdown, HTML) → navigate to them directly
|
||
|
||
3. **Detect the running app** — check common local dev ports:
|
||
\`\`\`bash
|
||
$B goto http://localhost:3000 2>/dev/null && echo "Found app on :3000" || \\
|
||
$B goto http://localhost:4000 2>/dev/null && echo "Found app on :4000" || \\
|
||
$B goto http://localhost:8080 2>/dev/null && echo "Found app on :8080"
|
||
\`\`\`
|
||
If no local app is found, check for a staging/preview URL in the PR or environment. If nothing works, ask the user for the URL.
|
||
|
||
4. **Test each affected page/route:**
|
||
- Navigate to the page
|
||
- Take a screenshot
|
||
- Check console for errors
|
||
- If the change was interactive (forms, buttons, flows), test the interaction end-to-end
|
||
- Use \`snapshot -D\` before and after actions to verify the change had the expected effect
|
||
|
||
5. **Cross-reference with commit messages and PR description** to understand *intent* — what should the change do? Verify it actually does that.
|
||
|
||
6. **Check TODOS.md** (if it exists) for known bugs or issues related to the changed files. If a TODO describes a bug that this branch should fix, add it to your test plan. If you find a new bug during QA that isn't in TODOS.md, note it in the report.
|
||
|
||
7. **Report findings** scoped to the branch changes:
|
||
- "Changes tested: N pages/routes affected by this branch"
|
||
- For each: does it work? Screenshot evidence.
|
||
- Any regressions on adjacent pages?
|
||
|
||
**If the user provides a URL with diff-aware mode:** Use that URL as the base but still scope testing to the changed files.
|
||
|
||
### Full (default when URL is provided)
|
||
Systematic exploration. Visit every reachable page. Document 5-10 well-evidenced issues. Produce health score. Takes 5-15 minutes depending on app size.
|
||
|
||
### Quick (\`--quick\`)
|
||
30-second smoke test. Visit homepage + top 5 navigation targets. Check: page loads? Console errors? Broken links? Produce health score. No detailed issue documentation.
|
||
|
||
### Regression (\`--regression <baseline>\`)
|
||
Run full mode, then load \`baseline.json\` from a previous run. Diff: which issues are fixed? Which are new? What's the score delta? Append regression section to report.
|
||
|
||
---
|
||
|
||
## Workflow
|
||
|
||
### Phase 1: Initialize
|
||
|
||
1. Find browse binary (see Setup above)
|
||
2. Create output directories
|
||
3. Copy report template from \`qa/templates/qa-report-template.md\` to output dir
|
||
4. Start timer for duration tracking
|
||
|
||
### Phase 2: Authenticate (if needed)
|
||
|
||
**If the user specified auth credentials:**
|
||
|
||
\`\`\`bash
|
||
$B goto <login-url>
|
||
$B snapshot -i # find the login form
|
||
$B fill @e3 "user@example.com"
|
||
$B fill @e4 "[REDACTED]" # NEVER include real passwords in report
|
||
$B click @e5 # submit
|
||
$B snapshot -D # verify login succeeded
|
||
\`\`\`
|
||
|
||
**If the user provided a cookie file:**
|
||
|
||
\`\`\`bash
|
||
$B cookie-import cookies.json
|
||
$B goto <target-url>
|
||
\`\`\`
|
||
|
||
**If 2FA/OTP is required:** Ask the user for the code and wait.
|
||
|
||
**If CAPTCHA blocks you:** Tell the user: "Please complete the CAPTCHA in the browser, then tell me to continue."
|
||
|
||
### Phase 3: Orient
|
||
|
||
Get a map of the application:
|
||
|
||
\`\`\`bash
|
||
$B goto <target-url>
|
||
$B snapshot -i -a -o "$REPORT_DIR/screenshots/initial.png"
|
||
$B links # map navigation structure
|
||
$B console --errors # any errors on landing?
|
||
\`\`\`
|
||
|
||
**Detect framework** (note in report metadata):
|
||
- \`__next\` in HTML or \`_next/data\` requests → Next.js
|
||
- \`csrf-token\` meta tag → Rails
|
||
- \`wp-content\` in URLs → WordPress
|
||
- Client-side routing with no page reloads → SPA
|
||
|
||
**For SPAs:** The \`links\` command may return few results because navigation is client-side. Use \`snapshot -i\` to find nav elements (buttons, menu items) instead.
|
||
|
||
### Phase 4: Explore
|
||
|
||
Visit pages systematically. At each page:
|
||
|
||
\`\`\`bash
|
||
$B goto <page-url>
|
||
$B snapshot -i -a -o "$REPORT_DIR/screenshots/page-name.png"
|
||
$B console --errors
|
||
\`\`\`
|
||
|
||
Then follow the **per-page exploration checklist** (see \`qa/references/issue-taxonomy.md\`):
|
||
|
||
1. **Visual scan** — Look at the annotated screenshot for layout issues
|
||
2. **Interactive elements** — Click buttons, links, controls. Do they work?
|
||
3. **Forms** — Fill and submit. Test empty, invalid, edge cases
|
||
4. **Navigation** — Check all paths in and out
|
||
5. **States** — Empty state, loading, error, overflow
|
||
6. **Console** — Any new JS errors after interactions?
|
||
7. **Responsiveness** — Check mobile viewport if relevant:
|
||
\`\`\`bash
|
||
$B viewport 375x812
|
||
$B screenshot "$REPORT_DIR/screenshots/page-mobile.png"
|
||
$B viewport 1280x720
|
||
\`\`\`
|
||
|
||
**Depth judgment:** Spend more time on core features (homepage, dashboard, checkout, search) and less on secondary pages (about, terms, privacy).
|
||
|
||
**Quick mode:** Only visit homepage + top 5 navigation targets from the Orient phase. Skip the per-page checklist — just check: loads? Console errors? Broken links visible?
|
||
|
||
### Phase 5: Document
|
||
|
||
Document each issue **immediately when found** — don't batch them.
|
||
|
||
**Two evidence tiers:**
|
||
|
||
**Interactive bugs** (broken flows, dead buttons, form failures):
|
||
1. Take a screenshot before the action
|
||
2. Perform the action
|
||
3. Take a screenshot showing the result
|
||
4. Use \`snapshot -D\` to show what changed
|
||
5. Write repro steps referencing screenshots
|
||
|
||
\`\`\`bash
|
||
$B screenshot "$REPORT_DIR/screenshots/issue-001-step-1.png"
|
||
$B click @e5
|
||
$B screenshot "$REPORT_DIR/screenshots/issue-001-result.png"
|
||
$B snapshot -D
|
||
\`\`\`
|
||
|
||
**Static bugs** (typos, layout issues, missing images):
|
||
1. Take a single annotated screenshot showing the problem
|
||
2. Describe what's wrong
|
||
|
||
\`\`\`bash
|
||
$B snapshot -i -a -o "$REPORT_DIR/screenshots/issue-002.png"
|
||
\`\`\`
|
||
|
||
**Write each issue to the report immediately** using the template format from \`qa/templates/qa-report-template.md\`.
|
||
|
||
### Phase 6: Wrap Up
|
||
|
||
1. **Compute health score** using the rubric below
|
||
2. **Write "Top 3 Things to Fix"** — the 3 highest-severity issues
|
||
3. **Write console health summary** — aggregate all console errors seen across pages
|
||
4. **Update severity counts** in the summary table
|
||
5. **Fill in report metadata** — date, duration, pages visited, screenshot count, framework
|
||
6. **Save baseline** — write \`baseline.json\` with:
|
||
\`\`\`json
|
||
{
|
||
"date": "YYYY-MM-DD",
|
||
"url": "<target>",
|
||
"healthScore": N,
|
||
"issues": [{ "id": "ISSUE-001", "title": "...", "severity": "...", "category": "..." }],
|
||
"categoryScores": { "console": N, "links": N, ... }
|
||
}
|
||
\`\`\`
|
||
|
||
**Regression mode:** After writing the report, load the baseline file. Compare:
|
||
- Health score delta
|
||
- Issues fixed (in baseline but not current)
|
||
- New issues (in current but not baseline)
|
||
- Append the regression section to the report
|
||
|
||
---
|
||
|
||
## Health Score Rubric
|
||
|
||
Compute each category score (0-100), then take the weighted average.
|
||
|
||
### Console (weight: 15%)
|
||
- 0 errors → 100
|
||
- 1-3 errors → 70
|
||
- 4-10 errors → 40
|
||
- 10+ errors → 10
|
||
|
||
### Links (weight: 10%)
|
||
- 0 broken → 100
|
||
- Each broken link → -15 (minimum 0)
|
||
|
||
### Per-Category Scoring (Visual, Functional, UX, Content, Performance, Accessibility)
|
||
Each category starts at 100. Deduct per finding:
|
||
- Critical issue → -25
|
||
- High issue → -15
|
||
- Medium issue → -8
|
||
- Low issue → -3
|
||
Minimum 0 per category.
|
||
|
||
### Weights
|
||
| Category | Weight |
|
||
|----------|--------|
|
||
| Console | 15% |
|
||
| Links | 10% |
|
||
| Visual | 10% |
|
||
| Functional | 20% |
|
||
| UX | 15% |
|
||
| Performance | 10% |
|
||
| Content | 5% |
|
||
| Accessibility | 15% |
|
||
|
||
### Final Score
|
||
\`score = Σ (category_score × weight)\`
|
||
|
||
---
|
||
|
||
## Framework-Specific Guidance
|
||
|
||
### Next.js
|
||
- Check console for hydration errors (\`Hydration failed\`, \`Text content did not match\`)
|
||
- Monitor \`_next/data\` requests in network — 404s indicate broken data fetching
|
||
- Test client-side navigation (click links, don't just \`goto\`) — catches routing issues
|
||
- Check for CLS (Cumulative Layout Shift) on pages with dynamic content
|
||
|
||
### Rails
|
||
- Check for N+1 query warnings in console (if development mode)
|
||
- Verify CSRF token presence in forms
|
||
- Test Turbo/Stimulus integration — do page transitions work smoothly?
|
||
- Check for flash messages appearing and dismissing correctly
|
||
|
||
### WordPress
|
||
- Check for plugin conflicts (JS errors from different plugins)
|
||
- Verify admin bar visibility for logged-in users
|
||
- Test REST API endpoints (\`/wp-json/\`)
|
||
- Check for mixed content warnings (common with WP)
|
||
|
||
### General SPA (React, Vue, Angular)
|
||
- Use \`snapshot -i\` for navigation — \`links\` command misses client-side routes
|
||
- Check for stale state (navigate away and back — does data refresh?)
|
||
- Test browser back/forward — does the app handle history correctly?
|
||
- Check for memory leaks (monitor console after extended use)
|
||
|
||
---
|
||
|
||
## Important Rules
|
||
|
||
1. **Repro is everything.** Every issue needs at least one screenshot. No exceptions.
|
||
2. **Verify before documenting.** Retry the issue once to confirm it's reproducible, not a fluke.
|
||
3. **Never include credentials.** Write \`[REDACTED]\` for passwords in repro steps.
|
||
4. **Write incrementally.** Append each issue to the report as you find it. Don't batch.
|
||
5. **Never read source code.** Test as a user, not a developer.
|
||
6. **Check console after every interaction.** JS errors that don't surface visually are still bugs.
|
||
7. **Test like a user.** Use realistic data. Walk through complete workflows end-to-end.
|
||
8. **Depth over breadth.** 5-10 well-documented issues with evidence > 20 vague descriptions.
|
||
9. **Never delete output files.** Screenshots and reports accumulate — that's intentional.
|
||
10. **Use \`snapshot -C\` for tricky UIs.** Finds clickable divs that the accessibility tree misses.`;
|
||
}
|
||
|
||
function generateDesignMethodology(): string {
|
||
return `## Modes
|
||
|
||
### Full (default)
|
||
Systematic review of all pages reachable from homepage. Visit 5-8 pages. Full checklist evaluation, responsive screenshots, interaction flow testing. Produces complete design audit report with letter grades.
|
||
|
||
### Quick (\`--quick\`)
|
||
Homepage + 2 key pages only. First Impression + Design System Extraction + abbreviated checklist. Fastest path to a design score.
|
||
|
||
### Deep (\`--deep\`)
|
||
Comprehensive review: 10-15 pages, every interaction flow, exhaustive checklist. For pre-launch audits or major redesigns.
|
||
|
||
### Diff-aware (automatic when on a feature branch with no URL)
|
||
When on a feature branch, scope to pages affected by the branch changes:
|
||
1. Analyze the branch diff: \`git diff main...HEAD --name-only\`
|
||
2. Map changed files to affected pages/routes
|
||
3. Detect running app on common local ports (3000, 4000, 8080)
|
||
4. Audit only affected pages, compare design quality before/after
|
||
|
||
### Regression (\`--regression\` or previous \`design-baseline.json\` found)
|
||
Run full audit, then load previous \`design-baseline.json\`. Compare: per-category grade deltas, new findings, resolved findings. Output regression table in report.
|
||
|
||
---
|
||
|
||
## Phase 1: First Impression
|
||
|
||
The most uniquely designer-like output. Form a gut reaction before analyzing anything.
|
||
|
||
1. Navigate to the target URL
|
||
2. Take a full-page desktop screenshot: \`$B screenshot "$REPORT_DIR/screenshots/first-impression.png"\`
|
||
3. Write the **First Impression** using this structured critique format:
|
||
- "The site communicates **[what]**." (what it says at a glance — competence? playfulness? confusion?)
|
||
- "I notice **[observation]**." (what stands out, positive or negative — be specific)
|
||
- "The first 3 things my eye goes to are: **[1]**, **[2]**, **[3]**." (hierarchy check — are these intentional?)
|
||
- "If I had to describe this in one word: **[word]**." (gut verdict)
|
||
|
||
This is the section users read first. Be opinionated. A designer doesn't hedge — they react.
|
||
|
||
---
|
||
|
||
## Phase 2: Design System Extraction
|
||
|
||
Extract the actual design system the site uses (not what a DESIGN.md says, but what's rendered):
|
||
|
||
\`\`\`bash
|
||
# Fonts in use (capped at 500 elements to avoid timeout)
|
||
$B js "JSON.stringify([...new Set([...document.querySelectorAll('*')].slice(0,500).map(e => getComputedStyle(e).fontFamily))])"
|
||
|
||
# Color palette in use
|
||
$B js "JSON.stringify([...new Set([...document.querySelectorAll('*')].slice(0,500).flatMap(e => [getComputedStyle(e).color, getComputedStyle(e).backgroundColor]).filter(c => c !== 'rgba(0, 0, 0, 0)'))])"
|
||
|
||
# Heading hierarchy
|
||
$B js "JSON.stringify([...document.querySelectorAll('h1,h2,h3,h4,h5,h6')].map(h => ({tag:h.tagName, text:h.textContent.trim().slice(0,50), size:getComputedStyle(h).fontSize, weight:getComputedStyle(h).fontWeight})))"
|
||
|
||
# Touch target audit (find undersized interactive elements)
|
||
$B js "JSON.stringify([...document.querySelectorAll('a,button,input,[role=button]')].filter(e => {const r=e.getBoundingClientRect(); return r.width>0 && (r.width<44||r.height<44)}).map(e => ({tag:e.tagName, text:(e.textContent||'').trim().slice(0,30), w:Math.round(e.getBoundingClientRect().width), h:Math.round(e.getBoundingClientRect().height)})).slice(0,20))"
|
||
|
||
# Performance baseline
|
||
$B perf
|
||
\`\`\`
|
||
|
||
Structure findings as an **Inferred Design System**:
|
||
- **Fonts:** list with usage counts. Flag if >3 distinct font families.
|
||
- **Colors:** palette extracted. Flag if >12 unique non-gray colors. Note warm/cool/mixed.
|
||
- **Heading Scale:** h1-h6 sizes. Flag skipped levels, non-systematic size jumps.
|
||
- **Spacing Patterns:** sample padding/margin values. Flag non-scale values.
|
||
|
||
After extraction, offer: *"Want me to save this as your DESIGN.md? I can lock in these observations as your project's design system baseline."*
|
||
|
||
---
|
||
|
||
## Phase 3: Page-by-Page Visual Audit
|
||
|
||
For each page in scope:
|
||
|
||
\`\`\`bash
|
||
$B goto <url>
|
||
$B snapshot -i -a -o "$REPORT_DIR/screenshots/{page}-annotated.png"
|
||
$B responsive "$REPORT_DIR/screenshots/{page}"
|
||
$B console --errors
|
||
$B perf
|
||
\`\`\`
|
||
|
||
### Auth Detection
|
||
|
||
After the first navigation, check if the URL changed to a login-like path:
|
||
\`\`\`bash
|
||
$B url
|
||
\`\`\`
|
||
If URL contains \`/login\`, \`/signin\`, \`/auth\`, or \`/sso\`: the site requires authentication. AskUserQuestion: "This site requires authentication. Want to import cookies from your browser? Run \`/setup-browser-cookies\` first if needed."
|
||
|
||
### Design Audit Checklist (10 categories, ~80 items)
|
||
|
||
Apply these at each page. Each finding gets an impact rating (high/medium/polish) and category.
|
||
|
||
**1. Visual Hierarchy & Composition** (8 items)
|
||
- Clear focal point? One primary CTA per view?
|
||
- Eye flows naturally top-left to bottom-right?
|
||
- Visual noise — competing elements fighting for attention?
|
||
- Information density appropriate for content type?
|
||
- Z-index clarity — nothing unexpectedly overlapping?
|
||
- Above-the-fold content communicates purpose in 3 seconds?
|
||
- Squint test: hierarchy still visible when blurred?
|
||
- White space is intentional, not leftover?
|
||
|
||
**2. Typography** (15 items)
|
||
- Font count <=3 (flag if more)
|
||
- Scale follows ratio (1.25 major third or 1.333 perfect fourth)
|
||
- Line-height: 1.5x body, 1.15-1.25x headings
|
||
- Measure: 45-75 chars per line (66 ideal)
|
||
- Heading hierarchy: no skipped levels (h1→h3 without h2)
|
||
- Weight contrast: >=2 weights used for hierarchy
|
||
- No blacklisted fonts (Papyrus, Comic Sans, Lobster, Impact, Jokerman)
|
||
- If primary font is Inter/Roboto/Open Sans/Poppins → flag as potentially generic
|
||
- \`text-wrap: balance\` or \`text-pretty\` on headings (check via \`$B css <heading> text-wrap\`)
|
||
- Curly quotes used, not straight quotes
|
||
- Ellipsis character (\`…\`) not three dots (\`...\`)
|
||
- \`font-variant-numeric: tabular-nums\` on number columns
|
||
- Body text >= 16px
|
||
- Caption/label >= 12px
|
||
- No letterspacing on lowercase text
|
||
|
||
**3. Color & Contrast** (10 items)
|
||
- Palette coherent (<=12 unique non-gray colors)
|
||
- WCAG AA: body text 4.5:1, large text (18px+) 3:1, UI components 3:1
|
||
- Semantic colors consistent (success=green, error=red, warning=yellow/amber)
|
||
- No color-only encoding (always add labels, icons, or patterns)
|
||
- Dark mode: surfaces use elevation, not just lightness inversion
|
||
- Dark mode: text off-white (~#E0E0E0), not pure white
|
||
- Primary accent desaturated 10-20% in dark mode
|
||
- \`color-scheme: dark\` on html element (if dark mode present)
|
||
- No red/green only combinations (8% of men have red-green deficiency)
|
||
- Neutral palette is warm or cool consistently — not mixed
|
||
|
||
**4. Spacing & Layout** (12 items)
|
||
- Grid consistent at all breakpoints
|
||
- Spacing uses a scale (4px or 8px base), not arbitrary values
|
||
- Alignment is consistent — nothing floats outside the grid
|
||
- Rhythm: related items closer together, distinct sections further apart
|
||
- Border-radius hierarchy (not uniform bubbly radius on everything)
|
||
- Inner radius = outer radius - gap (nested elements)
|
||
- No horizontal scroll on mobile
|
||
- Max content width set (no full-bleed body text)
|
||
- \`env(safe-area-inset-*)\` for notch devices
|
||
- URL reflects state (filters, tabs, pagination in query params)
|
||
- Flex/grid used for layout (not JS measurement)
|
||
- Breakpoints: mobile (375), tablet (768), desktop (1024), wide (1440)
|
||
|
||
**5. Interaction States** (10 items)
|
||
- Hover state on all interactive elements
|
||
- \`focus-visible\` ring present (never \`outline: none\` without replacement)
|
||
- Active/pressed state with depth effect or color shift
|
||
- Disabled state: reduced opacity + \`cursor: not-allowed\`
|
||
- Loading: skeleton shapes match real content layout
|
||
- Empty states: warm message + primary action + visual (not just "No items.")
|
||
- Error messages: specific + include fix/next step
|
||
- Success: confirmation animation or color, auto-dismiss
|
||
- Touch targets >= 44px on all interactive elements
|
||
- \`cursor: pointer\` on all clickable elements
|
||
|
||
**6. Responsive Design** (8 items)
|
||
- Mobile layout makes *design* sense (not just stacked desktop columns)
|
||
- Touch targets sufficient on mobile (>= 44px)
|
||
- No horizontal scroll on any viewport
|
||
- Images handle responsive (srcset, sizes, or CSS containment)
|
||
- Text readable without zooming on mobile (>= 16px body)
|
||
- Navigation collapses appropriately (hamburger, bottom nav, etc.)
|
||
- Forms usable on mobile (correct input types, no autoFocus on mobile)
|
||
- No \`user-scalable=no\` or \`maximum-scale=1\` in viewport meta
|
||
|
||
**7. Motion & Animation** (6 items)
|
||
- Easing: ease-out for entering, ease-in for exiting, ease-in-out for moving
|
||
- Duration: 50-700ms range (nothing slower unless page transition)
|
||
- Purpose: every animation communicates something (state change, attention, spatial relationship)
|
||
- \`prefers-reduced-motion\` respected (check: \`$B js "matchMedia('(prefers-reduced-motion: reduce)').matches"\`)
|
||
- No \`transition: all\` — properties listed explicitly
|
||
- Only \`transform\` and \`opacity\` animated (not layout properties like width, height, top, left)
|
||
|
||
**8. Content & Microcopy** (8 items)
|
||
- Empty states designed with warmth (message + action + illustration/icon)
|
||
- Error messages specific: what happened + why + what to do next
|
||
- Button labels specific ("Save API Key" not "Continue" or "Submit")
|
||
- No placeholder/lorem ipsum text visible in production
|
||
- Truncation handled (\`text-overflow: ellipsis\`, \`line-clamp\`, or \`break-words\`)
|
||
- Active voice ("Install the CLI" not "The CLI will be installed")
|
||
- Loading states end with \`…\` ("Saving…" not "Saving...")
|
||
- Destructive actions have confirmation modal or undo window
|
||
|
||
**9. AI Slop Detection** (10 anti-patterns — the blacklist)
|
||
|
||
The test: would a human designer at a respected studio ever ship this?
|
||
|
||
- Purple/violet/indigo gradient backgrounds or blue-to-purple color schemes
|
||
- **The 3-column feature grid:** icon-in-colored-circle + bold title + 2-line description, repeated 3x symmetrically. THE most recognizable AI layout.
|
||
- Icons in colored circles as section decoration (SaaS starter template look)
|
||
- Centered everything (\`text-align: center\` on all headings, descriptions, cards)
|
||
- Uniform bubbly border-radius on every element (same large radius on everything)
|
||
- Decorative blobs, floating circles, wavy SVG dividers (if a section feels empty, it needs better content, not decoration)
|
||
- Emoji as design elements (rockets in headings, emoji as bullet points)
|
||
- Colored left-border on cards (\`border-left: 3px solid <accent>\`)
|
||
- Generic hero copy ("Welcome to [X]", "Unlock the power of...", "Your all-in-one solution for...")
|
||
- Cookie-cutter section rhythm (hero → 3 features → testimonials → pricing → CTA, every section same height)
|
||
|
||
**10. Performance as Design** (6 items)
|
||
- LCP < 2.0s (web apps), < 1.5s (informational sites)
|
||
- CLS < 0.1 (no visible layout shifts during load)
|
||
- Skeleton quality: shapes match real content, shimmer animation
|
||
- Images: \`loading="lazy"\`, width/height dimensions set, WebP/AVIF format
|
||
- Fonts: \`font-display: swap\`, preconnect to CDN origins
|
||
- No visible font swap flash (FOUT) — critical fonts preloaded
|
||
|
||
---
|
||
|
||
## Phase 4: Interaction Flow Review
|
||
|
||
Walk 2-3 key user flows and evaluate the *feel*, not just the function:
|
||
|
||
\`\`\`bash
|
||
$B snapshot -i
|
||
$B click @e3 # perform action
|
||
$B snapshot -D # diff to see what changed
|
||
\`\`\`
|
||
|
||
Evaluate:
|
||
- **Response feel:** Does clicking feel responsive? Any delays or missing loading states?
|
||
- **Transition quality:** Are transitions intentional or generic/absent?
|
||
- **Feedback clarity:** Did the action clearly succeed or fail? Is the feedback immediate?
|
||
- **Form polish:** Focus states visible? Validation timing correct? Errors near the source?
|
||
|
||
---
|
||
|
||
## Phase 5: Cross-Page Consistency
|
||
|
||
Compare screenshots and observations across pages for:
|
||
- Navigation bar consistent across all pages?
|
||
- Footer consistent?
|
||
- Component reuse vs one-off designs (same button styled differently on different pages?)
|
||
- Tone consistency (one page playful while another is corporate?)
|
||
- Spacing rhythm carries across pages?
|
||
|
||
---
|
||
|
||
## Phase 6: Compile Report
|
||
|
||
### Output Locations
|
||
|
||
**Local:** \`.gstack/design-reports/design-audit-{domain}-{YYYY-MM-DD}.md\`
|
||
|
||
**Project-scoped:**
|
||
\`\`\`bash
|
||
SLUG=$(git remote get-url origin 2>/dev/null | sed 's|.*[:/]\\([^/]*/[^/]*\\)\\.git$|\\1|;s|.*[:/]\\([^/]*/[^/]*\\)$|\\1|' | tr '/' '-')
|
||
mkdir -p ~/.gstack/projects/$SLUG
|
||
\`\`\`
|
||
Write to: \`~/.gstack/projects/{slug}/{user}-{branch}-design-audit-{datetime}.md\`
|
||
|
||
**Baseline:** Write \`design-baseline.json\` for regression mode:
|
||
\`\`\`json
|
||
{
|
||
"date": "YYYY-MM-DD",
|
||
"url": "<target>",
|
||
"designScore": "B",
|
||
"aiSlopScore": "C",
|
||
"categoryGrades": { "hierarchy": "A", "typography": "B", ... },
|
||
"findings": [{ "id": "FINDING-001", "title": "...", "impact": "high", "category": "typography" }]
|
||
}
|
||
\`\`\`
|
||
|
||
### Scoring System
|
||
|
||
**Dual headline scores:**
|
||
- **Design Score: {A-F}** — weighted average of all 10 categories
|
||
- **AI Slop Score: {A-F}** — standalone grade with pithy verdict
|
||
|
||
**Per-category grades:**
|
||
- **A:** Intentional, polished, delightful. Shows design thinking.
|
||
- **B:** Solid fundamentals, minor inconsistencies. Looks professional.
|
||
- **C:** Functional but generic. No major problems, no design point of view.
|
||
- **D:** Noticeable problems. Feels unfinished or careless.
|
||
- **F:** Actively hurting user experience. Needs significant rework.
|
||
|
||
**Grade computation:** Each category starts at A. Each High-impact finding drops one letter grade. Each Medium-impact finding drops half a letter grade. Polish findings are noted but do not affect grade. Minimum is F.
|
||
|
||
**Category weights for Design Score:**
|
||
| Category | Weight |
|
||
|----------|--------|
|
||
| Visual Hierarchy | 15% |
|
||
| Typography | 15% |
|
||
| Spacing & Layout | 15% |
|
||
| Color & Contrast | 10% |
|
||
| Interaction States | 10% |
|
||
| Responsive | 10% |
|
||
| Content Quality | 10% |
|
||
| AI Slop | 5% |
|
||
| Motion | 5% |
|
||
| Performance Feel | 5% |
|
||
|
||
AI Slop is 5% of Design Score but also graded independently as a headline metric.
|
||
|
||
### Regression Output
|
||
|
||
When previous \`design-baseline.json\` exists or \`--regression\` flag is used:
|
||
- Load baseline grades
|
||
- Compare: per-category deltas, new findings, resolved findings
|
||
- Append regression table to report
|
||
|
||
---
|
||
|
||
## Design Critique Format
|
||
|
||
Use structured feedback, not opinions:
|
||
- "I notice..." — observation (e.g., "I notice the primary CTA competes with the secondary action")
|
||
- "I wonder..." — question (e.g., "I wonder if users will understand what 'Process' means here")
|
||
- "What if..." — suggestion (e.g., "What if we moved search to a more prominent position?")
|
||
- "I think... because..." — reasoned opinion (e.g., "I think the spacing between sections is too uniform because it doesn't create hierarchy")
|
||
|
||
Tie everything to user goals and product objectives. Always suggest specific improvements alongside problems.
|
||
|
||
---
|
||
|
||
## Important Rules
|
||
|
||
1. **Think like a designer, not a QA engineer.** You care whether things feel right, look intentional, and respect the user. You do NOT just care whether things "work."
|
||
2. **Screenshots are evidence.** Every finding needs at least one screenshot. Use annotated screenshots (\`snapshot -a\`) to highlight elements.
|
||
3. **Be specific and actionable.** "Change X to Y because Z" — not "the spacing feels off."
|
||
4. **Never read source code.** Evaluate the rendered site, not the implementation. (Exception: offer to write DESIGN.md from extracted observations.)
|
||
5. **AI Slop detection is your superpower.** Most developers can't evaluate whether their site looks AI-generated. You can. Be direct about it.
|
||
6. **Quick wins matter.** Always include a "Quick Wins" section — the 3-5 highest-impact fixes that take <30 minutes each.
|
||
7. **Use \`snapshot -C\` for tricky UIs.** Finds clickable divs that the accessibility tree misses.
|
||
8. **Responsive is design, not just "not broken."** A stacked desktop layout on mobile is not responsive design — it's lazy. Evaluate whether the mobile layout makes *design* sense.
|
||
9. **Document incrementally.** Write each finding to the report as you find it. Don't batch.
|
||
10. **Depth over breadth.** 5-10 well-documented findings with screenshots and specific suggestions > 20 vague observations.`;
|
||
}
|
||
|
||
const RESOLVERS: Record<string, () => string> = {
|
||
COMMAND_REFERENCE: generateCommandReference,
|
||
SNAPSHOT_FLAGS: generateSnapshotFlags,
|
||
PREAMBLE: generatePreamble,
|
||
BROWSE_SETUP: generateBrowseSetup,
|
||
QA_METHODOLOGY: generateQAMethodology,
|
||
DESIGN_METHODOLOGY: generateDesignMethodology,
|
||
};
|
||
|
||
// ─── Template Processing ────────────────────────────────────
|
||
|
||
const GENERATED_HEADER = `<!-- AUTO-GENERATED from {{SOURCE}} — do not edit directly -->\n<!-- Regenerate: bun run gen:skill-docs -->\n`;
|
||
|
||
function processTemplate(tmplPath: string): { outputPath: string; content: string } {
|
||
const tmplContent = fs.readFileSync(tmplPath, 'utf-8');
|
||
const relTmplPath = path.relative(ROOT, tmplPath);
|
||
const outputPath = tmplPath.replace(/\.tmpl$/, '');
|
||
|
||
// Replace placeholders
|
||
let content = tmplContent.replace(/\{\{(\w+)\}\}/g, (match, name) => {
|
||
const resolver = RESOLVERS[name];
|
||
if (!resolver) throw new Error(`Unknown placeholder {{${name}}} in ${relTmplPath}`);
|
||
return resolver();
|
||
});
|
||
|
||
// Check for any remaining unresolved placeholders
|
||
const remaining = content.match(/\{\{(\w+)\}\}/g);
|
||
if (remaining) {
|
||
throw new Error(`Unresolved placeholders in ${relTmplPath}: ${remaining.join(', ')}`);
|
||
}
|
||
|
||
// Prepend generated header (after frontmatter)
|
||
const header = GENERATED_HEADER.replace('{{SOURCE}}', path.basename(tmplPath));
|
||
const fmEnd = content.indexOf('---', content.indexOf('---') + 3);
|
||
if (fmEnd !== -1) {
|
||
const insertAt = content.indexOf('\n', fmEnd) + 1;
|
||
content = content.slice(0, insertAt) + header + content.slice(insertAt);
|
||
} else {
|
||
content = header + content;
|
||
}
|
||
|
||
return { outputPath, content };
|
||
}
|
||
|
||
// ─── Main ───────────────────────────────────────────────────
|
||
|
||
function findTemplates(): string[] {
|
||
const templates: string[] = [];
|
||
const candidates = [
|
||
path.join(ROOT, 'SKILL.md.tmpl'),
|
||
path.join(ROOT, 'browse', 'SKILL.md.tmpl'),
|
||
path.join(ROOT, 'qa', 'SKILL.md.tmpl'),
|
||
path.join(ROOT, 'qa-only', 'SKILL.md.tmpl'),
|
||
path.join(ROOT, 'setup-browser-cookies', 'SKILL.md.tmpl'),
|
||
path.join(ROOT, 'ship', 'SKILL.md.tmpl'),
|
||
path.join(ROOT, 'review', 'SKILL.md.tmpl'),
|
||
path.join(ROOT, 'plan-ceo-review', 'SKILL.md.tmpl'),
|
||
path.join(ROOT, 'plan-eng-review', 'SKILL.md.tmpl'),
|
||
path.join(ROOT, 'retro', 'SKILL.md.tmpl'),
|
||
path.join(ROOT, 'gstack-upgrade', 'SKILL.md.tmpl'),
|
||
path.join(ROOT, 'plan-design-review', 'SKILL.md.tmpl'),
|
||
path.join(ROOT, 'qa-design-review', 'SKILL.md.tmpl'),
|
||
];
|
||
for (const p of candidates) {
|
||
if (fs.existsSync(p)) templates.push(p);
|
||
}
|
||
return templates;
|
||
}
|
||
|
||
let hasChanges = false;
|
||
|
||
for (const tmplPath of findTemplates()) {
|
||
const { outputPath, content } = processTemplate(tmplPath);
|
||
const relOutput = path.relative(ROOT, outputPath);
|
||
|
||
if (DRY_RUN) {
|
||
const existing = fs.existsSync(outputPath) ? fs.readFileSync(outputPath, 'utf-8') : '';
|
||
if (existing !== content) {
|
||
console.log(`STALE: ${relOutput}`);
|
||
hasChanges = true;
|
||
} else {
|
||
console.log(`FRESH: ${relOutput}`);
|
||
}
|
||
} else {
|
||
fs.writeFileSync(outputPath, content);
|
||
console.log(`GENERATED: ${relOutput}`);
|
||
}
|
||
}
|
||
|
||
if (DRY_RUN && hasChanges) {
|
||
console.error('\nGenerated SKILL.md files are stale. Run: bun run gen:skill-docs');
|
||
process.exit(1);
|
||
}
|