feat: interactive /plan-design-review + CEO invokes designer

Rewrite /plan-design-review from report-only grading to an interactive
plan-fixer that rates each design dimension 0-10, explains what a 10
looks like, and edits the plan to get there. Parallel structure with
/plan-ceo-review and /plan-eng-review — one issue = one AskUserQuestion.

CEO review now detects UI scope and invokes the designer perspective
when the plan has frontend/UX work, so you get design review
automatically when it matters.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-03-17 20:23:09 -07:00
parent 56fa6d9cc2
commit 23c0b525f8
6 changed files with 669 additions and 714 deletions
+90 -75
View File
@@ -2178,15 +2178,13 @@ Skip research. Skip any AskUserQuestion calls — this is non-interactive. Gener
}, 420_000);
});
// --- Plan Design Review E2E ---
// --- Plan Design Review E2E (plan-mode) ---
describeIfSelected('Plan Design Review E2E', ['plan-design-review-audit', 'plan-design-review-export'], () => {
describeIfSelected('Plan Design Review E2E', ['plan-design-review-plan-mode', 'plan-design-review-no-ui-scope'], () => {
let reviewDir: string;
beforeAll(() => {
testServer = testServer || startTestServer();
reviewDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-plan-design-'));
setupBrowseShims(reviewDir);
const { spawnSync } = require('child_process');
const run = (cmd: string, args: string[]) =>
@@ -2195,9 +2193,6 @@ describeIfSelected('Plan Design Review E2E', ['plan-design-review-audit', 'plan-
run('git', ['init']);
run('git', ['config', 'user.email', 'test@test.com']);
run('git', ['config', 'user.name', 'Test']);
fs.writeFileSync(path.join(reviewDir, 'index.html'), '<h1>Test</h1>\n');
run('git', ['add', '.']);
run('git', ['commit', '-m', 'initial']);
// Copy plan-design-review skill
fs.mkdirSync(path.join(reviewDir, 'plan-design-review'), { recursive: true });
@@ -2205,102 +2200,122 @@ describeIfSelected('Plan Design Review E2E', ['plan-design-review-audit', 'plan-
path.join(ROOT, 'plan-design-review', 'SKILL.md'),
path.join(reviewDir, 'plan-design-review', 'SKILL.md'),
);
// Create a plan file with intentional design gaps
fs.writeFileSync(path.join(reviewDir, 'plan.md'), `# Plan: User Dashboard
## Context
Build a user dashboard that shows account stats, recent activity, and settings.
## Implementation
1. Create a dashboard page at /dashboard
2. Show user stats (posts, followers, engagement rate)
3. Add a recent activity feed
4. Add a settings panel
5. Use a clean, modern UI with cards and icons
6. Add a hero section at the top with a gradient background
## Technical Details
- React components with Tailwind CSS
- API endpoint: GET /api/dashboard
- WebSocket for real-time activity updates
`);
run('git', ['add', '.']);
run('git', ['commit', '-m', 'initial plan']);
});
afterAll(() => {
try { fs.rmSync(reviewDir, { recursive: true, force: true }); } catch {}
});
testIfSelected('plan-design-review-audit', async () => {
testIfSelected('plan-design-review-plan-mode', async () => {
const result = await runSkillTest({
prompt: `IMPORTANT: The browse binary is already assigned below as B. Do NOT search for it or run the SKILL.md setup block — just use $B directly.
prompt: `Read plan-design-review/SKILL.md for the design review workflow.
B="${browseBin}"
Review the plan in ./plan.md. This plan has several design gaps — it uses vague language like "clean, modern UI" and "cards and icons", mentions a "hero section with gradient" (AI slop), and doesn't specify empty states, error states, loading states, responsive behavior, or accessibility.
Read plan-design-review/SKILL.md for the design review workflow.
Skip the preamble bash block. Skip any AskUserQuestion calls — this is non-interactive. Rate each design dimension 0-10 and explain what would make it a 10. Write your findings and ratings directly to stdout.
Review the site at ${testServer.url}. Use --quick mode (homepage + 2 pages). Skip any AskUserQuestion calls — this is non-interactive. Write your audit report to ./design-audit.md. Do not offer to create DESIGN.md.
EFFICIENCY: Skip the preamble bash block. Combine multiple browse commands into single bash blocks (e.g. run all Phase 2 JS extractions in one block). Write the report as soon as you have enough data — do not over-explore.`,
IMPORTANT: Do NOT try to browse any URLs or use a browse binary. This is a plan review, not a live site audit. Just read the plan file and review it.`,
workingDirectory: reviewDir,
maxTurns: 30,
timeout: 360_000,
testName: 'plan-design-review-audit',
maxTurns: 15,
timeout: 300_000,
testName: 'plan-design-review-plan-mode',
runId,
});
logCost('/plan-design-review audit', result);
logCost('/plan-design-review plan-mode', result);
const reportPath = path.join(reviewDir, 'design-audit.md');
const reportExists = fs.existsSync(reportPath);
let reportContent = '';
if (reportExists) {
reportContent = fs.readFileSync(reportPath, 'utf-8');
}
// Check that the agent produced design ratings (0-10 scale)
const output = result.output || '';
const hasRatings = /\d+\/10/.test(output);
const hasDesignContent = output.toLowerCase().includes('information architecture') ||
output.toLowerCase().includes('interaction state') ||
output.toLowerCase().includes('ai slop') ||
output.toLowerCase().includes('hierarchy');
const hasFirstImpression = reportContent.toLowerCase().includes('first impression') ||
reportContent.toLowerCase().includes('impression');
recordE2E('/plan-design-review audit', 'Plan Design Review E2E', result, {
passed: reportExists && ['success', 'error_max_turns'].includes(result.exitReason),
recordE2E('/plan-design-review plan-mode', 'Plan Design Review E2E', result, {
passed: hasDesignContent && ['success', 'error_max_turns'].includes(result.exitReason),
});
expect(['success', 'error_max_turns']).toContain(result.exitReason);
expect(reportExists).toBe(true);
if (reportExists) {
expect(reportContent.length).toBeGreaterThan(200);
}
}, 420_000);
// Agent should produce design-relevant output about the plan
expect(hasDesignContent).toBe(true);
}, 360_000);
testIfSelected('plan-design-review-export', async () => {
// Clean up previous test artifacts
try { fs.unlinkSync(path.join(reviewDir, 'design-audit.md')); } catch {}
testIfSelected('plan-design-review-no-ui-scope', async () => {
// Write a backend-only plan
fs.writeFileSync(path.join(reviewDir, 'backend-plan.md'), `# Plan: Database Migration
## Context
Migrate user records from PostgreSQL to a new schema with better indexing.
## Implementation
1. Create migration to add new columns to users table
2. Backfill data from legacy columns
3. Add database indexes for common query patterns
4. Update ActiveRecord models
5. Run migration in staging first, then production
`);
const result = await runSkillTest({
prompt: `IMPORTANT: The browse binary is already assigned below as B. Do NOT search for it or run the SKILL.md setup block — just use $B directly.
prompt: `Read plan-design-review/SKILL.md for the design review workflow.
B="${browseBin}"
Review the plan in ./backend-plan.md. This is a pure backend database migration plan with no UI changes.
Read plan-design-review/SKILL.md for the design review workflow.
Skip the preamble bash block. Skip any AskUserQuestion calls — this is non-interactive. Write your findings directly to stdout.
Review ${testServer.url} with --quick mode. Skip any AskUserQuestion calls — this is non-interactive. After Phase 2 (Design System Extraction), write a DESIGN.md to the working directory. Also write the audit report to ./design-audit.md.`,
IMPORTANT: Do NOT try to browse any URLs or use a browse binary. This is a plan review, not a live site audit.`,
workingDirectory: reviewDir,
maxTurns: 25,
timeout: 360_000,
testName: 'plan-design-review-export',
maxTurns: 10,
timeout: 180_000,
testName: 'plan-design-review-no-ui-scope',
runId,
});
logCost('/plan-design-review export', result);
logCost('/plan-design-review no-ui-scope', result);
const designPath = path.join(reviewDir, 'DESIGN.md');
const reportPath = path.join(reviewDir, 'design-audit.md');
const designExists = fs.existsSync(designPath);
const reportExists = fs.existsSync(reportPath);
// Agent should detect no UI scope and exit early
const output = result.output || '';
const detectsNoUI = output.toLowerCase().includes('no ui') ||
output.toLowerCase().includes('no frontend') ||
output.toLowerCase().includes('no design') ||
output.toLowerCase().includes('not applicable') ||
output.toLowerCase().includes('backend');
let designContent = '';
if (designExists) {
designContent = fs.readFileSync(designPath, 'utf-8');
}
const hasTypography = designContent.toLowerCase().includes('typography') || designContent.toLowerCase().includes('font');
const hasColor = designContent.toLowerCase().includes('color');
recordE2E('/plan-design-review export', 'Plan Design Review E2E', result, {
passed: designExists && ['success', 'error_max_turns'].includes(result.exitReason),
recordE2E('/plan-design-review no-ui-scope', 'Plan Design Review E2E', result, {
passed: detectsNoUI && ['success', 'error_max_turns'].includes(result.exitReason),
});
expect(['success', 'error_max_turns']).toContain(result.exitReason);
// DESIGN.md export is best-effort — agent may not always produce it
if (designExists) {
expect(hasTypography || hasColor).toBe(true);
}
}, 420_000);
expect(detectsNoUI).toBe(true);
}, 240_000);
});
// --- QA Design Review E2E ---
// --- Design Review E2E (live-site audit + fix) ---
describeIfSelected('QA Design Review E2E', ['qa-design-review-fix'], () => {
describeIfSelected('Design Review E2E', ['design-review-fix'], () => {
let qaDesignDir: string;
let qaDesignServer: ReturnType<typeof Bun.serve> | null = null;
@@ -2376,11 +2391,11 @@ describeIfSelected('QA Design Review E2E', ['qa-design-review-fix'], () => {
},
});
// Copy qa-design-review skill
fs.mkdirSync(path.join(qaDesignDir, 'qa-design-review'), { recursive: true });
// Copy design-review skill
fs.mkdirSync(path.join(qaDesignDir, 'design-review'), { recursive: true });
fs.copyFileSync(
path.join(ROOT, 'qa-design-review', 'SKILL.md'),
path.join(qaDesignDir, 'qa-design-review', 'SKILL.md'),
path.join(ROOT, 'design-review', 'SKILL.md'),
path.join(qaDesignDir, 'design-review', 'SKILL.md'),
);
});
@@ -2389,7 +2404,7 @@ describeIfSelected('QA Design Review E2E', ['qa-design-review-fix'], () => {
try { fs.rmSync(qaDesignDir, { recursive: true, force: true }); } catch {}
});
test('Test 7: /qa-design-review audits and fixes design issues', async () => {
test('Test 7: /design-review audits and fixes design issues', async () => {
const serverUrl = `http://localhost:${(qaDesignServer as any)?.port}`;
const result = await runSkillTest({
@@ -2397,17 +2412,17 @@ describeIfSelected('QA Design Review E2E', ['qa-design-review-fix'], () => {
B="${browseBin}"
Read qa-design-review/SKILL.md for the design review + fix workflow.
Read design-review/SKILL.md for the design review + fix workflow.
Review the site at ${serverUrl}. Use --quick mode. Skip any AskUserQuestion calls — this is non-interactive. Fix up to 3 issues max. Write your report to ./design-audit.md.`,
workingDirectory: qaDesignDir,
maxTurns: 30,
timeout: 360_000,
testName: 'qa-design-review-fix',
testName: 'design-review-fix',
runId,
});
logCost('/qa-design-review fix', result);
logCost('/design-review fix', result);
const reportPath = path.join(qaDesignDir, 'design-audit.md');
const reportExists = fs.existsSync(reportPath);
@@ -2419,7 +2434,7 @@ Review the site at ${serverUrl}. Use --quick mode. Skip any AskUserQuestion call
const commits = gitLog.stdout.toString().trim().split('\n');
const designFixCommits = commits.filter((c: string) => c.includes('style(design)'));
recordE2E('/qa-design-review fix', 'QA Design Review E2E', result, {
recordE2E('/design-review fix', 'Design Review E2E', result, {
passed: ['success', 'error_max_turns'].includes(result.exitReason),
});