mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-07 05:56:41 +02:00
feat: interactive /plan-design-review + CEO invokes designer
Rewrite /plan-design-review from report-only grading to an interactive plan-fixer that rates each design dimension 0-10, explains what a 10 looks like, and edits the plan to get there. Parallel structure with /plan-ceo-review and /plan-eng-review — one issue = one AskUserQuestion. CEO review now detects UI scope and invokes the designer perspective when the plan has frontend/UX work, so you get design review automatically when it matters. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+90
-75
@@ -2178,15 +2178,13 @@ Skip research. Skip any AskUserQuestion calls — this is non-interactive. Gener
|
||||
}, 420_000);
|
||||
});
|
||||
|
||||
// --- Plan Design Review E2E ---
|
||||
// --- Plan Design Review E2E (plan-mode) ---
|
||||
|
||||
describeIfSelected('Plan Design Review E2E', ['plan-design-review-audit', 'plan-design-review-export'], () => {
|
||||
describeIfSelected('Plan Design Review E2E', ['plan-design-review-plan-mode', 'plan-design-review-no-ui-scope'], () => {
|
||||
let reviewDir: string;
|
||||
|
||||
beforeAll(() => {
|
||||
testServer = testServer || startTestServer();
|
||||
reviewDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-plan-design-'));
|
||||
setupBrowseShims(reviewDir);
|
||||
|
||||
const { spawnSync } = require('child_process');
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
@@ -2195,9 +2193,6 @@ describeIfSelected('Plan Design Review E2E', ['plan-design-review-audit', 'plan-
|
||||
run('git', ['init']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
fs.writeFileSync(path.join(reviewDir, 'index.html'), '<h1>Test</h1>\n');
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
// Copy plan-design-review skill
|
||||
fs.mkdirSync(path.join(reviewDir, 'plan-design-review'), { recursive: true });
|
||||
@@ -2205,102 +2200,122 @@ describeIfSelected('Plan Design Review E2E', ['plan-design-review-audit', 'plan-
|
||||
path.join(ROOT, 'plan-design-review', 'SKILL.md'),
|
||||
path.join(reviewDir, 'plan-design-review', 'SKILL.md'),
|
||||
);
|
||||
|
||||
// Create a plan file with intentional design gaps
|
||||
fs.writeFileSync(path.join(reviewDir, 'plan.md'), `# Plan: User Dashboard
|
||||
|
||||
## Context
|
||||
Build a user dashboard that shows account stats, recent activity, and settings.
|
||||
|
||||
## Implementation
|
||||
1. Create a dashboard page at /dashboard
|
||||
2. Show user stats (posts, followers, engagement rate)
|
||||
3. Add a recent activity feed
|
||||
4. Add a settings panel
|
||||
5. Use a clean, modern UI with cards and icons
|
||||
6. Add a hero section at the top with a gradient background
|
||||
|
||||
## Technical Details
|
||||
- React components with Tailwind CSS
|
||||
- API endpoint: GET /api/dashboard
|
||||
- WebSocket for real-time activity updates
|
||||
`);
|
||||
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial plan']);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
try { fs.rmSync(reviewDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
testIfSelected('plan-design-review-audit', async () => {
|
||||
testIfSelected('plan-design-review-plan-mode', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `IMPORTANT: The browse binary is already assigned below as B. Do NOT search for it or run the SKILL.md setup block — just use $B directly.
|
||||
prompt: `Read plan-design-review/SKILL.md for the design review workflow.
|
||||
|
||||
B="${browseBin}"
|
||||
Review the plan in ./plan.md. This plan has several design gaps — it uses vague language like "clean, modern UI" and "cards and icons", mentions a "hero section with gradient" (AI slop), and doesn't specify empty states, error states, loading states, responsive behavior, or accessibility.
|
||||
|
||||
Read plan-design-review/SKILL.md for the design review workflow.
|
||||
Skip the preamble bash block. Skip any AskUserQuestion calls — this is non-interactive. Rate each design dimension 0-10 and explain what would make it a 10. Write your findings and ratings directly to stdout.
|
||||
|
||||
Review the site at ${testServer.url}. Use --quick mode (homepage + 2 pages). Skip any AskUserQuestion calls — this is non-interactive. Write your audit report to ./design-audit.md. Do not offer to create DESIGN.md.
|
||||
|
||||
EFFICIENCY: Skip the preamble bash block. Combine multiple browse commands into single bash blocks (e.g. run all Phase 2 JS extractions in one block). Write the report as soon as you have enough data — do not over-explore.`,
|
||||
IMPORTANT: Do NOT try to browse any URLs or use a browse binary. This is a plan review, not a live site audit. Just read the plan file and review it.`,
|
||||
workingDirectory: reviewDir,
|
||||
maxTurns: 30,
|
||||
timeout: 360_000,
|
||||
testName: 'plan-design-review-audit',
|
||||
maxTurns: 15,
|
||||
timeout: 300_000,
|
||||
testName: 'plan-design-review-plan-mode',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/plan-design-review audit', result);
|
||||
logCost('/plan-design-review plan-mode', result);
|
||||
|
||||
const reportPath = path.join(reviewDir, 'design-audit.md');
|
||||
const reportExists = fs.existsSync(reportPath);
|
||||
let reportContent = '';
|
||||
if (reportExists) {
|
||||
reportContent = fs.readFileSync(reportPath, 'utf-8');
|
||||
}
|
||||
// Check that the agent produced design ratings (0-10 scale)
|
||||
const output = result.output || '';
|
||||
const hasRatings = /\d+\/10/.test(output);
|
||||
const hasDesignContent = output.toLowerCase().includes('information architecture') ||
|
||||
output.toLowerCase().includes('interaction state') ||
|
||||
output.toLowerCase().includes('ai slop') ||
|
||||
output.toLowerCase().includes('hierarchy');
|
||||
|
||||
const hasFirstImpression = reportContent.toLowerCase().includes('first impression') ||
|
||||
reportContent.toLowerCase().includes('impression');
|
||||
|
||||
recordE2E('/plan-design-review audit', 'Plan Design Review E2E', result, {
|
||||
passed: reportExists && ['success', 'error_max_turns'].includes(result.exitReason),
|
||||
recordE2E('/plan-design-review plan-mode', 'Plan Design Review E2E', result, {
|
||||
passed: hasDesignContent && ['success', 'error_max_turns'].includes(result.exitReason),
|
||||
});
|
||||
|
||||
expect(['success', 'error_max_turns']).toContain(result.exitReason);
|
||||
expect(reportExists).toBe(true);
|
||||
if (reportExists) {
|
||||
expect(reportContent.length).toBeGreaterThan(200);
|
||||
}
|
||||
}, 420_000);
|
||||
// Agent should produce design-relevant output about the plan
|
||||
expect(hasDesignContent).toBe(true);
|
||||
}, 360_000);
|
||||
|
||||
testIfSelected('plan-design-review-export', async () => {
|
||||
// Clean up previous test artifacts
|
||||
try { fs.unlinkSync(path.join(reviewDir, 'design-audit.md')); } catch {}
|
||||
testIfSelected('plan-design-review-no-ui-scope', async () => {
|
||||
// Write a backend-only plan
|
||||
fs.writeFileSync(path.join(reviewDir, 'backend-plan.md'), `# Plan: Database Migration
|
||||
|
||||
## Context
|
||||
Migrate user records from PostgreSQL to a new schema with better indexing.
|
||||
|
||||
## Implementation
|
||||
1. Create migration to add new columns to users table
|
||||
2. Backfill data from legacy columns
|
||||
3. Add database indexes for common query patterns
|
||||
4. Update ActiveRecord models
|
||||
5. Run migration in staging first, then production
|
||||
`);
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `IMPORTANT: The browse binary is already assigned below as B. Do NOT search for it or run the SKILL.md setup block — just use $B directly.
|
||||
prompt: `Read plan-design-review/SKILL.md for the design review workflow.
|
||||
|
||||
B="${browseBin}"
|
||||
Review the plan in ./backend-plan.md. This is a pure backend database migration plan with no UI changes.
|
||||
|
||||
Read plan-design-review/SKILL.md for the design review workflow.
|
||||
Skip the preamble bash block. Skip any AskUserQuestion calls — this is non-interactive. Write your findings directly to stdout.
|
||||
|
||||
Review ${testServer.url} with --quick mode. Skip any AskUserQuestion calls — this is non-interactive. After Phase 2 (Design System Extraction), write a DESIGN.md to the working directory. Also write the audit report to ./design-audit.md.`,
|
||||
IMPORTANT: Do NOT try to browse any URLs or use a browse binary. This is a plan review, not a live site audit.`,
|
||||
workingDirectory: reviewDir,
|
||||
maxTurns: 25,
|
||||
timeout: 360_000,
|
||||
testName: 'plan-design-review-export',
|
||||
maxTurns: 10,
|
||||
timeout: 180_000,
|
||||
testName: 'plan-design-review-no-ui-scope',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/plan-design-review export', result);
|
||||
logCost('/plan-design-review no-ui-scope', result);
|
||||
|
||||
const designPath = path.join(reviewDir, 'DESIGN.md');
|
||||
const reportPath = path.join(reviewDir, 'design-audit.md');
|
||||
const designExists = fs.existsSync(designPath);
|
||||
const reportExists = fs.existsSync(reportPath);
|
||||
// Agent should detect no UI scope and exit early
|
||||
const output = result.output || '';
|
||||
const detectsNoUI = output.toLowerCase().includes('no ui') ||
|
||||
output.toLowerCase().includes('no frontend') ||
|
||||
output.toLowerCase().includes('no design') ||
|
||||
output.toLowerCase().includes('not applicable') ||
|
||||
output.toLowerCase().includes('backend');
|
||||
|
||||
let designContent = '';
|
||||
if (designExists) {
|
||||
designContent = fs.readFileSync(designPath, 'utf-8');
|
||||
}
|
||||
|
||||
const hasTypography = designContent.toLowerCase().includes('typography') || designContent.toLowerCase().includes('font');
|
||||
const hasColor = designContent.toLowerCase().includes('color');
|
||||
|
||||
recordE2E('/plan-design-review export', 'Plan Design Review E2E', result, {
|
||||
passed: designExists && ['success', 'error_max_turns'].includes(result.exitReason),
|
||||
recordE2E('/plan-design-review no-ui-scope', 'Plan Design Review E2E', result, {
|
||||
passed: detectsNoUI && ['success', 'error_max_turns'].includes(result.exitReason),
|
||||
});
|
||||
|
||||
expect(['success', 'error_max_turns']).toContain(result.exitReason);
|
||||
// DESIGN.md export is best-effort — agent may not always produce it
|
||||
if (designExists) {
|
||||
expect(hasTypography || hasColor).toBe(true);
|
||||
}
|
||||
}, 420_000);
|
||||
expect(detectsNoUI).toBe(true);
|
||||
}, 240_000);
|
||||
});
|
||||
|
||||
// --- QA Design Review E2E ---
|
||||
// --- Design Review E2E (live-site audit + fix) ---
|
||||
|
||||
describeIfSelected('QA Design Review E2E', ['qa-design-review-fix'], () => {
|
||||
describeIfSelected('Design Review E2E', ['design-review-fix'], () => {
|
||||
let qaDesignDir: string;
|
||||
let qaDesignServer: ReturnType<typeof Bun.serve> | null = null;
|
||||
|
||||
@@ -2376,11 +2391,11 @@ describeIfSelected('QA Design Review E2E', ['qa-design-review-fix'], () => {
|
||||
},
|
||||
});
|
||||
|
||||
// Copy qa-design-review skill
|
||||
fs.mkdirSync(path.join(qaDesignDir, 'qa-design-review'), { recursive: true });
|
||||
// Copy design-review skill
|
||||
fs.mkdirSync(path.join(qaDesignDir, 'design-review'), { recursive: true });
|
||||
fs.copyFileSync(
|
||||
path.join(ROOT, 'qa-design-review', 'SKILL.md'),
|
||||
path.join(qaDesignDir, 'qa-design-review', 'SKILL.md'),
|
||||
path.join(ROOT, 'design-review', 'SKILL.md'),
|
||||
path.join(qaDesignDir, 'design-review', 'SKILL.md'),
|
||||
);
|
||||
});
|
||||
|
||||
@@ -2389,7 +2404,7 @@ describeIfSelected('QA Design Review E2E', ['qa-design-review-fix'], () => {
|
||||
try { fs.rmSync(qaDesignDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('Test 7: /qa-design-review audits and fixes design issues', async () => {
|
||||
test('Test 7: /design-review audits and fixes design issues', async () => {
|
||||
const serverUrl = `http://localhost:${(qaDesignServer as any)?.port}`;
|
||||
|
||||
const result = await runSkillTest({
|
||||
@@ -2397,17 +2412,17 @@ describeIfSelected('QA Design Review E2E', ['qa-design-review-fix'], () => {
|
||||
|
||||
B="${browseBin}"
|
||||
|
||||
Read qa-design-review/SKILL.md for the design review + fix workflow.
|
||||
Read design-review/SKILL.md for the design review + fix workflow.
|
||||
|
||||
Review the site at ${serverUrl}. Use --quick mode. Skip any AskUserQuestion calls — this is non-interactive. Fix up to 3 issues max. Write your report to ./design-audit.md.`,
|
||||
workingDirectory: qaDesignDir,
|
||||
maxTurns: 30,
|
||||
timeout: 360_000,
|
||||
testName: 'qa-design-review-fix',
|
||||
testName: 'design-review-fix',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/qa-design-review fix', result);
|
||||
logCost('/design-review fix', result);
|
||||
|
||||
const reportPath = path.join(qaDesignDir, 'design-audit.md');
|
||||
const reportExists = fs.existsSync(reportPath);
|
||||
@@ -2419,7 +2434,7 @@ Review the site at ${serverUrl}. Use --quick mode. Skip any AskUserQuestion call
|
||||
const commits = gitLog.stdout.toString().trim().split('\n');
|
||||
const designFixCommits = commits.filter((c: string) => c.includes('style(design)'));
|
||||
|
||||
recordE2E('/qa-design-review fix', 'QA Design Review E2E', result, {
|
||||
recordE2E('/design-review fix', 'Design Review E2E', result, {
|
||||
passed: ['success', 'error_max_turns'].includes(result.exitReason),
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user