mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-07 05:56:41 +02:00
Merge branch 'main' into garrytan/team-supabase-store
Resolved 4 conflicts:
- scripts/gen-skill-docs.ts: kept ARTIFACT_SETUP + added main's new
resolvers (SPEC_REVIEW_LOOP, DESIGN_SKETCH, BENEFITS_FROM,
CODEX_REVIEW_STEP). Updated codex review-log to use new paths.
- ship/SKILL.md.tmpl: adopted {{CODEX_REVIEW_STEP}} macro from main
- test/skill-e2e.test.ts: added main's new E2E tests (office-hours
spec review, plan-ceo benefits-from) + kept our E2E isolation cleanup
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2910,6 +2910,128 @@ Write the full output (including the GATE verdict) to ${codexDir}/codex-output.m
|
||||
}, 360_000);
|
||||
});
|
||||
|
||||
// --- Office Hours Spec Review E2E ---
|
||||
|
||||
describeIfSelected('Office Hours Spec Review E2E', ['office-hours-spec-review'], () => {
|
||||
let ohDir: string;
|
||||
|
||||
beforeAll(() => {
|
||||
ohDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-oh-spec-'));
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: ohDir, stdio: 'pipe', timeout: 5000 });
|
||||
|
||||
run('git', ['init', '-b', 'main']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
fs.writeFileSync(path.join(ohDir, 'README.md'), '# Test Project\n');
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'init']);
|
||||
|
||||
// Copy office-hours skill
|
||||
fs.mkdirSync(path.join(ohDir, 'office-hours'), { recursive: true });
|
||||
fs.copyFileSync(
|
||||
path.join(ROOT, 'office-hours', 'SKILL.md'),
|
||||
path.join(ohDir, 'office-hours', 'SKILL.md'),
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
try { fs.rmSync(ohDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/office-hours SKILL.md contains spec review loop', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read office-hours/SKILL.md. I want to understand the spec review loop.
|
||||
|
||||
Summarize what the "Spec Review Loop" section does — specifically:
|
||||
1. How many dimensions does the reviewer check?
|
||||
2. What tool is used to dispatch the reviewer?
|
||||
3. What's the maximum number of iterations?
|
||||
4. What metrics are tracked?
|
||||
|
||||
Write your summary to ${ohDir}/spec-review-summary.md`,
|
||||
workingDirectory: ohDir,
|
||||
maxTurns: 8,
|
||||
timeout: 120_000,
|
||||
testName: 'office-hours-spec-review',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/office-hours spec review', result);
|
||||
recordE2E('/office-hours-spec-review', 'Office Hours Spec Review E2E', result);
|
||||
expect(result.exitReason).toBe('success');
|
||||
|
||||
const summaryPath = path.join(ohDir, 'spec-review-summary.md');
|
||||
if (fs.existsSync(summaryPath)) {
|
||||
const summary = fs.readFileSync(summaryPath, 'utf-8').toLowerCase();
|
||||
// Verify the agent understood the key concepts
|
||||
expect(summary).toMatch(/5.*dimension|dimension.*5|completeness|consistency|clarity|scope|feasibility/);
|
||||
expect(summary).toMatch(/agent|subagent/);
|
||||
expect(summary).toMatch(/3.*iteration|iteration.*3|maximum.*3/);
|
||||
}
|
||||
}, 180_000);
|
||||
});
|
||||
|
||||
// --- Plan CEO Review Benefits-From E2E ---
|
||||
|
||||
describeIfSelected('Plan CEO Review Benefits-From E2E', ['plan-ceo-review-benefits'], () => {
|
||||
let benefitsDir: string;
|
||||
|
||||
beforeAll(() => {
|
||||
benefitsDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-benefits-'));
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: benefitsDir, stdio: 'pipe', timeout: 5000 });
|
||||
|
||||
run('git', ['init', '-b', 'main']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
fs.writeFileSync(path.join(benefitsDir, 'README.md'), '# Test Project\n');
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'init']);
|
||||
|
||||
// Copy plan-ceo-review skill
|
||||
fs.mkdirSync(path.join(benefitsDir, 'plan-ceo-review'), { recursive: true });
|
||||
fs.copyFileSync(
|
||||
path.join(ROOT, 'plan-ceo-review', 'SKILL.md'),
|
||||
path.join(benefitsDir, 'plan-ceo-review', 'SKILL.md'),
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
try { fs.rmSync(benefitsDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/plan-ceo-review SKILL.md contains prerequisite skill offer', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read plan-ceo-review/SKILL.md. Search for sections about "Prerequisite" or "office-hours" or "design doc found".
|
||||
|
||||
Summarize what happens when no design doc is found — specifically:
|
||||
1. Is /office-hours offered as a prerequisite?
|
||||
2. What options does the user get?
|
||||
3. Is there a mid-session detection for when the user seems lost?
|
||||
|
||||
Write your summary to ${benefitsDir}/benefits-summary.md`,
|
||||
workingDirectory: benefitsDir,
|
||||
maxTurns: 8,
|
||||
timeout: 120_000,
|
||||
testName: 'plan-ceo-review-benefits',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/plan-ceo-review benefits-from', result);
|
||||
recordE2E('/plan-ceo-review-benefits', 'Plan CEO Review Benefits-From E2E', result);
|
||||
expect(result.exitReason).toBe('success');
|
||||
|
||||
const summaryPath = path.join(benefitsDir, 'benefits-summary.md');
|
||||
if (fs.existsSync(summaryPath)) {
|
||||
const summary = fs.readFileSync(summaryPath, 'utf-8').toLowerCase();
|
||||
// Verify the agent understood the skill chaining
|
||||
expect(summary).toMatch(/office.hours/);
|
||||
expect(summary).toMatch(/design doc|no design/i);
|
||||
}
|
||||
}, 180_000);
|
||||
});
|
||||
|
||||
// Module-level afterAll — finalize eval collector and clean up E2E isolation
|
||||
afterAll(async () => {
|
||||
if (evalCollector) {
|
||||
|
||||
Reference in New Issue
Block a user