mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-02 11:45:20 +02:00
feat: /land-and-deploy first-run dry run + staging-first + trust ladder (v0.12.2.0) (#518)
* feat: /land-and-deploy first-run dry-run, staging-first, trust ladder First run shows a dry run — detect deploy infrastructure, validate commands, show what will happen — then confirm before proceeding. Staging-first option when staging detected. Config decay: re-triggers dry run if deploy config changes. Full wordsmithed copy for every user-facing message. Key changes: - Step 1.5: first-run dry-run with infrastructure validation table - Step 3.5a-bis: inline review gate before deploy - Step 4a/4b: merge queue + CI auto-deploy detection and messaging - Step 5a: staging-first option with verify-then-promote flow - Voice & Tone section: narrate-the-journey, teacher mode vs efficient mode - Config fingerprinting: trust decays when deploy config changes * chore: bump version and changelog (v0.12.2.0) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -134,10 +134,12 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
||||
'gstack-upgrade-happy-path': ['gstack-upgrade/**'],
|
||||
|
||||
// Deploy skills
|
||||
'land-and-deploy-workflow': ['land-and-deploy/**', 'scripts/gen-skill-docs.ts'],
|
||||
'canary-workflow': ['canary/**', 'browse/src/**'],
|
||||
'benchmark-workflow': ['benchmark/**', 'browse/src/**'],
|
||||
'setup-deploy-workflow': ['setup-deploy/**', 'scripts/gen-skill-docs.ts'],
|
||||
'land-and-deploy-workflow': ['land-and-deploy/**', 'scripts/gen-skill-docs.ts'],
|
||||
'land-and-deploy-first-run': ['land-and-deploy/**', 'scripts/gen-skill-docs.ts', 'bin/gstack-slug'],
|
||||
'land-and-deploy-review-gate': ['land-and-deploy/**', 'bin/gstack-review-read'],
|
||||
'canary-workflow': ['canary/**', 'browse/src/**'],
|
||||
'benchmark-workflow': ['benchmark/**', 'browse/src/**'],
|
||||
'setup-deploy-workflow': ['setup-deploy/**', 'scripts/gen-skill-docs.ts'],
|
||||
|
||||
// Autoplan
|
||||
'autoplan-core': ['autoplan/**', 'plan-ceo-review/**', 'plan-eng-review/**', 'plan-design-review/**'],
|
||||
@@ -254,6 +256,8 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
|
||||
|
||||
// Deploy skills
|
||||
'land-and-deploy-workflow': 'gate',
|
||||
'land-and-deploy-first-run': 'gate',
|
||||
'land-and-deploy-review-gate': 'gate',
|
||||
'canary-workflow': 'gate',
|
||||
'benchmark-workflow': 'gate',
|
||||
'setup-deploy-workflow': 'gate',
|
||||
|
||||
@@ -85,6 +85,161 @@ Do NOT use AskUserQuestion. Do NOT run gh or fly commands.`,
|
||||
}, 180_000);
|
||||
});
|
||||
|
||||
// --- Land-and-Deploy First-Run E2E ---
|
||||
|
||||
describeIfSelected('Land-and-Deploy first-run E2E', ['land-and-deploy-first-run'], () => {
|
||||
let firstRunDir: string;
|
||||
|
||||
beforeAll(() => {
|
||||
firstRunDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-land-first-run-'));
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: firstRunDir, stdio: 'pipe', timeout: 5000 });
|
||||
|
||||
run('git', ['init', '-b', 'main']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
|
||||
fs.writeFileSync(path.join(firstRunDir, 'app.ts'), 'export function hello() { return "world"; }\n');
|
||||
fs.writeFileSync(path.join(firstRunDir, 'fly.toml'), 'app = "first-run-app"\n\n[http_service]\n internal_port = 3000\n');
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
run('git', ['checkout', '-b', 'feat/first-deploy']);
|
||||
fs.writeFileSync(path.join(firstRunDir, 'app.ts'), 'export function hello() { return "first deploy"; }\n');
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'feat: first deploy']);
|
||||
|
||||
copyDirSync(path.join(ROOT, 'land-and-deploy'), path.join(firstRunDir, 'land-and-deploy'));
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
try { fs.rmSync(firstRunDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
testConcurrentIfSelected('land-and-deploy-first-run', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read land-and-deploy/SKILL.md for the /land-and-deploy skill instructions.
|
||||
|
||||
You are on branch feat/first-deploy. This is the FIRST TIME running /land-and-deploy
|
||||
for this project — there is NO land-deploy-confirmed file.
|
||||
|
||||
This repo has a fly.toml with app = "first-run-app", indicating a Fly.io deployment.
|
||||
|
||||
IMPORTANT: There is NO remote and NO GitHub PR — you cannot run gh commands.
|
||||
Instead, simulate the Step 1.5 first-run dry-run validation:
|
||||
1. Detect that this is a FIRST_RUN (no land-deploy-confirmed file)
|
||||
2. Detect the deploy platform from fly.toml (Fly.io, app = first-run-app)
|
||||
3. Infer the production URL (https://first-run-app.fly.dev)
|
||||
4. Build the DEPLOY INFRASTRUCTURE VALIDATION table showing:
|
||||
- Platform detected
|
||||
- Command validation results (simulated as all passing)
|
||||
- Staging detection results (none expected)
|
||||
- What will happen steps
|
||||
5. Write the dry-run report to .gstack/deploy-reports/dry-run-validation.md
|
||||
|
||||
Do NOT use AskUserQuestion. Do NOT run gh or fly commands.
|
||||
Just demonstrate the first-run dry-run output.`,
|
||||
workingDirectory: firstRunDir,
|
||||
maxTurns: 20,
|
||||
allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
|
||||
timeout: 120_000,
|
||||
testName: 'land-and-deploy-first-run',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/land-and-deploy first-run', result);
|
||||
recordE2E(evalCollector, '/land-and-deploy first-run', 'Land-and-Deploy first-run E2E', result);
|
||||
expect(result.exitReason).toBe('success');
|
||||
|
||||
// Verify dry-run report was created
|
||||
const reportDir = path.join(firstRunDir, '.gstack', 'deploy-reports');
|
||||
expect(fs.existsSync(reportDir)).toBe(true);
|
||||
|
||||
// Check report content mentions platform detection
|
||||
const reportFiles = fs.readdirSync(reportDir);
|
||||
expect(reportFiles.length).toBeGreaterThan(0);
|
||||
const reportContent = fs.readFileSync(path.join(reportDir, reportFiles[0]), 'utf-8');
|
||||
const hasPlatform = reportContent.toLowerCase().includes('fly') || reportContent.toLowerCase().includes('first-run-app');
|
||||
expect(hasPlatform).toBe(true);
|
||||
}, 180_000);
|
||||
});
|
||||
|
||||
// --- Land-and-Deploy Review Gate E2E ---
|
||||
|
||||
describeIfSelected('Land-and-Deploy review gate E2E', ['land-and-deploy-review-gate'], () => {
|
||||
let reviewDir: string;
|
||||
|
||||
beforeAll(() => {
|
||||
reviewDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-land-review-'));
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: reviewDir, stdio: 'pipe', timeout: 5000 });
|
||||
|
||||
run('git', ['init', '-b', 'main']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
|
||||
fs.writeFileSync(path.join(reviewDir, 'app.ts'), 'export function hello() { return "world"; }\n');
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
// Create 6 more commits to make any review stale
|
||||
for (let i = 1; i <= 6; i++) {
|
||||
fs.writeFileSync(path.join(reviewDir, `file${i}.ts`), `export const x${i} = ${i};\n`);
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', `feat: add file${i}`]);
|
||||
}
|
||||
|
||||
copyDirSync(path.join(ROOT, 'land-and-deploy'), path.join(reviewDir, 'land-and-deploy'));
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
try { fs.rmSync(reviewDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
testConcurrentIfSelected('land-and-deploy-review-gate', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read land-and-deploy/SKILL.md for the /land-and-deploy skill instructions.
|
||||
|
||||
Focus on Step 3.5a and Step 3.5a-bis (the review staleness check and inline review offer).
|
||||
|
||||
This repo has 6 commits since the initial commit. There are NO review logs
|
||||
(gstack-review-read would return NO_REVIEWS).
|
||||
|
||||
Simulate what the readiness gate would show:
|
||||
1. Run gstack-review-read equivalent (simulate NO_REVIEWS output)
|
||||
2. Determine review staleness: Eng Review should be "NOT RUN"
|
||||
3. Note that Step 3.5a-bis would offer an inline review
|
||||
4. Write a simulated readiness report to .gstack/deploy-reports/readiness-report.md
|
||||
showing the review status as NOT RUN with the inline review offer text
|
||||
|
||||
Do NOT use AskUserQuestion. Do NOT run gh commands.
|
||||
Show what the readiness gate output would look like.`,
|
||||
workingDirectory: reviewDir,
|
||||
maxTurns: 15,
|
||||
allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
|
||||
timeout: 120_000,
|
||||
testName: 'land-and-deploy-review-gate',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/land-and-deploy review-gate', result);
|
||||
recordE2E(evalCollector, '/land-and-deploy review-gate', 'Land-and-Deploy review gate E2E', result);
|
||||
expect(result.exitReason).toBe('success');
|
||||
|
||||
// Verify readiness report was created
|
||||
const reportDir = path.join(reviewDir, '.gstack', 'deploy-reports');
|
||||
expect(fs.existsSync(reportDir)).toBe(true);
|
||||
|
||||
const reportFiles = fs.readdirSync(reportDir);
|
||||
expect(reportFiles.length).toBeGreaterThan(0);
|
||||
const reportContent = fs.readFileSync(path.join(reportDir, reportFiles[0]), 'utf-8');
|
||||
// Should mention review status
|
||||
const hasReviewMention = reportContent.toLowerCase().includes('review') ||
|
||||
reportContent.toLowerCase().includes('not run');
|
||||
expect(hasReviewMention).toBe(true);
|
||||
}, 180_000);
|
||||
});
|
||||
|
||||
// --- Canary skill E2E ---
|
||||
|
||||
describeIfSelected('Canary skill E2E', ['canary-workflow'], () => {
|
||||
|
||||
Reference in New Issue
Block a user