From 21f0e5163d7a5671584dbe41d17553fac4c2c192 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Mon, 20 Apr 2026 07:14:09 +0800 Subject: [PATCH] fix(tests): repair stale test expectations + emit _EXPLAIN_LEVEL / _QUESTION_TUNING from preamble MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three pre-existing test failures on main were blocking /ship: - test/skill-validation.test.ts "Step 3.4 test coverage audit" expected the literal strings "CODE PATH COVERAGE" and "USER FLOW COVERAGE" which were removed when the Step 7 coverage diagram was compressed. Updated assertions to check the stable `Code paths:` / `User flows:` labels that still ship. - test/skill-validation.test.ts "ship step numbering" allowed-substeps list didn't include 15.0 (WIP squash) and 15.1 (bisectable commits) which were added for continuous checkpoint mode. Extended the allowlist. - test/writing-style-resolver.test.ts and test/plan-tune.test.ts expected `_EXPLAIN_LEVEL` and `_QUESTION_TUNING` bash variables in the preamble but generate-preamble-bash.ts had been refactored and those lines were dropped. Without them, downstream skills can't read `explain_level` or `question_tuning` config at runtime — terse mode and /plan-tune features were silently broken. Added the two bash echo blocks back to generatePreambleBash and refreshed the golden-file fixtures to match. All three preamble-related golden baselines (claude/codex/factory) are synchronized with the new output. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../resolvers/preamble/generate-preamble-bash.ts | 8 ++++++++ test/fixtures/golden/claude-ship-SKILL.md | 8 ++++++++ test/fixtures/golden/codex-ship-SKILL.md | 8 ++++++++ test/fixtures/golden/factory-ship-SKILL.md | 8 ++++++++ test/skill-validation.test.ts | 15 ++++++++++----- 5 files changed, 42 insertions(+), 5 deletions(-) diff --git a/scripts/resolvers/preamble/generate-preamble-bash.ts b/scripts/resolvers/preamble/generate-preamble-bash.ts index 49f4f2d0..2a43619b 100644 --- a/scripts/resolvers/preamble/generate-preamble-bash.ts +++ b/scripts/resolvers/preamble/generate-preamble-bash.ts @@ -41,6 +41,14 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: \${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +# Writing style verbosity (V1: default = ELI10, terse = tighter V0 prose. +# Read on every skill run so terse mode takes effect without a restart.) +_EXPLAIN_LEVEL=$(${ctx.paths.binDir}/gstack-config get explain_level 2>/dev/null || echo "default") +if [ "$_EXPLAIN_LEVEL" != "default" ] && [ "$_EXPLAIN_LEVEL" != "terse" ]; then _EXPLAIN_LEVEL="default"; fi +echo "EXPLAIN_LEVEL: $_EXPLAIN_LEVEL" +# Question tuning (see /plan-tune). Observational only in V1. +_QUESTION_TUNING=$(${ctx.paths.binDir}/gstack-config get question_tuning 2>/dev/null || echo "false") +echo "QUESTION_TUNING: $_QUESTION_TUNING" mkdir -p ~/.gstack/analytics if [ "$_TEL" != "off" ]; then echo '{"skill":"${ctx.skillName}","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true diff --git a/test/fixtures/golden/claude-ship-SKILL.md b/test/fixtures/golden/claude-ship-SKILL.md index c0e14388..8e2fa0c0 100644 --- a/test/fixtures/golden/claude-ship-SKILL.md +++ b/test/fixtures/golden/claude-ship-SKILL.md @@ -55,6 +55,14 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +# Writing style verbosity (V1: default = ELI10, terse = tighter V0 prose. +# Read on every skill run so terse mode takes effect without a restart.) +_EXPLAIN_LEVEL=$(~/.claude/skills/gstack/bin/gstack-config get explain_level 2>/dev/null || echo "default") +if [ "$_EXPLAIN_LEVEL" != "default" ] && [ "$_EXPLAIN_LEVEL" != "terse" ]; then _EXPLAIN_LEVEL="default"; fi +echo "EXPLAIN_LEVEL: $_EXPLAIN_LEVEL" +# Question tuning (see /plan-tune). Observational only in V1. +_QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning 2>/dev/null || echo "false") +echo "QUESTION_TUNING: $_QUESTION_TUNING" mkdir -p ~/.gstack/analytics if [ "$_TEL" != "off" ]; then echo '{"skill":"ship","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true diff --git a/test/fixtures/golden/codex-ship-SKILL.md b/test/fixtures/golden/codex-ship-SKILL.md index cfa85e6e..cd5c7c0e 100644 --- a/test/fixtures/golden/codex-ship-SKILL.md +++ b/test/fixtures/golden/codex-ship-SKILL.md @@ -44,6 +44,14 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +# Writing style verbosity (V1: default = ELI10, terse = tighter V0 prose. +# Read on every skill run so terse mode takes effect without a restart.) +_EXPLAIN_LEVEL=$($GSTACK_BIN/gstack-config get explain_level 2>/dev/null || echo "default") +if [ "$_EXPLAIN_LEVEL" != "default" ] && [ "$_EXPLAIN_LEVEL" != "terse" ]; then _EXPLAIN_LEVEL="default"; fi +echo "EXPLAIN_LEVEL: $_EXPLAIN_LEVEL" +# Question tuning (see /plan-tune). Observational only in V1. +_QUESTION_TUNING=$($GSTACK_BIN/gstack-config get question_tuning 2>/dev/null || echo "false") +echo "QUESTION_TUNING: $_QUESTION_TUNING" mkdir -p ~/.gstack/analytics if [ "$_TEL" != "off" ]; then echo '{"skill":"ship","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true diff --git a/test/fixtures/golden/factory-ship-SKILL.md b/test/fixtures/golden/factory-ship-SKILL.md index cba656ba..5c38f080 100644 --- a/test/fixtures/golden/factory-ship-SKILL.md +++ b/test/fixtures/golden/factory-ship-SKILL.md @@ -46,6 +46,14 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +# Writing style verbosity (V1: default = ELI10, terse = tighter V0 prose. +# Read on every skill run so terse mode takes effect without a restart.) +_EXPLAIN_LEVEL=$($GSTACK_BIN/gstack-config get explain_level 2>/dev/null || echo "default") +if [ "$_EXPLAIN_LEVEL" != "default" ] && [ "$_EXPLAIN_LEVEL" != "terse" ]; then _EXPLAIN_LEVEL="default"; fi +echo "EXPLAIN_LEVEL: $_EXPLAIN_LEVEL" +# Question tuning (see /plan-tune). Observational only in V1. +_QUESTION_TUNING=$($GSTACK_BIN/gstack-config get question_tuning 2>/dev/null || echo "false") +echo "QUESTION_TUNING: $_QUESTION_TUNING" mkdir -p ~/.gstack/analytics if [ "$_TEL" != "off" ]; then echo '{"skill":"ship","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true diff --git a/test/skill-validation.test.ts b/test/skill-validation.test.ts index 6515d08b..a60a4c61 100644 --- a/test/skill-validation.test.ts +++ b/test/skill-validation.test.ts @@ -1103,7 +1103,9 @@ describe('Step 3.4 test coverage audit', () => { test('ship/SKILL.md contains Step 7', () => { const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8'); expect(content).toContain('Step 7: Test Coverage Audit'); - expect(content).toContain('CODE PATH COVERAGE'); + // The coverage diagram collapses code-path and user-flow counts onto one + // summary line. Verify that summary is present (labels are stable). + expect(content).toContain('Code paths:'); }); test('Step 3.4 includes quality scoring rubric', () => { @@ -1153,9 +1155,11 @@ describe('Step 3.4 test coverage audit', () => { expect(content).toContain('Empty/zero/boundary states'); }); - test('Step 3.4 diagram includes USER FLOW COVERAGE section', () => { + test('Step 3.4 diagram includes user-flow coverage summary', () => { const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8'); - expect(content).toContain('USER FLOW COVERAGE'); + // The diagram was compressed from separate CODE PATH COVERAGE / USER FLOW + // COVERAGE section headers into a single summary line. Assert on the + // labels that still appear on that summary line. expect(content).toContain('Code paths:'); expect(content).toContain('User flows:'); }); @@ -1165,8 +1169,9 @@ describe('Step 3.4 test coverage audit', () => { describe('ship step numbering', () => { // Allowed sub-steps that are resolver-generated and intentionally nested: - // 8.1 (Plan Verification), 8.2 (Scope Drift), 9.1 (Review Army), 9.2 (Findings Merge), 9.3 (Cross-review dedup) - const ALLOWED_SUBSTEPS = new Set(['8.1', '8.2', '9.1', '9.2', '9.3']); + // 8.1 (Plan Verification), 8.2 (Scope Drift), 9.1 (Review Army), 9.2 (Findings Merge), + // 9.3 (Cross-review dedup), 15.0 (WIP squash — continuous checkpoint), 15.1 (Bisectable commits). + const ALLOWED_SUBSTEPS = new Set(['8.1', '8.2', '9.1', '9.2', '9.3', '15.0', '15.1']); test('ship/SKILL.md.tmpl contains no unexpected fractional step numbers', () => { const tmpl = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md.tmpl'), 'utf-8');