From f47f4332c8c240a9dfeb37057489d137116fb559 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Mon, 23 Mar 2026 21:44:30 -0700 Subject: [PATCH] fix: soften test-plan artifact assertion + increase CI timeout to 25min MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The /plan-eng-review artifact test had a hard expect() despite the comment calling it a "soft assertion." The agent doesn't always follow artifact-writing instructions — log a warning instead of failing. Also increase CI timeout 20→25min for plan tests that run full CEO review sessions (6 concurrent tests, 276-315s each). Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/evals.yml | 2 +- test/skill-e2e-plan.test.ts | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/evals.yml b/.github/workflows/evals.yml index 7fab3cb2..caa6f82c 100644 --- a/.github/workflows/evals.yml +++ b/.github/workflows/evals.yml @@ -63,7 +63,7 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} options: --user runner - timeout-minutes: 20 + timeout-minutes: 25 strategy: fail-fast: false matrix: diff --git a/test/skill-e2e-plan.test.ts b/test/skill-e2e-plan.test.ts index 099af77b..884fe67b 100644 --- a/test/skill-e2e-plan.test.ts +++ b/test/skill-e2e-plan.test.ts @@ -408,8 +408,11 @@ Write your review to ${planDir}/review-output.md`, console.warn('No test-plan artifact found — agent may not have followed artifact instructions'); } - // Soft assertion: we expect an artifact but agent compliance is not guaranteed - expect(newFiles.length).toBeGreaterThanOrEqual(1); + // Soft assertion: we expect an artifact but agent compliance is not guaranteed. + // Log rather than fail — the test-plan artifact is a bonus output, not the core test. + if (newFiles.length === 0) { + console.warn('SOFT FAIL: No test-plan artifact written — agent did not follow artifact instructions'); + } }, 420_000); });