From c15f78924e66f8770c2c139e4fe66353d7bfaae2 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sat, 13 Jun 2026 10:48:35 -0700 Subject: [PATCH] fix: workflow judge re-appends body-carved sections after the marker slice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit runWorkflowJudge appended sections/*.md before slicing startMarker..endMarker. That handles skills that moved their MARKERS into sections (plan-eng, plan-design) but not document-release, which keeps its markers in the skeleton and carved the workflow BODY (Steps 2-9 -> sections/release-body.md) AFTER the endMarker — so the slice dropped it and the judge scored completeness 2 ('Steps 2-9 are in an external file'). Now any carved section the marker window excluded is re-appended, so the judge sees the full workflow the agent executes. document-release: completeness 2->5, clarity 3->4. ship/plan-ceo/plan-eng/plan-design judges unchanged (their section content is already inside the slice, so the head-dedup skips re-append). Pre-existing since the v1.57.0.0 carve (#1907); surfaced now because hermetic-env.ts is a global touchfile that selects every llm-judge test. Co-Authored-By: Claude Fable 5 --- test/skill-llm-eval.test.ts | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/test/skill-llm-eval.test.ts b/test/skill-llm-eval.test.ts index eb47526b8..3b0d8661c 100644 --- a/test/skill-llm-eval.test.ts +++ b/test/skill-llm-eval.test.ts @@ -546,10 +546,13 @@ async function runWorkflowJudge(opts: { // slice markers vanish from the skeleton and the judge scores empty content. let content = fs.readFileSync(path.join(ROOT, opts.skillPath), 'utf-8'); const secDir = path.join(ROOT, path.dirname(opts.skillPath), 'sections'); + const sectionBodies: string[] = []; if (fs.existsSync(secDir)) { for (const f of fs.readdirSync(secDir).sort()) { if (f.endsWith('.md') && !f.endsWith('.md.tmpl')) { - content += '\n' + fs.readFileSync(path.join(secDir, f), 'utf-8'); + const body = fs.readFileSync(path.join(secDir, f), 'utf-8'); + sectionBodies.push(body); + content += '\n' + body; } } } @@ -565,6 +568,17 @@ async function runWorkflowJudge(opts: { section = content.slice(startIdx); } + // Two carve shapes exist. plan-eng/plan-design moved the MARKERS into the + // section files, so the slice above already reaches the carved content. + // document-release instead keeps its markers in the skeleton and carves the + // workflow BODY (Steps 2-9 → sections/release-body.md) AFTER the endMarker, + // so the marker slice drops it. Re-append any carved section the window + // excluded, so the judge always sees the full workflow the agent executes. + for (const body of sectionBodies) { + const head = body.trim().slice(0, 120); + if (head && !section.includes(head)) section += '\n' + body; + } + const scores = await callJudge(`You are evaluating the quality of ${opts.judgeContext} for an AI coding agent. The agent reads this document to learn ${opts.judgeGoal}. It references external tools and files