From c90feac2cad12c9f4e64357ef4abea29b7098a3f Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Mon, 16 Mar 2026 10:12:56 -0500 Subject: [PATCH] fix: improve contributor mode + qa-quick E2E reliability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Contributor mode: - Add "do not truncate" directive to template — agent was stopping after "My rating" without completing Steps/Raw output/What would make this a 10 sections - Restore assertions for Steps to reproduce and Date footer QA quick: - Make test server URL prominent: top of prompt, explicit "already running" and "do NOT discover ports" instructions - Bump session timeout 180s→240s and test timeout 240s→300s - Set B= at top of prompt (was buried in prose) Co-Authored-By: Claude Opus 4.6 --- SKILL.md | 6 ++++-- browse/SKILL.md | 6 ++++-- plan-ceo-review/SKILL.md | 6 ++++-- plan-eng-review/SKILL.md | 6 ++++-- qa-only/SKILL.md | 6 ++++-- qa/SKILL.md | 6 ++++-- retro/SKILL.md | 6 ++++-- review/SKILL.md | 6 ++++-- scripts/gen-skill-docs.ts | 6 ++++-- setup-browser-cookies/SKILL.md | 6 ++++-- ship/SKILL.md | 6 ++++-- test/skill-e2e.test.ts | 14 +++++++++----- 12 files changed, 53 insertions(+), 27 deletions(-) diff --git a/SKILL.md b/SKILL.md index 85bedfba..2239a91b 100644 --- a/SKILL.md +++ b/SKILL.md @@ -52,7 +52,7 @@ If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user w **NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. -**To file:** write `~/.gstack/contributor-logs/{slug}.md`: +**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): ``` # {Title} @@ -67,7 +67,9 @@ Hey gstack team — ran into this while using /{skill-name}: 1. {step} ## Raw output -(error messages or unexpected output in a code block) +``` +{paste the actual error or unexpected output here} +``` ## What would make this a 10 {one sentence: what gstack should have done differently} diff --git a/browse/SKILL.md b/browse/SKILL.md index 08f2174b..c0d7a4eb 100644 --- a/browse/SKILL.md +++ b/browse/SKILL.md @@ -52,7 +52,7 @@ If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user w **NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. -**To file:** write `~/.gstack/contributor-logs/{slug}.md`: +**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): ``` # {Title} @@ -67,7 +67,9 @@ Hey gstack team — ran into this while using /{skill-name}: 1. {step} ## Raw output -(error messages or unexpected output in a code block) +``` +{paste the actual error or unexpected output here} +``` ## What would make this a 10 {one sentence: what gstack should have done differently} diff --git a/plan-ceo-review/SKILL.md b/plan-ceo-review/SKILL.md index e75e1e4d..07830998 100644 --- a/plan-ceo-review/SKILL.md +++ b/plan-ceo-review/SKILL.md @@ -52,7 +52,7 @@ If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user w **NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. -**To file:** write `~/.gstack/contributor-logs/{slug}.md`: +**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): ``` # {Title} @@ -67,7 +67,9 @@ Hey gstack team — ran into this while using /{skill-name}: 1. {step} ## Raw output -(error messages or unexpected output in a code block) +``` +{paste the actual error or unexpected output here} +``` ## What would make this a 10 {one sentence: what gstack should have done differently} diff --git a/plan-eng-review/SKILL.md b/plan-eng-review/SKILL.md index 7b3a7e81..ad2baca6 100644 --- a/plan-eng-review/SKILL.md +++ b/plan-eng-review/SKILL.md @@ -52,7 +52,7 @@ If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user w **NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. -**To file:** write `~/.gstack/contributor-logs/{slug}.md`: +**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): ``` # {Title} @@ -67,7 +67,9 @@ Hey gstack team — ran into this while using /{skill-name}: 1. {step} ## Raw output -(error messages or unexpected output in a code block) +``` +{paste the actual error or unexpected output here} +``` ## What would make this a 10 {one sentence: what gstack should have done differently} diff --git a/qa-only/SKILL.md b/qa-only/SKILL.md index 6c31501f..27d939be 100644 --- a/qa-only/SKILL.md +++ b/qa-only/SKILL.md @@ -51,7 +51,7 @@ If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user w **NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. -**To file:** write `~/.gstack/contributor-logs/{slug}.md`: +**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): ``` # {Title} @@ -66,7 +66,9 @@ Hey gstack team — ran into this while using /{skill-name}: 1. {step} ## Raw output -(error messages or unexpected output in a code block) +``` +{paste the actual error or unexpected output here} +``` ## What would make this a 10 {one sentence: what gstack should have done differently} diff --git a/qa/SKILL.md b/qa/SKILL.md index 1473866e..938bf10b 100644 --- a/qa/SKILL.md +++ b/qa/SKILL.md @@ -56,7 +56,7 @@ If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user w **NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. -**To file:** write `~/.gstack/contributor-logs/{slug}.md`: +**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): ``` # {Title} @@ -71,7 +71,9 @@ Hey gstack team — ran into this while using /{skill-name}: 1. {step} ## Raw output -(error messages or unexpected output in a code block) +``` +{paste the actual error or unexpected output here} +``` ## What would make this a 10 {one sentence: what gstack should have done differently} diff --git a/retro/SKILL.md b/retro/SKILL.md index 5067124d..39b7ee13 100644 --- a/retro/SKILL.md +++ b/retro/SKILL.md @@ -51,7 +51,7 @@ If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user w **NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. -**To file:** write `~/.gstack/contributor-logs/{slug}.md`: +**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): ``` # {Title} @@ -66,7 +66,9 @@ Hey gstack team — ran into this while using /{skill-name}: 1. {step} ## Raw output -(error messages or unexpected output in a code block) +``` +{paste the actual error or unexpected output here} +``` ## What would make this a 10 {one sentence: what gstack should have done differently} diff --git a/review/SKILL.md b/review/SKILL.md index a5378aca..b94f8a30 100644 --- a/review/SKILL.md +++ b/review/SKILL.md @@ -52,7 +52,7 @@ If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user w **NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. -**To file:** write `~/.gstack/contributor-logs/{slug}.md`: +**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): ``` # {Title} @@ -67,7 +67,9 @@ Hey gstack team — ran into this while using /{skill-name}: 1. {step} ## Raw output -(error messages or unexpected output in a code block) +``` +{paste the actual error or unexpected output here} +``` ## What would make this a 10 {one sentence: what gstack should have done differently} diff --git a/scripts/gen-skill-docs.ts b/scripts/gen-skill-docs.ts index 065f87b6..f3d93dbb 100644 --- a/scripts/gen-skill-docs.ts +++ b/scripts/gen-skill-docs.ts @@ -131,7 +131,7 @@ If \`_CONTRIB\` is \`true\`: you are in **contributor mode**. You're a gstack us **NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. -**To file:** write \`~/.gstack/contributor-logs/{slug}.md\`: +**To file:** write \`~/.gstack/contributor-logs/{slug}.md\` with **all sections below** (do not truncate — include every section through the Date/Version footer): \`\`\` # {Title} @@ -146,7 +146,9 @@ Hey gstack team — ran into this while using /{skill-name}: 1. {step} ## Raw output -(error messages or unexpected output in a code block) +\`\`\` +{paste the actual error or unexpected output here} +\`\`\` ## What would make this a 10 {one sentence: what gstack should have done differently} diff --git a/setup-browser-cookies/SKILL.md b/setup-browser-cookies/SKILL.md index d1b4de49..d522b270 100644 --- a/setup-browser-cookies/SKILL.md +++ b/setup-browser-cookies/SKILL.md @@ -49,7 +49,7 @@ If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user w **NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. -**To file:** write `~/.gstack/contributor-logs/{slug}.md`: +**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): ``` # {Title} @@ -64,7 +64,9 @@ Hey gstack team — ran into this while using /{skill-name}: 1. {step} ## Raw output -(error messages or unexpected output in a code block) +``` +{paste the actual error or unexpected output here} +``` ## What would make this a 10 {one sentence: what gstack should have done differently} diff --git a/ship/SKILL.md b/ship/SKILL.md index 1c143461..7791f4bc 100644 --- a/ship/SKILL.md +++ b/ship/SKILL.md @@ -51,7 +51,7 @@ If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user w **NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. -**To file:** write `~/.gstack/contributor-logs/{slug}.md`: +**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): ``` # {Title} @@ -66,7 +66,9 @@ Hey gstack team — ran into this while using /{skill-name}: 1. {step} ## Raw output -(error messages or unexpected output in a code block) +``` +{paste the actual error or unexpected output here} +``` ## What would make this a 10 {one sentence: what gstack should have done differently} diff --git a/test/skill-e2e.test.ts b/test/skill-e2e.test.ts index 2b0406ac..78cd65ac 100644 --- a/test/skill-e2e.test.ts +++ b/test/skill-e2e.test.ts @@ -328,8 +328,8 @@ File a contributor report about this issue. Then tell me what you filed.`, expect(logContent).toContain('What I was trying to do'); expect(logContent).toContain('What happened instead'); expect(logContent).toMatch(/rating/i); - // "What would make this a 10" is nice-to-have — agent may truncate the report - // The key signal is using "My rating:" (new format) vs "How annoying" (old format) + expect(logContent).toContain('## Steps to reproduce'); + expect(logContent).toContain('**Date:'); // Clean up try { fs.rmSync(contribDir, { recursive: true, force: true }); } catch {} @@ -428,16 +428,20 @@ describeE2E('QA skill E2E', () => { test('/qa quick completes without browse errors', async () => { const result = await runSkillTest({ - prompt: `You have a browse binary at ${browseBin}. Assign it to B variable like: B="${browseBin}" + prompt: `B="${browseBin}" + +The test server is already running at: ${testServer.url} +Target page: ${testServer.url}/basic.html Read the file qa/SKILL.md for the QA workflow instructions. Run a Quick-depth QA test on ${testServer.url}/basic.html Do NOT use AskUserQuestion — run Quick tier directly. +Do NOT try to start a server or discover ports — the URL above is ready. Write your report to ${qaDir}/qa-reports/qa-report.md`, workingDirectory: qaDir, maxTurns: 35, - timeout: 180_000, + timeout: 240_000, testName: 'qa-quick', runId, }); @@ -452,7 +456,7 @@ Write your report to ${qaDir}/qa-reports/qa-report.md`, } // Accept error_max_turns — the agent doing thorough QA work is not a failure expect(['success', 'error_max_turns']).toContain(result.exitReason); - }, 240_000); + }, 300_000); }); // --- B5: Review skill E2E ---