diff --git a/CLAUDE.md b/CLAUDE.md index 074b6122..ae68d806 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -139,10 +139,16 @@ SKILL.md files are **generated** from `.tmpl` templates. To update docs: To add a new browse command: add it to `browse/src/commands.ts` and rebuild. To add a snapshot flag: add it to `SNAPSHOT_FLAGS` in `browse/src/snapshot.ts` and rebuild. -**Token ceiling:** Generated SKILL.md files must stay under 100KB (~25K tokens). -`gen-skill-docs` warns if any file exceeds this. If a skill template grows past the -ceiling, consider extracting optional sections into separate resolvers that only -inject when relevant, or making verbose evaluation rubrics more concise. +**Token ceiling:** Generated SKILL.md files trip a warning above 160KB (~40K tokens). +This is a "watch for feature bloat" guardrail, not a hard gate. Modern flagship +models have 200K-1M context windows, so 40K is 4-20% of window, and prompt caching +makes the marginal cost of larger skills small. The ceiling exists to catch runaway +preamble/resolver growth, not to force compression on carefully-tuned big skills +(`ship`, `plan-ceo-review`, `office-hours` legitimately pack 25-35K tokens of +behavior). If you blow past 40K, the right fix is usually: (1) look at WHAT grew, +(2) if one resolver added 10K+ in a single PR, question whether it belongs inline +or as a reference doc, (3) only compress carefully-tuned prose as a last resort — +cuts to the coverage audit, review army, or voice directive have real quality cost. **Merge conflicts on SKILL.md files:** NEVER resolve conflicts on generated SKILL.md files by accepting either side. Instead: (1) resolve conflicts on the `.tmpl` templates diff --git a/scripts/gen-skill-docs.ts b/scripts/gen-skill-docs.ts index dd709ee9..40f08369 100644 --- a/scripts/gen-skill-docs.ts +++ b/scripts/gen-skill-docs.ts @@ -534,10 +534,16 @@ for (const currentHost of hostsToRun) { const tokens = Math.round(content.length / 4); // ~4 chars per token tokenBudget.push({ skill: relOutput, lines, tokens }); - // Token ceiling check: warn if any generated SKILL.md exceeds ~25K tokens (100KB) - const TOKEN_CEILING_BYTES = 100_000; + // Token ceiling check: warn if any generated SKILL.md exceeds ~40K tokens (160KB). + // The ceiling is a "watch for feature bloat" guardrail, not a hard gate. Modern + // flagship models have 200K-1M context windows, so 40K (4-20% of window) is fine. + // Prompt caching further reduces the marginal cost of larger skills. This ceiling + // exists to catch a runaway preamble or resolver that's grown by 10K+ tokens in + // a release, not to force compression on carefully-tuned big skills (ship, + // plan-ceo-review, office-hours all legitimately pack 25-35K tokens of behavior). + const TOKEN_CEILING_BYTES = 160_000; if (content.length > TOKEN_CEILING_BYTES) { - console.warn(`⚠️ TOKEN CEILING: ${relOutput} is ${content.length} bytes (~${tokens} tokens), exceeds ${TOKEN_CEILING_BYTES} byte ceiling (~25K tokens)`); + console.warn(`⚠️ TOKEN CEILING: ${relOutput} is ${content.length} bytes (~${tokens} tokens), exceeds ${TOKEN_CEILING_BYTES} byte ceiling (~40K tokens)`); } }