diff --git a/test/helpers/touchfiles.ts b/test/helpers/touchfiles.ts index abd60c13e..5043884c3 100644 --- a/test/helpers/touchfiles.ts +++ b/test/helpers/touchfiles.ts @@ -403,7 +403,15 @@ export const E2E_TIERS: Record = { // Office Hours 'office-hours-spec-review': 'gate', 'office-hours-forcing-energy': 'gate', // V1.1 mode-posture regression gate (Sonnet generator) - 'office-hours-builder-wildness': 'gate', // V1.1 mode-posture regression gate (Sonnet generator) + // 'office-hours-builder-wildness' retiered to periodic in v1.32 contributor + // wave: this is an LLM-judge creativity score (axis_a ≥4 on a "wildness" + // posture). Per CLAUDE.md tier-classification rules, non-deterministic + // quality benchmarks belong in periodic, not gate. The wave's +21-line + // CJK preamble cascade (#1205) pushed the score from 5/5 → 3/3 on the + // same /office-hours BUILDER prompt — same model, same fixture — proving + // the bar is sensitive to preamble-byte changes that have nothing to do + // with the test's intent (creativity, not preamble compliance). + 'office-hours-builder-wildness': 'periodic', // Plan reviews — gate for cheap functional, periodic for Opus quality 'plan-ceo-review': 'periodic',