From 56c40666804f058f106adf145e27be10251f255b Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Mon, 23 Mar 2026 07:38:35 -0700 Subject: [PATCH] fix: unset CI/GITHUB_ACTIONS env vars for eval runs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Claude CLI routing behavior changes when CI=true — it skips skill invocation and uses Bash directly. Unsetting these markers makes Claude behave like a local environment for consistent eval results. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/evals.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/evals.yml b/.github/workflows/evals.yml index 3ca708d5..e9962069 100644 --- a/.github/workflows/evals.yml +++ b/.github/workflows/evals.yml @@ -115,6 +115,10 @@ jobs: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} EVALS_CONCURRENCY: "40" + # Unset CI markers so Claude CLI behaves like a local environment + # (CI=true changes Claude's routing/tool selection behavior) + CI: "" + GITHUB_ACTIONS: "" run: EVALS=1 bun test --retry 2 --concurrent --max-concurrency 40 ${{ matrix.suite.file }} - name: Upload eval results