diff --git a/CLAUDE.md b/CLAUDE.md index ddf6410..62480d0 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -147,7 +147,7 @@ Durable workflow orchestration with crash recovery, queryable progress, intellig ### Supporting Systems - **Configuration** — YAML configs in `apps/worker/configs/` with JSON Schema validation (`config-schema.json`). Supports auth settings (MFA/TOTP), URL/code rule scoping (`rules.avoid`/`rules.focus`), run-scope steering (`vuln_classes`, `exploit`), free-form `rules_of_engagement`, and post-hoc `report` filters (`min_severity`, `min_confidence`, `guidance`). `code_path` avoid rules are enforced via the `@gotgenes/pi-permission-system` extension: `apps/worker/src/temporal/activities.ts:syncCodePathDenyRules` writes a global `path` deny config once per workflow (`apps/worker/src/ai/settings-writer.ts:writeCodePathPermissionConfig`), and the executor loads the extension when that config is present (`apps/worker/src/ai/pi-executor.ts`), so denies fire across every tool and child `task` session. `vuln_classes`/`exploit` scope is locked into `session.json` on first run; resumes with a different scope fail fast (`persistOrValidateRunScope`). Credential resolution — local mode: env vars → `./.env`; npx mode: env vars → `~/.shannon/config.toml` (via `shn setup`) - **Prompts** — Per-phase templates in `apps/worker/prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `apps/worker/prompts/shared/` via `apps/worker/src/services/prompt-manager.ts`, including `_code-path-rules.txt` (focus/avoid `[FILE]`/`[GLOB]` routing) and `_rules-of-engagement.txt` (free-text engagement rules). When `exploit: false`, `apps/worker/src/services/findings-renderer.ts` deterministically converts each `*_exploitation_queue.json` into a `*_findings.md` for report assembly — no LLM in the loop -- **Agent Harness (pi)** — Uses the **pi harness** (`@earendil-works/pi-coding-agent`, requires Node ≥ 22.19) via `apps/worker/src/ai/pi-executor.ts` (`runPiPrompt` → `createAgentSession`, retry disabled so Temporal owns retry). Models resolve through pi-ai in `apps/worker/src/ai/models.ts` (Anthropic / Bedrock / custom base URL via `ModelRegistry`+`AuthStorage`). pi ships no JSON-schema output or `Task`/`TodoWrite` built-ins, so structured queues are captured via a `submit_exploitation_queue` custom tool (`apps/worker/src/ai/queue-schemas.ts`), and `task` (read-only child sessions) + `todo_write` are provided as custom tools (`apps/worker/src/ai/tools.ts`); the per-phase MCP collectors are pi custom tools (TypeBox `defineTool` in `apps/worker/src/mcp-server/`). Thinking level defaults to `medium`; disable per-scan via `CLAUDE_ADAPTIVE_THINKING=false` (→ `off`) or set `CLAUDE_THINKING_LEVEL` (env) / `core.adaptive_thinking = false` (npx TOML). Browser automation via `playwright-cli` with session isolation (`-s=`). TOTP generation via `generate-totp` CLI tool. Login flow template at `apps/worker/prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth. On authenticated whitebox scans, the `validate-authentication` preflight performs the single real login and saves the browser session to `auth-state.json` in the per-session audit directory (path from `authStateFile()` in `apps/worker/src/audit/utils.ts`, derived from `generateAuditPath()`). The validation activity (`apps/worker/src/services/validate-authentication.ts`) removes any stale file from a prior run before the agent runs and verifies the file parses and contains cookies or storage before the preflight is marked complete; `logWorkflowComplete` deletes it when the workflow ends so authenticated cookies don't sit on disk between scans. Agent prompts opt in to session reuse by `@include(shared/_shared-session.txt)` before their `` block — the partial restores the session and falls through to the full login flow if verification fails. `vuln-auth`/`exploit-auth` omit the include and own their own login +- **Agent Harness (pi)** — Uses the **pi harness** (`@earendil-works/pi-coding-agent`, requires Node ≥ 22.19) via `apps/worker/src/ai/pi-executor.ts` (`runPiPrompt` → `createAgentSession`, retry disabled so Temporal owns retry). Models resolve through pi-ai in `apps/worker/src/ai/models.ts` (Anthropic / Bedrock / custom base URL via `ModelRegistry`+`AuthStorage`). pi ships no JSON-schema output or `Task`/`TodoWrite` built-ins, so structured queues are captured via a `submit_exploitation_queue` custom tool (`apps/worker/src/ai/queue-schemas.ts`), and `task` (read-only child sessions) + `todo_write` are provided as custom tools (`apps/worker/src/ai/tools.ts`); the per-phase MCP collectors are pi custom tools (TypeBox `defineTool` in `apps/worker/src/mcp-server/`). Adaptive thinking (pi's `medium` level) is enabled only on Opus 4.6/4.7/4.8 (`supportsAdaptiveThinking`); every other model runs with thinking `off`. Disable per-scan via `CLAUDE_ADAPTIVE_THINKING=false` (→ `off`) / `core.adaptive_thinking = false` (npx TOML). Browser automation via `playwright-cli` with session isolation (`-s=`). TOTP generation via `generate-totp` CLI tool. Login flow template at `apps/worker/prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth. On authenticated whitebox scans, the `validate-authentication` preflight performs the single real login and saves the browser session to `auth-state.json` in the per-session audit directory (path from `authStateFile()` in `apps/worker/src/audit/utils.ts`, derived from `generateAuditPath()`). The validation activity (`apps/worker/src/services/validate-authentication.ts`) removes any stale file from a prior run before the agent runs and verifies the file parses and contains cookies or storage before the preflight is marked complete; `logWorkflowComplete` deletes it when the workflow ends so authenticated cookies don't sit on disk between scans. Agent prompts opt in to session reuse by `@include(shared/_shared-session.txt)` before their `` block — the partial restores the session and falls through to the full login flow if verification fails. `vuln-auth`/`exploit-auth` omit the include and own their own login - **Audit System** — Crash-safe append-only logging in `workspaces/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables. WorkflowLogger (`apps/worker/src/audit/workflow-logger.ts`) provides unified human-readable per-workflow logs, backed by LogStream (`apps/worker/src/audit/log-stream.ts`) shared stream primitive - **Deliverables** — Saved to `deliverables/` in the target repo via the `save-deliverable` CLI script (`apps/worker/src/scripts/save-deliverable.ts`) - **Workspaces & Resume** — Named workspaces via `-w ` or auto-named from URL+timestamp. Resume detects completed agents via `session.json`. `loadResumeState()` in `apps/worker/src/temporal/activities.ts` validates deliverable existence, restores git checkpoints, and cleans up incomplete deliverables. Workspace listing via `apps/worker/src/temporal/workspaces.ts` diff --git a/apps/worker/src/ai/models.ts b/apps/worker/src/ai/models.ts index b27db8d..c0305c2 100644 --- a/apps/worker/src/ai/models.ts +++ b/apps/worker/src/ai/models.ts @@ -103,19 +103,21 @@ export function resolveModelId(tier: ModelTier = 'medium', providerConfig?: Prov } } +/** Whether a model supports adaptive thinking. Opus 4.6, 4.7, and 4.8 only. */ +export function supportsAdaptiveThinking(model: string): boolean { + return /opus-4-[678]/.test(model); +} + /** * Resolve the thinking level for a run. * - * The previous harness enabled "adaptive" thinking only on capable models; pi uses - * explicit levels and clamps to model capability internally. We default to 'medium' - * and honour the existing CLAUDE_ADAPTIVE_THINKING=false kill switch (→ 'off'). An - * explicit CLAUDE_THINKING_LEVEL wins when set. + * Adaptive thinking is enabled only on capable models (Opus 4.6/4.7/4.8), mapped to + * pi's 'medium' level; every other model runs with thinking 'off'. The + * CLAUDE_ADAPTIVE_THINKING=false kill switch forces 'off' regardless of model. */ -export function resolveThinkingLevel(): ThinkingLevel { +export function resolveThinkingLevel(modelId: string): ThinkingLevel { if (process.env.CLAUDE_ADAPTIVE_THINKING === 'false') return 'off'; - const explicit = process.env.CLAUDE_THINKING_LEVEL as ThinkingLevel | undefined; - if (explicit) return explicit; - return 'medium'; + return supportsAdaptiveThinking(modelId) ? 'medium' : 'off'; } export interface ModelSelection { @@ -162,7 +164,7 @@ export function resolveModelSelection( return { model, - thinkingLevel: resolveThinkingLevel(), + thinkingLevel: resolveThinkingLevel(modelId), authStorage, modelId, providerId: eff.providerId,