From 7813baf16a9ca6ff76a8fcbd42cafdd84c0726dd Mon Sep 17 00:00:00 2001 From: ezl-keygraph Date: Thu, 28 May 2026 03:23:09 +0530 Subject: [PATCH] feat: share preflight authenticated session across agents (#345) * feat(auth): reuse preflight's authenticated session across agents * fix(preflight): verify saved auth state parses and has cookies or origins * fix(prompts): strip shared-session block when no auth is configured * fix(shannon): store shared auth state in the per-session audit dir * fix(prompts): write stub auth-state in pipeline-testing preflight * fix(preflight): clear stale auth-state.json before validate-authentication * fix(preflight): drop auth-state.json on workflow completion * docs(claude): refresh auth-state.json description for new layout and cleanup * refactor(prompts): drop unused PLAYWRIGHT_SESSION resolve in login instructions * style(prompts): collapse verifySavedAuthState signature per biome * refactor(prompts): require AUTH_STATE_FILE on authenticated runs * style(prompts): trim numbered-step comments back to step headers --- CLAUDE.md | 2 +- apps/worker/prompts/exploit-authz.txt | 2 + apps/worker/prompts/exploit-injection.txt | 2 + apps/worker/prompts/exploit-ssrf.txt | 2 + apps/worker/prompts/exploit-xss.txt | 2 + .../validate-authentication.txt | 5 +- apps/worker/prompts/recon.txt | 2 + .../worker/prompts/shared/_shared-session.txt | 19 +++++ .../prompts/validate-authentication.txt | 8 +++ apps/worker/prompts/vuln-authz.txt | 2 + apps/worker/prompts/vuln-injection.txt | 2 + apps/worker/prompts/vuln-ssrf.txt | 2 + apps/worker/prompts/vuln-xss.txt | 2 + apps/worker/src/audit/audit-session.ts | 2 +- apps/worker/src/audit/utils.ts | 8 +++ apps/worker/src/services/agent-execution.ts | 3 +- apps/worker/src/services/prompt-manager.ts | 7 ++ .../src/services/validate-authentication.ts | 72 ++++++++++++++++++- apps/worker/src/temporal/activities.ts | 12 +++- 19 files changed, 148 insertions(+), 8 deletions(-) create mode 100644 apps/worker/prompts/shared/_shared-session.txt diff --git a/CLAUDE.md b/CLAUDE.md index 65ff5b8..2fbf65d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -146,7 +146,7 @@ Durable workflow orchestration with crash recovery, queryable progress, intellig ### Supporting Systems - **Configuration** — YAML configs in `apps/worker/configs/` with JSON Schema validation (`config-schema.json`). Supports auth settings (MFA/TOTP), URL/code rule scoping (`rules.avoid`/`rules.focus`), run-scope steering (`vuln_classes`, `exploit`), free-form `rules_of_engagement`, and post-hoc `report` filters (`min_severity`, `min_confidence`, `guidance`). `code_path` avoid rules are written into `~/.claude/settings.json` `permissions.deny` (`Read`/`Edit`) once per workflow by `apps/worker/src/temporal/activities.ts:syncCodePathDenyRules` so the SDK enforces them at the tool layer even in `bypassPermissions` mode. `vuln_classes`/`exploit` scope is locked into `session.json` on first run; resumes with a different scope fail fast (`persistOrValidateRunScope`). Credential resolution — local mode: env vars → `./.env`; npx mode: env vars → `~/.shannon/config.toml` (via `shn setup`) - **Prompts** — Per-phase templates in `apps/worker/prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `apps/worker/prompts/shared/` via `apps/worker/src/services/prompt-manager.ts`, including `_code-path-rules.txt` (focus/avoid `[FILE]`/`[GLOB]` routing) and `_rules-of-engagement.txt` (free-text engagement rules). When `exploit: false`, `apps/worker/src/services/findings-renderer.ts` deterministically converts each `*_exploitation_queue.json` into a `*_findings.md` for report assembly — no LLM in the loop -- **SDK Integration** — Uses `@anthropic-ai/claude-agent-sdk` with `maxTurns: 10_000` and `bypassPermissions` mode. Adaptive thinking is enabled by default on Opus 4.6/4.7 (`supportsAdaptiveThinking` in `apps/worker/src/ai/models.ts`); disable per-scan via `CLAUDE_ADAPTIVE_THINKING=false` (env) or `core.adaptive_thinking = false` (npx TOML). Browser automation via `playwright-cli` with session isolation (`-s=`). TOTP generation via `generate-totp` CLI tool. Login flow template at `apps/worker/prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth +- **SDK Integration** — Uses `@anthropic-ai/claude-agent-sdk` with `maxTurns: 10_000` and `bypassPermissions` mode. Adaptive thinking is enabled by default on Opus 4.6/4.7 (`supportsAdaptiveThinking` in `apps/worker/src/ai/models.ts`); disable per-scan via `CLAUDE_ADAPTIVE_THINKING=false` (env) or `core.adaptive_thinking = false` (npx TOML). Browser automation via `playwright-cli` with session isolation (`-s=`). TOTP generation via `generate-totp` CLI tool. Login flow template at `apps/worker/prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth. On authenticated whitebox scans, the `validate-authentication` preflight performs the single real login and saves the browser session to `auth-state.json` in the per-session audit directory (path from `authStateFile()` in `apps/worker/src/audit/utils.ts`, derived from `generateAuditPath()`). The validation activity (`apps/worker/src/services/validate-authentication.ts`) removes any stale file from a prior run before the agent runs and verifies the file parses and contains cookies or storage before the preflight is marked complete; `logWorkflowComplete` deletes it when the workflow ends so authenticated cookies don't sit on disk between scans. Agent prompts opt in to session reuse by `@include(shared/_shared-session.txt)` before their `` block — the partial restores the session and falls through to the full login flow if verification fails. `vuln-auth`/`exploit-auth` omit the include and own their own login - **Audit System** — Crash-safe append-only logging in `workspaces/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables. WorkflowLogger (`apps/worker/src/audit/workflow-logger.ts`) provides unified human-readable per-workflow logs, backed by LogStream (`apps/worker/src/audit/log-stream.ts`) shared stream primitive - **Deliverables** — Saved to `deliverables/` in the target repo via the `save-deliverable` CLI script (`apps/worker/src/scripts/save-deliverable.ts`) - **Workspaces & Resume** — Named workspaces via `-w ` or auto-named from URL+timestamp. Resume detects completed agents via `session.json`. `loadResumeState()` in `apps/worker/src/temporal/activities.ts` validates deliverable existence, restores git checkpoints, and cleans up incomplete deliverables. Workspace listing via `apps/worker/src/temporal/workspaces.ts` diff --git a/apps/worker/prompts/exploit-authz.txt b/apps/worker/prompts/exploit-authz.txt index e131cd5..a957879 100644 --- a/apps/worker/prompts/exploit-authz.txt +++ b/apps/worker/prompts/exploit-authz.txt @@ -88,6 +88,8 @@ After exhaustive bypass attempts, determine: @include(shared/_rules.txt) +@include(shared/_shared-session.txt) + {{LOGIN_INSTRUCTIONS}} diff --git a/apps/worker/prompts/exploit-injection.txt b/apps/worker/prompts/exploit-injection.txt index 87d7ee3..e68c518 100644 --- a/apps/worker/prompts/exploit-injection.txt +++ b/apps/worker/prompts/exploit-injection.txt @@ -65,6 +65,8 @@ Remember: An unproven vulnerability is worse than no finding at all - it wastes @include(shared/_rules.txt) +@include(shared/_shared-session.txt) + {{LOGIN_INSTRUCTIONS}} diff --git a/apps/worker/prompts/exploit-ssrf.txt b/apps/worker/prompts/exploit-ssrf.txt index 2230a80..1e7b9ab 100644 --- a/apps/worker/prompts/exploit-ssrf.txt +++ b/apps/worker/prompts/exploit-ssrf.txt @@ -88,6 +88,8 @@ After exhaustive bypass attempts, determine: @include(shared/_rules.txt) +@include(shared/_shared-session.txt) + {{LOGIN_INSTRUCTIONS}} diff --git a/apps/worker/prompts/exploit-xss.txt b/apps/worker/prompts/exploit-xss.txt index 85e9e5d..441da80 100644 --- a/apps/worker/prompts/exploit-xss.txt +++ b/apps/worker/prompts/exploit-xss.txt @@ -86,6 +86,8 @@ After exhaustive bypass attempts, determine: @include(shared/_rules.txt) +@include(shared/_shared-session.txt) + {{LOGIN_INSTRUCTIONS}} diff --git a/apps/worker/prompts/pipeline-testing/validate-authentication.txt b/apps/worker/prompts/pipeline-testing/validate-authentication.txt index 550d720..1aaa808 100644 --- a/apps/worker/prompts/pipeline-testing/validate-authentication.txt +++ b/apps/worker/prompts/pipeline-testing/validate-authentication.txt @@ -1 +1,4 @@ -Return the structured verdict `{ "login_success": true }` and stop. +Write a stub authenticated session via Bash so the preflight's saved-state check passes: + echo '{"cookies":[{"name":"stub","value":"x","domain":"example.com","path":"/"}],"origins":[]}' > {{AUTH_STATE_FILE}} + +Then return the structured verdict `{ "login_success": true }` and stop. diff --git a/apps/worker/prompts/recon.txt b/apps/worker/prompts/recon.txt index f01868f..6a1e544 100644 --- a/apps/worker/prompts/recon.txt +++ b/apps/worker/prompts/recon.txt @@ -34,6 +34,8 @@ Areas to Focus On: @include(shared/_code-path-rules.txt) +@include(shared/_shared-session.txt) + {{LOGIN_INSTRUCTIONS}} diff --git a/apps/worker/prompts/shared/_shared-session.txt b/apps/worker/prompts/shared/_shared-session.txt new file mode 100644 index 0000000..8cafcd7 --- /dev/null +++ b/apps/worker/prompts/shared/_shared-session.txt @@ -0,0 +1,19 @@ + +The preflight already logged in and saved the authenticated browser +session to: + + {{AUTH_STATE_FILE}} + +Restore it before doing anything else: + + playwright-cli -s={{PLAYWRIGHT_SESSION}} state-load {{AUTH_STATE_FILE}} + +Then run verification (per the success_condition in your authentication +config) to confirm the restored session is still valid: + +- If verification passes → SKIP the login flow below entirely and + proceed with your primary task. You are authenticated. +- If verification fails → the saved session is stale. Fall through to + the full login flow below and perform it on your own browser session. + Do NOT overwrite {{AUTH_STATE_FILE}}. + diff --git a/apps/worker/prompts/validate-authentication.txt b/apps/worker/prompts/validate-authentication.txt index 3f0e9bc..5fc5d1d 100644 --- a/apps/worker/prompts/validate-authentication.txt +++ b/apps/worker/prompts/validate-authentication.txt @@ -19,6 +19,14 @@ This runs as a preflight check for our AI pentester. The user supplies credentia {{LOGIN_INSTRUCTIONS}} + +After verification confirms login_success, save the authenticated browser session so the rest of the pipeline can reuse it instead of logging in again: + + playwright-cli -s={{PLAYWRIGHT_SESSION}} state-save {{AUTH_STATE_FILE}} + +Run this only when login_success is true. Skip it on failure. + + - Submit each field (username, password, captcha, TOTP) exactly once. - Any rejection = auth error: return `login_success: false` and stop. Do not retry. diff --git a/apps/worker/prompts/vuln-authz.txt b/apps/worker/prompts/vuln-authz.txt index ecadf04..8975226 100644 --- a/apps/worker/prompts/vuln-authz.txt +++ b/apps/worker/prompts/vuln-authz.txt @@ -21,6 +21,8 @@ Success criterion: A complete, code-backed analysis of every potential authoriza @include(shared/_code-path-rules.txt) +@include(shared/_shared-session.txt) + {{LOGIN_INSTRUCTIONS}} diff --git a/apps/worker/prompts/vuln-injection.txt b/apps/worker/prompts/vuln-injection.txt index c70219f..b138d87 100644 --- a/apps/worker/prompts/vuln-injection.txt +++ b/apps/worker/prompts/vuln-injection.txt @@ -22,6 +22,8 @@ Success criterion: Complete source-to-sink traces detailing path, sanitizers, si @include(shared/_code-path-rules.txt) +@include(shared/_shared-session.txt) + {{LOGIN_INSTRUCTIONS}} diff --git a/apps/worker/prompts/vuln-ssrf.txt b/apps/worker/prompts/vuln-ssrf.txt index d3dbe64..376fb03 100644 --- a/apps/worker/prompts/vuln-ssrf.txt +++ b/apps/worker/prompts/vuln-ssrf.txt @@ -21,6 +21,8 @@ Success criterion: A complete source-to-sink trace for every identified SSRF vul @include(shared/_code-path-rules.txt) +@include(shared/_shared-session.txt) + {{LOGIN_INSTRUCTIONS}} diff --git a/apps/worker/prompts/vuln-xss.txt b/apps/worker/prompts/vuln-xss.txt index bfbef11..42b5b9b 100644 --- a/apps/worker/prompts/vuln-xss.txt +++ b/apps/worker/prompts/vuln-xss.txt @@ -21,6 +21,8 @@ Success criterion: Live confirmation of XSS execution for every vulnerability th @include(shared/_code-path-rules.txt) +@include(shared/_shared-session.txt) + {{LOGIN_INSTRUCTIONS}} diff --git a/apps/worker/src/audit/audit-session.ts b/apps/worker/src/audit/audit-session.ts index fea8825..73163df 100644 --- a/apps/worker/src/audit/audit-session.ts +++ b/apps/worker/src/audit/audit-session.ts @@ -28,7 +28,7 @@ const sessionMutex = new SessionMutex(); * AuditSession - Main audit system facade */ export class AuditSession { - private sessionMetadata: SessionMetadata; + readonly sessionMetadata: SessionMetadata; private sessionId: string; private metricsTracker: MetricsTracker; private workflowLogger: WorkflowLogger; diff --git a/apps/worker/src/audit/utils.ts b/apps/worker/src/audit/utils.ts index 2b64371..1b80e64 100644 --- a/apps/worker/src/audit/utils.ts +++ b/apps/worker/src/audit/utils.ts @@ -74,6 +74,14 @@ export function generateSessionJsonPath(sessionMetadata: SessionMetadata): strin return path.join(auditPath, 'session.json'); } +/** + * Path to the shared authenticated browser session saved by the preflight + * validator and consumed by downstream agents via `_shared-session.txt`. + */ +export function authStateFile(sessionMetadata: SessionMetadata): string { + return path.join(generateAuditPath(sessionMetadata), 'auth-state.json'); +} + /** * Generate path to workflow.log file */ diff --git a/apps/worker/src/services/agent-execution.ts b/apps/worker/src/services/agent-execution.ts index ba5fdd0..52877b9 100644 --- a/apps/worker/src/services/agent-execution.ts +++ b/apps/worker/src/services/agent-execution.ts @@ -25,6 +25,7 @@ import { fs, path } from 'zx'; import { type ClaudePromptResult, runClaudePrompt, validateAgentOutput } from '../ai/claude-executor.js'; import { getOutputFormat, getQueueFilename } from '../ai/queue-schemas.js'; import type { AuditSession } from '../audit/index.js'; +import { authStateFile } from '../audit/utils.js'; import { AGENTS } from '../session-manager.js'; import type { ActivityLogger } from '../types/activity-logger.js'; import type { AgentName } from '../types/agents.js'; @@ -122,7 +123,7 @@ export class AgentExecutionService { try { prompt = await loadPrompt( promptTemplate, - { webUrl, repoPath }, + { webUrl, repoPath, AUTH_STATE_FILE: authStateFile(auditSession.sessionMetadata) }, distributedConfig, pipelineTestingMode, logger, diff --git a/apps/worker/src/services/prompt-manager.ts b/apps/worker/src/services/prompt-manager.ts index 5bda4ef..68d1b5a 100644 --- a/apps/worker/src/services/prompt-manager.ts +++ b/apps/worker/src/services/prompt-manager.ts @@ -118,6 +118,7 @@ function renderReportFilterRules(report: ReportConfig | undefined): string { interface PromptVariables { webUrl: string; repoPath: string; + AUTH_STATE_FILE: string; PLAYWRIGHT_SESSION?: string; } @@ -321,6 +322,12 @@ async function interpolateVariables( result = result.replace(/[\s\S]*?<\/rules_of_engagement>\s*/g, ''); } + if (!config?.authentication) { + result = result.replace(/[\s\S]*?<\/shared_authenticated_session>\s*/g, ''); + } else { + result = result.replace(/{{AUTH_STATE_FILE}}/g, variables.AUTH_STATE_FILE); + } + if (config?.authentication?.login_flow) { const loginInstructions = await buildLoginInstructions(config.authentication, logger, promptsBaseDir); result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, loginInstructions); diff --git a/apps/worker/src/services/validate-authentication.ts b/apps/worker/src/services/validate-authentication.ts index 492c280..4ef22fc 100644 --- a/apps/worker/src/services/validate-authentication.ts +++ b/apps/worker/src/services/validate-authentication.ts @@ -12,10 +12,12 @@ * pipeline burns hours on broken auth. */ +import { readFile, rm } from 'node:fs/promises'; import type { JsonSchemaOutputFormat } from '@anthropic-ai/claude-agent-sdk'; import { z } from 'zod'; import { runClaudePrompt } from '../ai/claude-executor.js'; import type { AuditSession } from '../audit/index.js'; +import { authStateFile } from '../audit/utils.js'; import type { ActivityLogger } from '../types/activity-logger.js'; import type { AgentEndResult } from '../types/audit.js'; import type { DistributedConfig, ProviderConfig } from '../types/config.js'; @@ -93,9 +95,12 @@ export async function validateAuthentication(input: ValidateAuthInput): Promise< loginType: authentication.login_type, }); + const stateFile = authStateFile(auditSession.sessionMetadata); + await rm(stateFile, { force: true }); + const prompt = await loadPrompt( AGENT_NAME, - { webUrl, repoPath }, + { webUrl, repoPath, AUTH_STATE_FILE: stateFile }, distributedConfig, pipelineTestingMode ?? false, logger, @@ -120,7 +125,14 @@ export async function validateAuthentication(input: ValidateAuthInput): Promise< providerConfig, ); - const classification = classifyResult(result, authentication); + let classification = classifyResult(result, authentication); + + if (classification.ok) { + const sessionCheck = await verifySavedAuthState(stateFile, logger); + if (!sessionCheck.ok) { + classification = sessionCheck; + } + } const endResult: AgentEndResult = { attemptNumber, @@ -135,6 +147,62 @@ export async function validateAuthentication(input: ValidateAuthInput): Promise< return classification; } +async function verifySavedAuthState(stateFile: string, logger: ActivityLogger): Promise> { + let contents: string; + try { + contents = await readFile(stateFile, 'utf8'); + } catch { + return err( + new PentestError( + `Preflight reported login success but did not save the authenticated session to ${stateFile}.`, + 'validation', + true, + { stateFile }, + ErrorCode.AGENT_EXECUTION_FAILED, + ), + ); + } + + let parsed: unknown; + try { + parsed = JSON.parse(contents); + } catch (parseErr) { + const detail = parseErr instanceof Error ? parseErr.message : String(parseErr); + return err( + new PentestError( + `Preflight saved an authenticated session to ${stateFile}, but the file is not valid JSON: ${detail}`, + 'validation', + true, + { stateFile, parseError: detail }, + ErrorCode.AGENT_EXECUTION_FAILED, + ), + ); + } + + const cookieCount = countStorageEntries(parsed, 'cookies'); + const originCount = countStorageEntries(parsed, 'origins'); + if (cookieCount === 0 && originCount === 0) { + return err( + new PentestError( + `Preflight saved an authenticated session to ${stateFile}, but it contains no cookies or origins — the browser was not actually logged in.`, + 'validation', + true, + { stateFile, cookieCount, originCount }, + ErrorCode.AGENT_EXECUTION_FAILED, + ), + ); + } + + logger.info('Preflight authenticated session saved', { stateFile, cookieCount, originCount }); + return ok(undefined); +} + +function countStorageEntries(parsed: unknown, key: 'cookies' | 'origins'): number { + if (typeof parsed !== 'object' || parsed === null) return 0; + const value = (parsed as Record)[key]; + return Array.isArray(value) ? value.length : 0; +} + function classifyResult( result: import('../ai/claude-executor.js').ClaudePromptResult, authentication: NonNullable, diff --git a/apps/worker/src/temporal/activities.ts b/apps/worker/src/temporal/activities.ts index 304df40..62526c5 100644 --- a/apps/worker/src/temporal/activities.ts +++ b/apps/worker/src/temporal/activities.ts @@ -22,7 +22,7 @@ import { writePlaywrightStealthConfig } from '../ai/playwright-config-writer.js' import { writeUserSettingsForCodePathAvoids } from '../ai/settings-writer.js'; import { AuditSession } from '../audit/index.js'; import type { ResumeAttempt } from '../audit/metrics-tracker.js'; -import { generateSessionJsonPath, type SessionMetadata } from '../audit/utils.js'; +import { authStateFile, generateSessionJsonPath, type SessionMetadata } from '../audit/utils.js'; import type { WorkflowSummary } from '../audit/workflow-logger.js'; import type { CheckpointContext } from '../interfaces/checkpoint-provider.js'; import { DEFAULT_DELIVERABLES_SUBDIR, deliverablesDir } from '../paths.js'; @@ -926,7 +926,15 @@ export async function logWorkflowComplete(input: ActivityInput, summary: Workflo // 5. Write completion entry to workflow.log await auditSession.logWorkflowComplete(cumulativeSummary); - // 6. Clean up container + // 6. Drop the authenticated browser session + try { + await fs.rm(authStateFile(sessionMetadata), { force: true }); + } catch (error) { + const detail = error instanceof Error ? error.message : String(error); + console.warn(`Failed to clean up auth-state.json: ${detail}`); + } + + // 7. Clean up container removeContainer(workflowId); }