feat: share preflight authenticated session across agents (#345)

* feat(auth): reuse preflight's authenticated session across agents

* fix(preflight): verify saved auth state parses and has cookies or origins

* fix(prompts): strip shared-session block when no auth is configured

* fix(shannon): store shared auth state in the per-session audit dir

* fix(prompts): write stub auth-state in pipeline-testing preflight

* fix(preflight): clear stale auth-state.json before validate-authentication

* fix(preflight): drop auth-state.json on workflow completion

* docs(claude): refresh auth-state.json description for new layout and cleanup

* refactor(prompts): drop unused PLAYWRIGHT_SESSION resolve in login instructions

* style(prompts): collapse verifySavedAuthState signature per biome

* refactor(prompts): require AUTH_STATE_FILE on authenticated runs

* style(prompts): trim numbered-step comments back to step headers
This commit is contained in:
ezl-keygraph
2026-05-28 03:23:09 +05:30
committed by GitHub
parent 8f5d639f0d
commit 7813baf16a
19 changed files with 148 additions and 8 deletions
+1 -1
View File
@@ -146,7 +146,7 @@ Durable workflow orchestration with crash recovery, queryable progress, intellig
### Supporting Systems
- **Configuration** — YAML configs in `apps/worker/configs/` with JSON Schema validation (`config-schema.json`). Supports auth settings (MFA/TOTP), URL/code rule scoping (`rules.avoid`/`rules.focus`), run-scope steering (`vuln_classes`, `exploit`), free-form `rules_of_engagement`, and post-hoc `report` filters (`min_severity`, `min_confidence`, `guidance`). `code_path` avoid rules are written into `~/.claude/settings.json` `permissions.deny` (`Read`/`Edit`) once per workflow by `apps/worker/src/temporal/activities.ts:syncCodePathDenyRules` so the SDK enforces them at the tool layer even in `bypassPermissions` mode. `vuln_classes`/`exploit` scope is locked into `session.json` on first run; resumes with a different scope fail fast (`persistOrValidateRunScope`). Credential resolution — local mode: env vars → `./.env`; npx mode: env vars → `~/.shannon/config.toml` (via `shn setup`)
- **Prompts** — Per-phase templates in `apps/worker/prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `apps/worker/prompts/shared/` via `apps/worker/src/services/prompt-manager.ts`, including `_code-path-rules.txt` (focus/avoid `[FILE]`/`[GLOB]` routing) and `_rules-of-engagement.txt` (free-text engagement rules). When `exploit: false`, `apps/worker/src/services/findings-renderer.ts` deterministically converts each `*_exploitation_queue.json` into a `*_findings.md` for report assembly — no LLM in the loop
- **SDK Integration** — Uses `@anthropic-ai/claude-agent-sdk` with `maxTurns: 10_000` and `bypassPermissions` mode. Adaptive thinking is enabled by default on Opus 4.6/4.7 (`supportsAdaptiveThinking` in `apps/worker/src/ai/models.ts`); disable per-scan via `CLAUDE_ADAPTIVE_THINKING=false` (env) or `core.adaptive_thinking = false` (npx TOML). Browser automation via `playwright-cli` with session isolation (`-s=<session>`). TOTP generation via `generate-totp` CLI tool. Login flow template at `apps/worker/prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth
- **SDK Integration** — Uses `@anthropic-ai/claude-agent-sdk` with `maxTurns: 10_000` and `bypassPermissions` mode. Adaptive thinking is enabled by default on Opus 4.6/4.7 (`supportsAdaptiveThinking` in `apps/worker/src/ai/models.ts`); disable per-scan via `CLAUDE_ADAPTIVE_THINKING=false` (env) or `core.adaptive_thinking = false` (npx TOML). Browser automation via `playwright-cli` with session isolation (`-s=<session>`). TOTP generation via `generate-totp` CLI tool. Login flow template at `apps/worker/prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth. On authenticated whitebox scans, the `validate-authentication` preflight performs the single real login and saves the browser session to `auth-state.json` in the per-session audit directory (path from `authStateFile()` in `apps/worker/src/audit/utils.ts`, derived from `generateAuditPath()`). The validation activity (`apps/worker/src/services/validate-authentication.ts`) removes any stale file from a prior run before the agent runs and verifies the file parses and contains cookies or storage before the preflight is marked complete; `logWorkflowComplete` deletes it when the workflow ends so authenticated cookies don't sit on disk between scans. Agent prompts opt in to session reuse by `@include(shared/_shared-session.txt)` before their `<login_instructions>` block — the partial restores the session and falls through to the full login flow if verification fails. `vuln-auth`/`exploit-auth` omit the include and own their own login
- **Audit System** — Crash-safe append-only logging in `workspaces/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables. WorkflowLogger (`apps/worker/src/audit/workflow-logger.ts`) provides unified human-readable per-workflow logs, backed by LogStream (`apps/worker/src/audit/log-stream.ts`) shared stream primitive
- **Deliverables** — Saved to `deliverables/` in the target repo via the `save-deliverable` CLI script (`apps/worker/src/scripts/save-deliverable.ts`)
- **Workspaces & Resume** — Named workspaces via `-w <name>` or auto-named from URL+timestamp. Resume detects completed agents via `session.json`. `loadResumeState()` in `apps/worker/src/temporal/activities.ts` validates deliverable existence, restores git checkpoints, and cleans up incomplete deliverables. Workspace listing via `apps/worker/src/temporal/workspaces.ts`
+2
View File
@@ -88,6 +88,8 @@ After exhaustive bypass attempts, determine:
@include(shared/_rules.txt)
</rules>
@include(shared/_shared-session.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@@ -65,6 +65,8 @@ Remember: An unproven vulnerability is worse than no finding at all - it wastes
@include(shared/_rules.txt)
</rules>
@include(shared/_shared-session.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
+2
View File
@@ -88,6 +88,8 @@ After exhaustive bypass attempts, determine:
@include(shared/_rules.txt)
</rules>
@include(shared/_shared-session.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
+2
View File
@@ -86,6 +86,8 @@ After exhaustive bypass attempts, determine:
@include(shared/_rules.txt)
</rules>
@include(shared/_shared-session.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@@ -1 +1,4 @@
Return the structured verdict `{ "login_success": true }` and stop.
Write a stub authenticated session via Bash so the preflight's saved-state check passes:
echo '{"cookies":[{"name":"stub","value":"x","domain":"example.com","path":"/"}],"origins":[]}' > {{AUTH_STATE_FILE}}
Then return the structured verdict `{ "login_success": true }` and stop.
+2
View File
@@ -34,6 +34,8 @@ Areas to Focus On:
@include(shared/_code-path-rules.txt)
@include(shared/_shared-session.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@@ -0,0 +1,19 @@
<shared_authenticated_session>
The preflight already logged in and saved the authenticated browser
session to:
{{AUTH_STATE_FILE}}
Restore it before doing anything else:
playwright-cli -s={{PLAYWRIGHT_SESSION}} state-load {{AUTH_STATE_FILE}}
Then run verification (per the success_condition in your authentication
config) to confirm the restored session is still valid:
- If verification passes → SKIP the login flow below entirely and
proceed with your primary task. You are authenticated.
- If verification fails → the saved session is stale. Fall through to
the full login flow below and perform it on your own browser session.
Do NOT overwrite {{AUTH_STATE_FILE}}.
</shared_authenticated_session>
@@ -19,6 +19,14 @@ This runs as a preflight check for our AI pentester. The user supplies credentia
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
<publish_session>
After verification confirms login_success, save the authenticated browser session so the rest of the pipeline can reuse it instead of logging in again:
playwright-cli -s={{PLAYWRIGHT_SESSION}} state-save {{AUTH_STATE_FILE}}
Run this only when login_success is true. Skip it on failure.
</publish_session>
<critical>
- Submit each field (username, password, captcha, TOTP) exactly once.
- Any rejection = auth error: return `login_success: false` and stop. Do not retry.
+2
View File
@@ -21,6 +21,8 @@ Success criterion: A complete, code-backed analysis of every potential authoriza
@include(shared/_code-path-rules.txt)
@include(shared/_shared-session.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
+2
View File
@@ -22,6 +22,8 @@ Success criterion: Complete source-to-sink traces detailing path, sanitizers, si
@include(shared/_code-path-rules.txt)
@include(shared/_shared-session.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
+2
View File
@@ -21,6 +21,8 @@ Success criterion: A complete source-to-sink trace for every identified SSRF vul
@include(shared/_code-path-rules.txt)
@include(shared/_shared-session.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
+2
View File
@@ -21,6 +21,8 @@ Success criterion: Live confirmation of XSS execution for every vulnerability th
@include(shared/_code-path-rules.txt)
@include(shared/_shared-session.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
+1 -1
View File
@@ -28,7 +28,7 @@ const sessionMutex = new SessionMutex();
* AuditSession - Main audit system facade
*/
export class AuditSession {
private sessionMetadata: SessionMetadata;
readonly sessionMetadata: SessionMetadata;
private sessionId: string;
private metricsTracker: MetricsTracker;
private workflowLogger: WorkflowLogger;
+8
View File
@@ -74,6 +74,14 @@ export function generateSessionJsonPath(sessionMetadata: SessionMetadata): strin
return path.join(auditPath, 'session.json');
}
/**
* Path to the shared authenticated browser session saved by the preflight
* validator and consumed by downstream agents via `_shared-session.txt`.
*/
export function authStateFile(sessionMetadata: SessionMetadata): string {
return path.join(generateAuditPath(sessionMetadata), 'auth-state.json');
}
/**
* Generate path to workflow.log file
*/
+2 -1
View File
@@ -25,6 +25,7 @@ import { fs, path } from 'zx';
import { type ClaudePromptResult, runClaudePrompt, validateAgentOutput } from '../ai/claude-executor.js';
import { getOutputFormat, getQueueFilename } from '../ai/queue-schemas.js';
import type { AuditSession } from '../audit/index.js';
import { authStateFile } from '../audit/utils.js';
import { AGENTS } from '../session-manager.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import type { AgentName } from '../types/agents.js';
@@ -122,7 +123,7 @@ export class AgentExecutionService {
try {
prompt = await loadPrompt(
promptTemplate,
{ webUrl, repoPath },
{ webUrl, repoPath, AUTH_STATE_FILE: authStateFile(auditSession.sessionMetadata) },
distributedConfig,
pipelineTestingMode,
logger,
@@ -118,6 +118,7 @@ function renderReportFilterRules(report: ReportConfig | undefined): string {
interface PromptVariables {
webUrl: string;
repoPath: string;
AUTH_STATE_FILE: string;
PLAYWRIGHT_SESSION?: string;
}
@@ -321,6 +322,12 @@ async function interpolateVariables(
result = result.replace(/<rules_of_engagement>[\s\S]*?<\/rules_of_engagement>\s*/g, '');
}
if (!config?.authentication) {
result = result.replace(/<shared_authenticated_session>[\s\S]*?<\/shared_authenticated_session>\s*/g, '');
} else {
result = result.replace(/{{AUTH_STATE_FILE}}/g, variables.AUTH_STATE_FILE);
}
if (config?.authentication?.login_flow) {
const loginInstructions = await buildLoginInstructions(config.authentication, logger, promptsBaseDir);
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, loginInstructions);
@@ -12,10 +12,12 @@
* pipeline burns hours on broken auth.
*/
import { readFile, rm } from 'node:fs/promises';
import type { JsonSchemaOutputFormat } from '@anthropic-ai/claude-agent-sdk';
import { z } from 'zod';
import { runClaudePrompt } from '../ai/claude-executor.js';
import type { AuditSession } from '../audit/index.js';
import { authStateFile } from '../audit/utils.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import type { AgentEndResult } from '../types/audit.js';
import type { DistributedConfig, ProviderConfig } from '../types/config.js';
@@ -93,9 +95,12 @@ export async function validateAuthentication(input: ValidateAuthInput): Promise<
loginType: authentication.login_type,
});
const stateFile = authStateFile(auditSession.sessionMetadata);
await rm(stateFile, { force: true });
const prompt = await loadPrompt(
AGENT_NAME,
{ webUrl, repoPath },
{ webUrl, repoPath, AUTH_STATE_FILE: stateFile },
distributedConfig,
pipelineTestingMode ?? false,
logger,
@@ -120,7 +125,14 @@ export async function validateAuthentication(input: ValidateAuthInput): Promise<
providerConfig,
);
const classification = classifyResult(result, authentication);
let classification = classifyResult(result, authentication);
if (classification.ok) {
const sessionCheck = await verifySavedAuthState(stateFile, logger);
if (!sessionCheck.ok) {
classification = sessionCheck;
}
}
const endResult: AgentEndResult = {
attemptNumber,
@@ -135,6 +147,62 @@ export async function validateAuthentication(input: ValidateAuthInput): Promise<
return classification;
}
async function verifySavedAuthState(stateFile: string, logger: ActivityLogger): Promise<Result<void, PentestError>> {
let contents: string;
try {
contents = await readFile(stateFile, 'utf8');
} catch {
return err(
new PentestError(
`Preflight reported login success but did not save the authenticated session to ${stateFile}.`,
'validation',
true,
{ stateFile },
ErrorCode.AGENT_EXECUTION_FAILED,
),
);
}
let parsed: unknown;
try {
parsed = JSON.parse(contents);
} catch (parseErr) {
const detail = parseErr instanceof Error ? parseErr.message : String(parseErr);
return err(
new PentestError(
`Preflight saved an authenticated session to ${stateFile}, but the file is not valid JSON: ${detail}`,
'validation',
true,
{ stateFile, parseError: detail },
ErrorCode.AGENT_EXECUTION_FAILED,
),
);
}
const cookieCount = countStorageEntries(parsed, 'cookies');
const originCount = countStorageEntries(parsed, 'origins');
if (cookieCount === 0 && originCount === 0) {
return err(
new PentestError(
`Preflight saved an authenticated session to ${stateFile}, but it contains no cookies or origins — the browser was not actually logged in.`,
'validation',
true,
{ stateFile, cookieCount, originCount },
ErrorCode.AGENT_EXECUTION_FAILED,
),
);
}
logger.info('Preflight authenticated session saved', { stateFile, cookieCount, originCount });
return ok(undefined);
}
function countStorageEntries(parsed: unknown, key: 'cookies' | 'origins'): number {
if (typeof parsed !== 'object' || parsed === null) return 0;
const value = (parsed as Record<string, unknown>)[key];
return Array.isArray(value) ? value.length : 0;
}
function classifyResult(
result: import('../ai/claude-executor.js').ClaudePromptResult,
authentication: NonNullable<DistributedConfig['authentication']>,
+10 -2
View File
@@ -22,7 +22,7 @@ import { writePlaywrightStealthConfig } from '../ai/playwright-config-writer.js'
import { writeUserSettingsForCodePathAvoids } from '../ai/settings-writer.js';
import { AuditSession } from '../audit/index.js';
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
import { generateSessionJsonPath, type SessionMetadata } from '../audit/utils.js';
import { authStateFile, generateSessionJsonPath, type SessionMetadata } from '../audit/utils.js';
import type { WorkflowSummary } from '../audit/workflow-logger.js';
import type { CheckpointContext } from '../interfaces/checkpoint-provider.js';
import { DEFAULT_DELIVERABLES_SUBDIR, deliverablesDir } from '../paths.js';
@@ -926,7 +926,15 @@ export async function logWorkflowComplete(input: ActivityInput, summary: Workflo
// 5. Write completion entry to workflow.log
await auditSession.logWorkflowComplete(cumulativeSummary);
// 6. Clean up container
// 6. Drop the authenticated browser session
try {
await fs.rm(authStateFile(sessionMetadata), { force: true });
} catch (error) {
const detail = error instanceof Error ? error.message : String(error);
console.warn(`Failed to clean up auth-state.json: ${detail}`);
}
// 7. Clean up container
removeContainer(workflowId);
}