mirror of
https://github.com/KeygraphHQ/shannon.git
synced 2026-06-30 18:45:34 +02:00
feat(worker): migrate agent runtime from Claude Agent SDK to pi harness
This commit is contained in:
@@ -145,9 +145,9 @@ Durable workflow orchestration with crash recovery, queryable progress, intellig
|
||||
5. **Reporting** (`report`) — Executive-level security report
|
||||
|
||||
### Supporting Systems
|
||||
- **Configuration** — YAML configs in `apps/worker/configs/` with JSON Schema validation (`config-schema.json`). Supports auth settings (MFA/TOTP), URL/code rule scoping (`rules.avoid`/`rules.focus`), run-scope steering (`vuln_classes`, `exploit`), free-form `rules_of_engagement`, and post-hoc `report` filters (`min_severity`, `min_confidence`, `guidance`). `code_path` avoid rules are written into `~/.claude/settings.json` `permissions.deny` (`Read`/`Edit`) once per workflow by `apps/worker/src/temporal/activities.ts:syncCodePathDenyRules` so the SDK enforces them at the tool layer even in `bypassPermissions` mode. `vuln_classes`/`exploit` scope is locked into `session.json` on first run; resumes with a different scope fail fast (`persistOrValidateRunScope`). Credential resolution — local mode: env vars → `./.env`; npx mode: env vars → `~/.shannon/config.toml` (via `shn setup`)
|
||||
- **Configuration** — YAML configs in `apps/worker/configs/` with JSON Schema validation (`config-schema.json`). Supports auth settings (MFA/TOTP), URL/code rule scoping (`rules.avoid`/`rules.focus`), run-scope steering (`vuln_classes`, `exploit`), free-form `rules_of_engagement`, and post-hoc `report` filters (`min_severity`, `min_confidence`, `guidance`). `code_path` avoid rules are enforced via the `@gotgenes/pi-permission-system` extension: `apps/worker/src/temporal/activities.ts:syncCodePathDenyRules` writes a global `path` deny config once per workflow (`apps/worker/src/ai/settings-writer.ts:writeCodePathPermissionConfig`), and the executor loads the extension when that config is present (`apps/worker/src/ai/claude-executor.ts`), so denies fire across every tool and child `task` session. `vuln_classes`/`exploit` scope is locked into `session.json` on first run; resumes with a different scope fail fast (`persistOrValidateRunScope`). Credential resolution — local mode: env vars → `./.env`; npx mode: env vars → `~/.shannon/config.toml` (via `shn setup`)
|
||||
- **Prompts** — Per-phase templates in `apps/worker/prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `apps/worker/prompts/shared/` via `apps/worker/src/services/prompt-manager.ts`, including `_code-path-rules.txt` (focus/avoid `[FILE]`/`[GLOB]` routing) and `_rules-of-engagement.txt` (free-text engagement rules). When `exploit: false`, `apps/worker/src/services/findings-renderer.ts` deterministically converts each `*_exploitation_queue.json` into a `*_findings.md` for report assembly — no LLM in the loop
|
||||
- **SDK Integration** — Uses `@anthropic-ai/claude-agent-sdk` with `maxTurns: 10_000` and `bypassPermissions` mode. Adaptive thinking is enabled by default on Opus 4.6/4.7/4.8 (`supportsAdaptiveThinking` in `apps/worker/src/ai/models.ts`); disable per-scan via `CLAUDE_ADAPTIVE_THINKING=false` (env) or `core.adaptive_thinking = false` (npx TOML). Browser automation via `playwright-cli` with session isolation (`-s=<session>`). TOTP generation via `generate-totp` CLI tool. Login flow template at `apps/worker/prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth. On authenticated whitebox scans, the `validate-authentication` preflight performs the single real login and saves the browser session to `auth-state.json` in the per-session audit directory (path from `authStateFile()` in `apps/worker/src/audit/utils.ts`, derived from `generateAuditPath()`). The validation activity (`apps/worker/src/services/validate-authentication.ts`) removes any stale file from a prior run before the agent runs and verifies the file parses and contains cookies or storage before the preflight is marked complete; `logWorkflowComplete` deletes it when the workflow ends so authenticated cookies don't sit on disk between scans. Agent prompts opt in to session reuse by `@include(shared/_shared-session.txt)` before their `<login_instructions>` block — the partial restores the session and falls through to the full login flow if verification fails. `vuln-auth`/`exploit-auth` omit the include and own their own login
|
||||
- **Agent Harness (pi)** — Uses the **pi harness** (`@earendil-works/pi-coding-agent`, requires Node ≥ 22.19) via `apps/worker/src/ai/claude-executor.ts` (`runClaudePrompt` → `createAgentSession`, retry disabled so Temporal owns retry). Models resolve through pi-ai in `apps/worker/src/ai/models.ts` (Anthropic / Bedrock / Vertex / custom base URL via `ModelRegistry`+`AuthStorage`). pi ships no JSON-schema output or `Task`/`TodoWrite` built-ins, so structured queues are captured via a `submit_exploitation_queue` custom tool (`apps/worker/src/ai/queue-schemas.ts`), and `task` (read-only child sessions) + `todo_write` are provided as custom tools (`apps/worker/src/ai/tools.ts`); the per-phase MCP collectors are pi custom tools (TypeBox `defineTool` in `apps/worker/src/mcp-server/`). Thinking level defaults to `medium`; disable per-scan via `CLAUDE_ADAPTIVE_THINKING=false` (→ `off`) or set `CLAUDE_THINKING_LEVEL` (env) / `core.adaptive_thinking = false` (npx TOML). Browser automation via `playwright-cli` with session isolation (`-s=<session>`). TOTP generation via `generate-totp` CLI tool. Login flow template at `apps/worker/prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth. On authenticated whitebox scans, the `validate-authentication` preflight performs the single real login and saves the browser session to `auth-state.json` in the per-session audit directory (path from `authStateFile()` in `apps/worker/src/audit/utils.ts`, derived from `generateAuditPath()`). The validation activity (`apps/worker/src/services/validate-authentication.ts`) removes any stale file from a prior run before the agent runs and verifies the file parses and contains cookies or storage before the preflight is marked complete; `logWorkflowComplete` deletes it when the workflow ends so authenticated cookies don't sit on disk between scans. Agent prompts opt in to session reuse by `@include(shared/_shared-session.txt)` before their `<login_instructions>` block — the partial restores the session and falls through to the full login flow if verification fails. `vuln-auth`/`exploit-auth` omit the include and own their own login
|
||||
- **Audit System** — Crash-safe append-only logging in `workspaces/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables. WorkflowLogger (`apps/worker/src/audit/workflow-logger.ts`) provides unified human-readable per-workflow logs, backed by LogStream (`apps/worker/src/audit/log-stream.ts`) shared stream primitive
|
||||
- **Deliverables** — Saved to `deliverables/` in the target repo via the `save-deliverable` CLI script (`apps/worker/src/scripts/save-deliverable.ts`)
|
||||
- **Workspaces & Resume** — Named workspaces via `-w <name>` or auto-named from URL+timestamp. Resume detects completed agents via `session.json`. `loadResumeState()` in `apps/worker/src/temporal/activities.ts` validates deliverable existence, restores git checkpoints, and cleans up incomplete deliverables. Workspace listing via `apps/worker/src/temporal/workspaces.ts`
|
||||
@@ -168,7 +168,7 @@ Durable workflow orchestration with crash recovery, queryable progress, intellig
|
||||
### Key Design Patterns
|
||||
- **Configuration-Driven** — YAML configs with JSON Schema validation
|
||||
- **Progressive Analysis** — Each phase builds on previous results
|
||||
- **SDK-First** — Claude Agent SDK handles autonomous analysis
|
||||
- **Harness-First** — the pi harness (`@earendil-works/pi-coding-agent`) handles autonomous analysis
|
||||
- **Modular Error Handling** — `ErrorCode` enum, `Result<T,E>` for explicit error propagation, automatic retry (3 attempts per agent)
|
||||
- **Services Boundary** — Activities are thin Temporal wrappers; `apps/worker/src/services/` owns business logic, accepts `ActivityLogger`, returns `Result<T,E>`. No Temporal imports in services
|
||||
- **DI Container** — Per-workflow in `apps/worker/src/services/container.ts`. `AuditSession` excluded (parallel safety)
|
||||
@@ -228,7 +228,7 @@ Comments must be **timeless** — no references to this conversation, refactorin
|
||||
|
||||
**Entry Points:** `apps/worker/src/temporal/workflows.ts`, `apps/worker/src/temporal/activities.ts`, `apps/worker/src/temporal/worker.ts`
|
||||
|
||||
**Core Logic:** `apps/worker/src/session-manager.ts`, `apps/worker/src/ai/claude-executor.ts`, `apps/worker/src/ai/settings-writer.ts` (writes `code_path` deny rules to `~/.claude/settings.json`), `apps/worker/src/config-parser.ts`, `apps/worker/src/services/` (incl. `preflight.ts`, `findings-renderer.ts`, `reporting.ts`), `apps/worker/src/audit/`
|
||||
**Core Logic:** `apps/worker/src/session-manager.ts`, `apps/worker/src/ai/claude-executor.ts`, `apps/worker/src/ai/settings-writer.ts` (writes `code_path` deny rules to the `@gotgenes/pi-permission-system` global config), `apps/worker/src/config-parser.ts`, `apps/worker/src/services/` (incl. `preflight.ts`, `findings-renderer.ts`, `reporting.ts`), `apps/worker/src/audit/`
|
||||
|
||||
**Config:** `docker-compose.yml`, `apps/cli/infra/compose.yml`, `apps/worker/configs/`, `apps/worker/prompts/`, `tsconfig.base.json` (shared compiler options), `turbo.json`, `biome.json`
|
||||
|
||||
|
||||
@@ -19,7 +19,10 @@
|
||||
"clean": "rm -rf dist"
|
||||
},
|
||||
"dependencies": {
|
||||
"@anthropic-ai/claude-agent-sdk": "catalog:",
|
||||
"@earendil-works/pi-agent-core": "^0.79.1",
|
||||
"@earendil-works/pi-ai": "^0.79.1",
|
||||
"@earendil-works/pi-coding-agent": "^0.79.1",
|
||||
"@gotgenes/pi-permission-system": "^10.9.0",
|
||||
"@temporalio/activity": "^1.11.0",
|
||||
"@temporalio/client": "^1.11.0",
|
||||
"@temporalio/worker": "^1.11.0",
|
||||
@@ -28,6 +31,7 @@
|
||||
"ajv-formats": "^2.1.1",
|
||||
"dotenv": "^16.4.5",
|
||||
"js-yaml": "^4.1.0",
|
||||
"typebox": "1.1.38",
|
||||
"zod": "^4.3.6",
|
||||
"zx": "^8.0.0"
|
||||
},
|
||||
|
||||
@@ -4,28 +4,80 @@
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
// Production Claude agent execution with retry, git checkpoints, and audit logging
|
||||
// Production agent execution on the pi harness, with git checkpoints and audit logging.
|
||||
|
||||
import { type JsonSchemaOutputFormat, query } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { createRequire } from 'node:module';
|
||||
import type { AgentMessage } from '@earendil-works/pi-agent-core';
|
||||
import {
|
||||
type AgentSessionEvent,
|
||||
createAgentSession,
|
||||
DefaultResourceLoader,
|
||||
getAgentDir,
|
||||
ModelRegistry,
|
||||
type ResourceLoader,
|
||||
SessionManager,
|
||||
SettingsManager,
|
||||
type ToolDefinition,
|
||||
} from '@earendil-works/pi-coding-agent';
|
||||
import { fs, path } from 'zx';
|
||||
import type { AuditSession } from '../audit/index.js';
|
||||
import { deliverablesDir } from '../paths.js';
|
||||
import { isRetryableError, PentestError } from '../services/error-handling.js';
|
||||
import { AGENT_VALIDATORS } from '../session-manager.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
import { isSpendingCapBehavior } from '../utils/billing-detection.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import { isSpendingCapBehavior, matchesBillingTextPattern } from '../utils/billing-detection.js';
|
||||
import { formatTimestamp } from '../utils/formatting.js';
|
||||
import { Timer } from '../utils/metrics.js';
|
||||
import { createAuditLogger } from './audit-logger.js';
|
||||
import { dispatchMessage } from './message-handlers.js';
|
||||
import { type ModelTier, resolveModel, supportsAdaptiveThinking } from './models.js';
|
||||
import { type ModelTier, resolveModelSelection } from './models.js';
|
||||
import { detectExecutionContext, formatCompletionMessage, formatErrorOutput } from './output-formatters.js';
|
||||
import { createProgressManager } from './progress-manager.js';
|
||||
import { permissionConfigPath } from './settings-writer.js';
|
||||
import { createTaskTool, createTodoWriteTool } from './tools.js';
|
||||
|
||||
declare global {
|
||||
var SHANNON_DISABLE_LOADER: boolean | undefined;
|
||||
}
|
||||
|
||||
/** Built-in pi tools enabled for every agent (custom tool names are appended). */
|
||||
const BUILTIN_TOOLS = ['read', 'bash', 'edit', 'write', 'grep', 'find', 'ls'];
|
||||
|
||||
const requireFromHere = createRequire(import.meta.url);
|
||||
let cachedExtensionDir: string | null | undefined;
|
||||
|
||||
/** Resolve the installed @gotgenes/pi-permission-system package dir, or null. */
|
||||
function permissionExtensionDir(): string | null {
|
||||
if (cachedExtensionDir !== undefined) return cachedExtensionDir;
|
||||
try {
|
||||
const entry = requireFromHere.resolve('@gotgenes/pi-permission-system');
|
||||
cachedExtensionDir = path.dirname(path.dirname(entry));
|
||||
} catch {
|
||||
cachedExtensionDir = null;
|
||||
}
|
||||
return cachedExtensionDir;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a resource loader that loads the pi-permission-system extension — but only
|
||||
* when a code_path deny config exists (written by settings-writer). Returns
|
||||
* undefined otherwise, preserving default behavior (and zero overhead) for runs
|
||||
* with no code_path avoids.
|
||||
*/
|
||||
async function buildPermissionResourceLoader(cwd: string, logger: ActivityLogger): Promise<ResourceLoader | undefined> {
|
||||
if (!fs.existsSync(permissionConfigPath())) return undefined;
|
||||
const extDir = permissionExtensionDir();
|
||||
if (!extDir) {
|
||||
logger.warn(
|
||||
'code_path deny config present but @gotgenes/pi-permission-system not resolvable — skipping enforcement',
|
||||
);
|
||||
return undefined;
|
||||
}
|
||||
const loader = new DefaultResourceLoader({ cwd, agentDir: getAgentDir(), additionalExtensionPaths: [extDir] });
|
||||
await loader.reload();
|
||||
return loader;
|
||||
}
|
||||
|
||||
export interface ClaudePromptResult {
|
||||
result?: string | null | undefined;
|
||||
success: boolean;
|
||||
@@ -58,18 +110,8 @@ async function writeErrorLog(
|
||||
const errorLog = {
|
||||
timestamp: formatTimestamp(),
|
||||
agent: 'claude-executor',
|
||||
error: {
|
||||
name: err.constructor.name,
|
||||
message: err.message,
|
||||
code: err.code,
|
||||
status: err.status,
|
||||
stack: err.stack,
|
||||
},
|
||||
context: {
|
||||
sourceDir,
|
||||
prompt: `${fullPrompt.slice(0, 200)}...`,
|
||||
retryable: isRetryableError(err),
|
||||
},
|
||||
error: { name: err.constructor.name, message: err.message, code: err.code, status: err.status, stack: err.stack },
|
||||
context: { sourceDir, prompt: `${fullPrompt.slice(0, 200)}...`, retryable: isRetryableError(err) },
|
||||
duration,
|
||||
};
|
||||
const logPath = path.join(deliverablesDir(sourceDir), 'error.log');
|
||||
@@ -86,34 +128,23 @@ export async function validateAgentOutput(
|
||||
logger: ActivityLogger,
|
||||
): Promise<boolean> {
|
||||
logger.info(`Validating ${agentName} agent output`);
|
||||
|
||||
try {
|
||||
// Check if agent completed successfully (text result OR structured output)
|
||||
if (!result.success || (!result.result && result.structuredOutput === undefined)) {
|
||||
logger.error('Validation failed: Agent execution was unsuccessful');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get validator function for this agent
|
||||
const validator = agentName ? AGENT_VALIDATORS[agentName as keyof typeof AGENT_VALIDATORS] : undefined;
|
||||
|
||||
if (!validator) {
|
||||
logger.warn(`No validator found for agent "${agentName}" - assuming success`);
|
||||
logger.info('Validation passed: Unknown agent with successful result');
|
||||
return true;
|
||||
}
|
||||
|
||||
logger.info(`Using validator for agent: ${agentName}`, { sourceDir });
|
||||
|
||||
// Apply validation function
|
||||
const validationResult = await validator(sourceDir, logger);
|
||||
|
||||
if (validationResult) {
|
||||
logger.info('Validation passed: Required files/structure present');
|
||||
} else {
|
||||
logger.error('Validation failed: Missing required deliverable files');
|
||||
}
|
||||
|
||||
return validationResult;
|
||||
} catch (error) {
|
||||
const errMsg = error instanceof Error ? error.message : String(error);
|
||||
@@ -122,8 +153,40 @@ export async function validateAgentOutput(
|
||||
}
|
||||
}
|
||||
|
||||
// Low-level SDK execution. Handles message streaming, progress, and audit logging.
|
||||
// Exported for Temporal activities to call single-attempt execution.
|
||||
/** Concatenate the text blocks of an assistant message (skips thinking + tool calls). */
|
||||
function extractAssistantText(message: AgentMessage): string {
|
||||
if (message.role !== 'assistant') return '';
|
||||
const blocks = message.content as Array<{ type: string; text?: string }>;
|
||||
return blocks
|
||||
.filter((c) => c.type === 'text')
|
||||
.map((c) => c.text ?? '')
|
||||
.join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Classify error-bearing text into a PentestError, mirroring the prior SDK error
|
||||
* handling. Spending-cap / billing text is retryable (Temporal backs off and
|
||||
* recovers when the cap resets); session limit is permanent.
|
||||
*/
|
||||
function classifyErrorText(content: string): PentestError | null {
|
||||
if (!content) return null;
|
||||
if (matchesBillingTextPattern(content)) {
|
||||
return new PentestError(
|
||||
`Billing limit reached: ${content.slice(0, 100)}`,
|
||||
'billing',
|
||||
true,
|
||||
{},
|
||||
ErrorCode.SPENDING_CAP_REACHED,
|
||||
);
|
||||
}
|
||||
if (content.toLowerCase().includes('session limit reached')) {
|
||||
return new PentestError('Session limit reached', 'billing', false);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Low-level pi execution. Drives one agent session to completion with progress and
|
||||
// audit logging. Exported for Temporal activities to call single-attempt execution.
|
||||
export async function runClaudePrompt(
|
||||
prompt: string,
|
||||
sourceDir: string,
|
||||
@@ -133,11 +196,10 @@ export async function runClaudePrompt(
|
||||
auditSession: AuditSession | null = null,
|
||||
logger: ActivityLogger,
|
||||
modelTier: ModelTier = 'medium',
|
||||
outputFormat?: JsonSchemaOutputFormat,
|
||||
callerTools?: ToolDefinition[],
|
||||
apiKey?: string,
|
||||
deliverablesSubdir?: string,
|
||||
providerConfig?: import('../types/config.js').ProviderConfig,
|
||||
mcpServers?: Record<string, import('@anthropic-ai/claude-agent-sdk').McpServerConfig>,
|
||||
): Promise<ClaudePromptResult> {
|
||||
// 1. Initialize timing and prompt
|
||||
const timer = new Timer(`agent-${description.toLowerCase().replace(/\s+/g, '-')}`);
|
||||
@@ -151,132 +213,112 @@ export async function runClaudePrompt(
|
||||
);
|
||||
const auditLogger = createAuditLogger(auditSession);
|
||||
|
||||
logger.info(`Running Claude Code: ${description}...`);
|
||||
logger.info(`Running pi agent: ${description}...`);
|
||||
|
||||
// 3. Build env vars to pass to SDK subprocesses
|
||||
const sdkEnv: Record<string, string> = {
|
||||
CLAUDE_CODE_MAX_OUTPUT_TOKENS: process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS || '64000',
|
||||
PLAYWRIGHT_MCP_OUTPUT_DIR: deliverablesSubdir
|
||||
? path.join(sourceDir, path.dirname(deliverablesSubdir), '.playwright-cli')
|
||||
: path.join(sourceDir, '.shannon', '.playwright-cli'),
|
||||
// apiKey from ContainerConfig takes precedence over process.env
|
||||
...(apiKey && { ANTHROPIC_API_KEY: apiKey }),
|
||||
// Deliverables subdir for save-deliverable CLI tool
|
||||
...(deliverablesSubdir && { SHANNON_DELIVERABLES_SUBDIR: deliverablesSubdir }),
|
||||
};
|
||||
// 3. Expose bash-invoked CLI tooling (playwright-cli, save-deliverable) to the
|
||||
// environment pi's bash tool inherits. These are constant per container, so
|
||||
// setting them on process.env is parallel-safe across this workflow's agents.
|
||||
process.env.PLAYWRIGHT_MCP_OUTPUT_DIR = deliverablesSubdir
|
||||
? path.join(sourceDir, path.dirname(deliverablesSubdir), '.playwright-cli')
|
||||
: path.join(sourceDir, '.shannon', '.playwright-cli');
|
||||
if (deliverablesSubdir) process.env.SHANNON_DELIVERABLES_SUBDIR = deliverablesSubdir;
|
||||
if (apiKey) process.env.ANTHROPIC_API_KEY = apiKey;
|
||||
|
||||
// 3a. Apply structured provider config directly to sdkEnv (no process.env mutation)
|
||||
if (providerConfig) {
|
||||
switch (providerConfig.providerType) {
|
||||
case 'bedrock':
|
||||
sdkEnv.CLAUDE_CODE_USE_BEDROCK = '1';
|
||||
if (providerConfig.awsRegion) sdkEnv.AWS_REGION = providerConfig.awsRegion;
|
||||
if (providerConfig.awsAccessKeyId) sdkEnv.AWS_ACCESS_KEY_ID = providerConfig.awsAccessKeyId;
|
||||
if (providerConfig.awsSecretAccessKey) sdkEnv.AWS_SECRET_ACCESS_KEY = providerConfig.awsSecretAccessKey;
|
||||
break;
|
||||
case 'vertex':
|
||||
sdkEnv.CLAUDE_CODE_USE_VERTEX = '1';
|
||||
if (providerConfig.gcpRegion) sdkEnv.CLOUD_ML_REGION = providerConfig.gcpRegion;
|
||||
if (providerConfig.gcpProjectId) sdkEnv.ANTHROPIC_VERTEX_PROJECT_ID = providerConfig.gcpProjectId;
|
||||
if (providerConfig.gcpCredentialsPath)
|
||||
sdkEnv.GOOGLE_APPLICATION_CREDENTIALS = providerConfig.gcpCredentialsPath;
|
||||
break;
|
||||
case 'litellm_router':
|
||||
if (providerConfig.baseUrl) sdkEnv.ANTHROPIC_BASE_URL = providerConfig.baseUrl;
|
||||
if (providerConfig.authToken) sdkEnv.ANTHROPIC_AUTH_TOKEN = providerConfig.authToken;
|
||||
break;
|
||||
default:
|
||||
// 'anthropic_api' or unset — apiKey already handled above
|
||||
if (providerConfig.apiKey && !apiKey) sdkEnv.ANTHROPIC_API_KEY = providerConfig.apiKey;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// 3b. Passthrough env vars not already set by providerConfig or apiKey
|
||||
const passthroughVars = [
|
||||
...(!sdkEnv.ANTHROPIC_API_KEY ? ['ANTHROPIC_API_KEY'] : []),
|
||||
'CLAUDE_CODE_OAUTH_TOKEN',
|
||||
...(!sdkEnv.ANTHROPIC_BASE_URL ? ['ANTHROPIC_BASE_URL'] : []),
|
||||
...(!sdkEnv.ANTHROPIC_AUTH_TOKEN ? ['ANTHROPIC_AUTH_TOKEN'] : []),
|
||||
...(!sdkEnv.CLAUDE_CODE_USE_BEDROCK ? ['CLAUDE_CODE_USE_BEDROCK'] : []),
|
||||
...(!sdkEnv.AWS_REGION ? ['AWS_REGION'] : []),
|
||||
'AWS_BEARER_TOKEN_BEDROCK',
|
||||
...(!sdkEnv.CLAUDE_CODE_USE_VERTEX ? ['CLAUDE_CODE_USE_VERTEX'] : []),
|
||||
...(!sdkEnv.CLOUD_ML_REGION ? ['CLOUD_ML_REGION'] : []),
|
||||
...(!sdkEnv.ANTHROPIC_VERTEX_PROJECT_ID ? ['ANTHROPIC_VERTEX_PROJECT_ID'] : []),
|
||||
...(!sdkEnv.GOOGLE_APPLICATION_CREDENTIALS ? ['GOOGLE_APPLICATION_CREDENTIALS'] : []),
|
||||
'HOME',
|
||||
'PATH',
|
||||
'PLAYWRIGHT_MCP_EXECUTABLE_PATH',
|
||||
// 4. Resolve model + auth, then assemble the tool set (universal task/todo tools
|
||||
// plus any caller-supplied collector/submit tools).
|
||||
const selection = resolveModelSelection((auth) => ModelRegistry.create(auth), modelTier, apiKey, providerConfig);
|
||||
// Load the code_path deny extension only when a deny config was written; the same
|
||||
// loader is reused by child task sessions so they inherit the policy.
|
||||
const resourceLoader = await buildPermissionResourceLoader(sourceDir, logger);
|
||||
const customTools: ToolDefinition[] = [
|
||||
createTaskTool({
|
||||
model: selection.model,
|
||||
thinkingLevel: selection.thinkingLevel,
|
||||
authStorage: selection.authStorage,
|
||||
cwd: sourceDir,
|
||||
...(resourceLoader && { resourceLoader }),
|
||||
}),
|
||||
createTodoWriteTool(auditLogger),
|
||||
...(callerTools ?? []),
|
||||
];
|
||||
for (const name of passthroughVars) {
|
||||
const val = process.env[name];
|
||||
if (val) {
|
||||
sdkEnv[name] = val;
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Configure SDK options
|
||||
// Model override from providerConfig takes precedence over env-based resolveModel
|
||||
const model = providerConfig?.modelOverrides?.[modelTier] ?? resolveModel(modelTier);
|
||||
const adaptiveThinking = supportsAdaptiveThinking(model) && process.env.CLAUDE_ADAPTIVE_THINKING !== 'false';
|
||||
const options = {
|
||||
model,
|
||||
maxTurns: 10_000,
|
||||
cwd: sourceDir,
|
||||
permissionMode: 'bypassPermissions' as const,
|
||||
allowDangerouslySkipPermissions: true,
|
||||
settingSources: ['user'] as ('user' | 'project' | 'local')[],
|
||||
env: sdkEnv,
|
||||
...(adaptiveThinking && { thinking: { type: 'adaptive' as const } }),
|
||||
...(outputFormat && { outputFormat }),
|
||||
...(mcpServers && Object.keys(mcpServers).length > 0 && { mcpServers }),
|
||||
};
|
||||
|
||||
if (!execContext.useCleanOutput) {
|
||||
logger.info(`SDK Options: maxTurns=${options.maxTurns}, cwd=${sourceDir}, permissions=BYPASS`);
|
||||
}
|
||||
// pi's `tools` allowlist gates custom tools too — list every custom name.
|
||||
const tools = [...BUILTIN_TOOLS, ...customTools.map((t) => t.name)];
|
||||
|
||||
let turnCount = 0;
|
||||
let result: string | null = null;
|
||||
let pendingError: PentestError | null = null;
|
||||
let apiErrorDetected = false;
|
||||
let totalCost = 0;
|
||||
|
||||
progress.start();
|
||||
|
||||
try {
|
||||
// 6. Process the message stream
|
||||
const messageLoopResult = await processMessageStream(
|
||||
fullPrompt,
|
||||
options,
|
||||
{ execContext, description, progress, auditLogger, logger },
|
||||
timer,
|
||||
);
|
||||
const { session } = await createAgentSession({
|
||||
cwd: sourceDir,
|
||||
model: selection.model,
|
||||
thinkingLevel: selection.thinkingLevel,
|
||||
tools,
|
||||
customTools,
|
||||
authStorage: selection.authStorage,
|
||||
sessionManager: SessionManager.inMemory(),
|
||||
// Temporal owns retry; pi compaction stays on (no analog previously, guards
|
||||
// against context overflow on long agent runs).
|
||||
settingsManager: SettingsManager.inMemory({ retry: { enabled: false }, compaction: { enabled: true } }),
|
||||
...(resourceLoader && { resourceLoader }),
|
||||
});
|
||||
|
||||
turnCount = messageLoopResult.turnCount;
|
||||
result = messageLoopResult.result;
|
||||
apiErrorDetected = messageLoopResult.apiErrorDetected;
|
||||
totalCost = messageLoopResult.cost;
|
||||
const model = messageLoopResult.model;
|
||||
// 5. Map pi events to audit logging + progress + error capture.
|
||||
session.subscribe((event: AgentSessionEvent) => {
|
||||
switch (event.type) {
|
||||
case 'turn_end': {
|
||||
turnCount += 1;
|
||||
const msg = event.message;
|
||||
const text = extractAssistantText(msg);
|
||||
if (text.trim()) {
|
||||
void auditLogger.logLlmResponse(turnCount, text);
|
||||
const billing = classifyErrorText(text);
|
||||
if (billing) pendingError = billing;
|
||||
}
|
||||
if (msg.role === 'assistant' && msg.stopReason === 'error') {
|
||||
apiErrorDetected = true;
|
||||
pendingError =
|
||||
pendingError ??
|
||||
classifyErrorText(msg.errorMessage ?? '') ??
|
||||
new PentestError(`Agent error: ${(msg.errorMessage ?? 'unknown').slice(0, 200)}`, 'unknown', true);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'tool_execution_start':
|
||||
void auditLogger.logToolStart(event.toolName, event.args);
|
||||
break;
|
||||
case 'tool_execution_end':
|
||||
void auditLogger.logToolEnd(event.result);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
});
|
||||
|
||||
// === SPENDING CAP SAFEGUARD ===
|
||||
// 7. Defense-in-depth: Detect spending cap that slipped through detectApiError().
|
||||
// Uses consolidated billing detection from utils/billing-detection.ts
|
||||
// 6. Run the agent to completion (resolves at agent_end).
|
||||
await session.prompt(fullPrompt);
|
||||
session.dispose();
|
||||
|
||||
// 7. Surface any error captured during the run.
|
||||
if (pendingError) throw pendingError;
|
||||
|
||||
// 8. Read usage/cost and final text.
|
||||
const stats = session.getSessionStats();
|
||||
const totalCost = stats.cost;
|
||||
const result = session.getLastAssistantText() ?? null;
|
||||
|
||||
// 9. Defense-in-depth: detect a spending cap that produced an empty/cheap run.
|
||||
if (isSpendingCapBehavior(turnCount, totalCost, result || '')) {
|
||||
throw new PentestError(
|
||||
`Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`,
|
||||
'billing',
|
||||
true, // Retryable - Temporal will use 5-30 min backoff
|
||||
true,
|
||||
);
|
||||
}
|
||||
|
||||
// 8. Finalize successful result
|
||||
const duration = timer.stop();
|
||||
|
||||
if (apiErrorDetected) {
|
||||
logger.warn(`API Error detected in ${description} - will validate deliverables before failing`);
|
||||
}
|
||||
|
||||
progress.finish(formatCompletionMessage(execContext, description, turnCount, duration));
|
||||
|
||||
return {
|
||||
@@ -285,19 +327,14 @@ export async function runClaudePrompt(
|
||||
duration,
|
||||
turns: turnCount,
|
||||
cost: totalCost,
|
||||
model,
|
||||
model: selection.model.id,
|
||||
partialCost: totalCost,
|
||||
apiErrorDetected,
|
||||
...(messageLoopResult.structuredOutput !== undefined && {
|
||||
structuredOutput: messageLoopResult.structuredOutput,
|
||||
}),
|
||||
};
|
||||
} catch (error) {
|
||||
// 9. Handle errors — log, write error file, return failure
|
||||
// 10. Handle errors — log, write error file, return failure
|
||||
const duration = timer.stop();
|
||||
|
||||
const err = error as Error & { code?: string; status?: number };
|
||||
|
||||
await auditLogger.logError(err, duration, turnCount);
|
||||
progress.stop();
|
||||
outputLines(formatErrorOutput(err, execContext, description, duration, sourceDir, isRetryableError(err)));
|
||||
@@ -309,96 +346,8 @@ export async function runClaudePrompt(
|
||||
prompt: `${fullPrompt.slice(0, 100)}...`,
|
||||
success: false,
|
||||
duration,
|
||||
cost: totalCost,
|
||||
cost: 0,
|
||||
retryable: isRetryableError(err),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
interface MessageLoopResult {
|
||||
turnCount: number;
|
||||
result: string | null;
|
||||
apiErrorDetected: boolean;
|
||||
cost: number;
|
||||
model?: string | undefined;
|
||||
structuredOutput?: unknown;
|
||||
}
|
||||
|
||||
interface MessageLoopDeps {
|
||||
execContext: ReturnType<typeof detectExecutionContext>;
|
||||
description: string;
|
||||
progress: ReturnType<typeof createProgressManager>;
|
||||
auditLogger: ReturnType<typeof createAuditLogger>;
|
||||
logger: ActivityLogger;
|
||||
}
|
||||
|
||||
async function processMessageStream(
|
||||
fullPrompt: string,
|
||||
options: NonNullable<Parameters<typeof query>[0]['options']>,
|
||||
deps: MessageLoopDeps,
|
||||
timer: Timer,
|
||||
): Promise<MessageLoopResult> {
|
||||
const { execContext, description, progress, auditLogger, logger } = deps;
|
||||
const HEARTBEAT_INTERVAL = 30000;
|
||||
|
||||
let turnCount = 0;
|
||||
let result: string | null = null;
|
||||
let apiErrorDetected = false;
|
||||
let cost = 0;
|
||||
let model: string | undefined;
|
||||
let structuredOutput: unknown | undefined;
|
||||
let lastHeartbeat = Date.now();
|
||||
|
||||
for await (const message of query({ prompt: fullPrompt, options })) {
|
||||
// Heartbeat logging when loader is disabled
|
||||
const now = Date.now();
|
||||
if (global.SHANNON_DISABLE_LOADER && now - lastHeartbeat > HEARTBEAT_INTERVAL) {
|
||||
logger.info(`[${Math.floor((now - timer.startTime) / 1000)}s] ${description} running... (Turn ${turnCount})`);
|
||||
lastHeartbeat = now;
|
||||
}
|
||||
|
||||
// Increment turn count for assistant messages
|
||||
if (message.type === 'assistant') {
|
||||
turnCount++;
|
||||
}
|
||||
|
||||
const dispatchResult = await dispatchMessage(message as { type: string; subtype?: string }, turnCount, {
|
||||
execContext,
|
||||
description,
|
||||
progress,
|
||||
auditLogger,
|
||||
logger,
|
||||
});
|
||||
|
||||
if (dispatchResult.type === 'throw') {
|
||||
throw dispatchResult.error;
|
||||
}
|
||||
|
||||
if (dispatchResult.type === 'complete') {
|
||||
result = dispatchResult.result;
|
||||
cost = dispatchResult.cost;
|
||||
if (dispatchResult.structuredOutput !== undefined) {
|
||||
structuredOutput = dispatchResult.structuredOutput;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (dispatchResult.type === 'continue') {
|
||||
if (dispatchResult.apiErrorDetected) {
|
||||
apiErrorDetected = true;
|
||||
}
|
||||
if (dispatchResult.model) {
|
||||
model = dispatchResult.model;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
turnCount,
|
||||
result,
|
||||
apiErrorDetected,
|
||||
cost,
|
||||
model,
|
||||
...(structuredOutput !== undefined && { structuredOutput }),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,408 +0,0 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
import type { SDKAssistantMessageError } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { PentestError } from '../services/error-handling.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import { matchesBillingTextPattern } from '../utils/billing-detection.js';
|
||||
import { formatTimestamp } from '../utils/formatting.js';
|
||||
import type { AuditLogger } from './audit-logger.js';
|
||||
import {
|
||||
filterJsonToolCalls,
|
||||
formatAssistantOutput,
|
||||
formatResultOutput,
|
||||
formatToolResultOutput,
|
||||
formatToolUseOutput,
|
||||
} from './output-formatters.js';
|
||||
import type { ProgressManager } from './progress-manager.js';
|
||||
import type {
|
||||
ApiErrorDetection,
|
||||
AssistantMessage,
|
||||
AssistantResult,
|
||||
ContentBlock,
|
||||
ExecutionContext,
|
||||
ModelRefusalFallbackMessage,
|
||||
ResultData,
|
||||
ResultMessage,
|
||||
SystemInitMessage,
|
||||
ToolResultData,
|
||||
ToolResultMessage,
|
||||
ToolUseData,
|
||||
ToolUseMessage,
|
||||
} from './types.js';
|
||||
|
||||
// Handles both array and string content formats from SDK
|
||||
function extractMessageContent(message: AssistantMessage): string {
|
||||
const messageContent = message.message;
|
||||
|
||||
if (Array.isArray(messageContent.content)) {
|
||||
return messageContent.content
|
||||
.filter((c: ContentBlock) => c.type !== 'thinking' && c.type !== 'redacted_thinking')
|
||||
.map((c: ContentBlock) => c.text || JSON.stringify(c))
|
||||
.join('\n');
|
||||
}
|
||||
|
||||
return String(messageContent.content);
|
||||
}
|
||||
|
||||
// Extracts only text content (no tool_use JSON) to avoid false positives in error detection
|
||||
function extractTextOnlyContent(message: AssistantMessage): string {
|
||||
const messageContent = message.message;
|
||||
|
||||
if (Array.isArray(messageContent.content)) {
|
||||
return messageContent.content
|
||||
.filter((c: ContentBlock) => c.type === 'text' || c.text)
|
||||
.map((c: ContentBlock) => c.text || '')
|
||||
.join('\n');
|
||||
}
|
||||
|
||||
return String(messageContent.content);
|
||||
}
|
||||
|
||||
function detectApiError(content: string): ApiErrorDetection {
|
||||
if (!content || typeof content !== 'string') {
|
||||
return { detected: false };
|
||||
}
|
||||
|
||||
const lowerContent = content.toLowerCase();
|
||||
|
||||
// === BILLING/SPENDING CAP ERRORS (Retryable with long backoff) ===
|
||||
// When Claude Code hits its spending cap, it returns a short message like
|
||||
// "Spending cap reached resets 8am" instead of throwing an error.
|
||||
// These should retry with 5-30 min backoff so workflows can recover when cap resets.
|
||||
if (matchesBillingTextPattern(content)) {
|
||||
return {
|
||||
detected: true,
|
||||
shouldThrow: new PentestError(
|
||||
`Billing limit reached: ${content.slice(0, 100)}`,
|
||||
'billing',
|
||||
true, // RETRYABLE - Temporal will use 5-30 min backoff
|
||||
{},
|
||||
ErrorCode.SPENDING_CAP_REACHED,
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
// === SESSION LIMIT (Non-retryable) ===
|
||||
// Different from spending cap - usually means something is fundamentally wrong
|
||||
if (lowerContent.includes('session limit reached')) {
|
||||
return {
|
||||
detected: true,
|
||||
shouldThrow: new PentestError('Session limit reached', 'billing', false),
|
||||
};
|
||||
}
|
||||
|
||||
// Non-fatal API errors - detected but continue
|
||||
if (lowerContent.includes('api error') || lowerContent.includes('terminated')) {
|
||||
return { detected: true };
|
||||
}
|
||||
|
||||
return { detected: false };
|
||||
}
|
||||
|
||||
// Maps SDK structured error types to our error handling.
|
||||
function handleStructuredError(errorType: SDKAssistantMessageError, content: string): ApiErrorDetection {
|
||||
switch (errorType) {
|
||||
case 'billing_error':
|
||||
return {
|
||||
detected: true,
|
||||
shouldThrow: new PentestError(
|
||||
`Billing error (structured): ${content.slice(0, 100)}`,
|
||||
'billing',
|
||||
true, // Retryable with backoff
|
||||
{},
|
||||
ErrorCode.INSUFFICIENT_CREDITS,
|
||||
),
|
||||
};
|
||||
case 'rate_limit':
|
||||
return {
|
||||
detected: true,
|
||||
shouldThrow: new PentestError(
|
||||
`Rate limit hit (structured): ${content.slice(0, 100)}`,
|
||||
'network',
|
||||
true, // Retryable with backoff
|
||||
{},
|
||||
ErrorCode.API_RATE_LIMITED,
|
||||
),
|
||||
};
|
||||
case 'authentication_failed':
|
||||
return {
|
||||
detected: true,
|
||||
shouldThrow: new PentestError(
|
||||
`Authentication failed: ${content.slice(0, 100)}`,
|
||||
'config',
|
||||
false, // Not retryable - needs API key fix
|
||||
),
|
||||
};
|
||||
case 'server_error':
|
||||
return {
|
||||
detected: true,
|
||||
shouldThrow: new PentestError(
|
||||
`Server error (structured): ${content.slice(0, 100)}`,
|
||||
'network',
|
||||
true, // Retryable
|
||||
),
|
||||
};
|
||||
case 'invalid_request':
|
||||
return {
|
||||
detected: true,
|
||||
shouldThrow: new PentestError(
|
||||
`Invalid request: ${content.slice(0, 100)}`,
|
||||
'config',
|
||||
false, // Not retryable - needs code fix
|
||||
),
|
||||
};
|
||||
case 'max_output_tokens':
|
||||
return {
|
||||
detected: true,
|
||||
shouldThrow: new PentestError(
|
||||
`Max output tokens reached: ${content.slice(0, 100)}`,
|
||||
'billing',
|
||||
true, // Retryable - may succeed with different content
|
||||
),
|
||||
};
|
||||
case 'overloaded':
|
||||
return {
|
||||
detected: true,
|
||||
shouldThrow: new PentestError(
|
||||
`Anthropic API overloaded (structured): ${content.slice(0, 100)}`,
|
||||
'network',
|
||||
true, // Retryable with backoff
|
||||
),
|
||||
};
|
||||
case 'model_not_found':
|
||||
return {
|
||||
detected: true,
|
||||
shouldThrow: new PentestError(
|
||||
`Model not found: ${content.slice(0, 100)}`,
|
||||
'config',
|
||||
false, // Not retryable - model ID is misconfigured
|
||||
),
|
||||
};
|
||||
case 'oauth_org_not_allowed':
|
||||
return {
|
||||
detected: true,
|
||||
shouldThrow: new PentestError(
|
||||
`Organization not allowed for this credential: ${content.slice(0, 100)}`,
|
||||
'config',
|
||||
false, // Not retryable - needs credential/org fix
|
||||
),
|
||||
};
|
||||
default:
|
||||
return { detected: true };
|
||||
}
|
||||
}
|
||||
|
||||
function handleAssistantMessage(message: AssistantMessage, turnCount: number): AssistantResult {
|
||||
const content = extractMessageContent(message);
|
||||
const cleanedContent = filterJsonToolCalls(content);
|
||||
|
||||
// Prefer structured error field from SDK, fall back to text-sniffing
|
||||
// Use text-only content for error detection to avoid false positives
|
||||
// from tool_use JSON (e.g. security reports containing "usage limit")
|
||||
let errorDetection: ApiErrorDetection;
|
||||
if (message.error) {
|
||||
errorDetection = handleStructuredError(message.error, content);
|
||||
} else {
|
||||
const textOnlyContent = extractTextOnlyContent(message);
|
||||
errorDetection = detectApiError(textOnlyContent);
|
||||
}
|
||||
|
||||
const result: AssistantResult = {
|
||||
content,
|
||||
cleanedContent,
|
||||
apiErrorDetected: errorDetection.detected,
|
||||
logData: {
|
||||
turn: turnCount,
|
||||
content,
|
||||
timestamp: formatTimestamp(),
|
||||
},
|
||||
};
|
||||
|
||||
// Only add shouldThrow if it exists (exactOptionalPropertyTypes compliance)
|
||||
if (errorDetection.shouldThrow) {
|
||||
result.shouldThrow = errorDetection.shouldThrow;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Final message of a query with cost/duration info
|
||||
function handleResultMessage(message: ResultMessage): ResultData {
|
||||
const result: ResultData = {
|
||||
result: message.result || null,
|
||||
cost: message.total_cost_usd || 0,
|
||||
duration_ms: message.duration_ms || 0,
|
||||
permissionDenials: message.permission_denials?.length || 0,
|
||||
};
|
||||
|
||||
// Only add subtype if it exists (exactOptionalPropertyTypes compliance)
|
||||
if (message.subtype) {
|
||||
result.subtype = message.subtype;
|
||||
}
|
||||
|
||||
// Capture stop_reason for diagnostics (helps debug early stops, budget exceeded, etc.)
|
||||
if (message.stop_reason !== undefined) {
|
||||
result.stop_reason = message.stop_reason;
|
||||
if (message.stop_reason && message.stop_reason !== 'end_turn') {
|
||||
console.log(` Stop reason: ${message.stop_reason}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (message.structured_output !== undefined) {
|
||||
result.structuredOutput = message.structured_output;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
function handleToolUseMessage(message: ToolUseMessage): ToolUseData {
|
||||
return {
|
||||
toolName: message.name,
|
||||
parameters: message.input || {},
|
||||
timestamp: formatTimestamp(),
|
||||
};
|
||||
}
|
||||
|
||||
// Truncates long results for display (500 char limit), preserves full content for logging
|
||||
function handleToolResultMessage(message: ToolResultMessage): ToolResultData {
|
||||
const content = message.content;
|
||||
const contentStr = typeof content === 'string' ? content : JSON.stringify(content, null, 2);
|
||||
|
||||
const displayContent =
|
||||
contentStr.length > 500
|
||||
? `${contentStr.slice(0, 500)}...\n[Result truncated - ${contentStr.length} total chars]`
|
||||
: contentStr;
|
||||
|
||||
return {
|
||||
content,
|
||||
displayContent,
|
||||
timestamp: formatTimestamp(),
|
||||
};
|
||||
}
|
||||
|
||||
function outputLines(lines: string[]): void {
|
||||
for (const line of lines) {
|
||||
console.log(line);
|
||||
}
|
||||
}
|
||||
|
||||
export type MessageDispatchAction =
|
||||
| { type: 'continue'; apiErrorDetected?: boolean | undefined; model?: string | undefined }
|
||||
| { type: 'complete'; result: string | null; cost: number; structuredOutput?: unknown }
|
||||
| { type: 'throw'; error: Error };
|
||||
|
||||
export interface MessageDispatchDeps {
|
||||
execContext: ExecutionContext;
|
||||
description: string;
|
||||
progress: ProgressManager;
|
||||
auditLogger: AuditLogger;
|
||||
logger: ActivityLogger;
|
||||
}
|
||||
|
||||
// Dispatches SDK messages to appropriate handlers and formatters
|
||||
export async function dispatchMessage(
|
||||
message: { type: string; subtype?: string },
|
||||
turnCount: number,
|
||||
deps: MessageDispatchDeps,
|
||||
): Promise<MessageDispatchAction> {
|
||||
const { execContext, description, progress, auditLogger, logger } = deps;
|
||||
|
||||
switch (message.type) {
|
||||
case 'assistant': {
|
||||
const assistantResult = handleAssistantMessage(message as AssistantMessage, turnCount);
|
||||
|
||||
if (assistantResult.shouldThrow) {
|
||||
return { type: 'throw', error: assistantResult.shouldThrow };
|
||||
}
|
||||
|
||||
if (assistantResult.cleanedContent.trim()) {
|
||||
progress.stop();
|
||||
outputLines(formatAssistantOutput(assistantResult.cleanedContent, execContext, turnCount, description));
|
||||
progress.start();
|
||||
}
|
||||
|
||||
await auditLogger.logLlmResponse(turnCount, assistantResult.content);
|
||||
|
||||
if (assistantResult.apiErrorDetected) {
|
||||
logger.warn('API Error detected in assistant response');
|
||||
return { type: 'continue', apiErrorDetected: true };
|
||||
}
|
||||
|
||||
return { type: 'continue' };
|
||||
}
|
||||
|
||||
case 'system': {
|
||||
if (message.subtype === 'init') {
|
||||
const initMsg = message as SystemInitMessage;
|
||||
if (!execContext.useCleanOutput) {
|
||||
logger.info(`Model: ${initMsg.model}, Permission: ${initMsg.permissionMode}`);
|
||||
}
|
||||
return { type: 'continue', model: initMsg.model };
|
||||
}
|
||||
if (message.subtype === 'model_refusal_fallback') {
|
||||
const fallback = message as ModelRefusalFallbackMessage;
|
||||
const category = fallback.api_refusal_category ?? 'policy';
|
||||
await auditLogger.logNote(
|
||||
'model-fallback',
|
||||
`Model refused (${category}); fell back ${fallback.original_model} → ${fallback.fallback_model}`,
|
||||
);
|
||||
return { type: 'continue' };
|
||||
}
|
||||
return { type: 'continue' };
|
||||
}
|
||||
|
||||
case 'user':
|
||||
case 'tool_progress':
|
||||
case 'tool_use_summary':
|
||||
case 'auth_status':
|
||||
return { type: 'continue' };
|
||||
|
||||
case 'tool_use': {
|
||||
const toolData = handleToolUseMessage(message as unknown as ToolUseMessage);
|
||||
outputLines(formatToolUseOutput(toolData.toolName, toolData.parameters));
|
||||
await auditLogger.logToolStart(toolData.toolName, toolData.parameters);
|
||||
return { type: 'continue' };
|
||||
}
|
||||
|
||||
case 'tool_result': {
|
||||
const toolResultData = handleToolResultMessage(message as unknown as ToolResultMessage);
|
||||
outputLines(formatToolResultOutput(toolResultData.displayContent));
|
||||
await auditLogger.logToolEnd(toolResultData.content);
|
||||
return { type: 'continue' };
|
||||
}
|
||||
|
||||
case 'result': {
|
||||
const resultData = handleResultMessage(message as ResultMessage);
|
||||
outputLines(formatResultOutput(resultData, !execContext.useCleanOutput));
|
||||
|
||||
if (resultData.subtype === 'error_max_structured_output_retries') {
|
||||
return {
|
||||
type: 'throw',
|
||||
error: new PentestError(
|
||||
'Structured output validation failed after max retries',
|
||||
'validation',
|
||||
true,
|
||||
{},
|
||||
ErrorCode.OUTPUT_VALIDATION_FAILED,
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
type: 'complete' as const,
|
||||
result: resultData.result,
|
||||
cost: resultData.cost,
|
||||
...(resultData.structuredOutput !== undefined && { structuredOutput: resultData.structuredOutput }),
|
||||
};
|
||||
}
|
||||
|
||||
default:
|
||||
logger.info(`Unhandled message type: ${message.type}`);
|
||||
return { type: 'continue' };
|
||||
}
|
||||
}
|
||||
@@ -5,17 +5,28 @@
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Model tier definitions and resolution.
|
||||
* Model tier definitions and resolution for the pi harness.
|
||||
*
|
||||
* Three tiers mapped to capability levels:
|
||||
* - "small" (Haiku — summarization, structured extraction)
|
||||
* - "medium" (Sonnet — tool use, general analysis)
|
||||
* - "large" (Opus — deep reasoning, complex analysis)
|
||||
*
|
||||
* Users override via ANTHROPIC_SMALL_MODEL / ANTHROPIC_MEDIUM_MODEL / ANTHROPIC_LARGE_MODEL,
|
||||
* which works across all providers (direct, Bedrock, Vertex).
|
||||
* Users override per tier via ANTHROPIC_SMALL_MODEL / ANTHROPIC_MEDIUM_MODEL /
|
||||
* ANTHROPIC_LARGE_MODEL, which works across all providers (Anthropic, Bedrock,
|
||||
* Vertex, custom base URL).
|
||||
*
|
||||
* Resolution returns a pi `Model` object via `ModelRegistry.find`, plus the
|
||||
* `thinkingLevel` and an `AuthStorage` primed with runtime credentials. Bedrock
|
||||
* and Vertex authenticate from the process environment (the AWS_ and GOOGLE_ vars
|
||||
* the CLI forwards), so they need no runtime API key.
|
||||
*/
|
||||
|
||||
import type { ThinkingLevel } from '@earendil-works/pi-agent-core';
|
||||
import type { Api, Model } from '@earendil-works/pi-ai';
|
||||
import { AuthStorage, type ModelRegistry } from '@earendil-works/pi-coding-agent';
|
||||
import type { ProviderConfig } from '../types/config.js';
|
||||
|
||||
export type ModelTier = 'small' | 'medium' | 'large';
|
||||
|
||||
const DEFAULT_MODELS: Readonly<Record<ModelTier, string>> = {
|
||||
@@ -24,8 +35,24 @@ const DEFAULT_MODELS: Readonly<Record<ModelTier, string>> = {
|
||||
large: 'claude-opus-4-8',
|
||||
};
|
||||
|
||||
/** Resolve a model tier to a concrete model ID. */
|
||||
export function resolveModel(tier: ModelTier = 'medium'): string {
|
||||
/** pi-ai provider id for a Shannon ProviderConfig.providerType. Default: anthropic. */
|
||||
function piProviderId(providerConfig?: ProviderConfig): string {
|
||||
switch (providerConfig?.providerType) {
|
||||
case 'bedrock':
|
||||
return 'amazon-bedrock';
|
||||
case 'vertex':
|
||||
return 'google-vertex';
|
||||
default:
|
||||
// 'anthropic_api', 'custom_base_url', or unset all resolve to the anthropic
|
||||
// provider; custom_base_url overrides baseUrl/auth below.
|
||||
return 'anthropic';
|
||||
}
|
||||
}
|
||||
|
||||
/** Resolve a model tier to a concrete model ID (env override → providerConfig → default). */
|
||||
export function resolveModelId(tier: ModelTier = 'medium', providerConfig?: ProviderConfig): string {
|
||||
const override = providerConfig?.modelOverrides?.[tier];
|
||||
if (override) return override;
|
||||
switch (tier) {
|
||||
case 'small':
|
||||
return process.env.ANTHROPIC_SMALL_MODEL || DEFAULT_MODELS.small;
|
||||
@@ -36,9 +63,75 @@ export function resolveModel(tier: ModelTier = 'medium'): string {
|
||||
}
|
||||
}
|
||||
|
||||
/** Whether a model supports adaptive thinking. Opus 4.6, 4.7, and 4.8 only. */
|
||||
export function supportsAdaptiveThinking(model: string): boolean {
|
||||
return /opus-4-[678]/.test(model);
|
||||
/**
|
||||
* Resolve the thinking level for a run.
|
||||
*
|
||||
* The Claude Agent SDK enabled "adaptive" thinking only on capable models; pi uses
|
||||
* explicit levels and clamps to model capability internally. We default to 'medium'
|
||||
* and honour the existing CLAUDE_ADAPTIVE_THINKING=false kill switch (→ 'off'). An
|
||||
* explicit CLAUDE_THINKING_LEVEL wins when set.
|
||||
*/
|
||||
export function resolveThinkingLevel(): ThinkingLevel {
|
||||
if (process.env.CLAUDE_ADAPTIVE_THINKING === 'false') return 'off';
|
||||
const explicit = process.env.CLAUDE_THINKING_LEVEL as ThinkingLevel | undefined;
|
||||
if (explicit) return explicit;
|
||||
return 'medium';
|
||||
}
|
||||
|
||||
export interface ModelSelection {
|
||||
model: Model<Api>;
|
||||
thinkingLevel: ThinkingLevel;
|
||||
authStorage: AuthStorage;
|
||||
modelId: string;
|
||||
providerId: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an AuthStorage primed with the right credential for the active provider,
|
||||
* then resolve the tier's model from a fresh ModelRegistry.
|
||||
*
|
||||
* - Anthropic: runtime API key from ContainerConfig.apiKey → ProviderConfig.apiKey
|
||||
* → ANTHROPIC_API_KEY env. OAuth (CLAUDE_CODE_OAUTH_TOKEN) is read from env by pi.
|
||||
* - Custom base URL (custom_base_url): the auth token is set as the anthropic
|
||||
* runtime key and the model's baseUrl is overridden.
|
||||
* - Bedrock / Vertex: authenticate from the process environment (the AWS_ and
|
||||
* GOOGLE_ vars), no runtime key needed.
|
||||
*/
|
||||
export function resolveModelSelection(
|
||||
registryFactory: (authStorage: AuthStorage) => ModelRegistry,
|
||||
modelTier: ModelTier,
|
||||
apiKey?: string,
|
||||
providerConfig?: ProviderConfig,
|
||||
): ModelSelection {
|
||||
const providerId = piProviderId(providerConfig);
|
||||
const modelId = resolveModelId(modelTier, providerConfig);
|
||||
|
||||
const authStorage = AuthStorage.inMemory();
|
||||
|
||||
const anthropicKey = apiKey ?? providerConfig?.apiKey ?? process.env.ANTHROPIC_API_KEY;
|
||||
if (providerId === 'anthropic') {
|
||||
const token =
|
||||
providerConfig?.providerType === 'custom_base_url' ? (providerConfig.authToken ?? anthropicKey) : anthropicKey;
|
||||
if (token) authStorage.setRuntimeApiKey('anthropic', token);
|
||||
}
|
||||
|
||||
const registry = registryFactory(authStorage);
|
||||
const found = registry.find(providerId, modelId);
|
||||
if (!found) {
|
||||
throw new Error(`Model not found in pi registry: provider="${providerId}" model="${modelId}"`);
|
||||
}
|
||||
|
||||
// Custom base URL: override the resolved model's endpoint.
|
||||
const baseUrl = providerConfig?.providerType === 'custom_base_url' ? providerConfig.baseUrl : undefined;
|
||||
const model: Model<Api> = baseUrl ? { ...found, baseUrl } : found;
|
||||
|
||||
return {
|
||||
model,
|
||||
thinkingLevel: resolveThinkingLevel(),
|
||||
authStorage,
|
||||
modelId,
|
||||
providerId,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
+127
-183
@@ -5,196 +5,114 @@
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Zod schema definitions for vulnerability exploitation queue structured outputs.
|
||||
* TypeBox schemas + submit-tool factory for vulnerability exploitation queues.
|
||||
*
|
||||
* Each vuln agent returns a structured JSON response matching its schema.
|
||||
* The SDK validates the output against the JSON Schema generated from these Zod definitions.
|
||||
* pi has no JSON-schema output format, so each vuln agent's structured queue is
|
||||
* captured via a `submit_exploitation_queue` custom tool whose parameters mirror
|
||||
* the per-class schema below. The captured payload is written to
|
||||
* `<class>_exploitation_queue.json` by the caller (agent-execution).
|
||||
*/
|
||||
|
||||
import type { JsonSchemaOutputFormat } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { z } from 'zod';
|
||||
import { defineTool, type ToolDefinition } from '@earendil-works/pi-coding-agent';
|
||||
import { type Static, type TObject, Type } from 'typebox';
|
||||
import type { AgentName } from '../types/agents.js';
|
||||
|
||||
// === Common Fields ===
|
||||
|
||||
const ANALYSIS_NOTES_DESCRIPTION = 'Plain context for defenders (caveats, scope, what is at risk). Not attack steps.';
|
||||
|
||||
function notesField(exploit: boolean) {
|
||||
const f = z.string().optional();
|
||||
return exploit ? f : f.describe(ANALYSIS_NOTES_DESCRIPTION);
|
||||
}
|
||||
const optStr = (description?: string) => Type.Optional(Type.String(description ? { description } : {}));
|
||||
|
||||
function makeBase(exploit: boolean) {
|
||||
return z.object({
|
||||
ID: z.string(),
|
||||
vulnerability_type: z.string(),
|
||||
externally_exploitable: z.boolean(),
|
||||
confidence: z.string(),
|
||||
notes: notesField(exploit),
|
||||
});
|
||||
}
|
||||
|
||||
// === Per-Vuln-Type Schemas (used for type inference; notes description is mode-agnostic for types) ===
|
||||
|
||||
const baseVulnerability = makeBase(true);
|
||||
|
||||
const InjectionVulnerability = baseVulnerability.extend({
|
||||
source: z.string().optional(),
|
||||
combined_sources: z.string().optional(),
|
||||
path: z.string().optional(),
|
||||
sink_call: z.string().optional(),
|
||||
slot_type: z.string().optional(),
|
||||
sanitization_observed: z.string().optional(),
|
||||
concat_occurrences: z.string().optional(),
|
||||
verdict: z.string().optional(),
|
||||
mismatch_reason: z.string().optional(),
|
||||
witness_payload: z.string().optional(),
|
||||
});
|
||||
|
||||
const XssVulnerability = baseVulnerability.extend({
|
||||
source: z.string().optional(),
|
||||
source_detail: z.string().optional(),
|
||||
path: z.string().optional(),
|
||||
sink_function: z.string().optional(),
|
||||
render_context: z.string().optional(),
|
||||
encoding_observed: z.string().optional(),
|
||||
verdict: z.string().optional(),
|
||||
mismatch_reason: z.string().optional(),
|
||||
witness_payload: z.string().optional(),
|
||||
});
|
||||
|
||||
const AuthVulnerability = baseVulnerability.extend({
|
||||
source_endpoint: z.string().optional(),
|
||||
vulnerable_code_location: z.string().optional(),
|
||||
missing_defense: z.string().optional(),
|
||||
exploitation_hypothesis: z.string().optional(),
|
||||
suggested_exploit_technique: z.string().optional(),
|
||||
});
|
||||
|
||||
const SsrfVulnerability = baseVulnerability.extend({
|
||||
source_endpoint: z.string().optional(),
|
||||
vulnerable_parameter: z.string().optional(),
|
||||
vulnerable_code_location: z.string().optional(),
|
||||
missing_defense: z.string().optional(),
|
||||
exploitation_hypothesis: z.string().optional(),
|
||||
suggested_exploit_technique: z.string().optional(),
|
||||
});
|
||||
|
||||
const AuthzVulnerability = baseVulnerability.extend({
|
||||
endpoint: z.string().optional(),
|
||||
vulnerable_code_location: z.string().optional(),
|
||||
role_context: z.string().optional(),
|
||||
guard_evidence: z.string().optional(),
|
||||
side_effect: z.string().optional(),
|
||||
reason: z.string().optional(),
|
||||
minimal_witness: z.string().optional(),
|
||||
});
|
||||
|
||||
// === Inferred Entry Types (consumed by renderer) ===
|
||||
|
||||
export type InjectionFinding = z.infer<typeof InjectionVulnerability>;
|
||||
export type XssFinding = z.infer<typeof XssVulnerability>;
|
||||
export type AuthFinding = z.infer<typeof AuthVulnerability>;
|
||||
export type SsrfFinding = z.infer<typeof SsrfVulnerability>;
|
||||
export type AuthzFinding = z.infer<typeof AuthzVulnerability>;
|
||||
|
||||
// === Convert to JSON Schema for SDK ===
|
||||
|
||||
// NOTE: The SDK's AJV validator expects draft-07. Zod defaults to draft-2020-12 which
|
||||
// causes the SDK to silently skip structured output.
|
||||
function toOutputFormat(zodSchema: z.ZodType): JsonSchemaOutputFormat {
|
||||
return { type: 'json_schema', schema: z.toJSONSchema(zodSchema, { target: 'draft-07' }) as Record<string, unknown> };
|
||||
}
|
||||
|
||||
// === Per-Mode Output Format Builders ===
|
||||
// Two maps cached at module load; the only per-mode difference is the
|
||||
// description on the `notes` field, which steers the LLM's writing.
|
||||
|
||||
function buildOutputFormats(exploit: boolean): Partial<Record<AgentName, JsonSchemaOutputFormat>> {
|
||||
const base = makeBase(exploit);
|
||||
/** Base fields shared by every queue entry. `notes` gains guidance in analysis mode. */
|
||||
function baseFields(exploit: boolean) {
|
||||
return {
|
||||
'injection-vuln': toOutputFormat(
|
||||
z.object({
|
||||
vulnerabilities: z.array(
|
||||
base.extend({
|
||||
source: z.string().optional(),
|
||||
combined_sources: z.string().optional(),
|
||||
path: z.string().optional(),
|
||||
sink_call: z.string().optional(),
|
||||
slot_type: z.string().optional(),
|
||||
sanitization_observed: z.string().optional(),
|
||||
concat_occurrences: z.string().optional(),
|
||||
verdict: z.string().optional(),
|
||||
mismatch_reason: z.string().optional(),
|
||||
witness_payload: z.string().optional(),
|
||||
}),
|
||||
),
|
||||
}),
|
||||
),
|
||||
'xss-vuln': toOutputFormat(
|
||||
z.object({
|
||||
vulnerabilities: z.array(
|
||||
base.extend({
|
||||
source: z.string().optional(),
|
||||
source_detail: z.string().optional(),
|
||||
path: z.string().optional(),
|
||||
sink_function: z.string().optional(),
|
||||
render_context: z.string().optional(),
|
||||
encoding_observed: z.string().optional(),
|
||||
verdict: z.string().optional(),
|
||||
mismatch_reason: z.string().optional(),
|
||||
witness_payload: z.string().optional(),
|
||||
}),
|
||||
),
|
||||
}),
|
||||
),
|
||||
'auth-vuln': toOutputFormat(
|
||||
z.object({
|
||||
vulnerabilities: z.array(
|
||||
base.extend({
|
||||
source_endpoint: z.string().optional(),
|
||||
vulnerable_code_location: z.string().optional(),
|
||||
missing_defense: z.string().optional(),
|
||||
exploitation_hypothesis: z.string().optional(),
|
||||
suggested_exploit_technique: z.string().optional(),
|
||||
}),
|
||||
),
|
||||
}),
|
||||
),
|
||||
'ssrf-vuln': toOutputFormat(
|
||||
z.object({
|
||||
vulnerabilities: z.array(
|
||||
base.extend({
|
||||
source_endpoint: z.string().optional(),
|
||||
vulnerable_parameter: z.string().optional(),
|
||||
vulnerable_code_location: z.string().optional(),
|
||||
missing_defense: z.string().optional(),
|
||||
exploitation_hypothesis: z.string().optional(),
|
||||
suggested_exploit_technique: z.string().optional(),
|
||||
}),
|
||||
),
|
||||
}),
|
||||
),
|
||||
'authz-vuln': toOutputFormat(
|
||||
z.object({
|
||||
vulnerabilities: z.array(
|
||||
base.extend({
|
||||
endpoint: z.string().optional(),
|
||||
vulnerable_code_location: z.string().optional(),
|
||||
role_context: z.string().optional(),
|
||||
guard_evidence: z.string().optional(),
|
||||
side_effect: z.string().optional(),
|
||||
reason: z.string().optional(),
|
||||
minimal_witness: z.string().optional(),
|
||||
}),
|
||||
),
|
||||
}),
|
||||
),
|
||||
ID: Type.String(),
|
||||
vulnerability_type: Type.String(),
|
||||
externally_exploitable: Type.Boolean(),
|
||||
confidence: Type.String(),
|
||||
notes: exploit ? optStr() : optStr(ANALYSIS_NOTES_DESCRIPTION),
|
||||
};
|
||||
}
|
||||
|
||||
const OUTPUT_FORMATS_EXPLOIT = buildOutputFormats(true);
|
||||
const OUTPUT_FORMATS_ANALYSIS = buildOutputFormats(false);
|
||||
const injectionFields = {
|
||||
source: optStr(),
|
||||
combined_sources: optStr(),
|
||||
path: optStr(),
|
||||
sink_call: optStr(),
|
||||
slot_type: optStr(),
|
||||
sanitization_observed: optStr(),
|
||||
concat_occurrences: optStr(),
|
||||
verdict: optStr(),
|
||||
mismatch_reason: optStr(),
|
||||
witness_payload: optStr(),
|
||||
};
|
||||
|
||||
const xssFields = {
|
||||
source: optStr(),
|
||||
source_detail: optStr(),
|
||||
path: optStr(),
|
||||
sink_function: optStr(),
|
||||
render_context: optStr(),
|
||||
encoding_observed: optStr(),
|
||||
verdict: optStr(),
|
||||
mismatch_reason: optStr(),
|
||||
witness_payload: optStr(),
|
||||
};
|
||||
|
||||
const authFields = {
|
||||
source_endpoint: optStr(),
|
||||
vulnerable_code_location: optStr(),
|
||||
missing_defense: optStr(),
|
||||
exploitation_hypothesis: optStr(),
|
||||
suggested_exploit_technique: optStr(),
|
||||
};
|
||||
|
||||
const ssrfFields = {
|
||||
source_endpoint: optStr(),
|
||||
vulnerable_parameter: optStr(),
|
||||
vulnerable_code_location: optStr(),
|
||||
missing_defense: optStr(),
|
||||
exploitation_hypothesis: optStr(),
|
||||
suggested_exploit_technique: optStr(),
|
||||
};
|
||||
|
||||
const authzFields = {
|
||||
endpoint: optStr(),
|
||||
vulnerable_code_location: optStr(),
|
||||
role_context: optStr(),
|
||||
guard_evidence: optStr(),
|
||||
side_effect: optStr(),
|
||||
reason: optStr(),
|
||||
minimal_witness: optStr(),
|
||||
};
|
||||
|
||||
const PER_TYPE_FIELDS: Partial<Record<AgentName, Record<string, ReturnType<typeof optStr>>>> = {
|
||||
'injection-vuln': injectionFields,
|
||||
'xss-vuln': xssFields,
|
||||
'auth-vuln': authFields,
|
||||
'ssrf-vuln': ssrfFields,
|
||||
'authz-vuln': authzFields,
|
||||
};
|
||||
|
||||
/** Build the `{ vulnerabilities: [...] }` queue schema for an agent + mode. */
|
||||
function queueSchema(agentName: AgentName, exploit: boolean): TObject | null {
|
||||
const extra = PER_TYPE_FIELDS[agentName];
|
||||
if (!extra) return null;
|
||||
return Type.Object({
|
||||
vulnerabilities: Type.Array(Type.Object({ ...baseFields(exploit), ...extra })),
|
||||
});
|
||||
}
|
||||
|
||||
// === Inferred entry types (consumed by renderers) ===
|
||||
export type InjectionFinding = Static<ReturnType<typeof injectionEntry>>;
|
||||
export type XssFinding = Static<ReturnType<typeof xssEntry>>;
|
||||
export type AuthFinding = Static<ReturnType<typeof authEntry>>;
|
||||
export type SsrfFinding = Static<ReturnType<typeof ssrfEntry>>;
|
||||
export type AuthzFinding = Static<ReturnType<typeof authzEntry>>;
|
||||
|
||||
const injectionEntry = () => Type.Object({ ...baseFields(true), ...injectionFields });
|
||||
const xssEntry = () => Type.Object({ ...baseFields(true), ...xssFields });
|
||||
const authEntry = () => Type.Object({ ...baseFields(true), ...authFields });
|
||||
const ssrfEntry = () => Type.Object({ ...baseFields(true), ...ssrfFields });
|
||||
const authzEntry = () => Type.Object({ ...baseFields(true), ...authzFields });
|
||||
|
||||
const VULN_AGENT_QUEUE_FILENAMES: Partial<Record<AgentName, string>> = {
|
||||
'injection-vuln': 'injection_exploitation_queue.json',
|
||||
@@ -204,12 +122,38 @@ const VULN_AGENT_QUEUE_FILENAMES: Partial<Record<AgentName, string>> = {
|
||||
'authz-vuln': 'authz_exploitation_queue.json',
|
||||
};
|
||||
|
||||
/** Returns the structured output format for a vuln agent, or undefined for non-vuln agents. */
|
||||
export function getOutputFormat(agentName: AgentName, exploit = true): JsonSchemaOutputFormat | undefined {
|
||||
return (exploit ? OUTPUT_FORMATS_EXPLOIT : OUTPUT_FORMATS_ANALYSIS)[agentName];
|
||||
}
|
||||
|
||||
/** Returns the queue filename for a vuln agent, or undefined for non-vuln agents. */
|
||||
export function getQueueFilename(agentName: AgentName): string | undefined {
|
||||
return VULN_AGENT_QUEUE_FILENAMES[agentName];
|
||||
}
|
||||
|
||||
export interface QueueSubmitTool {
|
||||
tool: ToolDefinition;
|
||||
getCaptured: () => unknown;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the `submit_exploitation_queue` tool for a vuln agent, or null for
|
||||
* non-vuln agents. The agent calls it once with the full findings list; the
|
||||
* captured payload is the structured queue.
|
||||
*/
|
||||
export function createQueueSubmitTool(agentName: AgentName, exploit: boolean): QueueSubmitTool | null {
|
||||
const schema = queueSchema(agentName, exploit);
|
||||
if (!schema) return null;
|
||||
let captured: unknown;
|
||||
const tool = defineTool({
|
||||
name: 'submit_exploitation_queue',
|
||||
label: 'Submit Exploitation Queue',
|
||||
description:
|
||||
'Submit the final structured list of analyzed vulnerabilities for this class. Call exactly once when ' +
|
||||
'analysis is complete, with every finding included.',
|
||||
promptSnippet: 'submit_exploitation_queue: record the final structured findings list (call once)',
|
||||
parameters: schema,
|
||||
execute: async (_toolCallId, params) => {
|
||||
captured = params;
|
||||
const count = (params as { vulnerabilities?: unknown[] }).vulnerabilities?.length ?? 0;
|
||||
return { content: [{ type: 'text' as const, text: `Recorded ${count} findings.` }], details: {} };
|
||||
},
|
||||
});
|
||||
return { tool, getCaptured: () => captured };
|
||||
}
|
||||
|
||||
@@ -5,37 +5,71 @@
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Writes ~/.claude/settings.json with permissions.deny rules derived from
|
||||
* `code_path` avoid patterns. The SDK reads this via `settingSources: ['user']`;
|
||||
* deny rules fire even in `bypassPermissions` mode.
|
||||
* Writes the @gotgenes/pi-permission-system global config from `code_path` avoid
|
||||
* patterns. The executor loads the extension (see claude-executor) and pi enforces
|
||||
* these path denies at the tool layer for every agent. Written to the global config
|
||||
* dir under `agentDir` — the project-scoped path is gated behind project trust,
|
||||
* which our headless runs do not grant; the global path is not.
|
||||
*/
|
||||
|
||||
import os from 'node:os';
|
||||
import { getAgentDir } from '@earendil-works/pi-coding-agent';
|
||||
import { fs, path } from 'zx';
|
||||
import type { DistributedConfig } from '../types/config.js';
|
||||
|
||||
const FILE_TOOLS = ['Read', 'Edit'] as const;
|
||||
|
||||
function denyEntriesFor(pattern: string): string[] {
|
||||
const arg = `./${pattern.replace(/^[./]+/, '')}`;
|
||||
return FILE_TOOLS.map((tool) => `${tool}(${arg})`);
|
||||
/** Absolute path to the pi-permission-system global config.json. */
|
||||
export function permissionConfigPath(): string {
|
||||
return path.join(getAgentDir(), 'extensions', 'pi-permission-system', 'config.json');
|
||||
}
|
||||
|
||||
export async function writeUserSettingsForCodePathAvoids(config: DistributedConfig | null): Promise<void> {
|
||||
/**
|
||||
* Write (or remove) the pi-permission-system config derived from `code_path`
|
||||
* avoid patterns.
|
||||
*
|
||||
* Each avoid maps to a cross-cutting `path` deny — the strongest surface, blocking
|
||||
* the path across every tool and bash command, and not overridable by a per-tool
|
||||
* allow. `"*": "allow"` keeps everything else permitted so the extension does not
|
||||
* fall back to its default `ask` (which would block all access headlessly). When
|
||||
* there are no avoids the config is removed, so the executor skips loading the
|
||||
* extension entirely.
|
||||
*/
|
||||
export async function writeCodePathPermissionConfig(config: DistributedConfig | null): Promise<void> {
|
||||
const avoidPatterns = (config?.avoid ?? []).filter((r) => r.type === 'code_path').map((r) => r.value);
|
||||
const settingsPath = path.join(os.homedir(), '.claude', 'settings.json');
|
||||
const configPath = permissionConfigPath();
|
||||
|
||||
if (avoidPatterns.length === 0) {
|
||||
await fs.remove(settingsPath);
|
||||
await fs.remove(configPath);
|
||||
return;
|
||||
}
|
||||
|
||||
const settings = {
|
||||
permissions: {
|
||||
deny: avoidPatterns.flatMap(denyEntriesFor),
|
||||
// pi's matcher (wildcard-matcher.ts) has NO `**` globstar — it splits on each `*`
|
||||
// and joins with `.*`, and a single `*` already matches any chars incl. `/`. Tool
|
||||
// paths are compared as absolute (path-utils resolves them against cwd), so we
|
||||
// collapse `**`→`*` and add a `*/`-prefixed variant that matches the path under
|
||||
// any repo prefix. (A bare pattern never matches an absolute path.)
|
||||
const pathDeny: Record<string, 'allow' | 'deny'> = { '*': 'allow' };
|
||||
for (const pattern of avoidPatterns) {
|
||||
const clean = pattern.replace(/^[./]+/, '').replace(/\*\*/g, '*');
|
||||
// Deny the contents (under any repo prefix and as written)...
|
||||
pathDeny[`*/${clean}`] = 'deny';
|
||||
pathDeny[clean] = 'deny';
|
||||
// ...and the folder path itself, so the directory entry is denied too — the
|
||||
// contents patterns (…/*) require a trailing segment and wouldn't match it.
|
||||
if (clean.endsWith('/*')) {
|
||||
const folder = clean.slice(0, -2);
|
||||
if (folder) {
|
||||
pathDeny[`*/${folder}`] = 'deny';
|
||||
pathDeny[folder] = 'deny';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const permissionConfig = {
|
||||
permission: {
|
||||
'*': 'allow',
|
||||
path: pathDeny,
|
||||
},
|
||||
};
|
||||
|
||||
await fs.ensureDir(path.dirname(settingsPath));
|
||||
await fs.writeJson(settingsPath, settings, { spaces: 2 });
|
||||
await fs.ensureDir(path.dirname(configPath));
|
||||
await fs.writeJson(configPath, permissionConfig, { spaces: 2 });
|
||||
}
|
||||
|
||||
@@ -0,0 +1,137 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Universal custom tools registered for every agent: `task` and `todo_write`.
|
||||
*
|
||||
* These replace the Claude Agent SDK built-ins that pi does not ship. `task`
|
||||
* delegates a focused analysis to an in-process read-only child session (the
|
||||
* Task sub-agent replacement); `todo_write` is a full-state-replace planning
|
||||
* scratchpad mirrored to the workflow log.
|
||||
*/
|
||||
|
||||
import type { ThinkingLevel } from '@earendil-works/pi-agent-core';
|
||||
import type { Api, Model } from '@earendil-works/pi-ai';
|
||||
import {
|
||||
type AuthStorage,
|
||||
createAgentSession,
|
||||
defineTool,
|
||||
type ResourceLoader,
|
||||
SessionManager,
|
||||
SettingsManager,
|
||||
type ToolDefinition,
|
||||
} from '@earendil-works/pi-coding-agent';
|
||||
import { Type } from 'typebox';
|
||||
import type { AuditLogger } from './audit-logger.js';
|
||||
|
||||
/** Read-only tool surface for delegated child sessions. No bash/edit/write. */
|
||||
const CHILD_TOOLS = ['read', 'grep', 'find', 'ls'];
|
||||
|
||||
export interface TaskToolContext {
|
||||
model: Model<Api>;
|
||||
thinkingLevel: ThinkingLevel;
|
||||
authStorage: AuthStorage;
|
||||
cwd: string;
|
||||
/** When set, child sessions inherit the code_path deny policy. */
|
||||
resourceLoader?: ResourceLoader;
|
||||
}
|
||||
|
||||
/**
|
||||
* The `task` tool — delegate a focused, read-only analysis to a sub-agent.
|
||||
*
|
||||
* Spawns an in-process child `createAgentSession` scoped to read-only tools,
|
||||
* drives it to completion, and returns its final text. Marked `parallel` so the
|
||||
* model can fan out multiple delegations in one turn (the pre-recon/recon prompts
|
||||
* rely on this). Children get no `task` tool of their own — delegation is one level.
|
||||
*/
|
||||
export function createTaskTool(ctx: TaskToolContext): ToolDefinition {
|
||||
return defineTool({
|
||||
name: 'task',
|
||||
label: 'Task',
|
||||
description:
|
||||
'Delegate a focused source-code analysis question to a read-only sub-agent and return its findings. ' +
|
||||
'Use for deep code reading, tracing, and attack-surface mapping. Issue multiple task calls in one ' +
|
||||
'message to run analyses in parallel.',
|
||||
promptSnippet: 'task: delegate read-only code analysis to a parallel sub-agent',
|
||||
executionMode: 'parallel',
|
||||
parameters: Type.Object({
|
||||
description: Type.Optional(Type.String({ description: 'Short (3-5 word) label for the delegated analysis.' })),
|
||||
prompt: Type.String({ description: 'The full analysis instruction for the sub-agent.' }),
|
||||
}),
|
||||
execute: async (_toolCallId, params) => {
|
||||
const { session: child } = await createAgentSession({
|
||||
cwd: ctx.cwd,
|
||||
model: ctx.model,
|
||||
thinkingLevel: ctx.thinkingLevel,
|
||||
tools: CHILD_TOOLS,
|
||||
authStorage: ctx.authStorage,
|
||||
sessionManager: SessionManager.inMemory(),
|
||||
settingsManager: SettingsManager.inMemory({
|
||||
retry: { enabled: false },
|
||||
compaction: { enabled: true },
|
||||
}),
|
||||
...(ctx.resourceLoader && { resourceLoader: ctx.resourceLoader }),
|
||||
});
|
||||
try {
|
||||
await child.prompt(params.prompt);
|
||||
const text = child.getLastAssistantText() ?? '(sub-agent produced no output)';
|
||||
return { content: [{ type: 'text' as const, text }], details: {} };
|
||||
} finally {
|
||||
child.dispose();
|
||||
}
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export interface TodoItem {
|
||||
content: string;
|
||||
status: 'pending' | 'in_progress' | 'completed';
|
||||
activeForm: string;
|
||||
}
|
||||
|
||||
/** Render a todo list as a compact checklist for the workflow log. */
|
||||
function renderTodos(todos: readonly TodoItem[]): string {
|
||||
const mark = (s: TodoItem['status']): string => (s === 'completed' ? 'x' : s === 'in_progress' ? '~' : ' ');
|
||||
return todos.map((t) => `[${mark(t.status)}] ${t.content}`).join(' ');
|
||||
}
|
||||
|
||||
/**
|
||||
* The `todo_write` tool — a full-state-replace planning scratchpad.
|
||||
*
|
||||
* Mirrors Claude Code's TodoWrite: each call carries the entire list and replaces
|
||||
* stored state (no append/merge). No deliverable impact; every call is echoed to
|
||||
* the workflow log so `shannon logs` shows the agent's live plan. State is per
|
||||
* tool instance (one per agent execution).
|
||||
*/
|
||||
export function createTodoWriteTool(auditLogger: AuditLogger): ToolDefinition {
|
||||
let current: TodoItem[] = [];
|
||||
return defineTool({
|
||||
name: 'todo_write',
|
||||
label: 'Todo Write',
|
||||
description:
|
||||
'Record or update the task plan. Pass the COMPLETE todo list every call (full replace, not append). ' +
|
||||
'Keep exactly one task in_progress; mark tasks completed as soon as they are done.',
|
||||
promptSnippet: 'todo_write: track a plan as a checklist (pass the full list each call)',
|
||||
parameters: Type.Object({
|
||||
todos: Type.Array(
|
||||
Type.Object({
|
||||
content: Type.String({ description: 'Imperative task description, e.g. "Map SSRF sinks".' }),
|
||||
status: Type.Union([Type.Literal('pending'), Type.Literal('in_progress'), Type.Literal('completed')]),
|
||||
activeForm: Type.String({ description: 'Present-continuous form, e.g. "Mapping SSRF sinks".' }),
|
||||
}),
|
||||
),
|
||||
}),
|
||||
execute: async (_toolCallId, params) => {
|
||||
current = params.todos as TodoItem[];
|
||||
const completed = current.filter((t) => t.status === 'completed').length;
|
||||
await auditLogger.logNote('todo', renderTodos(current));
|
||||
return {
|
||||
content: [{ type: 'text' as const, text: `Todos updated (${current.length} items, ${completed} completed).` }],
|
||||
details: {},
|
||||
};
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -4,9 +4,7 @@
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
// Type definitions for Claude executor message processing pipeline
|
||||
|
||||
import type { SDKAssistantMessageError } from '@anthropic-ai/claude-agent-sdk';
|
||||
// Shared display/formatting types for the agent executor output layer.
|
||||
|
||||
export interface ExecutionContext {
|
||||
isParallelExecution: boolean;
|
||||
@@ -15,18 +13,6 @@ export interface ExecutionContext {
|
||||
agentKey: string;
|
||||
}
|
||||
|
||||
export interface AssistantResult {
|
||||
content: string;
|
||||
cleanedContent: string;
|
||||
apiErrorDetected: boolean;
|
||||
shouldThrow?: Error;
|
||||
logData: {
|
||||
turn: number;
|
||||
content: string;
|
||||
timestamp: string;
|
||||
};
|
||||
}
|
||||
|
||||
export interface ResultData {
|
||||
result: string | null;
|
||||
cost: number;
|
||||
@@ -36,77 +22,3 @@ export interface ResultData {
|
||||
permissionDenials: number;
|
||||
structuredOutput?: unknown;
|
||||
}
|
||||
|
||||
export interface ToolUseData {
|
||||
toolName: string;
|
||||
parameters: Record<string, unknown>;
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
export interface ToolResultData {
|
||||
content: unknown;
|
||||
displayContent: string;
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
export interface ContentBlock {
|
||||
type?: string;
|
||||
text?: string;
|
||||
thinking?: string;
|
||||
data?: string;
|
||||
}
|
||||
|
||||
export interface AssistantMessage {
|
||||
type: 'assistant';
|
||||
error?: SDKAssistantMessageError;
|
||||
message: {
|
||||
content: ContentBlock[] | string;
|
||||
};
|
||||
}
|
||||
|
||||
export interface ResultMessage {
|
||||
type: 'result';
|
||||
result?: string;
|
||||
total_cost_usd?: number;
|
||||
duration_ms?: number;
|
||||
subtype?: string;
|
||||
stop_reason?: string | null;
|
||||
permission_denials?: unknown[];
|
||||
structured_output?: unknown;
|
||||
}
|
||||
|
||||
export interface ToolUseMessage {
|
||||
type: 'tool_use';
|
||||
name: string;
|
||||
input?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface ToolResultMessage {
|
||||
type: 'tool_result';
|
||||
content?: unknown;
|
||||
}
|
||||
|
||||
export interface ApiErrorDetection {
|
||||
detected: boolean;
|
||||
shouldThrow?: Error;
|
||||
}
|
||||
|
||||
export interface SystemInitMessage {
|
||||
type: 'system';
|
||||
subtype: 'init';
|
||||
model?: string;
|
||||
permissionMode?: string;
|
||||
}
|
||||
|
||||
/** Emitted when a model refuses a request and the SDK falls back to another model (e.g. Fable 5 routing cybersecurity tasks to Opus 4.8). */
|
||||
export interface ModelRefusalFallbackMessage {
|
||||
type: 'system';
|
||||
subtype: 'model_refusal_fallback';
|
||||
original_model: string;
|
||||
fallback_model: string;
|
||||
api_refusal_category?: string | null;
|
||||
}
|
||||
|
||||
export interface UserMessage {
|
||||
type: 'user';
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
*/
|
||||
|
||||
import fs from 'node:fs/promises';
|
||||
import { isFableModel, resolveModel } from '../ai/models.js';
|
||||
import { isFableModel, resolveModelId } from '../ai/models.js';
|
||||
import { formatDuration, formatTimestamp } from '../utils/formatting.js';
|
||||
import { LogStream } from './log-stream.js';
|
||||
import { generateWorkflowLogPath, type SessionMetadata } from './utils.js';
|
||||
@@ -90,7 +90,7 @@ export class WorkflowLogger {
|
||||
// Surface Fable usage: its safety classifiers route cybersecurity tasks to
|
||||
// Opus 4.8, so those phases run on Opus 4.8 regardless of the tier setting.
|
||||
const fableTiers = (['small', 'medium', 'large'] as const)
|
||||
.map((tier) => ({ tier, model: resolveModel(tier) }))
|
||||
.map((tier) => ({ tier, model: resolveModelId(tier) }))
|
||||
.filter(({ model }) => isFableModel(model));
|
||||
if (fableTiers.length > 0) {
|
||||
const tierList = fableTiers.map(({ tier, model }) => `${tier} (${model})`).join(', ');
|
||||
|
||||
@@ -5,10 +5,10 @@
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Exploit Collector MCP Server (factory parameterized by vulnerability class
|
||||
* and per-run valid-ID set).
|
||||
* Exploit Collector tool factory (parameterized by vulnerability class and
|
||||
* per-run valid-ID set).
|
||||
*
|
||||
* Exposes a single Zod-validated MCP tool `add_exploit`, called once per
|
||||
* Exposes a single TypeBox-validated tool `add_exploit`, called once per
|
||||
* processed vulnerability by the 5 exploit-* agents (injection, xss, auth,
|
||||
* ssrf, authz). After the agent terminates, the host harvests
|
||||
* collector.getAll() and runs exploit-renderer to produce
|
||||
@@ -16,29 +16,28 @@
|
||||
* output.
|
||||
*
|
||||
* Schema shape:
|
||||
* - The SDK tool() helper consumes a ZodRawShape (flat object), not a
|
||||
* top-level discriminated union. The visible shape is therefore a single
|
||||
* z.object with common fields required, status as a string enum, and
|
||||
* per-status fields marked optional at the SDK layer. Each field's
|
||||
* `.describe()` text explains when it applies.
|
||||
* - The visible parameter schema is a single Type.Object with common fields
|
||||
* required, status as a string union, and per-status fields marked optional
|
||||
* at the tool layer (TypeBox cannot express a top-level discriminated union
|
||||
* as the flat tool parameters). Each field's `description` text explains
|
||||
* when it applies.
|
||||
* - True per-status field enforcement runs inside the tool handler via a
|
||||
* z.discriminatedUnion('status', ...). Missing-field errors come back to
|
||||
* the agent as structured Zod issues with retryable=true so it can fix
|
||||
* and retry the call.
|
||||
* Type.Union([exploited, blocked]) re-validation using the TypeBox `Value`
|
||||
* API. Missing-field errors come back to the agent as structured issues
|
||||
* with retryable=true so it can fix and retry the call.
|
||||
*
|
||||
* Strict queue-ID validation: vulnerability_id is refined against the per-run
|
||||
* queue's known IDs at schema-build time. Hallucinated or typo'd IDs are
|
||||
* rejected with a structured Zod error that includes the valid-ID list,
|
||||
* letting the agent recover locally.
|
||||
* Strict queue-ID validation: vulnerability_id is checked against the per-run
|
||||
* queue's known IDs in the handler. Hallucinated or typo'd IDs are rejected
|
||||
* with a structured error that includes the valid-ID list, letting the agent
|
||||
* recover locally.
|
||||
*
|
||||
* Each Zod schema's field-level descriptions carry the bullet labels and
|
||||
* reproducibility guidance, so the SDK injects it into the agent's tool
|
||||
* catalog.
|
||||
* Each field's description carries the bullet labels and reproducibility
|
||||
* guidance, so the harness injects it into the agent's tool catalog.
|
||||
*/
|
||||
|
||||
import type { McpSdkServerConfigWithInstance } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { createSdkMcpServer, tool } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { z } from 'zod';
|
||||
import { defineTool, type ToolDefinition } from '@earendil-works/pi-coding-agent';
|
||||
import { type Static, Type } from 'typebox';
|
||||
import { Value } from 'typebox/value';
|
||||
|
||||
// ============================================================================
|
||||
// CLASS DISCRIMINATOR
|
||||
@@ -103,214 +102,181 @@ export type AddExploitInput = ExploitedExploit | BlockedExploit;
|
||||
// ============================================================================
|
||||
|
||||
function buildSchemas(validIds: ReadonlySet<string>) {
|
||||
const vulnerabilityIdField = z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
const vulnerabilityIdField = Type.String({
|
||||
minLength: 1,
|
||||
description:
|
||||
'Vulnerability identifier (e.g. "INJ-VULN-03"). Must match an ID from this run\'s ' +
|
||||
'{class}_exploitation_queue.json exactly — the collector rejects IDs not in the queue. ' +
|
||||
`Valid IDs for this run: ${formatValidIdsPreview(validIds)}.`,
|
||||
)
|
||||
.refine((id: string) => validIds.has(id), {
|
||||
message:
|
||||
`Vulnerability ID not in this run's queue. Valid IDs: ` +
|
||||
`${formatValidIdsPreview(validIds)}. ` +
|
||||
'Check the queue.json for the canonical ID — likely a typo or hallucinated ID.',
|
||||
});
|
||||
'{class}_exploitation_queue.json exactly — the collector rejects IDs not in the queue. ' +
|
||||
`Valid IDs for this run: ${formatValidIdsPreview(validIds)}.`,
|
||||
});
|
||||
|
||||
const titleField = z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
const titleField = Type.String({
|
||||
minLength: 1,
|
||||
description:
|
||||
'Descriptive vulnerability title (e.g. "SQL Injection — User Search", "IDOR — Unauthorized ' +
|
||||
'Access to User Orders"). Concise; encodes the vulnerability category and where it lives.',
|
||||
);
|
||||
'Access to User Orders"). Concise; encodes the vulnerability category and where it lives.',
|
||||
});
|
||||
|
||||
const vulnerableLocationField = z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
const vulnerableLocationField = Type.String({
|
||||
minLength: 1,
|
||||
description:
|
||||
'Endpoint or mechanism where the vulnerability exists (e.g. "GET /api/products?id=", ' +
|
||||
'"POST /login", or a code location like "controllers/userController.js:42").',
|
||||
);
|
||||
'"POST /login", or a code location like "controllers/userController.js:42").',
|
||||
});
|
||||
|
||||
const overviewField = z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
const overviewField = Type.String({
|
||||
minLength: 1,
|
||||
description:
|
||||
'Brief summary of the exploit itself — what the vulnerability is and how it was demonstrated ' +
|
||||
'(or how it would be demonstrated, for blocked findings). 1-3 sentences.',
|
||||
);
|
||||
'(or how it would be demonstrated, for blocked findings). 1-3 sentences.',
|
||||
});
|
||||
|
||||
const prerequisitesField = z
|
||||
.string()
|
||||
.nullable()
|
||||
.optional()
|
||||
.describe(
|
||||
'Required setup, tools, or conditions to reproduce the exploit (e.g. authentication, ' +
|
||||
const prerequisitesField = Type.Optional(
|
||||
Type.Union([Type.String(), Type.Null()], {
|
||||
description:
|
||||
'Required setup, tools, or conditions to reproduce the exploit (e.g. authentication, ' +
|
||||
'specific role, prior application state). Omit or pass null when no prerequisites apply.',
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
const notesField = z
|
||||
.string()
|
||||
.nullable()
|
||||
.optional()
|
||||
.describe(
|
||||
'Optional supplementary context — caveats, related findings, environmental observations. ' +
|
||||
const notesField = Type.Optional(
|
||||
Type.Union([Type.String(), Type.Null()], {
|
||||
description:
|
||||
'Optional supplementary context — caveats, related findings, environmental observations. ' +
|
||||
'Free-form Markdown. Omit or pass null when N/A.',
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
const statusField = z
|
||||
.enum(['exploited', 'blocked'])
|
||||
.describe(
|
||||
const statusField = Type.Union([Type.Literal('exploited'), Type.Literal('blocked')], {
|
||||
description:
|
||||
'Verdict bucket. Set to "exploited" only after reaching Proof of Exploitation Level 3+ with ' +
|
||||
'concrete impact evidence (extracted data, executed JavaScript, account takeover, internal ' +
|
||||
'service access). Set to "blocked" only for real vulnerabilities where external factors ' +
|
||||
'(NOT security defenses) prevented full exploitation. Findings where a security defense ' +
|
||||
'successfully prevented exploitation after exhaustive bypass attempts are FALSE POSITIVE — ' +
|
||||
'route those to your workspace tracking file, not this tool.',
|
||||
);
|
||||
'concrete impact evidence (extracted data, executed JavaScript, account takeover, internal ' +
|
||||
'service access). Set to "blocked" only for real vulnerabilities where external factors ' +
|
||||
'(NOT security defenses) prevented full exploitation. Findings where a security defense ' +
|
||||
'successfully prevented exploitation after exhaustive bypass attempts are FALSE POSITIVE — ' +
|
||||
'route those to your workspace tracking file, not this tool.',
|
||||
});
|
||||
|
||||
// Per-status fields. All optional at the SDK shape layer because a single
|
||||
// ZodRawShape cannot express a top-level discriminated union; the handler
|
||||
// Per-status fields. All optional at the flat parameter layer because a single
|
||||
// Type.Object cannot express a top-level discriminated union; the handler
|
||||
// re-validates against the discriminated union below for true enforcement.
|
||||
const severityField = z
|
||||
.enum(SEVERITY_VALUES)
|
||||
.nullable()
|
||||
.optional()
|
||||
.describe(
|
||||
'REQUIRED when status="exploited". Severity of the demonstrated impact. Critical = Level 4 ' +
|
||||
const severityField = Type.Optional(
|
||||
Type.Union([...SEVERITY_VALUES.map((v) => Type.Literal(v)), Type.Null()], {
|
||||
description:
|
||||
'REQUIRED when status="exploited". Severity of the demonstrated impact. Critical = Level 4 ' +
|
||||
'(admin credentials extracted, sensitive data dumped, system commands executed, full account ' +
|
||||
'takeover). High = Level 3 (data extraction proven, authentication bypass confirmed, ' +
|
||||
'internal service access). Medium/Low based on impact narrowness or read-only access. Must ' +
|
||||
'reflect demonstrated impact, not theoretical potential.',
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
const impactField = z
|
||||
.string()
|
||||
.min(1)
|
||||
.nullable()
|
||||
.optional()
|
||||
.describe(
|
||||
'REQUIRED when status="exploited". Business/security impact achieved by the exploit ' +
|
||||
const impactField = Type.Optional(
|
||||
Type.Union([Type.String(), Type.Null()], {
|
||||
description:
|
||||
'REQUIRED when status="exploited". Business/security impact achieved by the exploit ' +
|
||||
'(e.g. "Extracted full user table including bcrypt password hashes for 1,247 users", ' +
|
||||
'"Achieved RCE as the application user; arbitrary shell commands executed"). Must describe ' +
|
||||
'what was actually demonstrated, not what could theoretically happen.',
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
const exploitationStepsField = z
|
||||
.array(z.string().min(1))
|
||||
.min(1)
|
||||
.nullable()
|
||||
.optional()
|
||||
.describe(
|
||||
'REQUIRED when status="exploited". Ordered, reproducible exploitation steps — one Markdown ' +
|
||||
const exploitationStepsField = Type.Optional(
|
||||
Type.Union([Type.Array(Type.String()), Type.Null()], {
|
||||
description:
|
||||
'REQUIRED when status="exploited". Ordered, reproducible exploitation steps — one Markdown ' +
|
||||
'blob per numbered step. Each step must include full URLs (protocol + domain + port + path ' +
|
||||
'+ params), complete payloads, and copy-paste-ready commands. Use clear placeholders for ' +
|
||||
'variable values like [SESSION_TOKEN], [DATABASE_NAME], [TABLE_NAME], [TARGET_USER_ID]. ' +
|
||||
'Write each step as natural Markdown — interleave prose with fenced code blocks (```bash, ' +
|
||||
'```http, etc.) as you would in a write-up. Steps must be detailed enough that someone ' +
|
||||
'unfamiliar with the application can follow without additional research.',
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
const proofOfImpactField = z
|
||||
.string()
|
||||
.min(1)
|
||||
.nullable()
|
||||
.optional()
|
||||
.describe(
|
||||
'REQUIRED when status="exploited". Concrete evidence of successful exploitation — extracted ' +
|
||||
const proofOfImpactField = Type.Optional(
|
||||
Type.Union([Type.String(), Type.Null()], {
|
||||
description:
|
||||
'REQUIRED when status="exploited". Concrete evidence of successful exploitation — extracted ' +
|
||||
'data, achieved actions, captured request/response pairs, log excerpts. Markdown blob; ' +
|
||||
'interleave prose with fenced code blocks. Must show what the exploit demonstrably achieved, ' +
|
||||
'not theoretical impact.',
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
const confidenceField = z
|
||||
.enum(CONFIDENCE_VALUES)
|
||||
.nullable()
|
||||
.optional()
|
||||
.describe(
|
||||
'REQUIRED when status="blocked". Confidence that this finding is a real vulnerability that ' +
|
||||
const confidenceField = Type.Optional(
|
||||
Type.Union([...CONFIDENCE_VALUES.map((v) => Type.Literal(v)), Type.Null()], {
|
||||
description:
|
||||
'REQUIRED when status="blocked". Confidence that this finding is a real vulnerability that ' +
|
||||
'would be exploited if the external blocker were removed. High = code analysis strongly ' +
|
||||
'confirms vulnerability and partial exploitation (Level 1-2) succeeded. Medium = code ' +
|
||||
'analysis confirms but live evidence is partial. Low = signal-only; revisit if blocker is ' +
|
||||
'removed in a future run.',
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
const currentBlockerField = z
|
||||
.string()
|
||||
.min(1)
|
||||
.nullable()
|
||||
.optional()
|
||||
.describe(
|
||||
'REQUIRED when status="blocked". What prevents full exploitation (e.g. "Server crashes after ' +
|
||||
const currentBlockerField = Type.Optional(
|
||||
Type.Union([Type.String(), Type.Null()], {
|
||||
description:
|
||||
'REQUIRED when status="blocked". What prevents full exploitation (e.g. "Server crashes after ' +
|
||||
'5 requests, blocking enumeration", "OAuth callback requires verified third-party email ' +
|
||||
'account we could not provision"). Must be an external operational constraint, not a ' +
|
||||
'security defense.',
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
const potentialImpactField = z
|
||||
.string()
|
||||
.min(1)
|
||||
.nullable()
|
||||
.optional()
|
||||
.describe(
|
||||
'REQUIRED when status="blocked". What could be achieved if the blocker were removed (e.g. ' +
|
||||
const potentialImpactField = Type.Optional(
|
||||
Type.Union([Type.String(), Type.Null()], {
|
||||
description:
|
||||
'REQUIRED when status="blocked". What could be achieved if the blocker were removed (e.g. ' +
|
||||
'"Full database read access", "Account takeover of arbitrary user via reset-token leak"). ' +
|
||||
'Distinct from impact — this is the hypothetical outcome, not a demonstrated one.',
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
const evidenceOfVulnerabilityField = z
|
||||
.string()
|
||||
.min(1)
|
||||
.nullable()
|
||||
.optional()
|
||||
.describe(
|
||||
'REQUIRED when status="blocked". Code snippets, response excerpts, or observed behavior ' +
|
||||
const evidenceOfVulnerabilityField = Type.Optional(
|
||||
Type.Union([Type.String(), Type.Null()], {
|
||||
description:
|
||||
'REQUIRED when status="blocked". Code snippets, response excerpts, or observed behavior ' +
|
||||
'proving the vulnerability is real. Markdown blob; interleave prose with fenced code blocks. ' +
|
||||
'This is what convinces the reader the finding is not a false positive despite incomplete ' +
|
||||
'exploitation.',
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
const whatWeTriedField = z
|
||||
.string()
|
||||
.min(1)
|
||||
.nullable()
|
||||
.optional()
|
||||
.describe(
|
||||
'REQUIRED when status="blocked". Log of attempted exploitation techniques and why each was ' +
|
||||
const whatWeTriedField = Type.Optional(
|
||||
Type.Union([Type.String(), Type.Null()], {
|
||||
description:
|
||||
'REQUIRED when status="blocked". Log of attempted exploitation techniques and why each was ' +
|
||||
'blocked. Each attempt should document the payload, the observed result, and the inferred ' +
|
||||
'blocker. Markdown blob; multiple attempts as a list or distinct paragraphs. Demonstrates ' +
|
||||
'exhaustive bypass effort per the Bypass Exhaustion Protocol.',
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
const howThisWouldBeExploitedField = z
|
||||
.array(z.string().min(1))
|
||||
.min(1)
|
||||
.nullable()
|
||||
.optional()
|
||||
.describe(
|
||||
'REQUIRED when status="blocked". Ordered hypothetical exploitation steps assuming the blocker ' +
|
||||
const howThisWouldBeExploitedField = Type.Optional(
|
||||
Type.Union([Type.Array(Type.String()), Type.Null()], {
|
||||
description:
|
||||
'REQUIRED when status="blocked". Ordered hypothetical exploitation steps assuming the blocker ' +
|
||||
'is removed — one Markdown blob per numbered step. Same reproducibility requirements as ' +
|
||||
'exploitation_steps: full URLs, complete payloads, copy-paste-ready commands. Frame the ' +
|
||||
'first step as "If [blocker] were removed: …".',
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
const expectedImpactField = z
|
||||
.string()
|
||||
.min(1)
|
||||
.nullable()
|
||||
.optional()
|
||||
.describe(
|
||||
'REQUIRED when status="blocked". Specific data or access that would be compromised if ' +
|
||||
const expectedImpactField = Type.Optional(
|
||||
Type.Union([Type.String(), Type.Null()], {
|
||||
description:
|
||||
'REQUIRED when status="blocked". Specific data or access that would be compromised if ' +
|
||||
'exploitation succeeded (e.g. "Read access to all user profile data including PII; write ' +
|
||||
'access to user-owned resources"). Markdown blob.',
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
// The flat shape passed to tool(). The SDK uses this to build the agent's
|
||||
// tool catalog. Per-status enforcement happens in the handler via the
|
||||
// discriminated union below.
|
||||
const flatShape = {
|
||||
// The flat parameter schema passed to defineTool(). The harness uses this to
|
||||
// build the agent's tool catalog. Per-status enforcement happens in the
|
||||
// handler via the discriminated union below.
|
||||
const flatShape = Type.Object({
|
||||
status: statusField,
|
||||
vulnerability_id: vulnerabilityIdField,
|
||||
title: titleField,
|
||||
@@ -329,59 +295,64 @@ function buildSchemas(validIds: ReadonlySet<string>) {
|
||||
what_we_tried: whatWeTriedField,
|
||||
how_this_would_be_exploited: howThisWouldBeExploitedField,
|
||||
expected_impact: expectedImpactField,
|
||||
};
|
||||
});
|
||||
|
||||
// Strict per-status validation. Re-runs in the handler so missing fields
|
||||
// for the chosen status return a retryable Zod error to the agent.
|
||||
const ExploitedSchema = z.object({
|
||||
status: z.literal('exploited'),
|
||||
// for the chosen status return a retryable error to the agent.
|
||||
const ExploitedSchema = Type.Object({
|
||||
status: Type.Literal('exploited'),
|
||||
vulnerability_id: vulnerabilityIdField,
|
||||
title: titleField,
|
||||
vulnerable_location: vulnerableLocationField,
|
||||
overview: overviewField,
|
||||
prerequisites: prerequisitesField,
|
||||
severity: z.enum(SEVERITY_VALUES),
|
||||
impact: z.string().min(1),
|
||||
exploitation_steps: z.array(z.string().min(1)).min(1),
|
||||
proof_of_impact: z.string().min(1),
|
||||
severity: Type.Union(SEVERITY_VALUES.map((v) => Type.Literal(v))),
|
||||
impact: Type.String({ minLength: 1 }),
|
||||
exploitation_steps: Type.Array(Type.String({ minLength: 1 }), { minItems: 1 }),
|
||||
proof_of_impact: Type.String({ minLength: 1 }),
|
||||
notes: notesField,
|
||||
});
|
||||
|
||||
const BlockedSchema = z.object({
|
||||
status: z.literal('blocked'),
|
||||
const BlockedSchema = Type.Object({
|
||||
status: Type.Literal('blocked'),
|
||||
vulnerability_id: vulnerabilityIdField,
|
||||
title: titleField,
|
||||
vulnerable_location: vulnerableLocationField,
|
||||
prerequisites: prerequisitesField,
|
||||
confidence: z.enum(CONFIDENCE_VALUES),
|
||||
current_blocker: z.string().min(1),
|
||||
potential_impact: z.string().min(1),
|
||||
evidence_of_vulnerability: z.string().min(1),
|
||||
what_we_tried: z.string().min(1),
|
||||
how_this_would_be_exploited: z.array(z.string().min(1)).min(1),
|
||||
expected_impact: z.string().min(1),
|
||||
confidence: Type.Union(CONFIDENCE_VALUES.map((v) => Type.Literal(v))),
|
||||
current_blocker: Type.String({ minLength: 1 }),
|
||||
potential_impact: Type.String({ minLength: 1 }),
|
||||
evidence_of_vulnerability: Type.String({ minLength: 1 }),
|
||||
what_we_tried: Type.String({ minLength: 1 }),
|
||||
how_this_would_be_exploited: Type.Array(Type.String({ minLength: 1 }), { minItems: 1 }),
|
||||
expected_impact: Type.String({ minLength: 1 }),
|
||||
notes: notesField,
|
||||
});
|
||||
|
||||
const StrictSchema = z.discriminatedUnion('status', [ExploitedSchema, BlockedSchema]);
|
||||
const StrictSchema = Type.Union([ExploitedSchema, BlockedSchema]);
|
||||
|
||||
return { flatShape, StrictSchema };
|
||||
}
|
||||
|
||||
type FlatInput = Static<ReturnType<typeof buildSchemas>['flatShape']>;
|
||||
type StrictInput = Static<ReturnType<typeof buildSchemas>['StrictSchema']>;
|
||||
|
||||
// ============================================================================
|
||||
// RESPONSE HELPERS
|
||||
// ============================================================================
|
||||
|
||||
interface ToolResult {
|
||||
[x: string]: unknown;
|
||||
content: Array<{ type: 'text'; text: string }>;
|
||||
isError: boolean;
|
||||
details: Record<string, unknown>;
|
||||
isError?: boolean;
|
||||
}
|
||||
|
||||
function createToolResult(response: { status: string; [key: string]: unknown }): ToolResult {
|
||||
const isError = response.status === 'error';
|
||||
return {
|
||||
content: [{ type: 'text', text: JSON.stringify(response, null, 2) }],
|
||||
isError: response.status === 'error',
|
||||
content: [{ type: 'text' as const, text: JSON.stringify(response, null, 2) }],
|
||||
details: {},
|
||||
...(isError && { isError: true }),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -393,21 +364,21 @@ function errorResult(message: string, errorType = 'ValidationError', retryable =
|
||||
return createToolResult({ status: 'error', message, errorType, retryable });
|
||||
}
|
||||
|
||||
function formatZodIssues(error: z.ZodError): string {
|
||||
return error.issues
|
||||
function formatValueErrors(schema: ReturnType<typeof buildSchemas>['StrictSchema'], value: unknown): string {
|
||||
return [...Value.Errors(schema, value)]
|
||||
.map((issue) => {
|
||||
const path = issue.path.length > 0 ? issue.path.join('.') : '(root)';
|
||||
const path = issue.instancePath.length > 0 ? issue.instancePath.replace(/^\//, '').replace(/\//g, '.') : '(root)';
|
||||
return `- ${path}: ${issue.message}`;
|
||||
})
|
||||
.join('\n');
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// SERVER FACTORY
|
||||
// TOOL FACTORY
|
||||
// ============================================================================
|
||||
|
||||
export interface ExploitCollectorServer {
|
||||
server: McpSdkServerConfigWithInstance;
|
||||
tools: ToolDefinition[];
|
||||
getAll(): AddExploitInput[];
|
||||
}
|
||||
|
||||
@@ -421,9 +392,11 @@ export function createExploitCollector(options: CreateExploitCollectorOptions):
|
||||
const exploits: AddExploitInput[] = [];
|
||||
const { flatShape, StrictSchema } = buildSchemas(validIds);
|
||||
|
||||
const addExploitTool = tool(
|
||||
'add_exploit',
|
||||
`Record a single processed ${vulnClass} vulnerability as structured exploitation evidence. ` +
|
||||
const addExploitTool = defineTool({
|
||||
name: 'add_exploit',
|
||||
label: 'Add Exploit',
|
||||
description:
|
||||
`Record a single processed ${vulnClass} vulnerability as structured exploitation evidence. ` +
|
||||
'Call this once per vulnerability in your queue.json after reaching a definitive verdict ' +
|
||||
'(either successfully exploited or potential-but-blocked). The status field discriminates the ' +
|
||||
"two report buckets; required sub-fields differ per status (see each field's description for " +
|
||||
@@ -432,20 +405,34 @@ export function createExploitCollector(options: CreateExploitCollectorOptions):
|
||||
'IDs. FALSE POSITIVE findings do NOT use this tool — they go to your workspace tracking file. ' +
|
||||
'After all queue vulnerabilities have been emitted, the host renderer assembles the ' +
|
||||
'deliverable Markdown from your recorded calls.',
|
||||
flatShape,
|
||||
async (input): Promise<ToolResult> => {
|
||||
// Re-validate against the strict discriminated union for per-status enforcement.
|
||||
const parsed = StrictSchema.safeParse(input);
|
||||
if (!parsed.success) {
|
||||
parameters: flatShape,
|
||||
execute: async (_toolCallId, args): Promise<ToolResult> => {
|
||||
const input = args as FlatInput;
|
||||
|
||||
// Strict queue-ID validation: reject hallucinated or typo'd IDs with the valid-ID list.
|
||||
if (!validIds.has(input.vulnerability_id)) {
|
||||
return errorResult(
|
||||
`Schema validation failed for status="${(input as { status?: string }).status}". ` +
|
||||
'Required-field issues:\n' +
|
||||
formatZodIssues(parsed.error),
|
||||
`Vulnerability ID not in this run's queue. Valid IDs: ` +
|
||||
`${formatValidIdsPreview(validIds)}. ` +
|
||||
'Check the queue.json for the canonical ID — likely a typo or hallucinated ID.',
|
||||
'ValidationError',
|
||||
true,
|
||||
);
|
||||
}
|
||||
const typed = parsed.data as AddExploitInput;
|
||||
|
||||
// Re-validate against the strict discriminated union for per-status enforcement.
|
||||
if (!Value.Check(StrictSchema, input)) {
|
||||
return errorResult(
|
||||
`Schema validation failed for status="${(input as { status?: string }).status}". ` +
|
||||
'Required-field issues:\n' +
|
||||
formatValueErrors(StrictSchema, input),
|
||||
'ValidationError',
|
||||
true,
|
||||
);
|
||||
}
|
||||
// Strip excess properties from the flat input so only the chosen status's
|
||||
// fields survive (mirrors the prior discriminated-union parse).
|
||||
const typed = Value.Clean(StrictSchema, structuredClone(input)) as StrictInput as AddExploitInput;
|
||||
const existing = exploits.find((e) => e.vulnerability_id === typed.vulnerability_id);
|
||||
if (existing) {
|
||||
return errorResult(
|
||||
@@ -458,16 +445,10 @@ export function createExploitCollector(options: CreateExploitCollectorOptions):
|
||||
exploits.push(typed);
|
||||
return successResult({ added: [typed.vulnerability_id], recorded_status: typed.status });
|
||||
},
|
||||
);
|
||||
|
||||
const server: McpSdkServerConfigWithInstance = createSdkMcpServer({
|
||||
name: 'exploit-collector',
|
||||
version: '1.0.0',
|
||||
tools: [addExploitTool],
|
||||
});
|
||||
|
||||
return {
|
||||
server,
|
||||
tools: [addExploitTool] as ToolDefinition[],
|
||||
getAll: (): AddExploitInput[] => [...exploits],
|
||||
};
|
||||
}
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Pre-Recon Collector MCP Server
|
||||
* Pre-Recon Collector tools
|
||||
*
|
||||
* Exposes seven Zod-validated MCP tools, one per section of the
|
||||
* Exposes seven TypeBox-validated tools, one per section of the
|
||||
* pre_recon_deliverable.md report. Every tool is one-shot (write-once;
|
||||
* duplicate calls return DuplicateError). A skipped tool renders a placeholder
|
||||
* rather than failing the activity. After the agent finishes, the host calls
|
||||
@@ -15,386 +15,331 @@
|
||||
* per-run call pattern, and runs the deterministic renderer to produce the
|
||||
* deliverable Markdown.
|
||||
*
|
||||
* Each Zod schema's field-level descriptions carry the section guidance, so
|
||||
* the SDK injects it into the agent's tool catalog.
|
||||
* Each TypeBox schema's field-level descriptions carry the section guidance, so
|
||||
* the harness injects it into the agent's tool catalog.
|
||||
*/
|
||||
|
||||
import type { McpSdkServerConfigWithInstance } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { createSdkMcpServer, tool } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { z } from 'zod';
|
||||
import { defineTool, type ToolDefinition } from '@earendil-works/pi-coding-agent';
|
||||
import { type Static, Type } from 'typebox';
|
||||
|
||||
// ============================================================================
|
||||
// SHARED SCHEMA
|
||||
// ============================================================================
|
||||
|
||||
export const SinkRefSchema = z.object({
|
||||
location: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
export const SinkRefSchema = Type.Object({
|
||||
location: Type.String({
|
||||
description:
|
||||
'File path with line number (e.g., "templates/render.js:34") or richer prose ' +
|
||||
'(e.g., "innerHTML at templates/render.js:34", "lines 45-67"). Must contain enough ' +
|
||||
'detail for a downstream agent to find the exact location.',
|
||||
),
|
||||
sink_function: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe('The sink function or property name (e.g., "innerHTML", "axios.get", "eval", "document.write").'),
|
||||
notes: z
|
||||
.string()
|
||||
.nullable()
|
||||
.optional()
|
||||
.describe(
|
||||
'Optional context — render-context detail, attribute name, scope hints, or anything ' +
|
||||
'(e.g., "innerHTML at templates/render.js:34", "lines 45-67"). Must contain enough ' +
|
||||
'detail for a downstream agent to find the exact location.',
|
||||
}),
|
||||
sink_function: Type.String({
|
||||
description: 'The sink function or property name (e.g., "innerHTML", "axios.get", "eval", "document.write").',
|
||||
}),
|
||||
notes: Type.Optional(
|
||||
Type.Union([Type.String(), Type.Null()], {
|
||||
description:
|
||||
'Optional context — render-context detail, attribute name, scope hints, or anything ' +
|
||||
'a downstream agent needs to act on this sink. Omit when the location and sink_function ' +
|
||||
'are sufficient on their own.',
|
||||
),
|
||||
}),
|
||||
),
|
||||
});
|
||||
|
||||
export type SinkRef = z.infer<typeof SinkRefSchema>;
|
||||
export type SinkRef = Static<typeof SinkRefSchema>;
|
||||
|
||||
// ============================================================================
|
||||
// PER-TOOL INPUT SCHEMAS
|
||||
// ============================================================================
|
||||
|
||||
export const ExecutiveSummaryInputSchema = z.object({
|
||||
text: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
export const ExecutiveSummaryInputSchema = Type.Object({
|
||||
text: Type.String({
|
||||
description:
|
||||
"Provide a 2-3 paragraph overview of the application's security posture, highlighting " +
|
||||
'the most critical attack surfaces and architectural security decisions. Becomes ' +
|
||||
'Section 1 of the rendered deliverable.',
|
||||
),
|
||||
'the most critical attack surfaces and architectural security decisions. Becomes ' +
|
||||
'Section 1 of the rendered deliverable.',
|
||||
}),
|
||||
});
|
||||
|
||||
const ArchitectureSchema = z.object({
|
||||
framework_and_language: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe('Framework and language details with their security implications.'),
|
||||
architectural_pattern: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe('Architectural pattern (monolith, microservices, hybrid) with trust boundary analysis.'),
|
||||
critical_security_components: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe('Critical security components with focus on auth, authz, and data protection.'),
|
||||
const ArchitectureSchema = Type.Object({
|
||||
framework_and_language: Type.String({
|
||||
description: 'Framework and language details with their security implications.',
|
||||
}),
|
||||
architectural_pattern: Type.String({
|
||||
description: 'Architectural pattern (monolith, microservices, hybrid) with trust boundary analysis.',
|
||||
}),
|
||||
critical_security_components: Type.String({
|
||||
description: 'Critical security components with focus on auth, authz, and data protection.',
|
||||
}),
|
||||
});
|
||||
|
||||
const DataSecuritySchema = z.object({
|
||||
database_security: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe('Analyze encryption, access controls, and query safety in database interactions.'),
|
||||
data_flow_security: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe('Identify sensitive data paths and the protection mechanisms applied along them.'),
|
||||
multi_tenant_isolation: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
const DataSecuritySchema = Type.Object({
|
||||
database_security: Type.String({
|
||||
description: 'Analyze encryption, access controls, and query safety in database interactions.',
|
||||
}),
|
||||
data_flow_security: Type.String({
|
||||
description: 'Identify sensitive data paths and the protection mechanisms applied along them.',
|
||||
}),
|
||||
multi_tenant_isolation: Type.String({
|
||||
description:
|
||||
'Assess tenant separation effectiveness. If the application is single-tenant, state that ' +
|
||||
'explicitly rather than leaving the field thin.',
|
||||
),
|
||||
'explicitly rather than leaving the field thin.',
|
||||
}),
|
||||
});
|
||||
|
||||
const AttackSurfaceSchema = z.object({
|
||||
external_entry_points: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe('Detailed analysis of each public interface that is network-accessible.'),
|
||||
internal_service_communication: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
const AttackSurfaceSchema = Type.Object({
|
||||
external_entry_points: Type.String({
|
||||
description: 'Detailed analysis of each public interface that is network-accessible.',
|
||||
}),
|
||||
internal_service_communication: Type.String({
|
||||
description:
|
||||
'Trust relationships and security assumptions between network-reachable services. ' +
|
||||
'If the application is a single service with no internal RPC fabric, state that.',
|
||||
),
|
||||
input_validation_patterns: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe('How user input is handled and validated in network-accessible endpoints.'),
|
||||
background_processing: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
'If the application is a single service with no internal RPC fabric, state that.',
|
||||
}),
|
||||
input_validation_patterns: Type.String({
|
||||
description: 'How user input is handled and validated in network-accessible endpoints.',
|
||||
}),
|
||||
background_processing: Type.String({
|
||||
description:
|
||||
'Async job security and privilege models for jobs triggered by network requests. ' +
|
||||
'If no async/background processing exists, state that.',
|
||||
),
|
||||
'If no async/background processing exists, state that.',
|
||||
}),
|
||||
});
|
||||
|
||||
const InfrastructureSchema = z.object({
|
||||
secrets_management: z.string().min(1).describe('How secrets are stored, rotated, and accessed.'),
|
||||
configuration_security: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
const InfrastructureSchema = Type.Object({
|
||||
secrets_management: Type.String({ description: 'How secrets are stored, rotated, and accessed.' }),
|
||||
configuration_security: Type.String({
|
||||
description:
|
||||
'Environment separation and secret handling. Specifically search for infrastructure ' +
|
||||
'configuration (e.g., Nginx, Kubernetes Ingress, CDN settings) that defines security ' +
|
||||
'headers like Strict-Transport-Security (HSTS) and Cache-Control, and report what was found.',
|
||||
),
|
||||
external_dependencies: z.string().min(1).describe('Third-party services and their security implications.'),
|
||||
monitoring_and_logging: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe('Security event visibility — what is logged, where it goes, and who can see it.'),
|
||||
'configuration (e.g., Nginx, Kubernetes Ingress, CDN settings) that defines security ' +
|
||||
'headers like Strict-Transport-Security (HSTS) and Cache-Control, and report what was found.',
|
||||
}),
|
||||
external_dependencies: Type.String({ description: 'Third-party services and their security implications.' }),
|
||||
monitoring_and_logging: Type.String({
|
||||
description: 'Security event visibility — what is logged, where it goes, and who can see it.',
|
||||
}),
|
||||
});
|
||||
|
||||
export const ApplicationIntelligenceInputSchema = z.object({
|
||||
architecture: ArchitectureSchema.describe(
|
||||
'Architecture & Technology Stack — driven by the Architecture Scanner sub-agent. ' +
|
||||
export const ApplicationIntelligenceInputSchema = Type.Object({
|
||||
architecture: Type.Object(ArchitectureSchema.properties, {
|
||||
description:
|
||||
'Architecture & Technology Stack — driven by the Architecture Scanner sub-agent. ' +
|
||||
'Becomes Section 2 of the rendered deliverable.',
|
||||
),
|
||||
data_security: DataSecuritySchema.describe(
|
||||
'Data Security & Storage — driven by the Data Security Auditor sub-agent. ' +
|
||||
}),
|
||||
data_security: Type.Object(DataSecuritySchema.properties, {
|
||||
description:
|
||||
'Data Security & Storage — driven by the Data Security Auditor sub-agent. ' +
|
||||
'Becomes Section 4 of the rendered deliverable.',
|
||||
),
|
||||
attack_surface: AttackSurfaceSchema.describe(
|
||||
'Attack Surface Analysis — driven by Entry Point Mapper + Architecture Scanner sub-agents. ' +
|
||||
}),
|
||||
attack_surface: Type.Object(AttackSurfaceSchema.properties, {
|
||||
description:
|
||||
'Attack Surface Analysis — driven by Entry Point Mapper + Architecture Scanner sub-agents. ' +
|
||||
'Only include entry points confirmed to be in-scope (network-reachable). ' +
|
||||
'Becomes Section 5 of the rendered deliverable.',
|
||||
),
|
||||
infrastructure: InfrastructureSchema.describe(
|
||||
'Infrastructure & Operational Security. Becomes Section 6 of the rendered deliverable.',
|
||||
),
|
||||
}),
|
||||
infrastructure: Type.Object(InfrastructureSchema.properties, {
|
||||
description: 'Infrastructure & Operational Security. Becomes Section 6 of the rendered deliverable.',
|
||||
}),
|
||||
});
|
||||
|
||||
export const AuthDeepDiveInputSchema = z.object({
|
||||
authentication_mechanisms: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
export const AuthDeepDiveInputSchema = Type.Object({
|
||||
authentication_mechanisms: Type.String({
|
||||
description:
|
||||
'Authentication mechanisms and their security properties. MUST include an exhaustive list of ' +
|
||||
'all API endpoints used for authentication (e.g., login, logout, token refresh, password reset).',
|
||||
),
|
||||
session_management: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
'all API endpoints used for authentication (e.g., login, logout, token refresh, password reset).',
|
||||
}),
|
||||
session_management: Type.String({
|
||||
description:
|
||||
'Session management and token security. Pinpoint the exact file and line(s) of code where ' +
|
||||
'session cookie flags (HttpOnly, Secure, SameSite) are configured.',
|
||||
),
|
||||
authz_model: z.string().min(1).describe('Authorization model and potential bypass scenarios.'),
|
||||
multi_tenancy: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe('Multi-tenancy security implementation. If the application is single-tenant, state that explicitly.'),
|
||||
sso_oauth_oidc: z
|
||||
.string()
|
||||
.nullable()
|
||||
.describe(
|
||||
'session cookie flags (HttpOnly, Secure, SameSite) are configured.',
|
||||
}),
|
||||
authz_model: Type.String({ description: 'Authorization model and potential bypass scenarios.' }),
|
||||
multi_tenancy: Type.String({
|
||||
description: 'Multi-tenancy security implementation. If the application is single-tenant, state that explicitly.',
|
||||
}),
|
||||
sso_oauth_oidc: Type.Union([Type.String(), Type.Null()], {
|
||||
description:
|
||||
'SSO/OAuth/OIDC flows: identify the callback endpoints and locate the specific code that ' +
|
||||
'validates the state and nonce parameters. Set null only if the application has no SSO/OAuth/OIDC ' +
|
||||
'integration at all.',
|
||||
),
|
||||
'validates the state and nonce parameters. Set null only if the application has no SSO/OAuth/OIDC ' +
|
||||
'integration at all.',
|
||||
}),
|
||||
});
|
||||
|
||||
export const CodebaseIndexingInputSchema = z.object({
|
||||
text: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
export const CodebaseIndexingInputSchema = Type.Object({
|
||||
text: Type.String({
|
||||
description:
|
||||
"A detailed, multi-sentence paragraph describing the codebase's directory structure, " +
|
||||
'organization, and significant tools or conventions used (e.g., build orchestration, code ' +
|
||||
'generation, testing frameworks). Focus on how this structure impacts discoverability of ' +
|
||||
'security-relevant components.',
|
||||
),
|
||||
'organization, and significant tools or conventions used (e.g., build orchestration, code ' +
|
||||
'generation, testing frameworks). Focus on how this structure impacts discoverability of ' +
|
||||
'security-relevant components.',
|
||||
}),
|
||||
});
|
||||
|
||||
export const CriticalFilePathsInputSchema = z.object({
|
||||
configuration: z
|
||||
.array(z.string().min(1))
|
||||
.describe('Configuration files (e.g., config/server.yaml, Dockerfile, docker-compose.yml).'),
|
||||
authentication_and_authorization: z
|
||||
.array(z.string().min(1))
|
||||
.describe(
|
||||
export const CriticalFilePathsInputSchema = Type.Object({
|
||||
configuration: Type.Array(Type.String(), {
|
||||
description: 'Configuration files (e.g., config/server.yaml, Dockerfile, docker-compose.yml).',
|
||||
}),
|
||||
authentication_and_authorization: Type.Array(Type.String(), {
|
||||
description:
|
||||
'Auth/authz files (e.g., auth/jwt_middleware.go, internal/user/permissions.go, ' +
|
||||
'config/initializers/session_store.rb, src/services/oauth_callback.js).',
|
||||
),
|
||||
api_and_routing: z
|
||||
.array(z.string().min(1))
|
||||
.describe(
|
||||
'config/initializers/session_store.rb, src/services/oauth_callback.js).',
|
||||
}),
|
||||
api_and_routing: Type.Array(Type.String(), {
|
||||
description:
|
||||
'API and routing files (e.g., cmd/api/main.go, internal/handlers/user_routes.go, ' +
|
||||
'ts/graphql/schema.graphql).',
|
||||
),
|
||||
data_models_and_db: z
|
||||
.array(z.string().min(1))
|
||||
.describe(
|
||||
'ts/graphql/schema.graphql).',
|
||||
}),
|
||||
data_models_and_db: Type.Array(Type.String(), {
|
||||
description:
|
||||
'Data model and DB interaction files (e.g., db/migrations/001_initial.sql, ' +
|
||||
'internal/models/user.go, internal/repository/sql_queries.go).',
|
||||
),
|
||||
dependency_manifests: z
|
||||
.array(z.string().min(1))
|
||||
.describe('Dependency manifests (e.g., go.mod, package.json, requirements.txt).'),
|
||||
sensitive_data_and_secrets: z
|
||||
.array(z.string().min(1))
|
||||
.describe(
|
||||
'internal/models/user.go, internal/repository/sql_queries.go).',
|
||||
}),
|
||||
dependency_manifests: Type.Array(Type.String(), {
|
||||
description: 'Dependency manifests (e.g., go.mod, package.json, requirements.txt).',
|
||||
}),
|
||||
sensitive_data_and_secrets: Type.Array(Type.String(), {
|
||||
description:
|
||||
'Sensitive data and secrets handling (e.g., internal/utils/encryption.go, ' + 'internal/secrets/manager.go).',
|
||||
),
|
||||
middleware_and_input_validation: z
|
||||
.array(z.string().min(1))
|
||||
.describe(
|
||||
}),
|
||||
middleware_and_input_validation: Type.Array(Type.String(), {
|
||||
description:
|
||||
'Middleware and input validation (e.g., internal/middleware/validator.go, ' +
|
||||
'internal/handlers/input_parsers.go).',
|
||||
),
|
||||
logging_and_monitoring: z
|
||||
.array(z.string().min(1))
|
||||
.describe('Logging and monitoring (e.g., internal/logging/logger.go, config/monitoring.yaml).'),
|
||||
infrastructure_and_deployment: z
|
||||
.array(z.string().min(1))
|
||||
.describe(
|
||||
'internal/handlers/input_parsers.go).',
|
||||
}),
|
||||
logging_and_monitoring: Type.Array(Type.String(), {
|
||||
description: 'Logging and monitoring (e.g., internal/logging/logger.go, config/monitoring.yaml).',
|
||||
}),
|
||||
infrastructure_and_deployment: Type.Array(Type.String(), {
|
||||
description:
|
||||
'Infrastructure and deployment (e.g., infra/pulumi/main.go, kubernetes/deploy.yaml, ' +
|
||||
'nginx.conf, gateway-ingress.yaml).',
|
||||
),
|
||||
'nginx.conf, gateway-ingress.yaml).',
|
||||
}),
|
||||
});
|
||||
|
||||
export const XssSinksInputSchema = z.object({
|
||||
applicable: z
|
||||
.boolean()
|
||||
.describe(
|
||||
export const XssSinksInputSchema = Type.Object({
|
||||
applicable: Type.Boolean({
|
||||
description:
|
||||
'False only if the application has no web frontend at all. Otherwise true, even if no ' +
|
||||
'sinks were found in a given category — empty arrays mean "scanned this category, no sinks found".',
|
||||
),
|
||||
html_body: z
|
||||
.array(SinkRefSchema)
|
||||
.describe(
|
||||
'sinks were found in a given category — empty arrays mean "scanned this category, no sinks found".',
|
||||
}),
|
||||
html_body: Type.Array(SinkRefSchema, {
|
||||
description:
|
||||
'HTML Body Context sinks: element.innerHTML, element.outerHTML, document.write(), ' +
|
||||
'document.writeln(), element.insertAdjacentHTML(), Range.createContextualFragment(), ' +
|
||||
'and jQuery sinks like add(), after(), append(), before(), html(), prepend(), replaceWith(), wrap().',
|
||||
),
|
||||
html_attribute: z
|
||||
.array(SinkRefSchema)
|
||||
.describe(
|
||||
'document.writeln(), element.insertAdjacentHTML(), Range.createContextualFragment(), ' +
|
||||
'and jQuery sinks like add(), after(), append(), before(), html(), prepend(), replaceWith(), wrap().',
|
||||
}),
|
||||
html_attribute: Type.Array(SinkRefSchema, {
|
||||
description:
|
||||
'HTML Attribute Context sinks: event handlers (onclick, onerror, onmouseover, onload, onfocus), ' +
|
||||
'URL-based attributes (href, src, formaction, action, background, data), the style attribute, ' +
|
||||
'iframe srcdoc, and general attributes (value, id, class, name, alt) when quotes are escaped.',
|
||||
),
|
||||
javascript: z
|
||||
.array(SinkRefSchema)
|
||||
.describe(
|
||||
'URL-based attributes (href, src, formaction, action, background, data), the style attribute, ' +
|
||||
'iframe srcdoc, and general attributes (value, id, class, name, alt) when quotes are escaped.',
|
||||
}),
|
||||
javascript: Type.Array(SinkRefSchema, {
|
||||
description:
|
||||
'JavaScript Context sinks: eval(), Function() constructor, setTimeout() / setInterval() ' +
|
||||
'with string arguments, and direct writes of user data into a <script> tag.',
|
||||
),
|
||||
css: z
|
||||
.array(SinkRefSchema)
|
||||
.describe(
|
||||
'with string arguments, and direct writes of user data into a <script> tag.',
|
||||
}),
|
||||
css: Type.Array(SinkRefSchema, {
|
||||
description:
|
||||
'CSS Context sinks: element.style properties (e.g., element.style.backgroundImage) and ' +
|
||||
'direct writes of user data into a <style> tag.',
|
||||
),
|
||||
url: z
|
||||
.array(SinkRefSchema)
|
||||
.describe(
|
||||
'direct writes of user data into a <style> tag.',
|
||||
}),
|
||||
url: Type.Array(SinkRefSchema, {
|
||||
description:
|
||||
'URL Context sinks: location / window.location, location.href, location.replace(), ' +
|
||||
'location.assign(), window.open(), history.pushState(), history.replaceState(), ' +
|
||||
'URL.createObjectURL(), and jQuery selector $(userInput) in older versions.',
|
||||
),
|
||||
'location.assign(), window.open(), history.pushState(), history.replaceState(), ' +
|
||||
'URL.createObjectURL(), and jQuery selector $(userInput) in older versions.',
|
||||
}),
|
||||
});
|
||||
|
||||
export const SsrfSinksInputSchema = z.object({
|
||||
applicable: z
|
||||
.boolean()
|
||||
.describe(
|
||||
export const SsrfSinksInputSchema = Type.Object({
|
||||
applicable: Type.Boolean({
|
||||
description:
|
||||
'False only if the application makes no outbound requests at all. Otherwise true, even if ' +
|
||||
'no sinks were found in a given category — empty arrays mean "scanned this category, no sinks found".',
|
||||
),
|
||||
http_clients: z
|
||||
.array(SinkRefSchema)
|
||||
.describe(
|
||||
'no sinks were found in a given category — empty arrays mean "scanned this category, no sinks found".',
|
||||
}),
|
||||
http_clients: Type.Array(SinkRefSchema, {
|
||||
description:
|
||||
'HTTP(S) clients: curl, requests (Python), axios (Node.js), fetch (JavaScript/Node.js), ' +
|
||||
'net/http (Go), HttpClient (Java/.NET), urllib (Python), RestTemplate, WebClient, OkHttp, Apache HttpClient.',
|
||||
),
|
||||
raw_sockets: z
|
||||
.array(SinkRefSchema)
|
||||
.describe(
|
||||
'net/http (Go), HttpClient (Java/.NET), urllib (Python), RestTemplate, WebClient, OkHttp, Apache HttpClient.',
|
||||
}),
|
||||
raw_sockets: Type.Array(SinkRefSchema, {
|
||||
description:
|
||||
'Raw sockets and connect APIs: Socket.connect, net.Dial (Go), socket.connect (Python), ' +
|
||||
'TcpClient, UdpClient, NetworkStream, java.net.Socket, java.net.URL.openConnection().',
|
||||
),
|
||||
url_openers: z
|
||||
.array(SinkRefSchema)
|
||||
.describe(
|
||||
'TcpClient, UdpClient, NetworkStream, java.net.Socket, java.net.URL.openConnection().',
|
||||
}),
|
||||
url_openers: Type.Array(SinkRefSchema, {
|
||||
description:
|
||||
'URL openers and file includes: file_get_contents (PHP), fopen, include_once, require_once, ' +
|
||||
'new URL().openStream() (Java), urllib.urlopen (Python), fs.readFile with URLs, ' +
|
||||
'import() with dynamic URLs, loadHTML / loadXML with external sources.',
|
||||
),
|
||||
redirect_handlers: z
|
||||
.array(SinkRefSchema)
|
||||
.describe(
|
||||
'new URL().openStream() (Java), urllib.urlopen (Python), fs.readFile with URLs, ' +
|
||||
'import() with dynamic URLs, loadHTML / loadXML with external sources.',
|
||||
}),
|
||||
redirect_handlers: Type.Array(SinkRefSchema, {
|
||||
description:
|
||||
'Redirect and "next URL" handlers: auto-follow redirects in HTTP clients, framework Location ' +
|
||||
'handlers (response.redirect), URL validation in redirect chains, "Continue to" / "Return URL" parameters.',
|
||||
),
|
||||
headless_browsers: z
|
||||
.array(SinkRefSchema)
|
||||
.describe(
|
||||
'handlers (response.redirect), URL validation in redirect chains, "Continue to" / "Return URL" parameters.',
|
||||
}),
|
||||
headless_browsers: Type.Array(SinkRefSchema, {
|
||||
description:
|
||||
'Headless browsers and render engines: Puppeteer (page.goto, page.setContent), ' +
|
||||
'Playwright (page.navigate, page.route), Selenium WebDriver navigation, html-to-pdf converters ' +
|
||||
'(wkhtmltopdf, Puppeteer PDF), and SSR with external content.',
|
||||
),
|
||||
media_processors: z
|
||||
.array(SinkRefSchema)
|
||||
.describe(
|
||||
'Playwright (page.navigate, page.route), Selenium WebDriver navigation, html-to-pdf converters ' +
|
||||
'(wkhtmltopdf, Puppeteer PDF), and SSR with external content.',
|
||||
}),
|
||||
media_processors: Type.Array(SinkRefSchema, {
|
||||
description:
|
||||
'Media processors: ImageMagick (convert, identify with URLs), GraphicsMagick, FFmpeg with ' +
|
||||
'network sources, wkhtmltopdf, Ghostscript with URL inputs, image optimization services with URL parameters.',
|
||||
),
|
||||
link_preview: z
|
||||
.array(SinkRefSchema)
|
||||
.describe(
|
||||
'network sources, wkhtmltopdf, Ghostscript with URL inputs, image optimization services with URL parameters.',
|
||||
}),
|
||||
link_preview: Type.Array(SinkRefSchema, {
|
||||
description:
|
||||
'Link preview and unfurlers: chat application link expanders, CMS link preview generators, ' +
|
||||
'oEmbed endpoint fetchers, social media card generators, URL metadata extractors.',
|
||||
),
|
||||
webhook_testers: z
|
||||
.array(SinkRefSchema)
|
||||
.describe(
|
||||
'oEmbed endpoint fetchers, social media card generators, URL metadata extractors.',
|
||||
}),
|
||||
webhook_testers: Type.Array(SinkRefSchema, {
|
||||
description:
|
||||
'Webhook testers and callback verifiers: "ping my webhook" functionality, outbound callback ' +
|
||||
'verification, health check notifications, event delivery confirmations, API endpoint validation tools.',
|
||||
),
|
||||
sso_oidc_discovery: z
|
||||
.array(SinkRefSchema)
|
||||
.describe(
|
||||
'verification, health check notifications, event delivery confirmations, API endpoint validation tools.',
|
||||
}),
|
||||
sso_oidc_discovery: Type.Array(SinkRefSchema, {
|
||||
description:
|
||||
'SSO/OIDC discovery and JWKS fetchers: OpenID Connect discovery endpoints, JWKS fetchers, ' +
|
||||
'OAuth authorization server metadata, SAML metadata fetchers, federation metadata retrievers.',
|
||||
),
|
||||
importers: z
|
||||
.array(SinkRefSchema)
|
||||
.describe(
|
||||
'OAuth authorization server metadata, SAML metadata fetchers, federation metadata retrievers.',
|
||||
}),
|
||||
importers: Type.Array(SinkRefSchema, {
|
||||
description:
|
||||
'Importers and data loaders: "import from URL" functionality, CSV/JSON/XML remote loaders, ' +
|
||||
'RSS/Atom feed readers, API data synchronization, configuration file fetchers.',
|
||||
),
|
||||
package_installers: z
|
||||
.array(SinkRefSchema)
|
||||
.describe(
|
||||
'RSS/Atom feed readers, API data synchronization, configuration file fetchers.',
|
||||
}),
|
||||
package_installers: Type.Array(SinkRefSchema, {
|
||||
description:
|
||||
'Package/plugin/theme installers: "install from URL" features, package managers with remote ' +
|
||||
'sources, plugin/theme downloaders, update mechanisms with remote checks, dependency resolution ' +
|
||||
'with external repos.',
|
||||
),
|
||||
monitoring_and_health: z
|
||||
.array(SinkRefSchema)
|
||||
.describe(
|
||||
'sources, plugin/theme downloaders, update mechanisms with remote checks, dependency resolution ' +
|
||||
'with external repos.',
|
||||
}),
|
||||
monitoring_and_health: Type.Array(SinkRefSchema, {
|
||||
description:
|
||||
'Monitoring and health check frameworks: URL pingers and uptime checkers, health check ' +
|
||||
'endpoints, monitoring probe systems, alerting webhook senders, performance testing tools.',
|
||||
),
|
||||
cloud_metadata: z
|
||||
.array(SinkRefSchema)
|
||||
.describe(
|
||||
'endpoints, monitoring probe systems, alerting webhook senders, performance testing tools.',
|
||||
}),
|
||||
cloud_metadata: Type.Array(SinkRefSchema, {
|
||||
description:
|
||||
'Cloud metadata helpers: AWS/GCP/Azure instance metadata callers, cloud service discovery ' +
|
||||
'mechanisms, container orchestration API clients, infrastructure metadata fetchers, service mesh ' +
|
||||
'configuration retrievers.',
|
||||
),
|
||||
'mechanisms, container orchestration API clients, infrastructure metadata fetchers, service mesh ' +
|
||||
'configuration retrievers.',
|
||||
}),
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// EXPORTED TYPES
|
||||
// ============================================================================
|
||||
|
||||
export type ExecutiveSummaryInput = z.infer<typeof ExecutiveSummaryInputSchema>;
|
||||
export type ApplicationIntelligenceInput = z.infer<typeof ApplicationIntelligenceInputSchema>;
|
||||
export type AuthDeepDiveInput = z.infer<typeof AuthDeepDiveInputSchema>;
|
||||
export type CodebaseIndexingInput = z.infer<typeof CodebaseIndexingInputSchema>;
|
||||
export type CriticalFilePathsInput = z.infer<typeof CriticalFilePathsInputSchema>;
|
||||
export type XssSinksInput = z.infer<typeof XssSinksInputSchema>;
|
||||
export type SsrfSinksInput = z.infer<typeof SsrfSinksInputSchema>;
|
||||
export type ExecutiveSummaryInput = Static<typeof ExecutiveSummaryInputSchema>;
|
||||
export type ApplicationIntelligenceInput = Static<typeof ApplicationIntelligenceInputSchema>;
|
||||
export type AuthDeepDiveInput = Static<typeof AuthDeepDiveInputSchema>;
|
||||
export type CodebaseIndexingInput = Static<typeof CodebaseIndexingInputSchema>;
|
||||
export type CriticalFilePathsInput = Static<typeof CriticalFilePathsInputSchema>;
|
||||
export type XssSinksInput = Static<typeof XssSinksInputSchema>;
|
||||
export type SsrfSinksInput = Static<typeof SsrfSinksInputSchema>;
|
||||
|
||||
export interface PreReconData {
|
||||
readonly executive_summary?: ExecutiveSummaryInput;
|
||||
@@ -427,32 +372,27 @@ export type PreReconCallStatus = Readonly<Record<PreReconToolName, PreReconToolS
|
||||
// ============================================================================
|
||||
|
||||
interface ToolResult {
|
||||
[x: string]: unknown;
|
||||
content: Array<{ type: 'text'; text: string }>;
|
||||
isError: boolean;
|
||||
}
|
||||
|
||||
function createToolResult(response: { status: string; [key: string]: unknown }): ToolResult {
|
||||
return {
|
||||
content: [{ type: 'text', text: JSON.stringify(response, null, 2) }],
|
||||
isError: response.status === 'error',
|
||||
};
|
||||
details: Record<string, unknown>;
|
||||
isError?: boolean;
|
||||
}
|
||||
|
||||
function successResult(data: Record<string, unknown>): ToolResult {
|
||||
return createToolResult({ status: 'success', ...data });
|
||||
const response = { status: 'success', ...data };
|
||||
return { content: [{ type: 'text' as const, text: JSON.stringify(response, null, 2) }], details: {} };
|
||||
}
|
||||
|
||||
function errorResult(message: string, errorType = 'ValidationError', retryable = true): ToolResult {
|
||||
return createToolResult({ status: 'error', message, errorType, retryable });
|
||||
const response = { status: 'error', message, errorType, retryable };
|
||||
return { content: [{ type: 'text' as const, text: JSON.stringify(response, null, 2) }], details: {}, isError: true };
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// SERVER FACTORY
|
||||
// TOOLS FACTORY
|
||||
// ============================================================================
|
||||
|
||||
export interface PreReconCollectorServer {
|
||||
server: McpSdkServerConfigWithInstance;
|
||||
tools: ToolDefinition[];
|
||||
getAll(): PreReconData;
|
||||
getCallStatus(): PreReconCallStatus;
|
||||
}
|
||||
@@ -476,113 +416,123 @@ export function createPreReconCollectorServer(): PreReconCollectorServer {
|
||||
);
|
||||
}
|
||||
|
||||
const setExecutiveSummary = tool(
|
||||
'set_executive_summary',
|
||||
"Record the application's overall security posture as a short executive summary. " +
|
||||
const setExecutiveSummary = defineTool({
|
||||
name: 'set_executive_summary',
|
||||
label: 'Set Executive Summary',
|
||||
description:
|
||||
"Record the application's overall security posture as a short executive summary. " +
|
||||
'Call exactly once before terminating. Becomes Section 1 of the rendered deliverable. ' +
|
||||
'Duplicate calls are rejected.',
|
||||
ExecutiveSummaryInputSchema.shape,
|
||||
async (input): Promise<ToolResult> => {
|
||||
parameters: ExecutiveSummaryInputSchema,
|
||||
execute: async (_toolCallId, input): Promise<ToolResult> => {
|
||||
if (state.executive_summary) return alreadyCalled('set_executive_summary');
|
||||
state.executive_summary = input;
|
||||
return successResult({ set: 'set_executive_summary' });
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
const setApplicationIntelligence = tool(
|
||||
'set_application_intelligence',
|
||||
'Record the composite application intelligence — architecture, data security, attack surface, ' +
|
||||
const setApplicationIntelligence = defineTool({
|
||||
name: 'set_application_intelligence',
|
||||
label: 'Set Application Intelligence',
|
||||
description:
|
||||
'Record the composite application intelligence — architecture, data security, attack surface, ' +
|
||||
'and infrastructure — in a single call. Call exactly once before terminating. ' +
|
||||
'Becomes Sections 2, 4, 5, and 6 of the rendered deliverable. Duplicate calls are rejected.',
|
||||
ApplicationIntelligenceInputSchema.shape,
|
||||
async (input): Promise<ToolResult> => {
|
||||
parameters: ApplicationIntelligenceInputSchema,
|
||||
execute: async (_toolCallId, input): Promise<ToolResult> => {
|
||||
if (state.application_intelligence) return alreadyCalled('set_application_intelligence');
|
||||
state.application_intelligence = input;
|
||||
return successResult({ set: 'set_application_intelligence' });
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
const setAuthDeepDive = tool(
|
||||
'set_auth_deep_dive',
|
||||
'Record the authentication & authorization deep dive. Call exactly once before terminating. ' +
|
||||
const setAuthDeepDive = defineTool({
|
||||
name: 'set_auth_deep_dive',
|
||||
label: 'Set Auth Deep Dive',
|
||||
description:
|
||||
'Record the authentication & authorization deep dive. Call exactly once before terminating. ' +
|
||||
'Becomes Section 3 of the rendered deliverable. Duplicate calls are rejected.',
|
||||
AuthDeepDiveInputSchema.shape,
|
||||
async (input): Promise<ToolResult> => {
|
||||
parameters: AuthDeepDiveInputSchema,
|
||||
execute: async (_toolCallId, input): Promise<ToolResult> => {
|
||||
if (state.auth_deep_dive) return alreadyCalled('set_auth_deep_dive');
|
||||
state.auth_deep_dive = input;
|
||||
return successResult({ set: 'set_auth_deep_dive' });
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
const setCodebaseIndexing = tool(
|
||||
'set_codebase_indexing',
|
||||
'Record the overall codebase indexing narrative. Call exactly once before terminating. ' +
|
||||
const setCodebaseIndexing = defineTool({
|
||||
name: 'set_codebase_indexing',
|
||||
label: 'Set Codebase Indexing',
|
||||
description:
|
||||
'Record the overall codebase indexing narrative. Call exactly once before terminating. ' +
|
||||
'Becomes Section 7 of the rendered deliverable. Duplicate calls are rejected.',
|
||||
CodebaseIndexingInputSchema.shape,
|
||||
async (input): Promise<ToolResult> => {
|
||||
parameters: CodebaseIndexingInputSchema,
|
||||
execute: async (_toolCallId, input): Promise<ToolResult> => {
|
||||
if (state.codebase_indexing) return alreadyCalled('set_codebase_indexing');
|
||||
state.codebase_indexing = input;
|
||||
return successResult({ set: 'set_codebase_indexing' });
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
const setCriticalFilePaths = tool(
|
||||
'set_critical_file_paths',
|
||||
'Record the catalog of critical file paths grouped by security relevance. Call exactly once ' +
|
||||
const setCriticalFilePaths = defineTool({
|
||||
name: 'set_critical_file_paths',
|
||||
label: 'Set Critical File Paths',
|
||||
description:
|
||||
'Record the catalog of critical file paths grouped by security relevance. Call exactly once ' +
|
||||
'before terminating. Becomes Section 8 of the rendered deliverable. The next agent uses this ' +
|
||||
'as a starting point for manual review. Duplicate calls are rejected.',
|
||||
CriticalFilePathsInputSchema.shape,
|
||||
async (input): Promise<ToolResult> => {
|
||||
parameters: CriticalFilePathsInputSchema,
|
||||
execute: async (_toolCallId, input): Promise<ToolResult> => {
|
||||
if (state.critical_file_paths) return alreadyCalled('set_critical_file_paths');
|
||||
state.critical_file_paths = input;
|
||||
return successResult({ set: 'set_critical_file_paths' });
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
const setXssSinks = tool(
|
||||
'set_xss_sinks',
|
||||
'Record discovered XSS sinks grouped by render context. Call exactly once before terminating. ' +
|
||||
const setXssSinks = defineTool({
|
||||
name: 'set_xss_sinks',
|
||||
label: 'Set Xss Sinks',
|
||||
description:
|
||||
'Record discovered XSS sinks grouped by render context. Call exactly once before terminating. ' +
|
||||
'If the application has no web frontend at all, set applicable=false; otherwise populate each ' +
|
||||
'render-context array (empty arrays mean "scanned, no sinks of this kind"). This list drives ' +
|
||||
"the vuln-xss agent's testing todos downstream. Becomes Section 9 of the rendered deliverable. " +
|
||||
'Duplicate calls are rejected.',
|
||||
XssSinksInputSchema.shape,
|
||||
async (input): Promise<ToolResult> => {
|
||||
parameters: XssSinksInputSchema,
|
||||
execute: async (_toolCallId, input): Promise<ToolResult> => {
|
||||
if (state.xss_sinks) return alreadyCalled('set_xss_sinks');
|
||||
state.xss_sinks = input;
|
||||
return successResult({ set: 'set_xss_sinks' });
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
const setSsrfSinks = tool(
|
||||
'set_ssrf_sinks',
|
||||
'Record discovered SSRF sinks grouped by sink category. Call exactly once before terminating. ' +
|
||||
const setSsrfSinks = defineTool({
|
||||
name: 'set_ssrf_sinks',
|
||||
label: 'Set Ssrf Sinks',
|
||||
description:
|
||||
'Record discovered SSRF sinks grouped by sink category. Call exactly once before terminating. ' +
|
||||
'If the application makes no outbound requests at all, set applicable=false; otherwise populate ' +
|
||||
'each category array (empty arrays mean "scanned, no sinks of this kind"). This list drives ' +
|
||||
"the vuln-ssrf agent's testing todos downstream. Becomes Section 10 of the rendered deliverable. " +
|
||||
'Duplicate calls are rejected.',
|
||||
SsrfSinksInputSchema.shape,
|
||||
async (input): Promise<ToolResult> => {
|
||||
parameters: SsrfSinksInputSchema,
|
||||
execute: async (_toolCallId, input): Promise<ToolResult> => {
|
||||
if (state.ssrf_sinks) return alreadyCalled('set_ssrf_sinks');
|
||||
state.ssrf_sinks = input;
|
||||
return successResult({ set: 'set_ssrf_sinks' });
|
||||
},
|
||||
);
|
||||
|
||||
const server: McpSdkServerConfigWithInstance = createSdkMcpServer({
|
||||
name: 'pre-recon-collector',
|
||||
version: '1.0.0',
|
||||
tools: [
|
||||
setExecutiveSummary,
|
||||
setApplicationIntelligence,
|
||||
setAuthDeepDive,
|
||||
setCodebaseIndexing,
|
||||
setCriticalFilePaths,
|
||||
setXssSinks,
|
||||
setSsrfSinks,
|
||||
],
|
||||
});
|
||||
|
||||
const tools: ToolDefinition[] = [
|
||||
setExecutiveSummary,
|
||||
setApplicationIntelligence,
|
||||
setAuthDeepDive,
|
||||
setCodebaseIndexing,
|
||||
setCriticalFilePaths,
|
||||
setXssSinks,
|
||||
setSsrfSinks,
|
||||
];
|
||||
|
||||
function statusOf<K extends PreReconToolName>(key: K): PreReconToolStatus {
|
||||
const flagMap: Record<PreReconToolName, unknown> = {
|
||||
set_executive_summary: state.executive_summary,
|
||||
@@ -597,7 +547,7 @@ export function createPreReconCollectorServer(): PreReconCollectorServer {
|
||||
}
|
||||
|
||||
return {
|
||||
server,
|
||||
tools,
|
||||
getAll: (): PreReconData => ({
|
||||
...(state.executive_summary && { executive_summary: state.executive_summary }),
|
||||
...(state.application_intelligence && { application_intelligence: state.application_intelligence }),
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -5,9 +5,9 @@
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Vuln Collector MCP Server (factory parameterized by vulnerability class).
|
||||
* Vuln Collector tools (factory parameterized by vulnerability class).
|
||||
*
|
||||
* Exposes 4 one-shot, Zod-validated MCP tools per vuln agent (injection, xss,
|
||||
* Exposes 4 one-shot, TypeBox-validated tools per vuln agent (injection, xss,
|
||||
* auth, ssrf, authz) that feed a deterministic renderer producing
|
||||
* {class}_analysis_deliverable.md:
|
||||
* - set_findings_summary — §1 executive summary + §2 dominant patterns
|
||||
@@ -20,14 +20,13 @@
|
||||
* across classes.
|
||||
*
|
||||
* Skipped tools surface as renderer placeholders, not activity failures.
|
||||
* getCallStatus() exposes the per-run call pattern for logging. Each Zod
|
||||
* schema's field-level descriptions carry the section guidance, so the SDK
|
||||
* injects it into the agent's tool catalog.
|
||||
* getCallStatus() exposes the per-run call pattern for logging. Each schema's
|
||||
* field-level descriptions carry the section guidance, so the agent's tool
|
||||
* catalog surfaces it.
|
||||
*/
|
||||
|
||||
import type { McpSdkServerConfigWithInstance } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { createSdkMcpServer, tool } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { type ZodRawShape, z } from 'zod';
|
||||
import { defineTool, type ToolDefinition } from '@earendil-works/pi-coding-agent';
|
||||
import { type Static, Type } from 'typebox';
|
||||
|
||||
// ============================================================================
|
||||
// CLASS DISCRIMINATOR
|
||||
@@ -46,286 +45,237 @@ export const BLIND_SPOTS_CLASSES: ReadonlySet<VulnClass> = new Set<VulnClass>(['
|
||||
// SHARED SCHEMAS — set_findings_summary, set_safe_vectors, set_blind_spots
|
||||
// ============================================================================
|
||||
|
||||
const PatternSchema = z.object({
|
||||
name: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
const PatternSchema = Type.Object({
|
||||
name: Type.String({
|
||||
description:
|
||||
'Concise pattern name, e.g. "Weak Session Management", "Reflected XSS in Search Parameter", ' +
|
||||
'"Insufficient URL Validation".',
|
||||
),
|
||||
description: z.string().min(1).describe('One- to two-sentence description of the pattern observed in the codebase.'),
|
||||
implication: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe('One- to two-sentence implication for exploitation — what does this pattern enable an attacker to do.'),
|
||||
representative_finding_ids: z
|
||||
.array(z.string().min(1))
|
||||
.min(1)
|
||||
.describe(
|
||||
'"Insufficient URL Validation".',
|
||||
}),
|
||||
description: Type.String({
|
||||
description: 'One- to two-sentence description of the pattern observed in the codebase.',
|
||||
}),
|
||||
implication: Type.String({
|
||||
description: 'One- to two-sentence implication for exploitation — what does this pattern enable an attacker to do.',
|
||||
}),
|
||||
representative_finding_ids: Type.Array(Type.String(), {
|
||||
description:
|
||||
'IDs of findings that exhibit this pattern (e.g. ["AUTH-VULN-01", "AUTH-VULN-02"]). Must match ' +
|
||||
'IDs the agent has assigned in the structured-output exploitation queue.',
|
||||
),
|
||||
'IDs the agent has assigned in the structured-output exploitation queue.',
|
||||
}),
|
||||
});
|
||||
|
||||
export const FindingsSummaryInputSchema = z.object({
|
||||
key_outcome: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
export const FindingsSummaryInputSchema = Type.Object({
|
||||
key_outcome: Type.String({
|
||||
description:
|
||||
'One to two sentences capturing the headline result of your analysis — what was found and its ' +
|
||||
'severity profile (e.g. "Several high-confidence SQL injection vulnerabilities were identified; ' +
|
||||
'all findings have been passed to the exploitation phase"). Becomes Section 1 of the rendered ' +
|
||||
'deliverable.',
|
||||
),
|
||||
patterns: z
|
||||
.array(PatternSchema)
|
||||
.describe(
|
||||
'severity profile (e.g. "Several high-confidence SQL injection vulnerabilities were identified; ' +
|
||||
'all findings have been passed to the exploitation phase"). Becomes Section 1 of the rendered ' +
|
||||
'deliverable.',
|
||||
}),
|
||||
patterns: Type.Array(PatternSchema, {
|
||||
description:
|
||||
'Complete list of dominant patterns observed across findings. Pass all patterns in one call. ' +
|
||||
'Empty array is acceptable if no recurring patterns were observed — the deliverable will render ' +
|
||||
'"No dominant patterns identified" for Section 2 in that case.',
|
||||
),
|
||||
'Empty array is acceptable if no recurring patterns were observed — the deliverable will render ' +
|
||||
'"No dominant patterns identified" for Section 2 in that case.',
|
||||
}),
|
||||
});
|
||||
|
||||
export const SafeVectorInputSchema = z.object({
|
||||
subject: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
export const SafeVectorInputSchema = Type.Object({
|
||||
subject: Type.String({
|
||||
description:
|
||||
'The specific subject of analysis. For injection/xss runs, the input parameter name (e.g. ' +
|
||||
'"username", "redirect_url"). For auth/ssrf runs, the component or flow name (e.g. ' +
|
||||
'"Password Hashing", "Webhook Configuration"). For authz runs, the endpoint (e.g. ' +
|
||||
'"POST /api/auth/logout"). The renderer maps this to the class-appropriate column header.',
|
||||
),
|
||||
location: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
'"username", "redirect_url"). For auth/ssrf runs, the component or flow name (e.g. ' +
|
||||
'"Password Hashing", "Webhook Configuration"). For authz runs, the endpoint (e.g. ' +
|
||||
'"POST /api/auth/logout"). The renderer maps this to the class-appropriate column header.',
|
||||
}),
|
||||
location: Type.String({
|
||||
description:
|
||||
'File path with line number (e.g. "controllers/authController.js:45") or endpoint URL (e.g. ' +
|
||||
'"/profile"). For authz runs, this is the guard location specifically (e.g. ' +
|
||||
'"middleware/auth.js:45"). The renderer maps this to the class-appropriate column header.',
|
||||
),
|
||||
defense_mechanism: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
'"/profile"). For authz runs, this is the guard location specifically (e.g. ' +
|
||||
'"middleware/auth.js:45"). The renderer maps this to the class-appropriate column header.',
|
||||
}),
|
||||
defense_mechanism: Type.String({
|
||||
description:
|
||||
'The robust defense observed (e.g. "Prepared Statement (Parameter Binding)", "HTML Entity ' +
|
||||
'Encoding", "Strict URL Whitelist Validation", "bcrypt.compare for constant-time check").',
|
||||
),
|
||||
render_context: z
|
||||
.string()
|
||||
.nullable()
|
||||
.optional()
|
||||
.describe(
|
||||
'XSS-only: the DOM render context for the validated vector — one of HTML_BODY, HTML_ATTRIBUTE, ' +
|
||||
'Encoding", "Strict URL Whitelist Validation", "bcrypt.compare for constant-time check").',
|
||||
}),
|
||||
render_context: Type.Optional(
|
||||
Type.Union([Type.String(), Type.Null()], {
|
||||
description:
|
||||
'XSS-only: the DOM render context for the validated vector — one of HTML_BODY, HTML_ATTRIBUTE, ' +
|
||||
'JAVASCRIPT_STRING, URL_PARAM, CSS_VALUE. Omit (or pass null) for non-XSS classes; the renderer ' +
|
||||
'only emits this column for the XSS deliverable.',
|
||||
),
|
||||
}),
|
||||
),
|
||||
});
|
||||
|
||||
export const SafeVectorsInputSchema = z.object({
|
||||
vectors: z
|
||||
.array(SafeVectorInputSchema)
|
||||
.describe(
|
||||
export const SafeVectorsInputSchema = Type.Object({
|
||||
vectors: Type.Array(SafeVectorInputSchema, {
|
||||
description:
|
||||
'All input vectors / components / endpoints that were analyzed and confirmed to have robust, ' +
|
||||
'context-appropriate defenses. Empty array is acceptable but unusual — the deliverable will ' +
|
||||
'render "No vectors confirmed secure during analysis" for Section 4 in that case. Becomes ' +
|
||||
'Section 4 of the rendered deliverable. The renderer sorts by (subject, location) before ' +
|
||||
'rendering, so emission order does not affect output.',
|
||||
),
|
||||
'context-appropriate defenses. Empty array is acceptable but unusual — the deliverable will ' +
|
||||
'render "No vectors confirmed secure during analysis" for Section 4 in that case. Becomes ' +
|
||||
'Section 4 of the rendered deliverable. The renderer sorts by (subject, location) before ' +
|
||||
'rendering, so emission order does not affect output.',
|
||||
}),
|
||||
});
|
||||
|
||||
export const BlindSpotItemSchema = z.object({
|
||||
heading: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
export const BlindSpotItemSchema = Type.Object({
|
||||
heading: Type.String({
|
||||
description:
|
||||
'Short heading for the blind spot (e.g. "Untraced Asynchronous Flows", ' +
|
||||
'"Limited Visibility into Stored Procedures", "Minified JavaScript Bundle").',
|
||||
),
|
||||
description: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
'"Limited Visibility into Stored Procedures", "Minified JavaScript Bundle").',
|
||||
}),
|
||||
description: Type.String({
|
||||
description:
|
||||
'One to three sentences describing the analysis gap — what could not be traced, why, and what ' +
|
||||
'the residual risk is.',
|
||||
),
|
||||
'the residual risk is.',
|
||||
}),
|
||||
});
|
||||
|
||||
export const BlindSpotsInputSchema = z.object({
|
||||
items: z
|
||||
.array(BlindSpotItemSchema)
|
||||
.describe(
|
||||
export const BlindSpotsInputSchema = Type.Object({
|
||||
items: Type.Array(BlindSpotItemSchema, {
|
||||
description:
|
||||
'Analysis constraints, untraced code paths, or other coverage gaps that should be noted. ' +
|
||||
'Empty array is acceptable on high-coverage runs — the deliverable will render "No analysis ' +
|
||||
'constraints or blind spots identified" for Section 5 in that case. Becomes Section 5 of the ' +
|
||||
'rendered deliverable.',
|
||||
),
|
||||
'Empty array is acceptable on high-coverage runs — the deliverable will render "No analysis ' +
|
||||
'constraints or blind spots identified" for Section 5 in that case. Becomes Section 5 of the ' +
|
||||
'rendered deliverable.',
|
||||
}),
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// PER-CLASS set_strategic_intelligence SCHEMAS (flat — no nesting)
|
||||
// ============================================================================
|
||||
|
||||
const InjectionStrategicIntelSchema = z.object({
|
||||
defensive_evasion_waf: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
const InjectionStrategicIntelSchema = Type.Object({
|
||||
defensive_evasion_waf: Type.String({
|
||||
description:
|
||||
'WAF behavior observed during analysis: active rules, common payloads blocked, identified ' +
|
||||
'bypasses (e.g. "WAF blocks UNION SELECT but not time-based blind injection"). Write ' +
|
||||
'"Not applicable — no WAF observed" if none was detected.',
|
||||
),
|
||||
error_based_potential: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
'bypasses (e.g. "WAF blocks UNION SELECT but not time-based blind injection"). Write ' +
|
||||
'"Not applicable — no WAF observed" if none was detected.',
|
||||
}),
|
||||
error_based_potential: Type.String({
|
||||
description:
|
||||
'Whether endpoints leak verbose database errors that enable error-based injection (e.g. ' +
|
||||
'"/api/products returns verbose PostgreSQL error messages, prime target for error-based ' +
|
||||
'exploitation"). Write "Not applicable" if no injection findings exist.',
|
||||
),
|
||||
confirmed_database_technology: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
'"/api/products returns verbose PostgreSQL error messages, prime target for error-based ' +
|
||||
'exploitation"). Write "Not applicable" if no injection findings exist.',
|
||||
}),
|
||||
confirmed_database_technology: Type.String({
|
||||
description:
|
||||
'Database engine(s) confirmed via error syntax or function calls (e.g. "PostgreSQL, confirmed ' +
|
||||
'via pg_sleep() and verbose error syntax"). Drives payload selection downstream. Write ' +
|
||||
'"Not applicable" if no DB sinks in scope.',
|
||||
),
|
||||
'via pg_sleep() and verbose error syntax"). Drives payload selection downstream. Write ' +
|
||||
'"Not applicable" if no DB sinks in scope.',
|
||||
}),
|
||||
});
|
||||
|
||||
const XssStrategicIntelSchema = z.object({
|
||||
csp_analysis: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
const XssStrategicIntelSchema = Type.Object({
|
||||
csp_analysis: Type.String({
|
||||
description:
|
||||
'Content Security Policy observed and its bypassability: current policy text, critical bypasses ' +
|
||||
"(e.g. \"script-src 'self' https://trusted-cdn.com — the trusted CDN hosts vulnerable AngularJS, " +
|
||||
'enabling client-side template injection bypass"). Write "Not applicable — no CSP header served" ' +
|
||||
'if none.',
|
||||
),
|
||||
cookie_security: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
"(e.g. \"script-src 'self' https://trusted-cdn.com — the trusted CDN hosts vulnerable AngularJS, " +
|
||||
'enabling client-side template injection bypass"). Write "Not applicable — no CSP header served" ' +
|
||||
'if none.',
|
||||
}),
|
||||
cookie_security: Type.String({
|
||||
description:
|
||||
'Session cookie security observations: HttpOnly, Secure, SameSite flags, and storage mechanism ' +
|
||||
'(e.g. "Primary session cookie `sessionid` is missing HttpOnly; tokens are also stored in ' +
|
||||
'localStorage, both accessible to JavaScript"). Drives exfiltration strategy.',
|
||||
),
|
||||
'(e.g. "Primary session cookie `sessionid` is missing HttpOnly; tokens are also stored in ' +
|
||||
'localStorage, both accessible to JavaScript"). Drives exfiltration strategy.',
|
||||
}),
|
||||
});
|
||||
|
||||
const AuthStrategicIntelSchema = z.object({
|
||||
authentication_method: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
const AuthStrategicIntelSchema = Type.Object({
|
||||
authentication_method: Type.String({
|
||||
description:
|
||||
'How users authenticate: JWT, session cookie, OAuth, SAML, etc. Include any algorithm or library ' +
|
||||
'details (e.g. "JWT (RS256) with hardcoded private key in lib/insecurity.ts:23").',
|
||||
),
|
||||
session_token_details: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
'details (e.g. "JWT (RS256) with hardcoded private key in lib/insecurity.ts:23").',
|
||||
}),
|
||||
session_token_details: Type.String({
|
||||
description:
|
||||
'Where tokens live and how they are protected: cookie name, storage mechanism (cookie vs ' +
|
||||
'localStorage), cookie flags, expiration (e.g. "JWT stored in localStorage under key `token`; ' +
|
||||
'cookie copy lacks HttpOnly/Secure/SameSite; 6-hour TTL with no revocation").',
|
||||
),
|
||||
password_policy: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
'localStorage), cookie flags, expiration (e.g. "JWT stored in localStorage under key `token`; ' +
|
||||
'cookie copy lacks HttpOnly/Secure/SameSite; 6-hour TTL with no revocation").',
|
||||
}),
|
||||
password_policy: Type.String({
|
||||
description:
|
||||
'Observed server-side password policy and storage: complexity rules, hashing algorithm, salt, ' +
|
||||
'(e.g. "MD5 without salt via crypto.createHash; no server-side complexity policy; client-side ' +
|
||||
'5-char minimum trivially bypassed").',
|
||||
),
|
||||
'(e.g. "MD5 without salt via crypto.createHash; no server-side complexity policy; client-side ' +
|
||||
'5-char minimum trivially bypassed").',
|
||||
}),
|
||||
});
|
||||
|
||||
const SsrfStrategicIntelSchema = z.object({
|
||||
http_client_library: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
const SsrfStrategicIntelSchema = Type.Object({
|
||||
http_client_library: Type.String({
|
||||
description:
|
||||
'HTTP client library/libraries used for outbound requests (e.g. "axios 1.6", "node-fetch", ' +
|
||||
'"requests", "HttpClient (Spring)"). Include version where it informs known bypass techniques.',
|
||||
),
|
||||
request_architecture: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
'"requests", "HttpClient (Spring)"). Include version where it informs known bypass techniques.',
|
||||
}),
|
||||
request_architecture: Type.String({
|
||||
description:
|
||||
'How outbound requests are constructed and routed: proxy/middleware patterns, internal routing ' +
|
||||
'rules (e.g. "Webhook URLs are POSTed directly without an outbound proxy; redirects are ' +
|
||||
'followed by default with no maxRedirects limit").',
|
||||
),
|
||||
internal_services: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
'rules (e.g. "Webhook URLs are POSTed directly without an outbound proxy; redirects are ' +
|
||||
'followed by default with no maxRedirects limit").',
|
||||
}),
|
||||
internal_services: Type.String({
|
||||
description:
|
||||
'Internal endpoints, services, or cloud-metadata addresses discovered during analysis that an ' +
|
||||
'SSRF could reach (e.g. "169.254.169.254 (AWS IMDS), internal admin API at admin.internal:8443, ' +
|
||||
'PostgreSQL on localhost:5432").',
|
||||
),
|
||||
'SSRF could reach (e.g. "169.254.169.254 (AWS IMDS), internal admin API at admin.internal:8443, ' +
|
||||
'PostgreSQL on localhost:5432").',
|
||||
}),
|
||||
});
|
||||
|
||||
const AuthzStrategicIntelSchema = z.object({
|
||||
session_management_architecture: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
const AuthzStrategicIntelSchema = Type.Object({
|
||||
session_management_architecture: Type.String({
|
||||
description:
|
||||
'Session and authentication architecture relevant to authorization decisions: where user identity ' +
|
||||
'comes from, whether the user ID is trusted by downstream guards (e.g. "JWT tokens in cookies; ' +
|
||||
'user ID extracted from `req.user.id` and used directly in DB queries without ownership ' +
|
||||
're-validation").',
|
||||
),
|
||||
role_permission_model: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
'comes from, whether the user ID is trusted by downstream guards (e.g. "JWT tokens in cookies; ' +
|
||||
'user ID extracted from `req.user.id` and used directly in DB queries without ownership ' +
|
||||
're-validation").',
|
||||
}),
|
||||
role_permission_model: Type.String({
|
||||
description:
|
||||
'Roles, capabilities, and where they live: identified roles, their privilege levels, and where ' +
|
||||
'role/permission data is stored (e.g. "Three roles: user, moderator, admin. Role embedded in ' +
|
||||
'JWT and database; checks inconsistent — many admin routes only check `req.user` presence").',
|
||||
),
|
||||
resource_access_patterns: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
'role/permission data is stored (e.g. "Three roles: user, moderator, admin. Role embedded in ' +
|
||||
'JWT and database; checks inconsistent — many admin routes only check `req.user` presence").',
|
||||
}),
|
||||
resource_access_patterns: Type.String({
|
||||
description:
|
||||
'How resource IDs flow through the system and ownership patterns: e.g. "Most endpoints use path ' +
|
||||
'parameters for resource IDs (/api/users/{id}); IDs are passed to DB queries without ownership ' +
|
||||
'validation". Critical for IDOR exploitation.',
|
||||
),
|
||||
workflow_implementation: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
'parameters for resource IDs (/api/users/{id}); IDs are passed to DB queries without ownership ' +
|
||||
'validation". Critical for IDOR exploitation.',
|
||||
}),
|
||||
workflow_implementation: Type.String({
|
||||
description:
|
||||
'Multi-step processes and state transitions: how workflow stages are tracked, whether prior-state ' +
|
||||
'checks are enforced (e.g. "Multi-step processes use status fields in database; status ' +
|
||||
'transitions do not verify prior state completion"). Drives context-based authz exploitation.',
|
||||
),
|
||||
'checks are enforced (e.g. "Multi-step processes use status fields in database; status ' +
|
||||
'transitions do not verify prior state completion"). Drives context-based authz exploitation.',
|
||||
}),
|
||||
});
|
||||
|
||||
const STRATEGIC_INTEL_SCHEMAS: Record<VulnClass, z.ZodObject<ZodRawShape>> = {
|
||||
const STRATEGIC_INTEL_SCHEMAS = {
|
||||
injection: InjectionStrategicIntelSchema,
|
||||
xss: XssStrategicIntelSchema,
|
||||
auth: AuthStrategicIntelSchema,
|
||||
ssrf: SsrfStrategicIntelSchema,
|
||||
authz: AuthzStrategicIntelSchema,
|
||||
};
|
||||
} as const;
|
||||
|
||||
// ============================================================================
|
||||
// EXPORTED TYPES
|
||||
// ============================================================================
|
||||
|
||||
export type Pattern = z.infer<typeof PatternSchema>;
|
||||
export type FindingsSummaryInput = z.infer<typeof FindingsSummaryInputSchema>;
|
||||
export type SafeVectorInput = z.infer<typeof SafeVectorInputSchema>;
|
||||
export type SafeVectorsInput = z.infer<typeof SafeVectorsInputSchema>;
|
||||
export type BlindSpotItem = z.infer<typeof BlindSpotItemSchema>;
|
||||
export type BlindSpotsInput = z.infer<typeof BlindSpotsInputSchema>;
|
||||
export type Pattern = Static<typeof PatternSchema>;
|
||||
export type FindingsSummaryInput = Static<typeof FindingsSummaryInputSchema>;
|
||||
export type SafeVectorInput = Static<typeof SafeVectorInputSchema>;
|
||||
export type SafeVectorsInput = Static<typeof SafeVectorsInputSchema>;
|
||||
export type BlindSpotItem = Static<typeof BlindSpotItemSchema>;
|
||||
export type BlindSpotsInput = Static<typeof BlindSpotsInputSchema>;
|
||||
|
||||
export type InjectionStrategicIntel = z.infer<typeof InjectionStrategicIntelSchema>;
|
||||
export type XssStrategicIntel = z.infer<typeof XssStrategicIntelSchema>;
|
||||
export type AuthStrategicIntel = z.infer<typeof AuthStrategicIntelSchema>;
|
||||
export type SsrfStrategicIntel = z.infer<typeof SsrfStrategicIntelSchema>;
|
||||
export type AuthzStrategicIntel = z.infer<typeof AuthzStrategicIntelSchema>;
|
||||
export type InjectionStrategicIntel = Static<typeof InjectionStrategicIntelSchema>;
|
||||
export type XssStrategicIntel = Static<typeof XssStrategicIntelSchema>;
|
||||
export type AuthStrategicIntel = Static<typeof AuthStrategicIntelSchema>;
|
||||
export type SsrfStrategicIntel = Static<typeof SsrfStrategicIntelSchema>;
|
||||
export type AuthzStrategicIntel = Static<typeof AuthzStrategicIntelSchema>;
|
||||
|
||||
// Discriminated by the agent class context — the renderer reads only the
|
||||
// sub-fields that apply to the active class.
|
||||
@@ -363,12 +313,14 @@ export type VulnCallStatus = Readonly<Record<VulnToolName, VulnToolStatus>>;
|
||||
interface ToolResult {
|
||||
[x: string]: unknown;
|
||||
content: Array<{ type: 'text'; text: string }>;
|
||||
details: Record<string, unknown>;
|
||||
isError: boolean;
|
||||
}
|
||||
|
||||
function createToolResult(response: { status: string; [key: string]: unknown }): ToolResult {
|
||||
return {
|
||||
content: [{ type: 'text', text: JSON.stringify(response, null, 2) }],
|
||||
content: [{ type: 'text' as const, text: JSON.stringify(response, null, 2) }],
|
||||
details: {},
|
||||
isError: response.status === 'error',
|
||||
};
|
||||
}
|
||||
@@ -382,11 +334,11 @@ function errorResult(message: string, errorType = 'ValidationError', retryable =
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// SERVER FACTORY
|
||||
// COLLECTOR FACTORY
|
||||
// ============================================================================
|
||||
|
||||
export interface VulnCollectorServer {
|
||||
server: McpSdkServerConfigWithInstance;
|
||||
tools: ToolDefinition[];
|
||||
getAll(): VulnCollectorData;
|
||||
getCallStatus(): VulnCallStatus;
|
||||
}
|
||||
@@ -407,68 +359,76 @@ export function createVulnCollector(vulnClass: VulnClass): VulnCollectorServer {
|
||||
);
|
||||
}
|
||||
|
||||
const setFindingsSummary = tool(
|
||||
'set_findings_summary',
|
||||
'Record the executive summary headline and the dominant vulnerability patterns observed across ' +
|
||||
const setFindingsSummary = defineTool({
|
||||
name: 'set_findings_summary',
|
||||
label: 'Set Findings Summary',
|
||||
description:
|
||||
'Record the executive summary headline and the dominant vulnerability patterns observed across ' +
|
||||
'your findings. Call exactly once before terminating. Becomes Section 1 (key outcome) and ' +
|
||||
'Section 2 (patterns) of the rendered deliverable — this is the load-bearing emission for the ' +
|
||||
'narrative .md and is required. Duplicate calls return "already called" and are no-ops. Empty ' +
|
||||
'patterns array is acceptable (renders as "No dominant patterns identified") but key_outcome ' +
|
||||
'is always required.',
|
||||
FindingsSummaryInputSchema.shape,
|
||||
async (input): Promise<ToolResult> => {
|
||||
parameters: FindingsSummaryInputSchema,
|
||||
execute: async (_toolCallId, input): Promise<ToolResult> => {
|
||||
if (state.findings_summary) return alreadyCalled('set_findings_summary');
|
||||
state.findings_summary = input;
|
||||
return successResult({ set: 'set_findings_summary' });
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
const intelSchema = STRATEGIC_INTEL_SCHEMAS[vulnClass];
|
||||
const setStrategicIntelligence = tool(
|
||||
'set_strategic_intelligence',
|
||||
`Record the environmental and defensive intelligence relevant to exploiting the ${vulnClass} ` +
|
||||
const setStrategicIntelligence = defineTool({
|
||||
name: 'set_strategic_intelligence',
|
||||
label: 'Set Strategic Intelligence',
|
||||
description:
|
||||
`Record the environmental and defensive intelligence relevant to exploiting the ${vulnClass} ` +
|
||||
'findings. Call exactly once before terminating. Becomes Section 3 of the rendered deliverable ' +
|
||||
`and is the section the downstream exploit-${vulnClass} agent reads for strategic context. ` +
|
||||
'Required. Duplicate calls return "already called" and are no-ops. Write "Not applicable" as ' +
|
||||
'the field value when a sub-field does not apply to this run (rather than omitting).',
|
||||
intelSchema.shape,
|
||||
async (input): Promise<ToolResult> => {
|
||||
parameters: intelSchema,
|
||||
execute: async (_toolCallId, input): Promise<ToolResult> => {
|
||||
if (state.strategic_intelligence) return alreadyCalled('set_strategic_intelligence');
|
||||
state.strategic_intelligence = input as unknown as StrategicIntelligenceInput;
|
||||
return successResult({ set: 'set_strategic_intelligence' });
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
const setSafeVectors = tool(
|
||||
'set_safe_vectors',
|
||||
'Record the input vectors, components, or endpoints that were analyzed and confirmed to have ' +
|
||||
const setSafeVectors = defineTool({
|
||||
name: 'set_safe_vectors',
|
||||
label: 'Set Safe Vectors',
|
||||
description:
|
||||
'Record the input vectors, components, or endpoints that were analyzed and confirmed to have ' +
|
||||
'robust, context-appropriate defenses. Call exactly once before terminating. Becomes Section 4 ' +
|
||||
'of the rendered deliverable. Recommended (empty array is acceptable on runs where no vectors ' +
|
||||
'were validated as safe, but explicit emission is preferred). The renderer sorts by ' +
|
||||
'(subject, location) before rendering, so emission order does not affect output. Duplicate ' +
|
||||
'calls return "already called" and are no-ops.',
|
||||
SafeVectorsInputSchema.shape,
|
||||
async (input): Promise<ToolResult> => {
|
||||
parameters: SafeVectorsInputSchema,
|
||||
execute: async (_toolCallId, input): Promise<ToolResult> => {
|
||||
if (state.safe_vectors) return alreadyCalled('set_safe_vectors');
|
||||
state.safe_vectors = input;
|
||||
return successResult({ set: 'set_safe_vectors', count: input.vectors.length });
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
const setBlindSpots = tool(
|
||||
'set_blind_spots',
|
||||
'Record analysis constraints, untraced code paths, or other coverage gaps. Call exactly once ' +
|
||||
const setBlindSpots = defineTool({
|
||||
name: 'set_blind_spots',
|
||||
label: 'Set Blind Spots',
|
||||
description:
|
||||
'Record analysis constraints, untraced code paths, or other coverage gaps. Call exactly once ' +
|
||||
'before terminating. Becomes Section 5 of the rendered deliverable. Recommended (empty array ' +
|
||||
'is acceptable on high-coverage runs, but explicit emission is preferred — readers expect ' +
|
||||
'either documented gaps or an explicit "no gaps" signal). Duplicate calls return "already ' +
|
||||
'called" and are no-ops.',
|
||||
BlindSpotsInputSchema.shape,
|
||||
async (input): Promise<ToolResult> => {
|
||||
parameters: BlindSpotsInputSchema,
|
||||
execute: async (_toolCallId, input): Promise<ToolResult> => {
|
||||
if (state.blind_spots) return alreadyCalled('set_blind_spots');
|
||||
state.blind_spots = input;
|
||||
return successResult({ set: 'set_blind_spots', count: input.items.length });
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
// set_blind_spots is withheld from classes without a Section 5 (auth, ssrf).
|
||||
const tools = [
|
||||
@@ -478,12 +438,6 @@ export function createVulnCollector(vulnClass: VulnClass): VulnCollectorServer {
|
||||
...(BLIND_SPOTS_CLASSES.has(vulnClass) ? [setBlindSpots] : []),
|
||||
];
|
||||
|
||||
const server: McpSdkServerConfigWithInstance = createSdkMcpServer({
|
||||
name: 'vuln-collector',
|
||||
version: '1.0.0',
|
||||
tools,
|
||||
});
|
||||
|
||||
function statusOf<K extends VulnToolName>(key: K): VulnToolStatus {
|
||||
const flagMap: Record<VulnToolName, unknown> = {
|
||||
set_findings_summary: state.findings_summary,
|
||||
@@ -495,7 +449,7 @@ export function createVulnCollector(vulnClass: VulnClass): VulnCollectorServer {
|
||||
}
|
||||
|
||||
return {
|
||||
server,
|
||||
tools: tools as ToolDefinition[],
|
||||
getAll: (): VulnCollectorData => ({
|
||||
...(state.findings_summary && { findings_summary: state.findings_summary }),
|
||||
...(state.strategic_intelligence && { strategic_intelligence: state.strategic_intelligence }),
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
|
||||
import { fs, path } from 'zx';
|
||||
import { type ClaudePromptResult, runClaudePrompt, validateAgentOutput } from '../ai/claude-executor.js';
|
||||
import { getOutputFormat, getQueueFilename } from '../ai/queue-schemas.js';
|
||||
import { createQueueSubmitTool, getQueueFilename } from '../ai/queue-schemas.js';
|
||||
import type { AuditSession } from '../audit/index.js';
|
||||
import { authStateFile } from '../audit/utils.js';
|
||||
import { AGENTS } from '../session-manager.js';
|
||||
@@ -54,7 +54,7 @@ export interface AgentExecutionInput {
|
||||
apiKey?: string | undefined;
|
||||
promptDir?: string | undefined;
|
||||
providerConfig?: import('../types/config.js').ProviderConfig | undefined;
|
||||
mcpServers?: Record<string, import('@anthropic-ai/claude-agent-sdk').McpServerConfig>;
|
||||
customTools?: import('@earendil-works/pi-coding-agent').ToolDefinition[];
|
||||
}
|
||||
|
||||
interface FailAgentOpts {
|
||||
@@ -109,7 +109,7 @@ export class AgentExecutionService {
|
||||
apiKey,
|
||||
promptDir,
|
||||
providerConfig,
|
||||
mcpServers,
|
||||
customTools,
|
||||
} = input;
|
||||
|
||||
// 1. Load config (pre-parsed configData → raw YAML → file path)
|
||||
@@ -163,8 +163,10 @@ export class AgentExecutionService {
|
||||
// 4. Start audit logging
|
||||
await auditSession.startAgent(agentName, prompt, attemptNumber);
|
||||
|
||||
// 5. Execute agent
|
||||
const outputFormat = getOutputFormat(agentName, distributedConfig?.exploit ?? true);
|
||||
// 5. Execute agent. Vuln agents get a submit tool that captures the structured
|
||||
// exploitation queue (pi has no JSON-schema output format).
|
||||
const submitTool = createQueueSubmitTool(agentName, distributedConfig?.exploit ?? true);
|
||||
const callerTools = [...(customTools ?? []), ...(submitTool ? [submitTool.tool] : [])];
|
||||
const result: ClaudePromptResult = await runClaudePrompt(
|
||||
prompt,
|
||||
repoPath,
|
||||
@@ -174,11 +176,10 @@ export class AgentExecutionService {
|
||||
auditSession,
|
||||
logger,
|
||||
AGENTS[agentName].modelTier,
|
||||
outputFormat,
|
||||
callerTools,
|
||||
apiKey,
|
||||
path.relative(repoPath, deliverablesPath),
|
||||
providerConfig,
|
||||
mcpServers,
|
||||
);
|
||||
|
||||
// 6. Spending cap check - defense-in-depth
|
||||
@@ -212,13 +213,17 @@ export class AgentExecutionService {
|
||||
});
|
||||
}
|
||||
|
||||
// 8. Write structured output to disk (vuln agents only)
|
||||
// 8. Write structured output to disk (vuln agents only) from the submit-tool capture
|
||||
const queueFilename = getQueueFilename(agentName);
|
||||
if (result.structuredOutput !== undefined && queueFilename) {
|
||||
await fs.ensureDir(deliverablesPath);
|
||||
const queuePath = path.join(deliverablesPath, queueFilename);
|
||||
await fs.writeFile(queuePath, JSON.stringify(result.structuredOutput, null, 2), 'utf8');
|
||||
logger.info(`Wrote structured output queue to ${queueFilename}`);
|
||||
if (submitTool && queueFilename) {
|
||||
const captured = submitTool.getCaptured();
|
||||
if (captured !== undefined) {
|
||||
result.structuredOutput = captured; // carry for the validation gate below
|
||||
await fs.ensureDir(deliverablesPath);
|
||||
const queuePath = path.join(deliverablesPath, queueFilename);
|
||||
await fs.writeFile(queuePath, JSON.stringify(captured, null, 2), 'utf8');
|
||||
logger.info(`Wrote structured output queue to ${queueFilename}`);
|
||||
}
|
||||
}
|
||||
|
||||
// 9. Validate output
|
||||
|
||||
@@ -25,16 +25,23 @@ import fs from 'node:fs/promises';
|
||||
import http from 'node:http';
|
||||
import https from 'node:https';
|
||||
import net, { type LookupFunction } from 'node:net';
|
||||
import type { SDKAssistantMessageError } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { query } from '@anthropic-ai/claude-agent-sdk';
|
||||
import os from 'node:os';
|
||||
import {
|
||||
AuthStorage,
|
||||
createAgentSession,
|
||||
ModelRegistry,
|
||||
SessionManager,
|
||||
SettingsManager,
|
||||
} from '@earendil-works/pi-coding-agent';
|
||||
import { glob } from 'zx';
|
||||
import { resolveModel } from '../ai/models.js';
|
||||
import { resolveModelId } from '../ai/models.js';
|
||||
import { parseConfig } from '../config-parser.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
import type { Config, Rule } from '../types/config.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import { err, ok, type Result } from '../types/result.js';
|
||||
import { isRetryableError, PentestError } from './error-handling.js';
|
||||
import { err, isErr, ok, type Result } from '../types/result.js';
|
||||
import { matchesBillingTextPattern } from '../utils/billing-detection.js';
|
||||
import { PentestError } from './error-handling.js';
|
||||
|
||||
const TARGET_URL_TIMEOUT_MS = 10_000;
|
||||
|
||||
@@ -241,82 +248,106 @@ async function validateCodePathsExist(
|
||||
// === Credential Validation ===
|
||||
|
||||
/** Map SDK error type to a human-readable preflight PentestError. */
|
||||
function classifySdkError(sdkError: SDKAssistantMessageError, authType: string): Result<void, PentestError> {
|
||||
switch (sdkError) {
|
||||
case 'authentication_failed':
|
||||
return err(
|
||||
new PentestError(
|
||||
`Invalid ${authType}. Check your credentials in .env and try again.`,
|
||||
'config',
|
||||
false,
|
||||
{ authType, sdkError },
|
||||
ErrorCode.AUTH_FAILED,
|
||||
),
|
||||
);
|
||||
case 'billing_error':
|
||||
return err(
|
||||
new PentestError(
|
||||
`Anthropic account has a billing issue. Add credits or check your billing dashboard.`,
|
||||
'billing',
|
||||
true,
|
||||
{ authType, sdkError },
|
||||
ErrorCode.BILLING_ERROR,
|
||||
),
|
||||
);
|
||||
case 'rate_limit':
|
||||
return err(
|
||||
new PentestError(
|
||||
`Anthropic rate limit or spending cap reached. Wait a few minutes and try again.`,
|
||||
'billing',
|
||||
true,
|
||||
{ authType, sdkError },
|
||||
ErrorCode.BILLING_ERROR,
|
||||
),
|
||||
);
|
||||
case 'server_error':
|
||||
return err(
|
||||
new PentestError(`Anthropic API is temporarily unavailable. Try again shortly.`, 'network', true, {
|
||||
authType,
|
||||
sdkError,
|
||||
}),
|
||||
);
|
||||
case 'overloaded':
|
||||
return err(
|
||||
new PentestError(`Anthropic API is overloaded. Wait a few moments and try again.`, 'network', true, {
|
||||
authType,
|
||||
sdkError,
|
||||
}),
|
||||
);
|
||||
case 'model_not_found':
|
||||
return err(
|
||||
new PentestError(
|
||||
`Configured model is not available for this account. Check ANTHROPIC_*_MODEL in .env.`,
|
||||
'config',
|
||||
false,
|
||||
{ authType, sdkError },
|
||||
),
|
||||
);
|
||||
case 'oauth_org_not_allowed':
|
||||
return err(
|
||||
new PentestError(
|
||||
`This credential's organization is not allowed. Check your ${authType} in .env.`,
|
||||
'config',
|
||||
false,
|
||||
{ authType, sdkError },
|
||||
ErrorCode.AUTH_FAILED,
|
||||
),
|
||||
);
|
||||
default:
|
||||
return err(
|
||||
new PentestError(
|
||||
`${authType} validation failed unexpectedly. Check your credentials in .env.`,
|
||||
'config',
|
||||
false,
|
||||
{ authType, sdkError },
|
||||
ErrorCode.AUTH_FAILED,
|
||||
),
|
||||
);
|
||||
/** Classify a provider error message (thrown or from a failed turn) into a PentestError. */
|
||||
function classifyCredentialError(text: string, authType: string): Result<void, PentestError> {
|
||||
const lower = text.toLowerCase();
|
||||
if (matchesBillingTextPattern(text)) {
|
||||
return err(
|
||||
new PentestError(
|
||||
`Anthropic account has a billing or rate-limit issue during ${authType} validation. Add credits or wait and retry.`,
|
||||
'billing',
|
||||
true,
|
||||
{ authType },
|
||||
ErrorCode.BILLING_ERROR,
|
||||
),
|
||||
);
|
||||
}
|
||||
if (/401|403|invalid[ _-]?api[ _-]?key|unauthorized|authentication|forbidden|not allowed|x-api-key/.test(lower)) {
|
||||
return err(
|
||||
new PentestError(
|
||||
`Invalid ${authType}. Check your credentials in .env and try again.`,
|
||||
'config',
|
||||
false,
|
||||
{ authType },
|
||||
ErrorCode.AUTH_FAILED,
|
||||
),
|
||||
);
|
||||
}
|
||||
if (/model/.test(lower) && /not found|not available|unknown/.test(lower)) {
|
||||
return err(
|
||||
new PentestError(
|
||||
`Configured model is not available for this account. Check ANTHROPIC_*_MODEL in .env.`,
|
||||
'config',
|
||||
false,
|
||||
{ authType },
|
||||
),
|
||||
);
|
||||
}
|
||||
if (
|
||||
/network|timeout|enotfound|econnrefused|fetch failed|getaddrinfo|socket|overloaded|unavailable|50\d/.test(lower)
|
||||
) {
|
||||
return err(
|
||||
new PentestError(`Anthropic API unreachable or temporarily unavailable. Try again shortly.`, 'network', true, {
|
||||
authType,
|
||||
}),
|
||||
);
|
||||
}
|
||||
return err(
|
||||
new PentestError(
|
||||
`${authType} validation failed: ${text.slice(0, 150)}`,
|
||||
'config',
|
||||
false,
|
||||
{ authType },
|
||||
ErrorCode.AUTH_FAILED,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
/** Minimal pi session probe to validate credentials. An optional baseUrl overrides the endpoint. */
|
||||
async function probeCredentialsWithPi(authType: string, baseUrl?: string): Promise<Result<void, PentestError>> {
|
||||
const authStorage = AuthStorage.inMemory();
|
||||
const token =
|
||||
process.env.ANTHROPIC_AUTH_TOKEN ?? process.env.ANTHROPIC_API_KEY ?? process.env.CLAUDE_CODE_OAUTH_TOKEN;
|
||||
if (token) authStorage.setRuntimeApiKey('anthropic', token);
|
||||
|
||||
const baseModel = ModelRegistry.create(authStorage).find('anthropic', resolveModelId('small'));
|
||||
if (!baseModel) {
|
||||
return err(
|
||||
new PentestError(
|
||||
`Model not found in pi registry: ${resolveModelId('small')}`,
|
||||
'config',
|
||||
false,
|
||||
{},
|
||||
ErrorCode.AUTH_FAILED,
|
||||
),
|
||||
);
|
||||
}
|
||||
const model = baseUrl ? { ...baseModel, baseUrl } : baseModel;
|
||||
|
||||
let errText: string | undefined;
|
||||
try {
|
||||
const { session } = await createAgentSession({
|
||||
cwd: os.tmpdir(),
|
||||
model,
|
||||
thinkingLevel: 'off',
|
||||
noTools: 'all',
|
||||
authStorage,
|
||||
sessionManager: SessionManager.inMemory(),
|
||||
settingsManager: SettingsManager.inMemory({ retry: { enabled: false }, compaction: { enabled: false } }),
|
||||
});
|
||||
session.subscribe((e) => {
|
||||
if (e.type === 'turn_end' && e.message.role === 'assistant' && e.message.stopReason === 'error') {
|
||||
errText = e.message.errorMessage ?? 'unknown provider error';
|
||||
}
|
||||
});
|
||||
await session.prompt('hi');
|
||||
session.dispose();
|
||||
} catch (error) {
|
||||
errText = error instanceof Error ? error.message : String(error);
|
||||
}
|
||||
|
||||
if (errText) return classifyCredentialError(errText, authType);
|
||||
return ok(undefined);
|
||||
}
|
||||
|
||||
/** Validate credentials via a minimal Claude Agent SDK query. */
|
||||
@@ -339,35 +370,14 @@ async function validateCredentials(
|
||||
if (apiKey) {
|
||||
process.env.ANTHROPIC_API_KEY = apiKey;
|
||||
}
|
||||
// 1. Custom base URL — validate endpoint is reachable via SDK query
|
||||
// 1. Custom base URL — validate the endpoint via a minimal pi session
|
||||
if (process.env.ANTHROPIC_BASE_URL && process.env.ANTHROPIC_AUTH_TOKEN) {
|
||||
const baseUrl = process.env.ANTHROPIC_BASE_URL;
|
||||
logger.info('Validating custom base URL');
|
||||
|
||||
try {
|
||||
for await (const message of query({ prompt: 'hi', options: { model: resolveModel('small'), maxTurns: 1 } })) {
|
||||
if (message.type === 'assistant' && message.error) {
|
||||
return classifySdkError(message.error, `custom endpoint (${baseUrl})`);
|
||||
}
|
||||
if (message.type === 'result') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
logger.info('Custom base URL OK');
|
||||
return ok(undefined);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
return err(
|
||||
new PentestError(
|
||||
`Custom base URL unreachable: ${baseUrl} — ${message}`,
|
||||
'network',
|
||||
false,
|
||||
{ baseUrl },
|
||||
ErrorCode.AUTH_FAILED,
|
||||
),
|
||||
);
|
||||
}
|
||||
const probe = await probeCredentialsWithPi(`custom endpoint (${baseUrl})`, baseUrl);
|
||||
if (isErr(probe)) return probe;
|
||||
logger.info('Custom base URL OK');
|
||||
return ok(undefined);
|
||||
}
|
||||
|
||||
// 2. Bedrock mode — validate required AWS credentials are present
|
||||
@@ -459,38 +469,13 @@ async function validateCredentials(
|
||||
);
|
||||
}
|
||||
|
||||
// 5. Validate via SDK query
|
||||
// 5. Validate via a minimal pi session
|
||||
const authType = process.env.CLAUDE_CODE_OAUTH_TOKEN ? 'OAuth token' : 'API key';
|
||||
logger.info(`Validating ${authType} via SDK...`);
|
||||
|
||||
try {
|
||||
for await (const message of query({ prompt: 'hi', options: { model: resolveModel('small'), maxTurns: 1 } })) {
|
||||
if (message.type === 'assistant' && message.error) {
|
||||
return classifySdkError(message.error, authType);
|
||||
}
|
||||
if (message.type === 'result') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(`${authType} OK`);
|
||||
return ok(undefined);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
const retryable = isRetryableError(error instanceof Error ? error : new Error(message));
|
||||
|
||||
return err(
|
||||
new PentestError(
|
||||
retryable
|
||||
? `Failed to reach Anthropic API. Check your network connection.`
|
||||
: `${authType} validation failed: ${message}`,
|
||||
retryable ? 'network' : 'config',
|
||||
retryable,
|
||||
{ authType },
|
||||
retryable ? undefined : ErrorCode.AUTH_FAILED,
|
||||
),
|
||||
);
|
||||
}
|
||||
logger.info(`Validating ${authType} via pi...`);
|
||||
const probe = await probeCredentialsWithPi(authType);
|
||||
if (isErr(probe)) return probe;
|
||||
logger.info(`${authType} OK`);
|
||||
return ok(undefined);
|
||||
}
|
||||
|
||||
// === Target URL Validation ===
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
*/
|
||||
|
||||
import { readFile, rm } from 'node:fs/promises';
|
||||
import type { JsonSchemaOutputFormat } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { z } from 'zod';
|
||||
import { defineTool, type ToolDefinition } from '@earendil-works/pi-coding-agent';
|
||||
import { Type } from 'typebox';
|
||||
import { runClaudePrompt } from '../ai/claude-executor.js';
|
||||
import type { AuditSession } from '../audit/index.js';
|
||||
import { authStateFile } from '../audit/utils.js';
|
||||
@@ -33,26 +33,38 @@ function isAuthFailurePoint(v: unknown): v is AuthFailurePoint {
|
||||
return typeof v === 'string' && (FAILURE_POINTS as readonly string[]).includes(v);
|
||||
}
|
||||
|
||||
// NOTE: SDK's AJV validator expects draft-07; Zod defaults to draft-2020-12,
|
||||
// which causes the SDK to silently skip structured output.
|
||||
const AuthValidationSchema = z.object({
|
||||
login_success: z.boolean(),
|
||||
failure_point: z.enum(FAILURE_POINTS).optional(),
|
||||
failure_detail: z
|
||||
.string()
|
||||
.max(250)
|
||||
.optional()
|
||||
.describe(
|
||||
'Free-form 1-2 sentence diagnostic of what the page showed (error messages, page state) when login failed. Required when login_success is false. Mask any sensitive values.',
|
||||
),
|
||||
});
|
||||
interface AuthValidationVerdict {
|
||||
login_success: boolean;
|
||||
failure_point?: AuthFailurePoint;
|
||||
failure_detail?: string;
|
||||
}
|
||||
|
||||
type AuthValidationVerdict = z.infer<typeof AuthValidationSchema>;
|
||||
|
||||
const VALIDATION_SCHEMA: JsonSchemaOutputFormat = {
|
||||
type: 'json_schema',
|
||||
schema: z.toJSONSchema(AuthValidationSchema, { target: 'draft-07' }) as Record<string, unknown>,
|
||||
};
|
||||
/** Submit tool capturing the login verdict (pi has no JSON-schema output format). */
|
||||
function createAuthSubmitTool(): { tool: ToolDefinition; getCaptured: () => AuthValidationVerdict | undefined } {
|
||||
let captured: AuthValidationVerdict | undefined;
|
||||
const tool = defineTool({
|
||||
name: 'submit_auth_result',
|
||||
label: 'Submit Auth Result',
|
||||
description: 'Report the login outcome. Call exactly once when the login attempt has concluded.',
|
||||
parameters: Type.Object({
|
||||
login_success: Type.Boolean(),
|
||||
failure_point: Type.Optional(
|
||||
Type.Union([Type.Literal('username_or_password'), Type.Literal('totp_secret'), Type.Literal('out_of_band')]),
|
||||
),
|
||||
failure_detail: Type.Optional(
|
||||
Type.String({
|
||||
description:
|
||||
'Free-form 1-2 sentence diagnostic of what the page showed (error messages, page state) when login failed. Required when login_success is false. Mask any sensitive values.',
|
||||
}),
|
||||
),
|
||||
}),
|
||||
execute: async (_toolCallId, params) => {
|
||||
captured = params as AuthValidationVerdict;
|
||||
return { content: [{ type: 'text' as const, text: 'Auth result recorded.' }], details: {} };
|
||||
},
|
||||
});
|
||||
return { tool, getCaptured: () => captured };
|
||||
}
|
||||
|
||||
const AGENT_NAME = 'validate-authentication';
|
||||
|
||||
@@ -110,6 +122,7 @@ export async function validateAuthentication(input: ValidateAuthInput): Promise<
|
||||
await auditSession.startAgent(AGENT_NAME, prompt, attemptNumber);
|
||||
const startTime = Date.now();
|
||||
|
||||
const submit = createAuthSubmitTool();
|
||||
const result = await runClaudePrompt(
|
||||
prompt,
|
||||
repoPath,
|
||||
@@ -119,11 +132,13 @@ export async function validateAuthentication(input: ValidateAuthInput): Promise<
|
||||
auditSession,
|
||||
logger,
|
||||
'medium',
|
||||
VALIDATION_SCHEMA,
|
||||
[submit.tool],
|
||||
apiKey,
|
||||
deliverablesSubdir,
|
||||
providerConfig,
|
||||
);
|
||||
const verdict = submit.getCaptured();
|
||||
if (verdict !== undefined) result.structuredOutput = verdict;
|
||||
|
||||
let classification = classifyResult(result, authentication);
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ import fs from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import { ApplicationFailure, Context, heartbeat } from '@temporalio/activity';
|
||||
import { writePlaywrightStealthConfig } from '../ai/playwright-config-writer.js';
|
||||
import { writeUserSettingsForCodePathAvoids } from '../ai/settings-writer.js';
|
||||
import { writeCodePathPermissionConfig } from '../ai/settings-writer.js';
|
||||
import { AuditSession } from '../audit/index.js';
|
||||
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
|
||||
import { authStateFile, generateSessionJsonPath, type SessionMetadata } from '../audit/utils.js';
|
||||
@@ -137,7 +137,7 @@ function buildContainerConfig(input: ActivityInput): ContainerConfig {
|
||||
async function runAgentActivity(
|
||||
agentName: AgentName,
|
||||
input: ActivityInput,
|
||||
mcpServers?: Record<string, import('@anthropic-ai/claude-agent-sdk').McpServerConfig>,
|
||||
customTools?: import('@earendil-works/pi-coding-agent').ToolDefinition[],
|
||||
): Promise<AgentMetrics> {
|
||||
const { repoPath, configPath, pipelineTestingMode = false, workflowId, webUrl } = input;
|
||||
|
||||
@@ -192,7 +192,7 @@ async function runAgentActivity(
|
||||
...(input.providerConfig !== undefined && { providerConfig: input.providerConfig }),
|
||||
...(input.promptDir !== undefined && { promptDir: input.promptDir }),
|
||||
...(input.configYAML !== undefined && { configYAML: input.configYAML }),
|
||||
...(mcpServers && { mcpServers }),
|
||||
...(customTools && { customTools }),
|
||||
},
|
||||
auditSession,
|
||||
logger,
|
||||
@@ -256,7 +256,7 @@ export async function runPreReconAgent(input: ActivityInput): Promise<AgentMetri
|
||||
const { renderPreRecon } = await import('../services/pre-recon-renderer.js');
|
||||
|
||||
const collector = createPreReconCollectorServer();
|
||||
const metrics = await runAgentActivity('pre-recon', input, { 'pre-recon-collector': collector.server });
|
||||
const metrics = await runAgentActivity('pre-recon', input, collector.tools);
|
||||
|
||||
// On resume, the agent is skipped and the collector is never populated.
|
||||
// The cached deliverable from the prior run is the source of truth.
|
||||
@@ -285,7 +285,7 @@ export async function runReconAgent(input: ActivityInput): Promise<AgentMetrics>
|
||||
const { renderRecon } = await import('../services/recon-renderer.js');
|
||||
|
||||
const collector = createReconCollectorServer();
|
||||
const metrics = await runAgentActivity('recon', input, { 'recon-collector': collector.server });
|
||||
const metrics = await runAgentActivity('recon', input, collector.tools);
|
||||
|
||||
// On resume, the agent is skipped and the collector is never populated.
|
||||
// The cached deliverable from the prior run is the source of truth.
|
||||
@@ -318,7 +318,7 @@ async function runVulnAgentWithCollector(
|
||||
const { renderVulnDeliverable } = await import('../services/vuln-renderer.js');
|
||||
|
||||
const collector = createVulnCollector(vulnClass);
|
||||
const metrics = await runAgentActivity(agentName, input, { 'vuln-collector': collector.server });
|
||||
const metrics = await runAgentActivity(agentName, input, collector.tools);
|
||||
|
||||
// On resume, the agent is skipped and the collector is never populated.
|
||||
// The cached deliverable from the prior run is the source of truth.
|
||||
@@ -399,7 +399,7 @@ async function runExploitAgentWithCollector(
|
||||
const { validIds, idToType } = await readExploitQueue(queuePath);
|
||||
|
||||
const collector = createExploitCollector({ vulnClass, validIds });
|
||||
const metrics = await runAgentActivity(agentName, input, { 'exploit-collector': collector.server });
|
||||
const metrics = await runAgentActivity(agentName, input, collector.tools);
|
||||
|
||||
// On resume, the agent is skipped and the collector is never populated.
|
||||
// The cached deliverable from the prior run is the source of truth.
|
||||
@@ -661,12 +661,13 @@ export async function syncPlaywrightStealthConfig(input: ActivityInput): Promise
|
||||
}
|
||||
|
||||
/**
|
||||
* Sync code_path avoid rules into Claude's user-scope settings.json so the
|
||||
* SDK enforces them at the tool layer for every agent in this run.
|
||||
* Sync code_path avoid rules into the @gotgenes/pi-permission-system global config
|
||||
* so pi enforces them at the tool layer for every agent in this run. The executor
|
||||
* loads the extension when this config is present (see claude-executor).
|
||||
*
|
||||
* Runs once per workflow before any agent fires. Config is fixed for the
|
||||
* lifetime of the workflow, so writing once avoids the parallel-agent race
|
||||
* on the global ~/.claude/settings.json file.
|
||||
* Runs once per workflow before any analysis agent fires. Config is fixed for the
|
||||
* lifetime of the workflow, so writing once avoids a parallel-agent race on the
|
||||
* global config file.
|
||||
*/
|
||||
export async function syncCodePathDenyRules(input: ActivityInput): Promise<void> {
|
||||
const logger = createActivityLogger();
|
||||
@@ -680,8 +681,12 @@ export async function syncCodePathDenyRules(input: ActivityInput): Promise<void>
|
||||
|
||||
const config = configResult.value;
|
||||
const denyCount = (config?.avoid ?? []).filter((r) => r.type === 'code_path').length;
|
||||
await writeUserSettingsForCodePathAvoids(config);
|
||||
logger.info(`Synced code_path deny rules to user settings (${denyCount} entries)`);
|
||||
await writeCodePathPermissionConfig(config);
|
||||
logger.info(
|
||||
denyCount > 0
|
||||
? `Synced ${denyCount} code_path deny rule(s) to the pi-permission-system config`
|
||||
: 'No code_path deny rules; pi-permission-system config cleared',
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -94,8 +94,9 @@ export interface DistributedConfig {
|
||||
/**
|
||||
* LLM provider configuration for multi-provider support.
|
||||
*
|
||||
* Maps to SDK environment variables at execution time. When providerType
|
||||
* is omitted or 'anthropic_api', falls back to apiKey + ANTHROPIC_API_KEY.
|
||||
* Resolved by the pi model/provider layer at execution time. Recognized
|
||||
* providerType values: 'bedrock', 'vertex', 'custom_base_url', 'anthropic_api'.
|
||||
* When omitted or 'anthropic_api', falls back to apiKey + ANTHROPIC_API_KEY.
|
||||
*/
|
||||
export interface ProviderConfig {
|
||||
readonly providerType?: string;
|
||||
|
||||
Generated
+1254
-145
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,2 @@
|
||||
packages:
|
||||
- "apps/*"
|
||||
|
||||
catalog:
|
||||
"@anthropic-ai/claude-agent-sdk": ^0.3.173
|
||||
|
||||
Reference in New Issue
Block a user