From 32c01a39b1245c1bc1ce1fac6bf264e3d7c70b07 Mon Sep 17 00:00:00 2001 From: ezl-keygraph Date: Thu, 21 May 2026 00:23:46 +0530 Subject: [PATCH] feat(preflight): block cloud metadata range in target URL check (#337) * chore(docker): pin temporal image to 1.7.0 * feat(preflight): block link-local metadata range in target URL check * style: apply biome formatting and import sorting --- apps/cli/infra/compose.yml | 2 +- apps/worker/src/ai/claude-executor.ts | 3 +- apps/worker/src/ai/queue-schemas.ts | 127 +++++++++++------- .../src/interfaces/checkpoint-provider.ts | 13 +- .../src/interfaces/findings-provider.ts | 6 +- apps/worker/src/interfaces/index.ts | 2 +- apps/worker/src/services/agent-execution.ts | 23 +++- apps/worker/src/services/config-loader.ts | 8 +- apps/worker/src/services/container.ts | 6 +- apps/worker/src/services/findings-renderer.ts | 18 +-- apps/worker/src/services/index.ts | 5 +- apps/worker/src/services/preflight.ts | 91 ++++++++++--- apps/worker/src/services/reporting.ts | 5 +- apps/worker/src/temporal/pipeline.ts | 4 +- docker-compose.yml | 2 +- 15 files changed, 202 insertions(+), 113 deletions(-) diff --git a/apps/cli/infra/compose.yml b/apps/cli/infra/compose.yml index 2ff839d..68b2c02 100644 --- a/apps/cli/infra/compose.yml +++ b/apps/cli/infra/compose.yml @@ -4,7 +4,7 @@ networks: services: temporal: - image: temporalio/temporal:latest + image: temporalio/temporal:1.7.0 container_name: shannon-temporal command: ["server", "start-dev", "--db-filename", "/home/temporal/temporal.db", "--ip", "0.0.0.0"] ports: diff --git a/apps/worker/src/ai/claude-executor.ts b/apps/worker/src/ai/claude-executor.ts index 497e201..b189a88 100644 --- a/apps/worker/src/ai/claude-executor.ts +++ b/apps/worker/src/ai/claude-executor.ts @@ -177,7 +177,8 @@ export async function runClaudePrompt( sdkEnv.CLAUDE_CODE_USE_VERTEX = '1'; if (providerConfig.gcpRegion) sdkEnv.CLOUD_ML_REGION = providerConfig.gcpRegion; if (providerConfig.gcpProjectId) sdkEnv.ANTHROPIC_VERTEX_PROJECT_ID = providerConfig.gcpProjectId; - if (providerConfig.gcpCredentialsPath) sdkEnv.GOOGLE_APPLICATION_CREDENTIALS = providerConfig.gcpCredentialsPath; + if (providerConfig.gcpCredentialsPath) + sdkEnv.GOOGLE_APPLICATION_CREDENTIALS = providerConfig.gcpCredentialsPath; break; case 'litellm_router': if (providerConfig.baseUrl) sdkEnv.ANTHROPIC_BASE_URL = providerConfig.baseUrl; diff --git a/apps/worker/src/ai/queue-schemas.ts b/apps/worker/src/ai/queue-schemas.ts index 311cfdd..f29b8a7 100644 --- a/apps/worker/src/ai/queue-schemas.ts +++ b/apps/worker/src/ai/queue-schemas.ts @@ -17,8 +17,7 @@ import type { AgentName } from '../types/agents.js'; // === Common Fields === -const ANALYSIS_NOTES_DESCRIPTION = - 'Plain context for defenders (caveats, scope, what is at risk). Not attack steps.'; +const ANALYSIS_NOTES_DESCRIPTION = 'Plain context for defenders (caveats, scope, what is at risk). Not attack steps.'; function notesField(exploit: boolean) { const f = z.string().optional(); @@ -114,53 +113,83 @@ function toOutputFormat(zodSchema: z.ZodType): JsonSchemaOutputFormat { function buildOutputFormats(exploit: boolean): Partial> { const base = makeBase(exploit); return { - 'injection-vuln': toOutputFormat(z.object({ vulnerabilities: z.array(base.extend({ - source: z.string().optional(), - combined_sources: z.string().optional(), - path: z.string().optional(), - sink_call: z.string().optional(), - slot_type: z.string().optional(), - sanitization_observed: z.string().optional(), - concat_occurrences: z.string().optional(), - verdict: z.string().optional(), - mismatch_reason: z.string().optional(), - witness_payload: z.string().optional(), - })) })), - 'xss-vuln': toOutputFormat(z.object({ vulnerabilities: z.array(base.extend({ - source: z.string().optional(), - source_detail: z.string().optional(), - path: z.string().optional(), - sink_function: z.string().optional(), - render_context: z.string().optional(), - encoding_observed: z.string().optional(), - verdict: z.string().optional(), - mismatch_reason: z.string().optional(), - witness_payload: z.string().optional(), - })) })), - 'auth-vuln': toOutputFormat(z.object({ vulnerabilities: z.array(base.extend({ - source_endpoint: z.string().optional(), - vulnerable_code_location: z.string().optional(), - missing_defense: z.string().optional(), - exploitation_hypothesis: z.string().optional(), - suggested_exploit_technique: z.string().optional(), - })) })), - 'ssrf-vuln': toOutputFormat(z.object({ vulnerabilities: z.array(base.extend({ - source_endpoint: z.string().optional(), - vulnerable_parameter: z.string().optional(), - vulnerable_code_location: z.string().optional(), - missing_defense: z.string().optional(), - exploitation_hypothesis: z.string().optional(), - suggested_exploit_technique: z.string().optional(), - })) })), - 'authz-vuln': toOutputFormat(z.object({ vulnerabilities: z.array(base.extend({ - endpoint: z.string().optional(), - vulnerable_code_location: z.string().optional(), - role_context: z.string().optional(), - guard_evidence: z.string().optional(), - side_effect: z.string().optional(), - reason: z.string().optional(), - minimal_witness: z.string().optional(), - })) })), + 'injection-vuln': toOutputFormat( + z.object({ + vulnerabilities: z.array( + base.extend({ + source: z.string().optional(), + combined_sources: z.string().optional(), + path: z.string().optional(), + sink_call: z.string().optional(), + slot_type: z.string().optional(), + sanitization_observed: z.string().optional(), + concat_occurrences: z.string().optional(), + verdict: z.string().optional(), + mismatch_reason: z.string().optional(), + witness_payload: z.string().optional(), + }), + ), + }), + ), + 'xss-vuln': toOutputFormat( + z.object({ + vulnerabilities: z.array( + base.extend({ + source: z.string().optional(), + source_detail: z.string().optional(), + path: z.string().optional(), + sink_function: z.string().optional(), + render_context: z.string().optional(), + encoding_observed: z.string().optional(), + verdict: z.string().optional(), + mismatch_reason: z.string().optional(), + witness_payload: z.string().optional(), + }), + ), + }), + ), + 'auth-vuln': toOutputFormat( + z.object({ + vulnerabilities: z.array( + base.extend({ + source_endpoint: z.string().optional(), + vulnerable_code_location: z.string().optional(), + missing_defense: z.string().optional(), + exploitation_hypothesis: z.string().optional(), + suggested_exploit_technique: z.string().optional(), + }), + ), + }), + ), + 'ssrf-vuln': toOutputFormat( + z.object({ + vulnerabilities: z.array( + base.extend({ + source_endpoint: z.string().optional(), + vulnerable_parameter: z.string().optional(), + vulnerable_code_location: z.string().optional(), + missing_defense: z.string().optional(), + exploitation_hypothesis: z.string().optional(), + suggested_exploit_technique: z.string().optional(), + }), + ), + }), + ), + 'authz-vuln': toOutputFormat( + z.object({ + vulnerabilities: z.array( + base.extend({ + endpoint: z.string().optional(), + vulnerable_code_location: z.string().optional(), + role_context: z.string().optional(), + guard_evidence: z.string().optional(), + side_effect: z.string().optional(), + reason: z.string().optional(), + minimal_witness: z.string().optional(), + }), + ), + }), + ), }; } diff --git a/apps/worker/src/interfaces/checkpoint-provider.ts b/apps/worker/src/interfaces/checkpoint-provider.ts index a066043..2fdb067 100644 --- a/apps/worker/src/interfaces/checkpoint-provider.ts +++ b/apps/worker/src/interfaces/checkpoint-provider.ts @@ -30,22 +30,13 @@ export interface CheckpointProvider { * Return { skip: true, metrics } to skip the agent (e.g., output files already exist). * Return { skip: false } to run normally. */ - shouldSkipAgent( - agentName: string, - repoPath: string, - deliverablesSubdir: string, - ): Promise; + shouldSkipAgent(agentName: string, repoPath: string, deliverablesSubdir: string): Promise; /** * Called after an agent activity succeeds. * Receives pipeline state and optional file context for artifact persistence. */ - onAgentComplete( - agentName: string, - phase: string, - state: PipelineState, - context?: CheckpointContext, - ): Promise; + onAgentComplete(agentName: string, phase: string, state: PipelineState, context?: CheckpointContext): Promise; } /** Default no-op implementation — no external checkpointing. */ diff --git a/apps/worker/src/interfaces/findings-provider.ts b/apps/worker/src/interfaces/findings-provider.ts index 1b351dc..5770b0b 100644 --- a/apps/worker/src/interfaces/findings-provider.ts +++ b/apps/worker/src/interfaces/findings-provider.ts @@ -11,11 +11,7 @@ import type { ActivityInput } from '../temporal/activities.js'; import type { VulnType } from '../types/agents.js'; export interface FindingsProvider { - mergeFindingsIntoQueue( - repoPath: string, - vulnType: VulnType, - input: ActivityInput, - ): Promise<{ mergedCount: number }>; + mergeFindingsIntoQueue(repoPath: string, vulnType: VulnType, input: ActivityInput): Promise<{ mergedCount: number }>; } /** Default no-op implementation — no external findings to merge. */ diff --git a/apps/worker/src/interfaces/index.ts b/apps/worker/src/interfaces/index.ts index fac478f..7825c29 100644 --- a/apps/worker/src/interfaces/index.ts +++ b/apps/worker/src/interfaces/index.ts @@ -5,7 +5,7 @@ * Consumers can provide alternate implementations via the DI container. */ -export type { CheckpointProvider, CheckpointContext, SkipDecision } from './checkpoint-provider.js'; +export type { CheckpointContext, CheckpointProvider, SkipDecision } from './checkpoint-provider.js'; export { NoOpCheckpointProvider } from './checkpoint-provider.js'; export type { FindingsProvider } from './findings-provider.js'; export { NoOpFindingsProvider } from './findings-provider.js'; diff --git a/apps/worker/src/services/agent-execution.ts b/apps/worker/src/services/agent-execution.ts index cac686c..ba5fdd0 100644 --- a/apps/worker/src/services/agent-execution.ts +++ b/apps/worker/src/services/agent-execution.ts @@ -95,7 +95,19 @@ export class AgentExecutionService { auditSession: AuditSession, logger: ActivityLogger, ): Promise> { - const { webUrl, repoPath, deliverablesPath, configPath, configData, configYAML, pipelineTestingMode = false, attemptNumber, apiKey, promptDir, providerConfig } = input; + const { + webUrl, + repoPath, + deliverablesPath, + configPath, + configData, + configYAML, + pipelineTestingMode = false, + attemptNumber, + apiKey, + promptDir, + providerConfig, + } = input; // 1. Load config (pre-parsed configData → raw YAML → file path) const configResult = await this.configLoader.loadOptional(configPath, configData, configYAML); @@ -108,7 +120,14 @@ export class AgentExecutionService { const promptTemplate = AGENTS[agentName].promptTemplate; let prompt: string; try { - prompt = await loadPrompt(promptTemplate, { webUrl, repoPath }, distributedConfig, pipelineTestingMode, logger, promptDir); + prompt = await loadPrompt( + promptTemplate, + { webUrl, repoPath }, + distributedConfig, + pipelineTestingMode, + logger, + promptDir, + ); } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); return err( diff --git a/apps/worker/src/services/config-loader.ts b/apps/worker/src/services/config-loader.ts index 7f2200c..c38e723 100644 --- a/apps/worker/src/services/config-loader.ts +++ b/apps/worker/src/services/config-loader.ts @@ -81,7 +81,13 @@ export class ConfigLoaderService { } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); return err( - new PentestError(`Failed to parse config YAML: ${errorMessage}`, 'config', false, { originalError: errorMessage }, ErrorCode.CONFIG_PARSE_ERROR), + new PentestError( + `Failed to parse config YAML: ${errorMessage}`, + 'config', + false, + { originalError: errorMessage }, + ErrorCode.CONFIG_PARSE_ERROR, + ), ); } } diff --git a/apps/worker/src/services/container.ts b/apps/worker/src/services/container.ts index a9586b3..7f0de9c 100644 --- a/apps/worker/src/services/container.ts +++ b/apps/worker/src/services/container.ts @@ -99,11 +99,7 @@ const DEFAULT_CONFIG: ContainerConfig = { * setContainerFactory() at worker startup to inject custom provider * implementations into every container. */ -type ContainerFactory = ( - workflowId: string, - sessionMetadata: SessionMetadata, - config: ContainerConfig, -) => Container; +type ContainerFactory = (workflowId: string, sessionMetadata: SessionMetadata, config: ContainerConfig) => Container; let containerFactory: ContainerFactory = (_workflowId, sessionMetadata, config) => new Container({ sessionMetadata, config }); diff --git a/apps/worker/src/services/findings-renderer.ts b/apps/worker/src/services/findings-renderer.ts index 88189b2..6dfb5cc 100644 --- a/apps/worker/src/services/findings-renderer.ts +++ b/apps/worker/src/services/findings-renderer.ts @@ -17,13 +17,7 @@ */ import { fs, path } from 'zx'; -import type { - AuthFinding, - AuthzFinding, - InjectionFinding, - SsrfFinding, - XssFinding, -} from '../ai/queue-schemas.js'; +import type { AuthFinding, AuthzFinding, InjectionFinding, SsrfFinding, XssFinding } from '../ai/queue-schemas.js'; import { deliverablesDir } from '../paths.js'; import type { ActivityLogger } from '../types/activity-logger.js'; import type { VulnClass } from '../types/config.js'; @@ -125,10 +119,7 @@ function renderInjectionEntry(e: InjectionFinding): string { return buildEntry( e.ID, e.vulnerability_type, - [ - summaryRow('Vulnerable location', location), - summaryRow('Overview', e.mismatch_reason), - ], + [summaryRow('Vulnerable location', location), summaryRow('Overview', e.mismatch_reason)], e.notes, ); } @@ -138,10 +129,7 @@ function renderXssEntry(e: XssFinding): string { return buildEntry( e.ID, e.vulnerability_type, - [ - summaryRow('Vulnerable location', location), - summaryRow('Overview', e.mismatch_reason), - ], + [summaryRow('Vulnerable location', location), summaryRow('Overview', e.mismatch_reason)], e.notes, ); } diff --git a/apps/worker/src/services/index.ts b/apps/worker/src/services/index.ts index b864d27..48a0019 100644 --- a/apps/worker/src/services/index.ts +++ b/apps/worker/src/services/index.ts @@ -11,14 +11,13 @@ * Services are pure domain logic with no Temporal dependencies. */ +export type { ClaudePromptResult } from '../ai/claude-executor.js'; +export { runClaudePrompt } from '../ai/claude-executor.js'; export type { AgentExecutionInput } from './agent-execution.js'; export { AgentExecutionService } from './agent-execution.js'; - export { ConfigLoaderService } from './config-loader.js'; export type { ContainerDependencies } from './container.js'; export { Container, getContainer, getOrCreateContainer, removeContainer, setContainerFactory } from './container.js'; export { ExploitationCheckerService } from './exploitation-checker.js'; export { loadPrompt } from './prompt-manager.js'; export { assembleFinalReport, injectModelIntoReport } from './reporting.js'; -export type { ClaudePromptResult } from '../ai/claude-executor.js'; -export { runClaudePrompt } from '../ai/claude-executor.js'; diff --git a/apps/worker/src/services/preflight.ts b/apps/worker/src/services/preflight.ts index afbf7d7..1c0f2f5 100644 --- a/apps/worker/src/services/preflight.ts +++ b/apps/worker/src/services/preflight.ts @@ -16,13 +16,15 @@ * 2. Config file parses and validates (if provided) * 3. code_path rules match real entries in the repo (filesystem only) * 4. Credentials validate via Claude Agent SDK query (API key, OAuth, Bedrock, or Vertex AI) - * 5. Target URL is reachable from the container (DNS + HTTP) + * 5. Target URL resolves, is not link-local (cloud metadata), and is reachable (DNS + HTTP) */ +import type { LookupAddress } from 'node:dns'; import { lookup } from 'node:dns/promises'; import fs from 'node:fs/promises'; import http from 'node:http'; import https from 'node:https'; +import net, { type LookupFunction } from 'node:net'; import type { SDKAssistantMessageError } from '@anthropic-ai/claude-agent-sdk'; import { query } from '@anthropic-ai/claude-agent-sdk'; import { glob } from 'zx'; @@ -40,9 +42,47 @@ function isLoopbackAddress(address: string): boolean { return address === '127.0.0.1' || address === '::1' || address === '0.0.0.0'; } +// 169.254.0.0/16 hosts the cloud metadata service. RFC1918 and loopback are +// intentionally allowed — scanning local targets is a supported Shannon use case. +const metadataBlockList = new net.BlockList(); +metadataBlockList.addSubnet('169.254.0.0', 16, 'ipv4'); + +function isBlockedAddress(address: string): boolean { + switch (net.isIP(address)) { + case 4: + return metadataBlockList.check(address, 'ipv4'); + case 6: + return metadataBlockList.check(address, 'ipv6'); + default: + return false; + } +} + +/** DNS lookup pinned to already-validated `addresses`, so the socket cannot be re-pointed after validation (DNS rebinding). */ +function pinnedLookup(addresses: LookupAddress[]): LookupFunction { + return (hostname, options, callback) => { + const matching = options.family ? addresses.filter((a) => a.family === options.family) : addresses; + const pool = matching.length > 0 ? matching : addresses; + if (options.all) { + callback(null, pool); + return; + } + const first = pool[0]; + if (!first) { + callback(new Error(`no resolved address for ${hostname}`), '', 0); + return; + } + callback(null, first.address, first.family); + }; +} + // === Repository Validation === -async function validateRepo(repoPath: string, logger: ActivityLogger, skipGitCheck?: boolean): Promise> { +async function validateRepo( + repoPath: string, + logger: ActivityLogger, + skipGitCheck?: boolean, +): Promise> { logger.info('Checking repository path...', { repoPath }); // 1. Check repo directory exists @@ -254,11 +294,17 @@ function classifySdkError(sdkError: SDKAssistantMessageError, authType: string): } /** Validate credentials via a minimal Claude Agent SDK query. */ -async function validateCredentials(logger: ActivityLogger, apiKey?: string, providerConfig?: import('../types/config.js').ProviderConfig): Promise> { +async function validateCredentials( + logger: ActivityLogger, + apiKey?: string, + providerConfig?: import('../types/config.js').ProviderConfig, +): Promise> { // 0. If providerConfig is present, credentials are managed by the caller. // The executor will map providerConfig directly to sdkEnv — no process.env needed. if (providerConfig) { - logger.info(`Provider config present (type: ${providerConfig.providerType || 'anthropic_api'}) — skipping env-based credential validation`); + logger.info( + `Provider config present (type: ${providerConfig.providerType || 'anthropic_api'}) — skipping env-based credential validation`, + ); return ok(undefined); } @@ -424,7 +470,7 @@ async function validateCredentials(logger: ActivityLogger, apiKey?: string, prov // === Target URL Validation === /** HTTP HEAD with TLS verification disabled — we check reachability, not certificate validity. */ -function httpHead(url: string, timeoutMs: number): Promise { +function httpHead(url: string, timeoutMs: number, addresses: LookupAddress[]): Promise { return new Promise((resolve, reject) => { const parsed = new URL(url); const isHttps = parsed.protocol === 'https:'; @@ -435,6 +481,7 @@ function httpHead(url: string, timeoutMs: number): Promise { { method: 'HEAD', timeout: timeoutMs, + lookup: pinnedLookup(addresses), ...(isHttps && { rejectUnauthorized: false }), }, (res) => { @@ -472,12 +519,11 @@ async function validateTargetUrl(targetUrl: string, logger: ActivityLogger): Pro ); } - // 2. DNS lookup — detect loopback addresses early for a better hint + // 2. Resolve all records once — reused (pinned) for the connection below. const hostname = parsed.hostname; - let resolvedAddress: string | undefined; + let addresses: LookupAddress[]; try { - const result = await lookup(hostname); - resolvedAddress = result.address; + addresses = await lookup(hostname, { all: true }); } catch { return err( new PentestError( @@ -490,25 +536,40 @@ async function validateTargetUrl(targetUrl: string, logger: ActivityLogger): Pro ); } - // 3. HTTP reachability check + // 3. Reject the link-local metadata range (169.254.0.0/16). + const blocked = addresses.find((entry) => isBlockedAddress(entry.address)); + if (blocked) { + return err( + new PentestError( + `Target URL ${targetUrl} resolves to ${blocked.address}, a link-local address ` + + `(169.254.0.0/16). This range hosts the cloud instance metadata service and cannot be scanned.`, + 'config', + false, + { targetUrl, hostname, address: blocked.address }, + ErrorCode.TARGET_UNREACHABLE, + ), + ); + } + + // 4. HTTP reachability check (socket pinned to the resolved addresses). try { - await httpHead(targetUrl, TARGET_URL_TIMEOUT_MS); + await httpHead(targetUrl, TARGET_URL_TIMEOUT_MS, addresses); logger.info('Target URL OK'); return ok(undefined); } catch (error) { - const isLoopback = isLoopbackAddress(resolvedAddress); const detail = error instanceof Error ? error.message : String(error); + const isLoopback = addresses.some((entry) => isLoopbackAddress(entry.address)); if (isLoopback) { const suggestion = targetUrl.replace(hostname, 'host.docker.internal'); return err( new PentestError( - `Target URL ${targetUrl} resolves to ${resolvedAddress} (loopback) and is not reachable. ` + + `Target URL ${targetUrl} resolves to a loopback address and is not reachable. ` + `For local services, use host.docker.internal instead of ${hostname} (e.g., ${suggestion})`, 'network', false, - { targetUrl, resolvedAddress, hostname }, + { targetUrl, hostname }, ErrorCode.TARGET_UNREACHABLE, ), ); @@ -519,7 +580,7 @@ async function validateTargetUrl(targetUrl: string, logger: ActivityLogger): Pro `Target URL ${targetUrl} is not reachable: ${detail}`, 'network', false, - { targetUrl, resolvedAddress }, + { targetUrl }, ErrorCode.TARGET_UNREACHABLE, ), ); diff --git a/apps/worker/src/services/reporting.ts b/apps/worker/src/services/reporting.ts index 574f25c..8919a0d 100644 --- a/apps/worker/src/services/reporting.ts +++ b/apps/worker/src/services/reporting.ts @@ -129,7 +129,10 @@ export async function injectModelIntoReport( logger.info(`Injecting model info into report: ${modelStr}`); // 3. Read the final report - const reportPath = path.join(deliverablesDir(repoPath, deliverablesSubdir), 'comprehensive_security_assessment_report.md'); + const reportPath = path.join( + deliverablesDir(repoPath, deliverablesSubdir), + 'comprehensive_security_assessment_report.md', + ); if (!(await fs.pathExists(reportPath))) { logger.warn('Final report not found, skipping model injection'); diff --git a/apps/worker/src/temporal/pipeline.ts b/apps/worker/src/temporal/pipeline.ts index 7f74aa6..911ffc3 100644 --- a/apps/worker/src/temporal/pipeline.ts +++ b/apps/worker/src/temporal/pipeline.ts @@ -5,7 +5,7 @@ * within their own workflow context. */ -export { pentestPipeline } from './workflows.js'; +export type { ActivityInput } from './activities.js'; export type { AgentMetrics, PipelineInput, @@ -14,4 +14,4 @@ export type { ResumeState, VulnExploitPipelineResult, } from './shared.js'; -export type { ActivityInput } from './activities.js'; +export { pentestPipeline } from './workflows.js'; diff --git a/docker-compose.yml b/docker-compose.yml index bf86b69..f791e67 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,7 +4,7 @@ networks: services: temporal: - image: temporalio/temporal:latest + image: temporalio/temporal:1.7.0 container_name: shannon-temporal command: ["server", "start-dev", "--db-filename", "/home/temporal/temporal.db", "--ip", "0.0.0.0"] ports: