diff --git a/CLAUDE.md b/CLAUDE.md index 665c15b..00bea06 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -144,8 +144,8 @@ Durable workflow orchestration with crash recovery, queryable progress, intellig 5. **Reporting** (`report`) — Executive-level security report ### Supporting Systems -- **Configuration** — YAML configs in `apps/worker/configs/` with JSON Schema validation (`config-schema.json`). Supports auth settings, MFA/TOTP, and per-app testing parameters. Credential resolution — local mode: env vars → `./.env`; npx mode: env vars → `~/.shannon/config.toml` (via `shn setup`) -- **Prompts** — Per-phase templates in `apps/worker/prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `apps/worker/prompts/shared/` via `apps/worker/src/services/prompt-manager.ts` +- **Configuration** — YAML configs in `apps/worker/configs/` with JSON Schema validation (`config-schema.json`). Supports auth settings (MFA/TOTP), URL/code rule scoping (`rules.avoid`/`rules.focus`), run-scope steering (`vuln_classes`, `exploit`), free-form `rules_of_engagement`, and post-hoc `report` filters (`min_severity`, `min_confidence`, `guidance`). `code_path` avoid rules are written into `~/.claude/settings.json` `permissions.deny` (`Read`/`Edit`) once per workflow by `apps/worker/src/temporal/activities.ts:syncCodePathDenyRules` so the SDK enforces them at the tool layer even in `bypassPermissions` mode. `vuln_classes`/`exploit` scope is locked into `session.json` on first run; resumes with a different scope fail fast (`persistOrValidateRunScope`). Credential resolution — local mode: env vars → `./.env`; npx mode: env vars → `~/.shannon/config.toml` (via `shn setup`) +- **Prompts** — Per-phase templates in `apps/worker/prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `apps/worker/prompts/shared/` via `apps/worker/src/services/prompt-manager.ts`, including `_code-path-rules.txt` (focus/avoid `[FILE]`/`[GLOB]` routing) and `_rules-of-engagement.txt` (free-text engagement rules). When `exploit: false`, `apps/worker/src/services/findings-renderer.ts` deterministically converts each `*_exploitation_queue.json` into a `*_findings.md` for report assembly — no LLM in the loop - **SDK Integration** — Uses `@anthropic-ai/claude-agent-sdk` with `maxTurns: 10_000` and `bypassPermissions` mode. Adaptive thinking is enabled by default on Opus 4.6/4.7 (`supportsAdaptiveThinking` in `apps/worker/src/ai/models.ts`); disable per-scan via `CLAUDE_ADAPTIVE_THINKING=false` (env) or `core.adaptive_thinking = false` (npx TOML). Browser automation via `playwright-cli` with session isolation (`-s=`). TOTP generation via `generate-totp` CLI tool. Login flow template at `apps/worker/prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth - **Audit System** — Crash-safe append-only logging in `workspaces/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables. WorkflowLogger (`apps/worker/src/audit/workflow-logger.ts`) provides unified human-readable per-workflow logs, backed by LogStream (`apps/worker/src/audit/log-stream.ts`) shared stream primitive - **Deliverables** — Saved to `deliverables/` in the target repo via the `save-deliverable` CLI script (`apps/worker/src/scripts/save-deliverable.ts`) @@ -227,7 +227,7 @@ Comments must be **timeless** — no references to this conversation, refactorin **Entry Points:** `apps/worker/src/temporal/workflows.ts`, `apps/worker/src/temporal/activities.ts`, `apps/worker/src/temporal/worker.ts` -**Core Logic:** `apps/worker/src/session-manager.ts`, `apps/worker/src/ai/claude-executor.ts`, `apps/worker/src/config-parser.ts`, `apps/worker/src/services/`, `apps/worker/src/audit/` +**Core Logic:** `apps/worker/src/session-manager.ts`, `apps/worker/src/ai/claude-executor.ts`, `apps/worker/src/ai/settings-writer.ts` (writes `code_path` deny rules to `~/.claude/settings.json`), `apps/worker/src/config-parser.ts`, `apps/worker/src/services/` (incl. `preflight.ts`, `findings-renderer.ts`, `reporting.ts`), `apps/worker/src/audit/` **Config:** `docker-compose.yml`, `apps/cli/infra/compose.yml`, `apps/worker/configs/`, `apps/worker/prompts/`, `tsconfig.base.json` (shared compiler options), `turbo.json`, `biome.json` diff --git a/README.md b/README.md index c7dfd11..1997582 100644 --- a/README.md +++ b/README.md @@ -374,9 +374,21 @@ cp configs/example-config.yaml ./my-app-config.yaml ##### Basic Configuration Structure ```yaml -# Optional: describe your target environment (max 500 chars) +# Describe your target environment (optional, max 500 chars) description: "Next.js e-commerce app on PostgreSQL. Local dev environment — .env files contain local-only credentials, not deployed to production." +# Limit which vulnerability classes run end-to-end (optional, default: all five) +# vuln_classes: [injection, xss, auth, authz, ssrf] + +# Skip the exploitation phase (optional, default: "true") +# exploit: "false" + +# Free-form rules of engagement (optional) +# rules_of_engagement: | +# - No password brute-force; cap login attempts at 5 per account. +# - Throttle to under 5 requests per second per endpoint; back off 60s on any 429. +# - Use placeholders like [order_id] in deliverables — no real data values. + authentication: login_type: form login_url: "https://your-app.com/login" @@ -395,15 +407,28 @@ authentication: value: "/dashboard" rules: + # Supported types: url_path, subdomain, domain, method, header, parameter, code_path avoid: - description: "AI should avoid testing logout functionality" - type: path - url_path: "/logout" + type: url_path + value: "/logout" + + # code_path values are repo-relative file paths or globs (e.g. "src/auth.ts", "src/vendor/**"). + # - description: "Out-of-scope vendored libraries" + # type: code_path + # value: "src/vendor/**" focus: - description: "AI should emphasize testing API endpoints" - type: path - url_path: "/api" + type: url_path + value: "/api" + +# Filters applied by the report agent when assembling the final report (optional). +# report: +# min_severity: low # drop findings below this severity (low | medium | high | critical) +# min_confidence: low # drop findings below this confidence (low | medium | high) +# guidance: | +# Drop findings about missing security headers and rate-limit gaps. ``` Run with: diff --git a/apps/worker/configs/config-schema.json b/apps/worker/configs/config-schema.json index 757f083..229dca8 100644 --- a/apps/worker/configs/config-schema.json +++ b/apps/worker/configs/config-schema.json @@ -118,6 +118,51 @@ }, "additionalProperties": false }, + "vuln_classes": { + "type": "array", + "description": "Vulnerability classes to test. When omitted, all five classes run. When set, only listed classes run; their vuln+exploit agents and report sections are included.", + "items": { + "type": "string", + "enum": ["injection", "xss", "auth", "authz", "ssrf"] + }, + "minItems": 1, + "maxItems": 5, + "uniqueItems": true + }, + "exploit": { + "type": "string", + "enum": ["true", "false"], + "description": "Whether to run the exploitation phase (default true). Set false to run only analysis." + }, + "report": { + "type": "object", + "description": "Report filtering and guidance applied by the report agent.", + "properties": { + "min_severity": { + "type": "string", + "enum": ["low", "medium", "high", "critical"], + "description": "Minimum severity threshold; findings below are dropped by the report agent." + }, + "min_confidence": { + "type": "string", + "enum": ["low", "medium", "high"], + "description": "Minimum confidence threshold; findings below are dropped by the report agent." + }, + "guidance": { + "type": "string", + "minLength": 1, + "maxLength": 500, + "description": "Free-text guidance to the report agent (e.g., 'Drop findings about missing security headers')." + } + }, + "additionalProperties": false + }, + "rules_of_engagement": { + "type": "string", + "minLength": 1, + "maxLength": 1000, + "description": "Free-text instructions to the agent that render into every prompt." + }, "login": { "type": "object", "description": "Deprecated: Use 'authentication' section instead", @@ -135,7 +180,11 @@ { "required": ["authentication"] }, { "required": ["rules"] }, { "required": ["authentication", "rules"] }, - { "required": ["description"] } + { "required": ["description"] }, + { "required": ["vuln_classes"] }, + { "required": ["exploit"] }, + { "required": ["report"] }, + { "required": ["rules_of_engagement"] } ], "additionalProperties": false, "$defs": { @@ -151,17 +200,17 @@ }, "type": { "type": "string", - "enum": ["path", "subdomain", "domain", "method", "header", "parameter"], - "description": "Type of rule (what aspect of requests to match against)" + "enum": ["url_path", "subdomain", "domain", "method", "header", "parameter", "code_path"], + "description": "Type of rule (what aspect of requests or source code to match against)" }, - "url_path": { + "value": { "type": "string", "minLength": 1, "maxLength": 1000, - "description": "URL path pattern or value to match" + "description": "Value to match" } }, - "required": ["description", "type", "url_path"], + "required": ["description", "type", "value"], "additionalProperties": false } } diff --git a/apps/worker/configs/example-config.yaml b/apps/worker/configs/example-config.yaml index e46e8e6..3e6f2e8 100644 --- a/apps/worker/configs/example-config.yaml +++ b/apps/worker/configs/example-config.yaml @@ -4,6 +4,27 @@ # Description of the target environment (optional, max 500 chars) description: "Next.js e-commerce app on PostgreSQL. Local dev environment — .env files contain local-only credentials, not deployed to production." +# Limit which vulnerability classes run end-to-end (optional, default: all five) +# vuln_classes: [injection, xss, auth, authz, ssrf] + +# Skip the exploitation phase (optional, default: "true") +# exploit: "false" + +# Free-form engagement rules applied to analysis and exploitation agents (optional). +# Example below is illustrative; edit, remove, or add sections as needed. +# rules_of_engagement: | +# Forbidden techniques: +# - No password brute-force or credential stuffing. Cap login attempts at 5 per account. +# - ... +# +# Operational: +# - Throttle to under 5 requests per second per endpoint. Back off 60 seconds on any 429 response. +# - ... +# +# Data handling: +# - Do not include actual values in deliverables — use placeholders like [order_id] or [user_email]. +# - ... + authentication: login_type: form # Options: 'form' or 'sso' login_url: "https://example.com/login" @@ -25,27 +46,55 @@ authentication: value: "/dashboard" rules: + # Supported types: url_path, subdomain, domain, method, header, parameter, code_path avoid: - description: "Do not test the marketing site subdomain" type: subdomain - url_path: "www" + value: "www" - description: "Skip logout functionality" - type: path - url_path: "/logout" + type: url_path + value: "/logout" - description: "No DELETE operations on user API" - type: path - url_path: "/api/v1/users/*" + type: url_path + value: "/api/v1/users/*" + # code_path values are repo-relative file paths or globs (e.g. "src/auth.ts", "test/**"). + # - description: "Test fixtures and specs (not production code)" + # type: code_path + # value: "test/**" + # + # - description: "Generated migrations" + # type: code_path + # value: "db/migrations/**" + focus: - description: "Prioritize beta admin panel subdomain" type: subdomain - url_path: "beta-admin" + value: "beta-admin" - description: "Focus on user profile updates" - type: path - url_path: "/api/v2/user-profile" + type: url_path + value: "/api/v2/user-profile" + + # code_path values are repo-relative file paths or globs (e.g. "src/auth.ts", "routes/*.ts"). + # - description: "Express route handlers" + # type: code_path + # value: "routes/*.ts" + # + # - description: "Sequelize ORM model definitions" + # type: code_path + # value: "models/*.ts" + +# Report filters applied by the report agent when assembling the final report (optional). +# Example below is illustrative; edit, remove, or add sections as needed. +# report: +# min_severity: low +# min_confidence: low +# guidance: | +# Drop findings about missing security headers and rate-limit gaps. +# ... # Pipeline execution settings (optional) # pipeline: diff --git a/apps/worker/prompts/exploit-auth.txt b/apps/worker/prompts/exploit-auth.txt index 870a674..a9807d3 100644 --- a/apps/worker/prompts/exploit-auth.txt +++ b/apps/worker/prompts/exploit-auth.txt @@ -105,6 +105,8 @@ If you only confirmed a vulnerability without attempting to weaponize it, you ha {{LOGIN_INSTRUCTIONS}} +@include(shared/_rules-of-engagement.txt) + - Your **actionable exploitation queue** is located at `.shannon/deliverables/auth_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue. diff --git a/apps/worker/prompts/exploit-authz.txt b/apps/worker/prompts/exploit-authz.txt index 0c218c3..e131cd5 100644 --- a/apps/worker/prompts/exploit-authz.txt +++ b/apps/worker/prompts/exploit-authz.txt @@ -92,6 +92,8 @@ After exhaustive bypass attempts, determine: {{LOGIN_INSTRUCTIONS}} +@include(shared/_rules-of-engagement.txt) + - Your **actionable exploitation queue** is located at `.shannon/deliverables/authz_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue. diff --git a/apps/worker/prompts/exploit-injection.txt b/apps/worker/prompts/exploit-injection.txt index ae5bfd3..62fe542 100644 --- a/apps/worker/prompts/exploit-injection.txt +++ b/apps/worker/prompts/exploit-injection.txt @@ -69,6 +69,8 @@ Remember: An unproven vulnerability is worse than no finding at all - it wastes {{LOGIN_INSTRUCTIONS}} +@include(shared/_rules-of-engagement.txt) + - Your **actionable exploitation queue** is located at `.shannon/deliverables/injection_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue. diff --git a/apps/worker/prompts/exploit-ssrf.txt b/apps/worker/prompts/exploit-ssrf.txt index d7e795e..2230a80 100644 --- a/apps/worker/prompts/exploit-ssrf.txt +++ b/apps/worker/prompts/exploit-ssrf.txt @@ -92,6 +92,8 @@ After exhaustive bypass attempts, determine: {{LOGIN_INSTRUCTIONS}} +@include(shared/_rules-of-engagement.txt) + - Your **actionable exploitation queue** is located at `.shannon/deliverables/ssrf_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue. diff --git a/apps/worker/prompts/exploit-xss.txt b/apps/worker/prompts/exploit-xss.txt index 2a805ff..85e9e5d 100644 --- a/apps/worker/prompts/exploit-xss.txt +++ b/apps/worker/prompts/exploit-xss.txt @@ -90,6 +90,8 @@ After exhaustive bypass attempts, determine: {{LOGIN_INSTRUCTIONS}} +@include(shared/_rules-of-engagement.txt) + - Your **actionable exploitation queue** is located at `.shannon/deliverables/xss_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue. diff --git a/apps/worker/prompts/pre-recon-code.txt b/apps/worker/prompts/pre-recon-code.txt index a24d1b9..1974382 100644 --- a/apps/worker/prompts/pre-recon-code.txt +++ b/apps/worker/prompts/pre-recon-code.txt @@ -31,6 +31,14 @@ Read `.gitignore` and run `git ls-files --others --ignored --exclude-standard -- {{DESCRIPTION}} + +Downstream vulnerability analysis will cover these classes: {{VULN_CLASSES_TESTED}}. + + +@include(shared/_code-path-rules.txt) + +@include(shared/_rules-of-engagement.txt) + **PENTESTING WORKFLOW - YOUR POSITION:** diff --git a/apps/worker/prompts/recon.txt b/apps/worker/prompts/recon.txt index ce1f16c..f4a0006 100644 --- a/apps/worker/prompts/recon.txt +++ b/apps/worker/prompts/recon.txt @@ -20,6 +20,10 @@ Filesystem: - {{REPO_PATH}}/.shannon/scratchpad/ (read-write) - screenshots, scripts, scratch work, etc. + +Downstream vulnerability analysis will cover these classes: {{VULN_CLASSES_TESTED}}. Map only what supports these classes. + + Rules to Avoid: {{RULES_AVOID}} @@ -28,10 +32,14 @@ Areas to Focus On: {{RULES_FOCUS}} +@include(shared/_code-path-rules.txt) + {{LOGIN_INSTRUCTIONS}} +@include(shared/_rules-of-engagement.txt) + # Penetration Test Scope & Boundaries diff --git a/apps/worker/prompts/report-executive.txt b/apps/worker/prompts/report-executive.txt index edf0d8a..6cefe2b 100644 --- a/apps/worker/prompts/report-executive.txt +++ b/apps/worker/prompts/report-executive.txt @@ -9,11 +9,11 @@ Technical leadership (CTOs, CISOs, Engineering VPs) who need both technical accu -The orchestrator has already concatenated all specialist reports into `comprehensive_security_assessment_report.md`. +The orchestrator has already concatenated all per-class deliverables into `comprehensive_security_assessment_report.md`. Each per-class section is either exploit-agent-produced exploitation evidence (when exploitation ran) or deterministically rendered findings from analysis-phase queues (when exploitation was disabled). The cleanup rules below apply uniformly to either source. Your task is to: 1. Read this existing concatenated report 2. Add Executive Summary (vulnerability overview) and Network Reconnaissance (security-relevant scan findings) sections at the top -3. Clean up ALL exploitation evidence sections by removing hallucinated content +3. Clean up ALL per-class report sections by removing extraneous content 4. Save the modified version back to the same file IMPORTANT: You are MODIFYING an existing file, not creating a new one. @@ -32,8 +32,13 @@ Filesystem: Authentication Context: {{AUTH_CONTEXT}} + +Vulnerability classes tested in this run: {{VULN_CLASSES_TESTED}} +Exploitation: {{EXPLOITATION}} +{{REPORT_FILTERS_BLOCK}} + You will analyze the following deliverable files: - `.shannon/deliverables/pre_recon_deliverable.md` - Initial reconnaissance and technology stack @@ -51,26 +56,14 @@ MODIFY the existing file `.shannon/deliverables/comprehensive_security_assessmen ## Executive Summary - Target: {{WEB_URL}} - Assessment Date: {current date} -- Scope: Authentication, XSS, SQL and Command Injection, SSRF, Authorization testing +- Scope: {{VULN_CLASSES_TESTED}} +- Exploitation: {{EXPLOITATION}} ## Summary by Vulnerability Type {For each vulnerability type below, examine the comprehensive_security_assessment_report.md sections and provide a summary. If no section exists for that type or no vulnerabilities are listed, explicitly state that none were found.} -**Authentication Vulnerabilities:** -{Check for "Authentication Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No authentication vulnerabilities were found."} - -**Authorization Vulnerabilities:** -{Check for "Authorization Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No authorization vulnerabilities were found."} - -**Cross-Site Scripting (XSS) Vulnerabilities:** -{Check for "XSS Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No XSS vulnerabilities were found."} - -**SQL/Command Injection Vulnerabilities:** -{Check for "Injection Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No SQL or command injection vulnerabilities were found."} - -**Server-Side Request Forgery (SSRF) Vulnerabilities:** -{Check for "SSRF Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No SSRF vulnerabilities were found."} +{{VULN_SUMMARY_SUBSECTIONS}} ## Network Reconnaissance {Extract security-relevant findings from automated scanning tools: @@ -80,14 +73,14 @@ MODIFY the existing file `.shannon/deliverables/comprehensive_security_assessmen - Any other security-relevant findings from the automated tools SKIP stack details - technical leaders know their infrastructure} -2. KEEPING the existing exploitation evidence sections but CLEANING them according to the rules below +2. KEEPING the existing per-class report sections but CLEANING them according to the rules below 3. The final structure should be: - Executive Summary (new) - Network Reconnaissance (new) - - All existing exploitation evidence sections (cleaned) + - All existing per-class report sections (cleaned) -IMPORTANT: Do NOT reorder the existing exploitation evidence sections. Maintain the exact order they appear in the concatenated report. Only remove sections that do not match the defined criteria above. +IMPORTANT: Do NOT reorder the existing per-class report sections. Maintain the exact order they appear in the concatenated report. Only remove sections that do not match the defined criteria above. @@ -103,11 +96,12 @@ IMPORTANT: Do NOT reorder the existing exploitation evidence sections. Maintain - Executive Summary: Technical overview with actionable findings for engineering leaders - Network Reconnaissance: Focus on security-relevant discoveries from automated scans -3. Clean the exploitation evidence sections from `.shannon/deliverables/comprehensive_security_assessment_report.md` by applying these rules: +3. Clean the per-class report sections in `.shannon/deliverables/comprehensive_security_assessment_report.md` by applying these rules: - KEEP these specific section headings: NOTE: these sections will contain vulnerability lists with IDs matching pattern `### [TYPE]-VULN-[NUMBER]` - * `# [Type] Exploitation Evidence` - * `## Successfully Exploited Vulnerabilities` + * `# [Type] {{REPORT_VULN_HEADING}}` + * `## {{REPORT_VULN_SUBHEADING}}` +{{REPORT_FILTER_RULES}} - REMOVE ANY OTHER SECTIONS (even if they contain vulnerability IDs), such as: * `## Potential Vulnerabilities (Validation Blocked)` (All agents) * Standalone "Recommendations" sections @@ -119,11 +113,11 @@ IMPORTANT: Do NOT reorder the existing exploitation evidence sections. Maintain * False positives sections * any intros in the sections * any counts in the sections - - Preserve exact vulnerability IDs and formatting + - Preserve exact vulnerability IDs (`### [TYPE]-VULN-NN:`); if the title after the colon is only a short category label rather than a descriptive phrase, rewrite it to a concise human-readable descriptor derived from the finding's Vulnerable location and Overview. 4. Combine the content: - Place the Executive Summary and Network Reconnaissance sections at the top - - Follow with the cleaned exploitation evidence sections + - Follow with the cleaned per-class report sections - Save as the modified `.shannon/deliverables/comprehensive_security_assessment_report.md` CRITICAL: You are modifying the existing concatenated report at `.shannon/deliverables/comprehensive_security_assessment_report.md` IN-PLACE, not creating a separate file. diff --git a/apps/worker/prompts/shared/_code-path-rules.txt b/apps/worker/prompts/shared/_code-path-rules.txt new file mode 100644 index 0000000..eb95b47 --- /dev/null +++ b/apps/worker/prompts/shared/_code-path-rules.txt @@ -0,0 +1,13 @@ + +Source-code routing. Each rule is tagged `[FILE]` (literal path) or `[GLOB]` (pattern). All paths are repository-relative. + +How to apply (focus rules): +- For `[FILE]` entries — delegate analysis to the Task tool. +- For `[GLOB]` entries — invoke the Glob tool to enumerate matches, then delegate analysis of every match to the Task tool. + +Avoid — out of scope. Skip entirely; the tool layer will block any access attempts. +{{CODE_RULES_AVOID}} + +Focus — priority work assignments. Analyze every entry. +{{CODE_RULES_FOCUS}} + diff --git a/apps/worker/prompts/shared/_rules-of-engagement.txt b/apps/worker/prompts/shared/_rules-of-engagement.txt new file mode 100644 index 0000000..1107520 --- /dev/null +++ b/apps/worker/prompts/shared/_rules-of-engagement.txt @@ -0,0 +1,3 @@ + +{{RULES_OF_ENGAGEMENT}} + diff --git a/apps/worker/prompts/vuln-auth.txt b/apps/worker/prompts/vuln-auth.txt index 5cdc41d..71933c5 100644 --- a/apps/worker/prompts/vuln-auth.txt +++ b/apps/worker/prompts/vuln-auth.txt @@ -20,10 +20,14 @@ Success criterion: A complete, code-backed analysis of every potential authentic @include(shared/_rules.txt) +@include(shared/_code-path-rules.txt) + {{LOGIN_INSTRUCTIONS}} +@include(shared/_rules-of-engagement.txt) + **Your Professional Standard** - **Severity Context:** A flaw in authentication is a catastrophic failure. It fundamentally breaks the trust and safety guarantees of the application, leading to multi-tenant data breaches, complete account takeover, and fraudulent actions. These are the most critical vulnerabilities you can find. diff --git a/apps/worker/prompts/vuln-authz.txt b/apps/worker/prompts/vuln-authz.txt index b1c1406..ecadf04 100644 --- a/apps/worker/prompts/vuln-authz.txt +++ b/apps/worker/prompts/vuln-authz.txt @@ -19,10 +19,14 @@ Success criterion: A complete, code-backed analysis of every potential authoriza @include(shared/_rules.txt) +@include(shared/_code-path-rules.txt) + {{LOGIN_INSTRUCTIONS}} +@include(shared/_rules-of-engagement.txt) + **Your Professional Standard** - **Severity Context:** A flaw in authorization is a critical security failure. It allows privilege escalation, horizontal and vertical access control bypass, and unauthorized data access. These vulnerabilities enable attackers to access resources they shouldn't, escalate privileges, and compromise multi-tenant data isolation. diff --git a/apps/worker/prompts/vuln-injection.txt b/apps/worker/prompts/vuln-injection.txt index 2f7b330..c70219f 100644 --- a/apps/worker/prompts/vuln-injection.txt +++ b/apps/worker/prompts/vuln-injection.txt @@ -20,10 +20,14 @@ Success criterion: Complete source-to-sink traces detailing path, sanitizers, si @include(shared/_rules.txt) +@include(shared/_code-path-rules.txt) + {{LOGIN_INSTRUCTIONS}} +@include(shared/_rules-of-engagement.txt) + **Your Professional Standard** - **Severity Context:** A structural flaw in a backend command, whether an SQL query or a shell command, is one of the most severe classes of vulnerability in a SaaS application. It undermines the foundational trust of the system by creating the *potential* for data exposure (SQLi) or direct server compromise (Command Injection). diff --git a/apps/worker/prompts/vuln-ssrf.txt b/apps/worker/prompts/vuln-ssrf.txt index b55e2f5..d3dbe64 100644 --- a/apps/worker/prompts/vuln-ssrf.txt +++ b/apps/worker/prompts/vuln-ssrf.txt @@ -19,10 +19,14 @@ Success criterion: A complete source-to-sink trace for every identified SSRF vul @include(shared/_rules.txt) +@include(shared/_code-path-rules.txt) + {{LOGIN_INSTRUCTIONS}} +@include(shared/_rules-of-engagement.txt) + **Your Professional Standard** - **Severity Context:** An SSRF flaw is a breach of network segmentation and trust boundaries. It allows attackers to leverage the application server as a proxy to access internal services, cloud metadata endpoints, or perform reconnaissance of internal networks. These vulnerabilities can lead to data exposure, privilege escalation, and complete network compromise. diff --git a/apps/worker/prompts/vuln-xss.txt b/apps/worker/prompts/vuln-xss.txt index 2776d3d..bfbef11 100644 --- a/apps/worker/prompts/vuln-xss.txt +++ b/apps/worker/prompts/vuln-xss.txt @@ -19,10 +19,14 @@ Success criterion: Live confirmation of XSS execution for every vulnerability th @include(shared/_rules.txt) +@include(shared/_code-path-rules.txt) + {{LOGIN_INSTRUCTIONS}} +@include(shared/_rules-of-engagement.txt) + **Your Professional Standard** - Severity Context: An XSS flaw is a breach of trust between the user and the application. It grants an attacker the ability to execute arbitrary code within a victim's browser, leading to session hijacking, credential theft, and complete compromise of the user's account and data within the application. diff --git a/apps/worker/src/ai/queue-schemas.ts b/apps/worker/src/ai/queue-schemas.ts index be78932..5daf422 100644 --- a/apps/worker/src/ai/queue-schemas.ts +++ b/apps/worker/src/ai/queue-schemas.ts @@ -79,6 +79,14 @@ const AuthzVulnerability = baseVulnerability.extend({ minimal_witness: z.string().optional(), }); +// === Inferred Entry Types (consumed by renderer) === + +export type InjectionFinding = z.infer; +export type XssFinding = z.infer; +export type AuthFinding = z.infer; +export type SsrfFinding = z.infer; +export type AuthzFinding = z.infer; + // === Queue Wrapper Schemas === const InjectionQueueSchema = z.object({ vulnerabilities: z.array(InjectionVulnerability) }); diff --git a/apps/worker/src/ai/settings-writer.ts b/apps/worker/src/ai/settings-writer.ts new file mode 100644 index 0000000..dea5380 --- /dev/null +++ b/apps/worker/src/ai/settings-writer.ts @@ -0,0 +1,41 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Writes ~/.claude/settings.json with permissions.deny rules derived from + * `code_path` avoid patterns. The SDK reads this via `settingSources: ['user']`; + * deny rules fire even in `bypassPermissions` mode. + */ + +import os from 'node:os'; +import { fs, path } from 'zx'; +import type { DistributedConfig } from '../types/config.js'; + +const FILE_TOOLS = ['Read', 'Edit'] as const; + +function denyEntriesFor(pattern: string): string[] { + const arg = `./${pattern.replace(/^[./]+/, '')}`; + return FILE_TOOLS.map((tool) => `${tool}(${arg})`); +} + +export async function writeUserSettingsForCodePathAvoids(config: DistributedConfig | null): Promise { + const avoidPatterns = (config?.avoid ?? []).filter((r) => r.type === 'code_path').map((r) => r.value); + const settingsPath = path.join(os.homedir(), '.claude', 'settings.json'); + + if (avoidPatterns.length === 0) { + await fs.remove(settingsPath); + return; + } + + const settings = { + permissions: { + deny: avoidPatterns.flatMap(denyEntriesFor), + }, + }; + + await fs.ensureDir(path.dirname(settingsPath)); + await fs.writeJson(settingsPath, settings, { spaces: 2 }); +} diff --git a/apps/worker/src/config-parser.ts b/apps/worker/src/config-parser.ts index f6bfd15..f46532d 100644 --- a/apps/worker/src/config-parser.ts +++ b/apps/worker/src/config-parser.ts @@ -10,7 +10,13 @@ import type { FormatsPlugin } from 'ajv-formats'; import yaml from 'js-yaml'; import { fs } from 'zx'; import { PentestError } from './services/error-handling.js'; -import type { Authentication, Config, DistributedConfig, Rule } from './types/config.js'; +import { + ALL_VULN_CLASSES, + type Authentication, + type Config, + type DistributedConfig, + type Rule, +} from './types/config.js'; import { ErrorCode } from './types/errors.js'; // Handle ESM/CJS interop for ajv-formats using require @@ -306,6 +312,39 @@ export const parseConfigYAML = (yamlContent: string): Config => { return config as Config; }; +function checkDeprecatedFields(config: Config): void { + const messages: string[] = []; + + const checkRules = (rules: unknown, where: string): void => { + if (!Array.isArray(rules)) return; + rules.forEach((rule, idx) => { + if (typeof rule !== 'object' || rule === null) return; + const r = rule as Record; + if (r.type === 'path') { + messages.push(`rules.${where}[${idx}].type: 'path' has been renamed to 'url_path'.`); + } + if ('url_path' in r && !('value' in r)) { + messages.push(`rules.${where}[${idx}]: the rule field 'url_path' has been renamed to 'value'.`); + } + }); + }; + + const raw = config as Record; + const rules = raw.rules as { avoid?: unknown; focus?: unknown } | undefined; + checkRules(rules?.avoid, 'avoid'); + checkRules(rules?.focus, 'focus'); + + if (messages.length > 0) { + throw new PentestError( + `Configuration uses deprecated fields. Please update:\n - ${messages.join('\n - ')}`, + 'config', + false, + { deprecatedFields: messages }, + ErrorCode.CONFIG_VALIDATION_FAILED, + ); + } +} + const validateConfig = (config: Config): void => { if (!config || typeof config !== 'object') { throw new PentestError( @@ -327,6 +366,8 @@ const validateConfig = (config: Config): void => { ); } + checkDeprecatedFields(config); + const isValid = validateSchema(config); if (!isValid) { const errors = validateSchema.errors || []; @@ -342,10 +383,16 @@ const validateConfig = (config: Config): void => { performSecurityValidation(config); - if (!config.rules && !config.authentication && !config.description) { - console.warn( - '⚠️ Configuration file contains no rules, authentication, or description. The pentest will run without any scoping restrictions or login capabilities.', - ); + const hasAnySteering = + !!config.rules || + !!config.authentication || + !!config.description || + !!config.vuln_classes || + config.exploit !== undefined || + !!config.report || + !!config.rules_of_engagement; + if (!hasAnySteering) { + console.warn('⚠️ Configuration file contains no steering fields. The pentest will run with all defaults.'); } else if (config.rules && !config.rules.avoid && !config.rules.focus) { console.warn('⚠️ Configuration file contains no rules. The pentest will run without any scoping restrictions.'); } @@ -432,6 +479,34 @@ const performSecurityValidation = (config: Config): void => { } } } + + if (config.rules_of_engagement) { + for (const pattern of DANGEROUS_PATTERNS) { + if (pattern.test(config.rules_of_engagement)) { + throw new PentestError( + `rules_of_engagement contains potentially dangerous pattern: ${pattern.source}`, + 'config', + false, + { field: 'rules_of_engagement', pattern: pattern.source }, + ErrorCode.CONFIG_VALIDATION_FAILED, + ); + } + } + } + + if (config.report?.guidance) { + for (const pattern of DANGEROUS_PATTERNS) { + if (pattern.test(config.report.guidance)) { + throw new PentestError( + `report.guidance contains potentially dangerous pattern: ${pattern.source}`, + 'config', + false, + { field: 'report.guidance', pattern: pattern.source }, + ErrorCode.CONFIG_VALIDATION_FAILED, + ); + } + } + } }; const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): void => { @@ -439,12 +514,12 @@ const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): voi rules.forEach((rule, index) => { for (const pattern of DANGEROUS_PATTERNS) { - if (pattern.test(rule.url_path)) { + if (pattern.test(rule.value)) { throw new PentestError( - `rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`, + `rules.${ruleType}[${index}].value contains potentially dangerous pattern: ${pattern.source}`, 'config', false, - { field: `rules.${ruleType}[${index}].url_path`, pattern: pattern.source }, + { field: `rules.${ruleType}[${index}].value`, pattern: pattern.source }, ErrorCode.CONFIG_VALIDATION_FAILED, ); } @@ -464,13 +539,25 @@ const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): voi }; const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): void => { - const field = `rules.${ruleType}[${index}].url_path`; + const field = `rules.${ruleType}[${index}].value`; switch (rule.type) { - case 'path': - if (!rule.url_path.startsWith('/')) { + case 'url_path': + if (!rule.value.startsWith('/')) { throw new PentestError( - `${field} for type 'path' must start with '/'`, + `${field} for type 'url_path' must start with '/'`, + 'config', + false, + { field, ruleType: rule.type }, + ErrorCode.CONFIG_VALIDATION_FAILED, + ); + } + break; + + case 'code_path': + if (rule.value.includes('://')) { + throw new PentestError( + `${field} for type 'code_path' must not contain a URL protocol (got '${rule.value}')`, 'config', false, { field, ruleType: rule.type }, @@ -482,7 +569,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): case 'subdomain': case 'domain': // Basic domain validation - no slashes allowed - if (rule.url_path.includes('/')) { + if (rule.value.includes('/')) { throw new PentestError( `${field} for type '${rule.type}' cannot contain '/' characters`, 'config', @@ -492,7 +579,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): ); } // Must contain at least one dot for domains - if (rule.type === 'domain' && !rule.url_path.includes('.')) { + if (rule.type === 'domain' && !rule.value.includes('.')) { throw new PentestError( `${field} for type 'domain' must be a valid domain name`, 'config', @@ -505,7 +592,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): case 'method': { const allowedMethods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS']; - if (!allowedMethods.includes(rule.url_path.toUpperCase())) { + if (!allowedMethods.includes(rule.value.toUpperCase())) { throw new PentestError( `${field} for type 'method' must be one of: ${allowedMethods.join(', ')}`, 'config', @@ -518,7 +605,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): } case 'header': - if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) { + if (!rule.value.match(/^[a-zA-Z0-9\-_]+$/)) { throw new PentestError( `${field} for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`, 'config', @@ -530,7 +617,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): break; case 'parameter': - if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) { + if (!rule.value.match(/^[a-zA-Z0-9\-_]+$/)) { throw new PentestError( `${field} for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`, 'config', @@ -546,13 +633,13 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): const checkForDuplicates = (rules: Rule[], ruleType: string): void => { const seen = new Set(); rules.forEach((rule, index) => { - const key = `${rule.type}:${rule.url_path}`; + const key = `${rule.type}:${rule.value}`; if (seen.has(key)) { throw new PentestError( - `Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`, + `Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.value}'`, 'config', false, - { field: `rules.${ruleType}[${index}]`, ruleType: rule.type, urlPath: rule.url_path }, + { field: `rules.${ruleType}[${index}]`, ruleType: rule.type, value: rule.value }, ErrorCode.CONFIG_VALIDATION_FAILED, ); } @@ -561,16 +648,16 @@ const checkForDuplicates = (rules: Rule[], ruleType: string): void => { }; const checkForConflicts = (avoidRules: Rule[] = [], focusRules: Rule[] = []): void => { - const avoidSet = new Set(avoidRules.map((rule) => `${rule.type}:${rule.url_path}`)); + const avoidSet = new Set(avoidRules.map((rule) => `${rule.type}:${rule.value}`)); focusRules.forEach((rule, index) => { - const key = `${rule.type}:${rule.url_path}`; + const key = `${rule.type}:${rule.value}`; if (avoidSet.has(key)) { throw new PentestError( - `Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`, + `Conflicting rule found: rules.focus[${index}] '${rule.value}' also exists in rules.avoid`, 'config', false, - { field: `rules.focus[${index}]`, urlPath: rule.url_path }, + { field: `rules.focus[${index}]`, value: rule.value }, ErrorCode.CONFIG_VALIDATION_FAILED, ); } @@ -581,7 +668,7 @@ const sanitizeRule = (rule: Rule): Rule => { return { description: rule.description.trim(), type: rule.type.toLowerCase().trim() as Rule['type'], - url_path: rule.url_path.trim(), + value: rule.value.trim(), }; }; @@ -591,11 +678,28 @@ export const distributeConfig = (config: Config | null): DistributedConfig => { const authentication = config?.authentication || null; const description = config?.description?.trim() || ''; + const vuln_classes = + config?.vuln_classes && config.vuln_classes.length > 0 ? [...config.vuln_classes] : [...ALL_VULN_CLASSES]; + + const exploit = config?.exploit !== undefined ? config.exploit === 'true' : true; + + const report = { + ...(config?.report?.min_severity && { min_severity: config.report.min_severity }), + ...(config?.report?.min_confidence && { min_confidence: config.report.min_confidence }), + ...(config?.report?.guidance && { guidance: config.report.guidance.trim() }), + }; + + const rules_of_engagement = config?.rules_of_engagement?.trim() ?? ''; + return { avoid: avoid.map(sanitizeRule), focus: focus.map(sanitizeRule), authentication: authentication ? sanitizeAuthentication(authentication) : null, description, + vuln_classes, + exploit, + report, + rules_of_engagement, }; }; diff --git a/apps/worker/src/services/findings-renderer.ts b/apps/worker/src/services/findings-renderer.ts new file mode 100644 index 0000000..88189b2 --- /dev/null +++ b/apps/worker/src/services/findings-renderer.ts @@ -0,0 +1,251 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Deterministic queue-JSON to findings-MD renderer. + * + * Used when exploit=false: the exploit agents didn't run, so there is no + * `*_exploitation_evidence.md` to concatenate into the report. This module + * reads each `*_exploitation_queue.json` (already SDK-validated against the + * schemas in ../ai/queue-schemas.ts) and writes a `*_findings.md` per class + * in the canonical body shape that report-executive.txt's cleanup expects. + * + * No LLM in the loop — every field maps directly from a JSON key. + */ + +import { fs, path } from 'zx'; +import type { + AuthFinding, + AuthzFinding, + InjectionFinding, + SsrfFinding, + XssFinding, +} from '../ai/queue-schemas.js'; +import { deliverablesDir } from '../paths.js'; +import type { ActivityLogger } from '../types/activity-logger.js'; +import type { VulnClass } from '../types/config.js'; + +const DISCLAIMER = [ + '> Exploitation phase was not run for this assessment. Each entry documents a', + '> vulnerability identified through static analysis; live exploitation steps and', + '> proof of impact are not included.', +].join('\n'); + +interface ClassConfig { + readonly heading: string; + readonly noneFoundLabel: string; + readonly queueFile: string; + readonly findingsFile: string; + readonly renderEntry: (entry: T) => string; +} + +interface QueueDocument { + vulnerabilities?: T[]; +} + +// === Common Render Helpers === + +function summaryRow(label: string, value: string | undefined | null | boolean): string | null { + if (value === undefined || value === null) return null; + if (typeof value === 'string' && value.trim() === '') return null; + return `- **${label}:** ${value}`; +} + +function formatLocation(endpoint: string | undefined, codeLocation: string | undefined): string { + if (endpoint && codeLocation) return `${endpoint} (${codeLocation})`; + return endpoint ?? codeLocation ?? ''; +} + +function buildEntry( + id: string, + title: string, + summaryRows: ReadonlyArray, + notes: string | undefined, +): string { + const lines: string[] = []; + lines.push(`### ${id}: ${title}`); + lines.push(''); + lines.push('**Summary:**'); + for (const row of summaryRows) { + if (row !== null) lines.push(row); + } + lines.push(''); + if (notes && notes.trim() !== '') { + lines.push(`**Notes:** ${notes.trim()}`); + } + return lines.join('\n').trimEnd(); +} + +// === Per-Class Renderers === + +function renderAuthEntry(e: AuthFinding): string { + return buildEntry( + e.ID, + e.vulnerability_type, + [ + summaryRow('Vulnerable location', formatLocation(e.source_endpoint, e.vulnerable_code_location)), + summaryRow('Overview', e.missing_defense), + summaryRow('Impact', e.exploitation_hypothesis), + ], + e.notes, + ); +} + +function renderSsrfEntry(e: SsrfFinding): string { + return buildEntry( + e.ID, + e.vulnerability_type, + [ + summaryRow('Vulnerable location', formatLocation(e.source_endpoint, e.vulnerable_code_location)), + summaryRow('Overview', e.missing_defense), + summaryRow('Impact', e.exploitation_hypothesis), + ], + e.notes, + ); +} + +function renderAuthzEntry(e: AuthzFinding): string { + return buildEntry( + e.ID, + e.vulnerability_type, + [ + summaryRow('Vulnerable location', formatLocation(e.endpoint, e.vulnerable_code_location)), + summaryRow('Overview', e.guard_evidence), + summaryRow('Impact', e.side_effect), + ], + e.notes, + ); +} + +function renderInjectionEntry(e: InjectionFinding): string { + const location = e.path && e.sink_call ? `${e.sink_call} (path: ${e.path})` : (e.sink_call ?? e.path); + return buildEntry( + e.ID, + e.vulnerability_type, + [ + summaryRow('Vulnerable location', location), + summaryRow('Overview', e.mismatch_reason), + ], + e.notes, + ); +} + +function renderXssEntry(e: XssFinding): string { + const location = e.path && e.sink_function ? `${e.sink_function} (path: ${e.path})` : (e.sink_function ?? e.path); + return buildEntry( + e.ID, + e.vulnerability_type, + [ + summaryRow('Vulnerable location', location), + summaryRow('Overview', e.mismatch_reason), + ], + e.notes, + ); +} + +// === Class Registry === + +const CLASSES: Record> = { + auth: { + heading: 'Authentication', + noneFoundLabel: 'authentication', + queueFile: 'auth_exploitation_queue.json', + findingsFile: 'auth_findings.md', + renderEntry: (e) => renderAuthEntry(e as AuthFinding), + }, + authz: { + heading: 'Authorization', + noneFoundLabel: 'authorization', + queueFile: 'authz_exploitation_queue.json', + findingsFile: 'authz_findings.md', + renderEntry: (e) => renderAuthzEntry(e as AuthzFinding), + }, + injection: { + heading: 'Injection', + noneFoundLabel: 'injection', + queueFile: 'injection_exploitation_queue.json', + findingsFile: 'injection_findings.md', + renderEntry: (e) => renderInjectionEntry(e as InjectionFinding), + }, + xss: { + heading: 'XSS', + noneFoundLabel: 'XSS', + queueFile: 'xss_exploitation_queue.json', + findingsFile: 'xss_findings.md', + renderEntry: (e) => renderXssEntry(e as XssFinding), + }, + ssrf: { + heading: 'SSRF', + noneFoundLabel: 'SSRF', + queueFile: 'ssrf_exploitation_queue.json', + findingsFile: 'ssrf_findings.md', + renderEntry: (e) => renderSsrfEntry(e as SsrfFinding), + }, +}; + +// === Class File Assembly === + +function renderClassFile(config: ClassConfig, entries: readonly unknown[]): string { + const sections: string[] = []; + sections.push(`# ${config.heading} Findings`); + sections.push(''); + sections.push(DISCLAIMER); + sections.push(''); + sections.push('## Identified Vulnerabilities'); + sections.push(''); + if (entries.length === 0) { + sections.push(`No ${config.noneFoundLabel} vulnerabilities were identified.`); + sections.push(''); + } else { + for (const entry of entries) { + sections.push(config.renderEntry(entry)); + sections.push(''); + } + } + return `${sections.join('\n').trimEnd()}\n`; +} + +// === Public Entry Point === + +/** + * Render `*_findings.md` per class from each `*_exploitation_queue.json`. + * + * Idempotent: skips classes whose findings file already exists, or whose queue + * is missing (class out of scope this run). Per-class failures are logged and + * other classes still proceed. + */ +export async function renderFindingsFromQueues( + sourceDir: string, + deliverablesSubdir: string | undefined, + logger: ActivityLogger, +): Promise { + const dir = deliverablesDir(sourceDir, deliverablesSubdir); + + for (const config of Object.values(CLASSES)) { + const queuePath = path.join(dir, config.queueFile); + const findingsPath = path.join(dir, config.findingsFile); + + if (await fs.pathExists(findingsPath)) { + logger.info(`${config.heading}: ${config.findingsFile} already exists, skipping`); + continue; + } + if (!(await fs.pathExists(queuePath))) { + logger.info(`${config.heading}: no queue file (class out of scope), skipping`); + continue; + } + + try { + const doc = (await fs.readJson(queuePath)) as QueueDocument; + const entries = doc.vulnerabilities ?? []; + const markdown = renderClassFile(config, entries); + await fs.writeFile(findingsPath, markdown); + logger.info(`${config.heading}: rendered ${entries.length} finding(s) to ${config.findingsFile}`); + } catch (error) { + const err = error as Error; + logger.warn(`${config.heading}: failed to render findings from ${config.queueFile}: ${err.message}`); + } + } +} diff --git a/apps/worker/src/services/preflight.ts b/apps/worker/src/services/preflight.ts index 3f4a018..d0d4014 100644 --- a/apps/worker/src/services/preflight.ts +++ b/apps/worker/src/services/preflight.ts @@ -14,8 +14,9 @@ * Checks run sequentially, cheapest first: * 1. Repository path exists and contains .git * 2. Config file parses and validates (if provided) - * 3. Credentials validate via Claude Agent SDK query (API key, OAuth, Bedrock, or Vertex AI) - * 4. Target URL is reachable from the container (DNS + HTTP) + * 3. code_path rules match real entries in the repo (filesystem only) + * 4. Credentials validate via Claude Agent SDK query (API key, OAuth, Bedrock, or Vertex AI) + * 5. Target URL is reachable from the container (DNS + HTTP) */ import { lookup } from 'node:dns/promises'; @@ -24,9 +25,11 @@ import http from 'node:http'; import https from 'node:https'; import type { SDKAssistantMessageError } from '@anthropic-ai/claude-agent-sdk'; import { query } from '@anthropic-ai/claude-agent-sdk'; +import { glob } from 'zx'; import { resolveModel } from '../ai/models.js'; import { parseConfig } from '../config-parser.js'; import type { ActivityLogger } from '../types/activity-logger.js'; +import type { Config, Rule } from '../types/config.js'; import { ErrorCode } from '../types/errors.js'; import { err, ok, type Result } from '../types/result.js'; import { isRetryableError, PentestError } from './error-handling.js'; @@ -104,13 +107,13 @@ async function validateRepo(repoPath: string, logger: ActivityLogger, skipGitChe // === Config Validation === -async function validateConfig(configPath: string, logger: ActivityLogger): Promise> { +async function validateConfig(configPath: string, logger: ActivityLogger): Promise> { logger.info('Validating configuration file...', { configPath }); try { - await parseConfig(configPath); + const config = await parseConfig(configPath); logger.info('Configuration file OK'); - return ok(undefined); + return ok(config); } catch (error) { if (error instanceof PentestError) { return err(error); @@ -128,6 +131,73 @@ async function validateConfig(configPath: string, logger: ActivityLogger): Promi } } +// === code_path Existence Validation === + +const CODE_PATH_IGNORE = ['.git/**', '.shannon/**']; + +async function patternMatchesAny(repoPath: string, pattern: string): Promise { + const stream = glob.globbyStream(pattern, { + cwd: repoPath, + dot: true, + onlyFiles: false, + followSymbolicLinks: false, + ignore: CODE_PATH_IGNORE, + }); + for await (const _ of stream) { + return true; + } + return false; +} + +type RuleKind = 'avoid' | 'focus'; +interface MissingCodePath { + kind: RuleKind; + value: string; + description: string; +} + +async function validateCodePathsExist( + config: Config, + repoPath: string, + logger: ActivityLogger, +): Promise> { + const tagged: Array<{ kind: RuleKind; rule: Rule }> = [ + ...(config.rules?.avoid ?? []).map((rule) => ({ kind: 'avoid' as const, rule })), + ...(config.rules?.focus ?? []).map((rule) => ({ kind: 'focus' as const, rule })), + ].filter(({ rule }) => rule.type === 'code_path'); + + if (tagged.length === 0) { + return ok(undefined); + } + + logger.info(`Validating ${tagged.length} code_path rule(s) against repo...`); + + // ≥1 match is the only property enforced — malformed globs simply match nothing. + const missing: MissingCodePath[] = []; + for (const { kind, rule } of tagged) { + if (!(await patternMatchesAny(repoPath, rule.value))) { + missing.push({ kind, value: rule.value, description: rule.description }); + } + } + + if (missing.length > 0) { + const lines = missing.map((m) => `[${m.kind}] '${m.value}' — ${m.description}`); + return err( + new PentestError( + `code_path rules don't match any file or directory in the repo:\n - ${lines.join('\n - ')}\n` + + `Fix the patterns or remove the rules.`, + 'config', + false, + { missing }, + ErrorCode.CONFIG_VALIDATION_FAILED, + ), + ); + } + + logger.info('All code_path rules matched'); + return ok(undefined); +} + // === Credential Validation === /** Map SDK error type to a human-readable preflight PentestError. */ @@ -463,8 +533,9 @@ async function validateTargetUrl(targetUrl: string, logger: ActivityLogger): Pro * * 1. Repository path exists and contains .git * 2. Config file parses and validates (if configPath provided) - * 3. Credentials validate (API key, OAuth, Bedrock, or Vertex AI) - * 4. Target URL is reachable from the container + * 3. code_path rules match at least one entry in the repo (skipped without config) + * 4. Credentials validate (API key, OAuth, Bedrock, or Vertex AI) + * 5. Target URL is reachable from the container * * Returns on first failure. */ @@ -484,20 +555,31 @@ export async function runPreflightChecks( } // 2. Config check (free — filesystem + CPU) + let parsedConfig: Config | null = null; if (configPath) { const configResult = await validateConfig(configPath, logger); if (!configResult.ok) { return configResult; } + parsedConfig = configResult.value; } - // 3. Credential check (cheap — 1 SDK round-trip, skipped when providerConfig present) + // 3. code_path rules must match real entries in the repo (filesystem only). + // Runs after both repo and config are valid, before any network round-trip. + if (parsedConfig) { + const codePathResult = await validateCodePathsExist(parsedConfig, repoPath, logger); + if (!codePathResult.ok) { + return codePathResult; + } + } + + // 4. Credential check (cheap — 1 SDK round-trip, skipped when providerConfig present) const credResult = await validateCredentials(logger, apiKey, providerConfig); if (!credResult.ok) { return credResult; } - // 4. Target URL reachability check (cheap — 1 HTTP round-trip) + // 5. Target URL reachability check (cheap — 1 HTTP round-trip) const urlResult = await validateTargetUrl(targetUrl, logger); if (!urlResult.ok) { return urlResult; diff --git a/apps/worker/src/services/prompt-manager.ts b/apps/worker/src/services/prompt-manager.ts index 19e7efe..4c69876 100644 --- a/apps/worker/src/services/prompt-manager.ts +++ b/apps/worker/src/services/prompt-manager.ts @@ -8,9 +8,113 @@ import { fs, path } from 'zx'; import { PROMPTS_DIR } from '../paths.js'; import { PLAYWRIGHT_SESSION_MAPPING } from '../session-manager.js'; import type { ActivityLogger } from '../types/activity-logger.js'; -import type { Authentication, DistributedConfig } from '../types/config.js'; +import type { Authentication, DistributedConfig, ReportConfig, Rule, VulnClass } from '../types/config.js'; +import { isGlobPattern } from '../utils/glob.js'; import { handlePromptError, PentestError } from './error-handling.js'; +function renderCodePathRules(rules: Rule[]): string { + const filtered = rules.filter((r) => r.type === 'code_path'); + if (filtered.length === 0) return 'None'; + return filtered + .map((r) => { + const kind = isGlobPattern(r.value) ? '[GLOB]' : '[FILE]'; + return `- ${r.value} ${kind} — ${r.description}`; + }) + .join('\n'); +} + +interface VulnSummarySpec { + readonly heading: string; + readonly evidenceSection: string; + readonly noneFoundLabel: string; +} + +const VULN_SUMMARY_SPECS: Record = { + auth: { + heading: 'Authentication Vulnerabilities', + evidenceSection: 'Authentication Exploitation Evidence', + noneFoundLabel: 'authentication', + }, + authz: { + heading: 'Authorization Vulnerabilities', + evidenceSection: 'Authorization Exploitation Evidence', + noneFoundLabel: 'authorization', + }, + xss: { + heading: 'Cross-Site Scripting (XSS) Vulnerabilities', + evidenceSection: 'XSS Exploitation Evidence', + noneFoundLabel: 'XSS', + }, + injection: { + heading: 'SQL/Command Injection Vulnerabilities', + evidenceSection: 'Injection Exploitation Evidence', + noneFoundLabel: 'SQL or command injection', + }, + ssrf: { + heading: 'Server-Side Request Forgery (SSRF) Vulnerabilities', + evidenceSection: 'SSRF Exploitation Evidence', + noneFoundLabel: 'SSRF', + }, +}; + +function renderVulnSummarySubsections(selected: readonly VulnClass[]): string { + const classes = selected.length > 0 ? selected : (Object.keys(VULN_SUMMARY_SPECS) as VulnClass[]); + return classes + .map((cls) => { + const spec = VULN_SUMMARY_SPECS[cls]; + return `**${spec.heading}:**\n{Check for "${spec.evidenceSection}" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No ${spec.noneFoundLabel} vulnerabilities were found."}`; + }) + .join('\n\n'); +} + +/** + * Renders the top-level block. Empty when no filters are set — + * each filter is included only when the operator configured it, so the agent + * never sees `none` placeholders or instructions for filters that don't apply. + */ +function renderReportFiltersBlock(report: ReportConfig | undefined): string { + if (!report) return ''; + const guidance = report.guidance?.trim(); + if (!report.min_severity && !report.min_confidence && !guidance) return ''; + + const lines: string[] = [ + '', + 'The filters below are user-supplied and binding for this assessment. Honor each strictly when assembling the final report.', + '', + ]; + if (report.min_severity) { + lines.push( + `- Minimum severity: ${report.min_severity} — keep only findings rated this severity or higher (scale: low < medium < high < critical).`, + ); + } + if (report.min_confidence) { + lines.push( + `- Minimum confidence: ${report.min_confidence} — keep only findings rated this confidence or higher (scale: low < medium < high).`, + ); + } + if (guidance) { + lines.push(''); + lines.push('User guidance — apply throughout the report as binding directives for finding selection:'); + lines.push(guidance); + } + lines.push(''); + return lines.join('\n'); +} + +/** + * Renders the per-finding DROP rules used inside the cleanup step. Severity and + * confidence inline as concrete thresholds; guidance is referenced by pointer + * so the actual text only lives in , avoiding double-statement. + */ +function renderReportFilterRules(report: ReportConfig | undefined): string { + const drops: string[] = []; + if (report?.min_severity) drops.push(`* severity is below ${report.min_severity}`); + if (report?.min_confidence) drops.push(`* confidence is below ${report.min_confidence}`); + if (report?.guidance?.trim()) drops.push('* topic matches an exclusion in the user guidance'); + if (drops.length === 0) return ''; + return [' - DROP any `### [TYPE]-VULN-[NUMBER]` finding whose:', ...drops.map((d) => ` ${d}`)].join('\n'); +} + interface PromptVariables { webUrl: string; repoPath: string; @@ -175,36 +279,63 @@ async function interpolateVariables( .replace(/{{AUTH_CONTEXT}}/g, buildAuthContext(config)) .replace(/{{DESCRIPTION}}/g, config?.description ? `Description: ${config.description}` : ''); - if (config) { - // Handle rules section - if both are empty, use cleaner messaging - const hasAvoidRules = config.avoid && config.avoid.length > 0; - const hasFocusRules = config.focus && config.focus.length > 0; - - if (!hasAvoidRules && !hasFocusRules) { - // Replace the entire rules section with a clean message - const cleanRulesSection = '\nNo specific rules or focus areas provided for this test.\n'; - result = result.replace(/[\s\S]*?<\/rules>/g, cleanRulesSection); - } else { - const avoidRules = hasAvoidRules ? config.avoid?.map((r) => `- ${r.description}`).join('\n') : 'None'; - const focusRules = hasFocusRules ? config.focus?.map((r) => `- ${r.description}`).join('\n') : 'None'; - - result = result.replace(/{{RULES_AVOID}}/g, avoidRules).replace(/{{RULES_FOCUS}}/g, focusRules); - } - - // Extract and inject login instructions from config - if (config.authentication?.login_flow) { - const loginInstructions = await buildLoginInstructions(config.authentication, logger, promptsBaseDir); - result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, loginInstructions); - } else { - result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, ''); - } + const avoidUrlRules = config?.avoid?.filter((r) => r.type !== 'code_path') ?? []; + const focusUrlRules = config?.focus?.filter((r) => r.type !== 'code_path') ?? []; + if (avoidUrlRules.length === 0 && focusUrlRules.length === 0) { + result = result.replace(/[\s\S]*?<\/rules>\s*/g, ''); + } else { + const avoidStr = avoidUrlRules.length > 0 ? avoidUrlRules.map((r) => `- ${r.description}`).join('\n') : 'None'; + const focusStr = focusUrlRules.length > 0 ? focusUrlRules.map((r) => `- ${r.description}`).join('\n') : 'None'; + result = result.replace(/{{RULES_AVOID}}/g, avoidStr).replace(/{{RULES_FOCUS}}/g, focusStr); + } + + const avoidCodeRules = (config?.avoid ?? []).filter((r) => r.type === 'code_path'); + const focusCodeRules = (config?.focus ?? []).filter((r) => r.type === 'code_path'); + if (avoidCodeRules.length === 0 && focusCodeRules.length === 0) { + result = result.replace(/[\s\S]*?<\/code_path_rules>\s*/g, ''); + } else { + result = result + .replace(/{{CODE_RULES_AVOID}}/g, renderCodePathRules(config?.avoid ?? [])) + .replace(/{{CODE_RULES_FOCUS}}/g, renderCodePathRules(config?.focus ?? [])); + } + + const roe = config?.rules_of_engagement?.trim() ?? ''; + if (roe) { + result = result.replace(/{{RULES_OF_ENGAGEMENT}}/g, roe); + } else { + result = result.replace(/[\s\S]*?<\/rules_of_engagement>\s*/g, ''); + } + + if (config?.authentication?.login_flow) { + const loginInstructions = await buildLoginInstructions(config.authentication, logger, promptsBaseDir); + result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, loginInstructions); } else { - // Replace the entire rules section with a clean message when no config provided - const cleanRulesSection = '\nNo specific rules or focus areas provided for this test.\n'; - result = result.replace(/[\s\S]*?<\/rules>/g, cleanRulesSection); result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, ''); } + const vulnClasses = config?.vuln_classes ?? []; + result = result.replace( + /{{VULN_CLASSES_TESTED}}/g, + vulnClasses.length > 0 ? vulnClasses.join(', ') : 'injection, xss, auth, authz, ssrf', + ); + result = result.replace(/{{VULN_SUMMARY_SUBSECTIONS}}/g, renderVulnSummarySubsections(vulnClasses)); + + const exploitEnabled = config?.exploit ?? true; + result = result + .replace(/{{EXPLOITATION}}/g, exploitEnabled ? 'enabled' : 'disabled') + .replace(/{{REPORT_VULN_HEADING}}/g, exploitEnabled ? 'Exploitation Evidence' : 'Findings') + .replace( + /{{REPORT_VULN_SUBHEADING}}/g, + exploitEnabled ? 'Successfully Exploited Vulnerabilities' : 'Identified Vulnerabilities', + ); + + result = result + .replace(/{{REPORT_FILTERS_BLOCK}}/g, renderReportFiltersBlock(config?.report)) + .replace(/{{REPORT_FILTER_RULES}}/g, renderReportFilterRules(config?.report)); + + // Collapse runs of 3+ newlines (left behind by tag-strip and empty-fragment substitutions). + result = result.replace(/\n{3,}/g, '\n\n'); + // Validate that all placeholders have been replaced (excluding instructional text) const remainingPlaceholders = result.match(/\{\{[^}]+\}\}/g); if (remainingPlaceholders) { diff --git a/apps/worker/src/services/reporting.ts b/apps/worker/src/services/reporting.ts index 1cb11a6..574f25c 100644 --- a/apps/worker/src/services/reporting.ts +++ b/apps/worker/src/services/reporting.ts @@ -12,60 +12,66 @@ import { PentestError } from './error-handling.js'; interface DeliverableFile { name: string; - path: string; + /** Candidate filenames in priority order. First one that exists wins. */ + paths: readonly string[]; required: boolean; } -// Pure function: Assemble final report from specialist deliverables +// Pure function: Assemble final report from specialist deliverables. +// Per class, prefer the exploit-agent's evidence file; fall back to renderer-produced findings. +// Both never coexist for a workspace because scope (exploit flag) is locked. export async function assembleFinalReport( sourceDir: string, deliverablesSubdir: string | undefined, logger: ActivityLogger, ): Promise { - const deliverableFiles: DeliverableFile[] = [ - { name: 'Injection', path: 'injection_exploitation_evidence.md', required: false }, - { name: 'XSS', path: 'xss_exploitation_evidence.md', required: false }, - { name: 'Authentication', path: 'auth_exploitation_evidence.md', required: false }, - { name: 'SSRF', path: 'ssrf_exploitation_evidence.md', required: false }, - { name: 'Authorization', path: 'authz_exploitation_evidence.md', required: false }, + const deliverableFiles: readonly DeliverableFile[] = [ + { name: 'Injection', paths: ['injection_exploitation_evidence.md', 'injection_findings.md'], required: false }, + { name: 'XSS', paths: ['xss_exploitation_evidence.md', 'xss_findings.md'], required: false }, + { name: 'Authentication', paths: ['auth_exploitation_evidence.md', 'auth_findings.md'], required: false }, + { name: 'SSRF', paths: ['ssrf_exploitation_evidence.md', 'ssrf_findings.md'], required: false }, + { name: 'Authorization', paths: ['authz_exploitation_evidence.md', 'authz_findings.md'], required: false }, ]; + const dir = deliverablesDir(sourceDir, deliverablesSubdir); const sections: string[] = []; for (const file of deliverableFiles) { - const filePath = path.join(deliverablesDir(sourceDir, deliverablesSubdir), file.path); - try { - if (await fs.pathExists(filePath)) { - const content = await fs.readFile(filePath, 'utf8'); - sections.push(content); - logger.info(`Added ${file.name} findings`); - } else if (file.required) { + let added = false; + for (const candidate of file.paths) { + const filePath = path.join(dir, candidate); + try { + if (await fs.pathExists(filePath)) { + const content = await fs.readFile(filePath, 'utf8'); + sections.push(content); + logger.info(`Added ${file.name} section from ${candidate}`); + added = true; + break; + } + } catch (error) { + const err = error as Error; + logger.warn(`Could not read ${candidate}: ${err.message}`); + } + } + if (!added) { + if (file.required) { throw new PentestError( - `Required deliverable file not found: ${file.path}`, + `Required deliverable file not found: ${file.paths.join(' or ')}`, 'filesystem', false, - { deliverableFile: file.path, sourceDir }, + { deliverableFile: file.paths, sourceDir }, ErrorCode.DELIVERABLE_NOT_FOUND, ); - } else { - logger.info(`No ${file.name} deliverable found`); } - } catch (error) { - if (file.required) { - throw error; - } - const err = error as Error; - logger.warn(`Could not read ${file.path}: ${err.message}`); + logger.info(`No ${file.name} deliverable found`); } } const finalContent = sections.join('\n\n'); - const outputDir = deliverablesDir(sourceDir, deliverablesSubdir); - const finalReportPath = path.join(outputDir, 'comprehensive_security_assessment_report.md'); + const finalReportPath = path.join(dir, 'comprehensive_security_assessment_report.md'); try { - // Ensure deliverables directory exists - await fs.ensureDir(outputDir); + await fs.ensureDir(dir); await fs.writeFile(finalReportPath, finalContent); logger.info(`Final report assembled at ${finalReportPath}`); } catch (error) { diff --git a/apps/worker/src/temporal/activities.ts b/apps/worker/src/temporal/activities.ts index de6063f..1c1d43b 100644 --- a/apps/worker/src/temporal/activities.ts +++ b/apps/worker/src/temporal/activities.ts @@ -18,26 +18,28 @@ import fs from 'node:fs/promises'; import path from 'node:path'; import { ApplicationFailure, Context, heartbeat } from '@temporalio/activity'; +import { writeUserSettingsForCodePathAvoids } from '../ai/settings-writer.js'; import { AuditSession } from '../audit/index.js'; import type { ResumeAttempt } from '../audit/metrics-tracker.js'; -import type { SessionMetadata } from '../audit/utils.js'; +import { generateSessionJsonPath, type SessionMetadata } from '../audit/utils.js'; import type { WorkflowSummary } from '../audit/workflow-logger.js'; -import type { ContainerConfig, ProviderConfig } from '../types/config.js'; import type { CheckpointContext } from '../interfaces/checkpoint-provider.js'; +import { DEFAULT_DELIVERABLES_SUBDIR, deliverablesDir } from '../paths.js'; import { getContainer, getOrCreateContainer, removeContainer } from '../services/container.js'; import { classifyErrorForTemporal, PentestError } from '../services/error-handling.js'; import { ExploitationCheckerService } from '../services/exploitation-checker.js'; import { executeGitCommandWithRetry } from '../services/git-manager.js'; import { runPreflightChecks } from '../services/preflight.js'; import type { ExploitationDecision, VulnType } from '../services/queue-validation.js'; +import { renderFindingsFromQueues } from '../services/findings-renderer.js'; import { assembleFinalReport, injectModelIntoReport } from '../services/reporting.js'; import { AGENTS } from '../session-manager.js'; import type { AgentName } from '../types/agents.js'; import { ALL_AGENTS } from '../types/agents.js'; +import type { ContainerConfig, ProviderConfig, VulnClass } from '../types/config.js'; import { ErrorCode } from '../types/errors.js'; import { isErr } from '../types/result.js'; -import { DEFAULT_DELIVERABLES_SUBDIR, deliverablesDir } from '../paths.js'; -import { fileExists, readJson } from '../utils/file-io.js'; +import { atomicWrite, fileExists, readJson } from '../utils/file-io.js'; import { createActivityLogger } from './activity-logger.js'; import type { AgentMetrics, PipelineState, ResumeState } from './shared.js'; @@ -135,7 +137,8 @@ async function runAgentActivity(agentName: AgentName, input: ActivityInput): Pro // Skip guard: the checkpoint provider decides whether to run the agent. // The default NoOp provider always returns { skip: false }. - const skipContainer = getContainer(workflowId) ?? + const skipContainer = + getContainer(workflowId) ?? getOrCreateContainer(workflowId, buildSessionMetadata(input), buildContainerConfig(input)); const decision = await skipContainer.checkpointProvider.shouldSkipAgent( agentName, @@ -321,7 +324,15 @@ export async function runPreflightValidation(input: ActivityInput): Promise { } /** - * Assemble the final report by concatenating exploitation evidence files. + * Sync code_path avoid rules into Claude's user-scope settings.json so the + * SDK enforces them at the tool layer for every agent in this run. + * + * Runs once per workflow before any agent fires. Config is fixed for the + * lifetime of the workflow, so writing once avoids the parallel-agent race + * on the global ~/.claude/settings.json file. */ -export async function assembleReportActivity(input: ActivityInput): Promise { +export async function syncCodePathDenyRules(input: ActivityInput): Promise { + const logger = createActivityLogger(); + const container = getOrCreateContainer(input.workflowId, buildSessionMetadata(input), buildContainerConfig(input)); + + const configResult = await container.configLoader.loadOptional(input.configPath, undefined, input.configYAML); + if (isErr(configResult)) { + logger.warn(`syncCodePathDenyRules: skipping (config load failed: ${configResult.error.message})`); + return; + } + + const config = configResult.value; + const denyCount = (config?.avoid ?? []).filter((r) => r.type === 'code_path').length; + await writeUserSettingsForCodePathAvoids(config); + logger.info(`Synced code_path deny rules to user settings (${denyCount} entries)`); +} + +/** + * Assemble the final report by concatenating per-class deliverables. + * + * Under exploit=true, each exploit agent has produced `*_exploitation_evidence.md` + * directly. Under exploit=false, exploit agents didn't run; we deterministically + * render `*_findings.md` from each `*_exploitation_queue.json` first, then assemble. + */ +export async function assembleReportActivity(input: ActivityInput, exploit: boolean): Promise { const { repoPath, deliverablesSubdir } = input; const logger = createActivityLogger(); + + if (!exploit) { + logger.info('Rendering per-class findings from analysis queues...'); + try { + await renderFindingsFromQueues(repoPath, deliverablesSubdir, logger); + } catch (error) { + const err = error as Error; + logger.warn(`Error rendering findings from queues: ${err.message}`); + } + } + logger.info('Assembling deliverables from specialist agents...'); try { await assembleFinalReport(repoPath, deliverablesSubdir, logger); @@ -438,6 +488,11 @@ export async function checkExploitationQueue(input: ActivityInput, vulnType: Vul return checker.checkQueue(vulnType, delivPath, logger); } +interface RunScope { + vulnClasses: VulnClass[]; + exploit: boolean; +} + interface SessionJson { session: { id: string; @@ -445,6 +500,7 @@ interface SessionJson { repoPath?: string; originalWorkflowId?: string; resumeAttempts?: ResumeAttempt[]; + scope?: RunScope; }; metrics: { agents: Record< @@ -562,6 +618,42 @@ export async function loadResumeState( }; } +/** First run records scope into session.json; resume runs throw if it differs. */ +export async function persistOrValidateRunScope( + input: ActivityInput, + vulnClasses: VulnClass[], + exploit: boolean, +): Promise { + const sessionMetadata = buildSessionMetadata(input); + const auditSession = new AuditSession(sessionMetadata); + await auditSession.initialize(input.workflowId); + + const sessionPath = generateSessionJsonPath(sessionMetadata); + const session = await readJson(sessionPath); + + if (session.session.scope) { + const recorded = session.session.scope; + const sameClasses = + recorded.vulnClasses.length === vulnClasses.length && + recorded.vulnClasses.every((c) => vulnClasses.includes(c)) && + vulnClasses.every((c) => recorded.vulnClasses.includes(c)); + + if (!sameClasses || recorded.exploit !== exploit) { + throw ApplicationFailure.nonRetryable( + `Resume scope mismatch for workspace ${input.sessionId}.\n` + + ` Original: vuln_classes=[${recorded.vulnClasses.join(', ')}], exploit=${recorded.exploit}\n` + + ` Provided: vuln_classes=[${vulnClasses.join(', ')}], exploit=${exploit}\n` + + `Resume requires the same scope as the original run. Start a new workspace if you want different scope.`, + 'ScopeMismatchError', + ); + } + return; + } + + session.session.scope = { vulnClasses: [...vulnClasses], exploit }; + await atomicWrite(sessionPath, session); +} + async function findLatestCommit(gitDir: string, commitHashes: string[]): Promise { if (commitHashes.length === 1) { const hash = commitHashes[0]; @@ -605,7 +697,7 @@ export async function restoreGitCheckpoint( await executeGitCommandWithRetry( ['git', 'rev-parse', '--verify', checkpointHash], repoPath, - 'verify checkpoint hash exists' + 'verify checkpoint hash exists', ); } catch { logger.info(`Checkpoint hash not found in clone, skipping git reset: ${checkpointHash}`); diff --git a/apps/worker/src/temporal/shared.ts b/apps/worker/src/temporal/shared.ts index 32519cf..a293310 100644 --- a/apps/worker/src/temporal/shared.ts +++ b/apps/worker/src/temporal/shared.ts @@ -2,7 +2,7 @@ import { defineQuery } from '@temporalio/workflow'; export type { AgentMetrics } from '../types/metrics.js'; -import type { DistributedConfig, PipelineConfig, ProviderConfig } from '../types/config.js'; +import type { DistributedConfig, PipelineConfig, ProviderConfig, VulnClass } from '../types/config.js'; import type { ErrorCode } from '../types/errors.js'; import type { AgentMetrics } from '../types/metrics.js'; @@ -29,6 +29,8 @@ export interface PipelineInput { checkpointsEnabled?: boolean; // Enable checkpoint activities (default: false) skipGitCheck?: boolean; // Skip .git directory validation in preflight (e.g. when .git is removed after clone) providerConfig?: ProviderConfig; // LLM provider configuration (Bedrock, Vertex, etc.) + vulnClasses?: VulnClass[]; // omitted = all five + exploit?: boolean; // false skips the exploitation phase } export interface ResumeState { diff --git a/apps/worker/src/temporal/worker.ts b/apps/worker/src/temporal/worker.ts index 78c5633..5ca35ec 100644 --- a/apps/worker/src/temporal/worker.ts +++ b/apps/worker/src/temporal/worker.ts @@ -36,7 +36,7 @@ import dotenv from 'dotenv'; import { sanitizeHostname } from '../audit/utils.js'; import { parseConfig } from '../config-parser.js'; import { deliverablesDir } from '../paths.js'; -import type { PipelineConfig } from '../types/config.js'; +import type { PipelineConfig, VulnClass } from '../types/config.js'; import { fileExists, readJson } from '../utils/file-io.js'; import * as activities from './activities.js'; import type { PipelineInput, PipelineProgress, PipelineState } from './shared.js'; @@ -275,30 +275,39 @@ async function resolveWorkspace(client: Client, args: CliArgs): Promise { - if (!configPath) return {}; +interface OrchestrationConfig { + pipelineConfig: PipelineConfig; + vulnClasses?: VulnClass[]; + exploit?: boolean; +} + +async function loadOrchestrationConfig(configPath: string | undefined): Promise { + if (!configPath) return { pipelineConfig: {} }; try { const config = await parseConfig(configPath); - const raw = config.pipeline; - if (!raw) return {}; - const result: PipelineConfig = {}; - if (raw.retry_preset !== undefined) { - result.retry_preset = raw.retry_preset; + const pipelineConfig: PipelineConfig = {}; + if (config.pipeline?.retry_preset !== undefined) { + pipelineConfig.retry_preset = config.pipeline.retry_preset; } - if (raw.max_concurrent_pipelines !== undefined) { - result.max_concurrent_pipelines = Number(raw.max_concurrent_pipelines); + if (config.pipeline?.max_concurrent_pipelines !== undefined) { + pipelineConfig.max_concurrent_pipelines = Number(config.pipeline.max_concurrent_pipelines); } - return result; + + return { + pipelineConfig, + ...(config.vuln_classes && config.vuln_classes.length > 0 && { vulnClasses: [...config.vuln_classes] }), + ...(config.exploit !== undefined && { exploit: config.exploit === 'true' }), + }; } catch { - return {}; + return { pipelineConfig: {} }; } } function buildPipelineInput( args: CliArgs, workspace: WorkspaceResolution, - pipelineConfig: PipelineConfig, + orchestration: OrchestrationConfig, ): PipelineInput { return { webUrl: args.webUrl, @@ -309,7 +318,9 @@ function buildPipelineInput( ...(args.pipelineTestingMode && { pipelineTestingMode: args.pipelineTestingMode }), ...(workspace.isResume && args.resumeFromWorkspace && { resumeFromWorkspace: args.resumeFromWorkspace }), ...(workspace.terminatedWorkflows.length > 0 && { terminatedWorkflows: workspace.terminatedWorkflows }), - ...(Object.keys(pipelineConfig).length > 0 && { pipelineConfig }), + ...(Object.keys(orchestration.pipelineConfig).length > 0 && { pipelineConfig: orchestration.pipelineConfig }), + ...(orchestration.vulnClasses && { vulnClasses: orchestration.vulnClasses }), + ...(orchestration.exploit !== undefined && { exploit: orchestration.exploit }), }; } @@ -417,8 +428,8 @@ async function run(): Promise { // 4. Resolve workspace and build pipeline input const workspace = await resolveWorkspace(client, args); - const pipelineConfig = await loadPipelineConfig(args.configPath); - const input = buildPipelineInput(args, workspace, pipelineConfig); + const orchestration = await loadOrchestrationConfig(args.configPath); + const input = buildPipelineInput(args, workspace, orchestration); // 5. Start worker polling in the background const workerDone = worker.run(); diff --git a/apps/worker/src/temporal/workflows.ts b/apps/worker/src/temporal/workflows.ts index d4f26d8..5cc3d1c 100644 --- a/apps/worker/src/temporal/workflows.ts +++ b/apps/worker/src/temporal/workflows.ts @@ -33,6 +33,7 @@ import { } from '@temporalio/workflow'; import type { AgentName, VulnType } from '../types/agents.js'; import { ALL_AGENTS } from '../types/agents.js'; +import { ALL_VULN_CLASSES, type VulnClass } from '../types/config.js'; import type * as activities from './activities.js'; import type { ActivityInput } from './activities.js'; import { @@ -48,6 +49,19 @@ import { import { toWorkflowSummary } from './summary-mapper.js'; import { classifyErrorCode, formatWorkflowError } from './workflow-errors.js'; +/** Agents this run is expected to produce — drives the resume short-circuit. */ +function computeExpectedAgents(vulnClasses: readonly VulnClass[], exploit: boolean): string[] { + const expected: string[] = ['pre-recon', 'recon']; + for (const cls of vulnClasses) { + expected.push(`${cls}-vuln`); + if (exploit) { + expected.push(`${cls}-exploit`); + } + } + expected.push('report'); + return expected; +} + // Retry configuration for production (long intervals for billing recovery) const PRODUCTION_RETRY = { initialInterval: '5 minutes', @@ -215,22 +229,42 @@ export async function pentestPipeline(input: PipelineInput): Promise 0 ? input.vulnClasses : ALL_VULN_CLASSES; + const selectedClassSet = new Set(selectedVulnClasses); + const exploit: boolean = input.exploit ?? true; + const expectedAgents = computeExpectedAgents(selectedVulnClasses, exploit); + + await a.persistOrValidateRunScope(activityInput, [...selectedVulnClasses], exploit); + let resumeState: ResumeState | null = null; if (input.resumeFromWorkspace) { // 1. Load resume state (validates workspace, cross-checks deliverables) - resumeState = await a.loadResumeState(input.resumeFromWorkspace, input.webUrl, input.repoPath, input.deliverablesSubdir); + resumeState = await a.loadResumeState( + input.resumeFromWorkspace, + input.webUrl, + input.repoPath, + input.deliverablesSubdir, + ); // 2. Restore git workspace and clean up incomplete deliverables const incompleteAgents = ALL_AGENTS.filter( (agentName) => !resumeState?.completedAgents.includes(agentName), ) as AgentName[]; - await a.restoreGitCheckpoint(input.repoPath, resumeState.checkpointHash, incompleteAgents, input.deliverablesSubdir); + await a.restoreGitCheckpoint( + input.repoPath, + resumeState.checkpointHash, + incompleteAgents, + input.deliverablesSubdir, + ); - // 3. Short-circuit if all agents already completed - if (resumeState.completedAgents.length === ALL_AGENTS.length) { - log.info(`All ${ALL_AGENTS.length} agents already completed. Nothing to resume.`); + // 3. Short-circuit when every agent expected by this run is done. + // Uses dynamic expectedAgents (not ALL_AGENTS) so a class-scoped run completes sooner. + const allExpectedDone = expectedAgents.every((a) => resumeState?.completedAgents.includes(a)); + if (allExpectedDone) { + log.info(`All ${expectedAgents.length} expected agents already completed. Nothing to resume.`); state.status = 'completed'; state.completedAgents = [...resumeState.completedAgents]; state.summary = computeSummary(state); @@ -389,6 +423,11 @@ export async function pentestPipeline(input: PipelineInput): Promise Promise> = []; for (const config of pipelineConfigs) { + // Excluded classes drop entirely; any prior deliverables stay on disk but don't count this run. + if (!selectedClassSet.has(config.vulnType)) { + log.info(`Skipping ${config.vulnType} pipeline (class not selected this run)`); + continue; + } if (!shouldSkip(config.vulnAgent) || !shouldSkip(config.exploitAgent)) { pipelineThunks.push(() => runVulnExploitPipeline(config.vulnType, config.runVuln, config.runExploit)); } else { @@ -487,8 +529,8 @@ export async function pentestPipeline(input: PipelineInput): Promise