13 Commits

Author SHA1 Message Date
ezl-keygraph 35f59f30f6 feat(docker): forward /etc/hosts entries to worker containers (#346) 2026-05-28 23:12:11 +05:30
ezl-keygraph 7813baf16a feat: share preflight authenticated session across agents (#345)
* feat(auth): reuse preflight's authenticated session across agents

* fix(preflight): verify saved auth state parses and has cookies or origins

* fix(prompts): strip shared-session block when no auth is configured

* fix(shannon): store shared auth state in the per-session audit dir

* fix(prompts): write stub auth-state in pipeline-testing preflight

* fix(preflight): clear stale auth-state.json before validate-authentication

* fix(preflight): drop auth-state.json on workflow completion

* docs(claude): refresh auth-state.json description for new layout and cleanup

* refactor(prompts): drop unused PLAYWRIGHT_SESSION resolve in login instructions

* style(prompts): collapse verifySavedAuthState signature per biome

* refactor(prompts): require AUTH_STATE_FILE on authenticated runs

* style(prompts): trim numbered-step comments back to step headers
2026-05-28 03:23:09 +05:30
ezl-keygraph 8f5d639f0d fix(deps): bump fast-uri to 3.1.2 (CVE-2026-6321) (#344) 2026-05-27 13:16:55 +05:30
ezl-keygraph 32c01a39b1 feat(preflight): block cloud metadata range in target URL check (#337)
* chore(docker): pin temporal image to 1.7.0

* feat(preflight): block link-local metadata range in target URL check

* style: apply biome formatting and import sorting
2026-05-21 00:23:46 +05:30
ezl-keygraph 72c424f687 fix(docker): pin --ignore-scripts on global npm installs (#338) 2026-05-21 00:23:14 +05:30
ezl-keygraph 1af42339b9 feat(auth): auth-validation preflight + email_login credentials (#335)
* feat(preflight): add credential validation activity

* refactor(preflight): tighten error retryability and dedup failure-point enum

* refactor(preflight): extract resolvePromptDir helper and cap failure_detail at 250 chars

* refactor(preflight): inline validator rules into intro paragraph

* refactor(preflight): restyle validator prompt with XML tags and tool list

* chore(preflight): bump auth validation timeout to 10 minutes

* feat: provision playwright stealth config for browser auto-discovery

* feat(stealth): strengthen browser fingerprint with chrome.runtime and realistic plugins

* feat(prompts): add pipeline-testing stub for validate-authentication

* refactor(stealth): swap zx for node:fs in playwright-config-writer

* feat(auth): add email_login credentials with login-flow substitution

* fix(auth): propagate email_login through credentials sanitizer

* fix(config): drop dangerous-pattern check on credentials.password

* feat(auth-validation): instruct agent to mask sensitive values in failure_detail

* docs(auth): document email_login credentials for magic-link and email-OTP flows

* docs(auth): add login_flow authoring guide with placeholder reference

* feat(auth): make credentials.password optional for passwordless flows

* docs(auth): drop redundant placeholder hint from login_flow examples
2026-05-20 03:46:56 +05:30
ezl-keygraph ca86c839cc feat(ai): steer notes field for analysis-only mode (#329) 2026-05-06 04:07:38 +05:30
ezl-keygraph 0a57b062fd feat(scripts): add --help to save-deliverable and generate-totp (#328) 2026-05-06 04:07:25 +05:30
ezl-keygraph 46be49c175 chore: remove unused scan tools and dead error type (#327)
* chore: remove unused scan tools and dead error type

* chore(logs): redact base URL and target URL from preflight info logs
2026-05-04 21:51:45 +05:30
ezl-keygraph 95998d1a44 feat: add config-driven run scoping and report filtering (#326)
* feat(steerability): add config-driven profile with code_path avoid enforcement

* fix(steerability): write SDK deny rules once per workflow to avoid parallel-agent race

* fix(steerability): reference guidance by pointer in report DROP rules

* fix(steerability): tighten code_path avoid enforcement

* chore(steerability): use shared ALL_VULN_CLASSES const and tighten RunScope type

* fix(steerability): validate run scope before resume short-circuit

* fix(steerability): emit only documented Read/Edit deny rules for code_path

* fix(steerability): assemble report from analysis deliverables when exploit is disabled

* feat(steerability): preflight check that code_path rules match at least one repo entry

* fix(steerability): tag missing code_path entries with avoid/focus kind

* revert(steerability): assemble report from analysis deliverables when exploit is disabled

* feat(steerability): render per-class findings from queue JSON when exploit is disabled

* refactor(steerability): trim findings renderer to common mappable rows

* feat(steerability): allow report agent to rewrite category-label finding titles

* docs(steerability): document new config fields in README and CLAUDE.md

* docs(steerability): comment out optional config sections in examples
2026-05-01 23:56:15 +05:30
ezl-keygraph 6c8135d031 feat(ai): upgrade to Opus 4.7 with adaptive thinking (#325) 2026-04-28 21:52:13 +05:30
ezl-keygraph 03a3d764af feat(cli): block running shannon with sudo or as root (#323) 2026-04-28 12:43:07 +05:30
ezl-keygraph 79caada539 fix(deps): bump protobufjs to 7.5.5 to patch CVE-2026-41242 (#314) 2026-04-23 20:42:06 +05:30
71 changed files with 3216 additions and 599 deletions
-1
View File
@@ -135,7 +135,6 @@ shannon <URL> <REPO> --pipeline-testing
|-------------------|---------|------------|
| `config` | Configuration file issues | No |
| `network` | Connection/timeout issues | Yes |
| `tool` | External tool (nmap, etc.) failed | Yes |
| `prompt` | Claude SDK/API issues | Sometimes |
| `filesystem` | File read/write errors | Sometimes |
| `validation` | Deliverable validation failed | Yes (via retry) |
+9 -3
View File
@@ -4,6 +4,12 @@
# Recommended output token configuration for larger tool outputs
CLAUDE_CODE_MAX_OUTPUT_TOKENS=64000
# Adaptive thinking is enabled automatically on Opus 4.6/4.7. Set to false to disable.
# CLAUDE_ADAPTIVE_THINKING=false
# Shannon forwards your machine's /etc/hosts entries into the worker container. Set to false to disable.
# SHANNON_FORWARD_HOSTS=false
# =============================================================================
# OPTION 1: Direct Anthropic
# =============================================================================
@@ -26,7 +32,7 @@ ANTHROPIC_API_KEY=your-api-key-here
# Optional for direct Anthropic and custom base URL modes. Required for Bedrock/Vertex.
# ANTHROPIC_SMALL_MODEL=... # Small tier (default: claude-haiku-4-5-20251001)
# ANTHROPIC_MEDIUM_MODEL=... # Medium tier (default: claude-sonnet-4-6)
# ANTHROPIC_LARGE_MODEL=... # Large tier (default: claude-opus-4-6)
# ANTHROPIC_LARGE_MODEL=... # Large tier (default: claude-opus-4-7)
# =============================================================================
# OPTION 3: AWS Bedrock
@@ -36,7 +42,7 @@ ANTHROPIC_API_KEY=your-api-key-here
# Example Bedrock model IDs for us-east-1:
# ANTHROPIC_SMALL_MODEL=us.anthropic.claude-haiku-4-5-20251001-v1:0
# ANTHROPIC_MEDIUM_MODEL=us.anthropic.claude-sonnet-4-6
# ANTHROPIC_LARGE_MODEL=us.anthropic.claude-opus-4-6
# ANTHROPIC_LARGE_MODEL=us.anthropic.claude-opus-4-7
# CLAUDE_CODE_USE_BEDROCK=1
# AWS_REGION=us-east-1
@@ -52,7 +58,7 @@ ANTHROPIC_API_KEY=your-api-key-here
# Example Vertex AI model IDs:
# ANTHROPIC_SMALL_MODEL=claude-haiku-4-5@20251001
# ANTHROPIC_MEDIUM_MODEL=claude-sonnet-4-6
# ANTHROPIC_LARGE_MODEL=claude-opus-4-6
# ANTHROPIC_LARGE_MODEL=claude-opus-4-7
# CLAUDE_CODE_USE_VERTEX=1
# CLOUD_ML_REGION=us-east5
+6 -6
View File
@@ -116,6 +116,7 @@ Infra (Temporal) runs via `docker-compose.yml`. Workers are ephemeral `docker ru
- `docker-compose.yml` — Infra only: `shannon-temporal` (port 7233/8233). Network: `shannon-net`
- `Dockerfile` — 2-stage build (builder + Chainguard Wolfi runtime). Uses pnpm. Entrypoint: `CMD ["node", "apps/worker/dist/temporal/worker.js"]`
- No `docker-compose.docker.yml` — host gateway handled via `--add-host` flag in CLI
- `/etc/hosts` forwarding — at worker spawn, `forwardEtcHostsFlags` in `apps/cli/src/docker.ts` reads the host's `/etc/hosts` and emits one `--add-host` flag per valid user-added entry. Loopback IPs (`127.x`, `::1`) are rewritten to `host-gateway`; IPv6 addresses are bracketed. Disable per-scan via `SHANNON_FORWARD_HOSTS=false`. No-op on Windows native (WSL2 reads its own `/etc/hosts` via the Linux path).
### Worker Package (`apps/worker/`)
- `apps/worker/src/paths.ts` — Centralized path constants (`PROMPTS_DIR`, `CONFIGS_DIR`, `WORKSPACES_DIR`)
@@ -137,16 +138,16 @@ Durable workflow orchestration with crash recovery, queryable progress, intellig
- `apps/worker/src/temporal/shared.ts` — Types, interfaces, query definitions
### Five-Phase Pipeline
1. **Pre-Recon** (`pre-recon`) — External scans (nmap, subfinder, whatweb) + source code analysis
1. **Pre-Recon** (`pre-recon`) — Source code analysis to build the architectural baseline
2. **Recon** (`recon`) — Attack surface mapping from initial findings
3. **Vulnerability Analysis** (5 parallel agents) — injection, xss, auth, authz, ssrf
4. **Exploitation** (5 parallel agents, conditional) — Exploits confirmed vulnerabilities
5. **Reporting** (`report`) — Executive-level security report
### Supporting Systems
- **Configuration** — YAML configs in `apps/worker/configs/` with JSON Schema validation (`config-schema.json`). Supports auth settings, MFA/TOTP, and per-app testing parameters. Credential resolution — local mode: env vars → `./.env`; npx mode: env vars → `~/.shannon/config.toml` (via `shn setup`)
- **Prompts** — Per-phase templates in `apps/worker/prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `apps/worker/prompts/shared/` via `apps/worker/src/services/prompt-manager.ts`
- **SDK Integration** — Uses `@anthropic-ai/claude-agent-sdk` with `maxTurns: 10_000` and `bypassPermissions` mode. Browser automation via `playwright-cli` with session isolation (`-s=<session>`). TOTP generation via `generate-totp` CLI tool. Login flow template at `apps/worker/prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth
- **Configuration** — YAML configs in `apps/worker/configs/` with JSON Schema validation (`config-schema.json`). Supports auth settings (MFA/TOTP), URL/code rule scoping (`rules.avoid`/`rules.focus`), run-scope steering (`vuln_classes`, `exploit`), free-form `rules_of_engagement`, and post-hoc `report` filters (`min_severity`, `min_confidence`, `guidance`). `code_path` avoid rules are written into `~/.claude/settings.json` `permissions.deny` (`Read`/`Edit`) once per workflow by `apps/worker/src/temporal/activities.ts:syncCodePathDenyRules` so the SDK enforces them at the tool layer even in `bypassPermissions` mode. `vuln_classes`/`exploit` scope is locked into `session.json` on first run; resumes with a different scope fail fast (`persistOrValidateRunScope`). Credential resolution — local mode: env vars → `./.env`; npx mode: env vars → `~/.shannon/config.toml` (via `shn setup`)
- **Prompts** — Per-phase templates in `apps/worker/prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `apps/worker/prompts/shared/` via `apps/worker/src/services/prompt-manager.ts`, including `_code-path-rules.txt` (focus/avoid `[FILE]`/`[GLOB]` routing) and `_rules-of-engagement.txt` (free-text engagement rules). When `exploit: false`, `apps/worker/src/services/findings-renderer.ts` deterministically converts each `*_exploitation_queue.json` into a `*_findings.md` for report assembly — no LLM in the loop
- **SDK Integration** — Uses `@anthropic-ai/claude-agent-sdk` with `maxTurns: 10_000` and `bypassPermissions` mode. Adaptive thinking is enabled by default on Opus 4.6/4.7 (`supportsAdaptiveThinking` in `apps/worker/src/ai/models.ts`); disable per-scan via `CLAUDE_ADAPTIVE_THINKING=false` (env) or `core.adaptive_thinking = false` (npx TOML). Browser automation via `playwright-cli` with session isolation (`-s=<session>`). TOTP generation via `generate-totp` CLI tool. Login flow template at `apps/worker/prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth. On authenticated whitebox scans, the `validate-authentication` preflight performs the single real login and saves the browser session to `auth-state.json` in the per-session audit directory (path from `authStateFile()` in `apps/worker/src/audit/utils.ts`, derived from `generateAuditPath()`). The validation activity (`apps/worker/src/services/validate-authentication.ts`) removes any stale file from a prior run before the agent runs and verifies the file parses and contains cookies or storage before the preflight is marked complete; `logWorkflowComplete` deletes it when the workflow ends so authenticated cookies don't sit on disk between scans. Agent prompts opt in to session reuse by `@include(shared/_shared-session.txt)` before their `<login_instructions>` block — the partial restores the session and falls through to the full login flow if verification fails. `vuln-auth`/`exploit-auth` omit the include and own their own login
- **Audit System** — Crash-safe append-only logging in `workspaces/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables. WorkflowLogger (`apps/worker/src/audit/workflow-logger.ts`) provides unified human-readable per-workflow logs, backed by LogStream (`apps/worker/src/audit/log-stream.ts`) shared stream primitive
- **Deliverables** — Saved to `deliverables/` in the target repo via the `save-deliverable` CLI script (`apps/worker/src/scripts/save-deliverable.ts`)
- **Workspaces & Resume** — Named workspaces via `-w <name>` or auto-named from URL+timestamp. Resume detects completed agents via `session.json`. `loadResumeState()` in `apps/worker/src/temporal/activities.ts` validates deliverable existence, restores git checkpoints, and cleans up incomplete deliverables. Workspace listing via `apps/worker/src/temporal/workspaces.ts`
@@ -227,7 +228,7 @@ Comments must be **timeless** — no references to this conversation, refactorin
**Entry Points:** `apps/worker/src/temporal/workflows.ts`, `apps/worker/src/temporal/activities.ts`, `apps/worker/src/temporal/worker.ts`
**Core Logic:** `apps/worker/src/session-manager.ts`, `apps/worker/src/ai/claude-executor.ts`, `apps/worker/src/config-parser.ts`, `apps/worker/src/services/`, `apps/worker/src/audit/`
**Core Logic:** `apps/worker/src/session-manager.ts`, `apps/worker/src/ai/claude-executor.ts`, `apps/worker/src/ai/settings-writer.ts` (writes `code_path` deny rules to `~/.claude/settings.json`), `apps/worker/src/config-parser.ts`, `apps/worker/src/services/` (incl. `preflight.ts`, `findings-renderer.ts`, `reporting.ts`), `apps/worker/src/audit/`
**Config:** `docker-compose.yml`, `apps/cli/infra/compose.yml`, `apps/worker/configs/`, `apps/worker/prompts/`, `tsconfig.base.json` (shared compiler options), `turbo.json`, `biome.json`
@@ -244,5 +245,4 @@ Package managers are configured with a minimum release age (7 days). Requires pn
- **Worker not processing** — Check `docker ps --filter "name=shannon-worker-"`
- **Reset state** — `./shannon stop --clean`
- **Local apps unreachable** — Use `host.docker.internal` instead of `localhost`
- **Missing tools** — Use `--pipeline-testing` to skip nmap/subfinder/whatweb (graceful degradation)
- **Container permissions** — On Linux, may need `sudo` for docker commands
+2 -53
View File
@@ -13,46 +13,14 @@ RUN apk update && apk add --no-cache \
curl \
wget \
ca-certificates \
# Network libraries for Go tools
libpcap-dev \
linux-headers \
# Language runtimes
go \
nodejs-22 \
npm \
python3 \
py3-pip \
ruby \
ruby-dev \
# Security tools available in Wolfi
nmap \
# Additional utilities
bash
# Set environment variables for Go
ENV GOPATH=/go
ENV PATH=$GOPATH/bin:/usr/local/go/bin:$PATH
ENV CGO_ENABLED=1
# Create directories
RUN mkdir -p $GOPATH/bin
# Install Go-based security tools
RUN go install -v github.com/projectdiscovery/subfinder/v2/cmd/subfinder@v2.13.0
# Install WhatWeb from release tarball (Ruby-based tool)
RUN curl -sL https://github.com/urbanadventurer/WhatWeb/archive/refs/tags/v0.6.3.tar.gz | tar xz -C /opt && \
mv /opt/WhatWeb-0.6.3 /opt/whatweb && \
chmod +x /opt/whatweb/whatweb && \
gem install addressable -v 2.8.9 && \
echo '#!/bin/bash' > /usr/local/bin/whatweb && \
echo 'cd /opt/whatweb && exec ./whatweb "$@"' >> /usr/local/bin/whatweb && \
chmod +x /usr/local/bin/whatweb
# Install Python-based tools
RUN pip3 install --no-cache-dir schemathesis==4.13.0
# Install pnpm
RUN npm install -g pnpm@10.33.0
RUN npm install -g --ignore-scripts pnpm@10.33.0
# Build Node.js application in builder to avoid QEMU emulation failures in CI
WORKDIR /app
@@ -84,15 +52,10 @@ RUN apk update && apk add --no-cache \
curl \
ca-certificates \
shadow \
# Network libraries (runtime)
libpcap \
# Security tools
nmap \
# Language runtimes (minimal)
nodejs-22 \
npm \
python3 \
ruby \
# Chromium browser and dependencies for Playwright
chromium \
# Additional libraries Chromium needs
@@ -110,20 +73,6 @@ RUN apk update && apk add --no-cache \
# Font rendering
fontconfig
# Copy Go binaries from builder
COPY --from=builder /go/bin/subfinder /usr/local/bin/
# Copy WhatWeb from builder
COPY --from=builder /opt/whatweb /opt/whatweb
COPY --from=builder /usr/local/bin/whatweb /usr/local/bin/whatweb
# Install WhatWeb Ruby dependencies in runtime stage
RUN gem install addressable -v 2.8.9
# Copy Python packages from builder
COPY --from=builder /usr/lib/python3.*/site-packages /usr/lib/python3.12/site-packages
COPY --from=builder /usr/bin/schemathesis /usr/bin/
# Create non-root user
RUN addgroup -g 1001 pentest && \
adduser -u 1001 -G pentest -s /bin/bash -D pentest
@@ -142,7 +91,7 @@ COPY --from=builder /app/node_modules /app/node_modules
COPY --from=builder /app/apps/worker /app/apps/worker
COPY --from=builder /app/apps/cli/package.json /app/apps/cli/package.json
RUN npm install -g @anthropic-ai/claude-code@2.1.84 @playwright/cli@0.1.1
RUN npm install -g --ignore-scripts @anthropic-ai/claude-code@2.1.84 @playwright/cli@0.1.1
RUN mkdir -p /tmp/.claude/skills && \
playwright-cli install --skills && \
cp -r .claude/skills/playwright-cli /tmp/.claude/skills/ && \
+84 -18
View File
@@ -44,7 +44,6 @@ Shannon identified 20+ vulnerabilities in OWASP Juice Shop, including authentica
- **Reproducible Proof-of-Concept Exploits**: The final report contains only proven, exploitable findings with copy-and-paste PoCs. Vulnerabilities that cannot be exploited are not reported.
- **OWASP Vulnerability Coverage**: Identifies and validates Injection, XSS, SSRF, and Broken Authentication/Authorization, with additional categories in development.
- **Code-Aware Dynamic Testing**: Analyzes source code to guide attack strategy, then validates findings with live browser and CLI-based exploits against the running application.
- **Integrated Security Tooling**: Leverages Nmap, Subfinder, WhatWeb, and Schemathesis during reconnaissance and discovery phases.
- **Parallel Processing**: Vulnerability analysis and exploitation phases run concurrently across all attack categories.
## Product Line
@@ -374,9 +373,21 @@ cp configs/example-config.yaml ./my-app-config.yaml
##### Basic Configuration Structure
```yaml
# Optional: describe your target environment (max 500 chars)
# Describe your target environment (optional, max 500 chars)
description: "Next.js e-commerce app on PostgreSQL. Local dev environment — .env files contain local-only credentials, not deployed to production."
# Limit which vulnerability classes run end-to-end (optional, default: all five)
# vuln_classes: [injection, xss, auth, authz, ssrf]
# Skip the exploitation phase (optional, default: "true")
# exploit: "false"
# Free-form rules of engagement (optional)
# rules_of_engagement: |
# - No password brute-force; cap login attempts at 5 per account.
# - Throttle to under 5 requests per second per endpoint; back off 60s on any 429.
# - Use placeholders like [order_id] in deliverables — no real data values.
authentication:
login_type: form
login_url: "https://your-app.com/login"
@@ -385,6 +396,13 @@ authentication:
password: "yourpassword"
totp_secret: "LB2E2RX7XFHSTGCK" # Optional for 2FA
# Optional mailbox credentials for magic-link / email-OTP flows.
# email_login:
# address: "inbox@example.com"
# password: "mailbox-password"
# totp_secret: "JBSWY3DPEHPK3PXP"
# Natural language instructions for login flow
login_flow:
- "Type $username into the email field"
- "Type $password into the password field"
@@ -395,15 +413,28 @@ authentication:
value: "/dashboard"
rules:
# Supported types: url_path, subdomain, domain, method, header, parameter, code_path
avoid:
- description: "AI should avoid testing logout functionality"
type: path
url_path: "/logout"
type: url_path
value: "/logout"
# code_path values are repo-relative file paths or globs (e.g. "src/auth.ts", "src/vendor/**").
# - description: "Out-of-scope vendored libraries"
# type: code_path
# value: "src/vendor/**"
focus:
- description: "AI should emphasize testing API endpoints"
type: path
url_path: "/api"
type: url_path
value: "/api"
# Filters applied by the report agent when assembling the final report (optional).
# report:
# min_severity: low # drop findings below this severity (low | medium | high | critical)
# min_confidence: low # drop findings below this confidence (low | medium | high)
# guidance: |
# Drop findings about missing security headers and rate-limit gaps.
```
Run with:
@@ -421,9 +452,39 @@ npx @keygraph/shannon start -u https://example.com -r /path/to/repo -c ./my-app-
</details>
#### TOTP Setup for 2FA
#### Writing Login Flow
If your application uses two-factor authentication, simply add the TOTP secret to your config file. The AI will automatically generate the required codes during testing.
Log in once in a fresh incognito/private window. Write the steps in the same order you perform them:
- When you type into a field, reference the field by its exact label or placeholder.
- When you click a button, reference the exact button text.
Supported placeholders:
- `$username`
- `$password`
- `$totp`
- `$email_address`
- `$email_password`
- `$email_totp`
At runtime, Shannon replaces these placeholders with the credentials passed in the config.
```yaml
login_flow:
- "Type $username in <exact email field label or placeholder>"
- "Click <exact button text>"
- "Type $password in <exact password field label or placeholder>"
- "Click <exact button text>"
- "If prompted for 2FA, type $totp in <exact code field label or placeholder>"
- "Click <exact button text>"
```
#### Adaptive Thinking (Opus 4.6/4.7)
Claude decides when and how deeply to reason on Opus 4.6 and 4.7. Enabled by default whenever a tier resolves to one of these models.
- **npx mode** — `npx @keygraph/shannon setup` prompts you during the wizard.
- **Local mode** — set `CLAUDE_ADAPTIVE_THINKING=false` in `.env` (or as an exported env var) to disable.
#### Subscription Plan Rate Limits
@@ -453,7 +514,7 @@ export AWS_REGION=us-east-1
export AWS_BEARER_TOKEN_BEDROCK=your-bearer-token
export ANTHROPIC_SMALL_MODEL=us.anthropic.claude-haiku-4-5-20251001-v1:0
export ANTHROPIC_MEDIUM_MODEL=us.anthropic.claude-sonnet-4-6
export ANTHROPIC_LARGE_MODEL=us.anthropic.claude-opus-4-6
export ANTHROPIC_LARGE_MODEL=us.anthropic.claude-opus-4-7
```
<details>
@@ -465,12 +526,12 @@ AWS_REGION=us-east-1
AWS_BEARER_TOKEN_BEDROCK=your-bearer-token
ANTHROPIC_SMALL_MODEL=us.anthropic.claude-haiku-4-5-20251001-v1:0
ANTHROPIC_MEDIUM_MODEL=us.anthropic.claude-sonnet-4-6
ANTHROPIC_LARGE_MODEL=us.anthropic.claude-opus-4-6
ANTHROPIC_LARGE_MODEL=us.anthropic.claude-opus-4-7
```
</details>
Shannon uses three model tiers: **small** (`claude-haiku-4-5-20251001`) for summarization, **medium** (`claude-sonnet-4-6`) for security analysis, and **large** (`claude-opus-4-6`) for deep reasoning. Set `ANTHROPIC_SMALL_MODEL`, `ANTHROPIC_MEDIUM_MODEL`, and `ANTHROPIC_LARGE_MODEL` to the Bedrock model IDs for your region.
Shannon uses three model tiers: **small** (`claude-haiku-4-5-20251001`) for summarization, **medium** (`claude-sonnet-4-6`) for security analysis, and **large** (`claude-opus-4-7`) for deep reasoning. Set `ANTHROPIC_SMALL_MODEL`, `ANTHROPIC_MEDIUM_MODEL`, and `ANTHROPIC_LARGE_MODEL` to the Bedrock model IDs for your region.
### Google Vertex AI
@@ -491,7 +552,7 @@ export ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id
export GOOGLE_APPLICATION_CREDENTIALS=/path/to/your-sa-key.json
export ANTHROPIC_SMALL_MODEL=claude-haiku-4-5@20251001
export ANTHROPIC_MEDIUM_MODEL=claude-sonnet-4-6
export ANTHROPIC_LARGE_MODEL=claude-opus-4-6
export ANTHROPIC_LARGE_MODEL=claude-opus-4-7
```
<details>
@@ -504,7 +565,7 @@ ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id
GOOGLE_APPLICATION_CREDENTIALS=./credentials/google-sa-key.json
ANTHROPIC_SMALL_MODEL=claude-haiku-4-5@20251001
ANTHROPIC_MEDIUM_MODEL=claude-sonnet-4-6
ANTHROPIC_LARGE_MODEL=claude-opus-4-6
ANTHROPIC_LARGE_MODEL=claude-opus-4-7
```
</details>
@@ -531,7 +592,7 @@ export ANTHROPIC_AUTH_TOKEN=your-auth-token
# Optionally override model tiers (defaults are used if not set)
export ANTHROPIC_SMALL_MODEL=claude-haiku-4-5-20251001
export ANTHROPIC_MEDIUM_MODEL=claude-sonnet-4-6
export ANTHROPIC_LARGE_MODEL=claude-opus-4-6
export ANTHROPIC_LARGE_MODEL=claude-opus-4-7
```
<details>
@@ -542,7 +603,7 @@ ANTHROPIC_BASE_URL=https://your-proxy.example.com
ANTHROPIC_AUTH_TOKEN=your-auth-token
ANTHROPIC_SMALL_MODEL=claude-haiku-4-5-20251001
ANTHROPIC_MEDIUM_MODEL=claude-sonnet-4-6
ANTHROPIC_LARGE_MODEL=claude-opus-4-6
ANTHROPIC_LARGE_MODEL=claude-opus-4-7
```
</details>
@@ -624,6 +685,12 @@ npx @keygraph/shannon start -u http://host.docker.internal:3000 -r /path/to/repo
</details>
**Custom hostnames in `/etc/hosts`:**
If your local stack uses custom hostnames mapped in `/etc/hosts`, Shannon forwards those entries into the worker container at scan start:
To disable, add `SHANNON_FORWARD_HOSTS=false` to `.env` (local mode) or export it in your shell: `export SHANNON_FORWARD_HOSTS=false`. In npx mode, the shell export is the only option since there's no `.env`.
### Output and Results
All results are saved to the workspaces directory: `./workspaces/` (local mode) or `~/.shannon/workspaces/` (npx mode). Use `-o <path>` to copy deliverables to a custom output directory after the run completes.
@@ -712,8 +779,7 @@ Shannon uses a multi-agent architecture that combines white-box source code anal
```
┌──────────────────────┐
│ Pre-Reconnaissance │
(nmap, subfinder,
│ whatweb, code scan) │
(source code scan)
└──────────┬───────────┘
@@ -756,7 +822,7 @@ Each scan runs in its own ephemeral Docker container (`docker run --rm`) with a
#### **Phase 1: Pre-Reconnaissance**
External scanning using nmap, subfinder, and whatweb to fingerprint the target's infrastructure and tech stack. Simultaneously performs source code analysis to identify the application framework, entry points, and potential attack surface from the codebase.
Performs source code analysis to identify the application framework, entry points, and potential attack surface from the codebase. Builds the foundational architectural intelligence that all subsequent agents depend on.
#### **Phase 2: Reconnaissance**
+1 -2
View File
@@ -147,7 +147,7 @@ This phase informs everything downstream. If the codebase uses an ORM with param
## Phase 2: Reconnaissance
Bridges static and dynamic analysis using browser automation. The recon agent correlates code findings with the live application, validating that endpoints actually exist, mapping authentication flows, inventorying input vectors (URL parameters, POST fields, headers, cookies), and documenting the real authorization architecture. This phase may also integrate with infrastructure discovery tools including Nmap, Subfinder, and WhatWeb for network perimeter mapping.
Bridges static and dynamic analysis using browser automation. The recon agent correlates code findings with the live application, validating that endpoints actually exist, mapping authentication flows, inventorying input vectors (URL parameters, POST fields, headers, cookies), and documenting the real authorization architecture.
## Phase 3: Vulnerability Analysis
@@ -194,7 +194,6 @@ This correlation means that a data flow vulnerability identified in static analy
- **Fully Autonomous Operation:** Shannon Pro handles complex workflows including 2FA/TOTP logins and SSO (e.g., Sign in with Google) without human intervention. TOTP is handled via a dedicated MCP server tool.
- **White-Box Awareness:** Unlike black-box scanners, Shannon Pro reads the source code to intelligently guide its attack strategy, combining code-level insight with runtime validation.
- **Parallel Processing:** Vulnerability analysis and exploitation phases run concurrently across attack domains, with pipelined parallelism minimizing total execution time.
- **Tool Orchestration:** Shannon Pro orchestrates existing security tools (e.g., Schemathesis for API testing, Nmap for network discovery) while adding LLM reasoning to interpret results.
- **Configurable Login Flows:** Authentication configuration specifies login procedures and credentials, which are interpolated into agent prompts for authenticated testing.
---
+1 -1
View File
@@ -4,7 +4,7 @@ networks:
services:
temporal:
image: temporalio/temporal:latest
image: temporalio/temporal:1.7.0
container_name: shannon-temporal
command: ["server", "start-dev", "--db-filename", "/home/temporal/temporal.db", "--ip", "0.0.0.0"]
ports:
+24 -7
View File
@@ -32,7 +32,10 @@ export async function setup(): Promise<void> {
const config = await setupProvider(provider as Provider);
// 2. Save config
// 2. Adaptive thinking
await maybePromptAdaptiveThinking(config);
// 3. Save config
saveConfig(config);
const configPath = path.join(SHANNON_HOME, 'config.toml');
@@ -80,7 +83,7 @@ async function setupAnthropic(): Promise<ShannonConfig> {
'Do you want to change the default models?\n' +
' Small - claude-haiku-4-5-20251001\n' +
' Medium - claude-sonnet-4-6\n' +
' Large - claude-opus-4-6',
' Large - claude-opus-4-7',
initialValue: false,
});
if (p.isCancel(customizeModels)) return cancelAndExit();
@@ -102,7 +105,7 @@ async function setupAnthropic(): Promise<ShannonConfig> {
const large = await p.text({
message: 'Large model ID',
initialValue: 'claude-opus-4-6',
initialValue: 'claude-opus-4-7',
validate: required('Large model ID is required'),
});
if (p.isCancel(large)) return cancelAndExit();
@@ -140,7 +143,7 @@ async function setupCustomBaseUrl(): Promise<ShannonConfig> {
'Do you want to change the default models?\n' +
' Small - claude-haiku-4-5-20251001\n' +
' Medium - claude-sonnet-4-6\n' +
' Large - claude-opus-4-6',
' Large - claude-opus-4-7',
initialValue: false,
});
if (p.isCancel(customizeModels)) return cancelAndExit();
@@ -162,7 +165,7 @@ async function setupCustomBaseUrl(): Promise<ShannonConfig> {
const large = await p.text({
message: 'Large model ID',
initialValue: 'claude-opus-4-6',
initialValue: 'claude-opus-4-7',
validate: required('Large model ID is required'),
});
if (p.isCancel(large)) return cancelAndExit();
@@ -199,7 +202,7 @@ async function setupBedrock(): Promise<ShannonConfig> {
const large = await p.text({
message: 'Large model ID',
placeholder: 'us.anthropic.claude-opus-4-6',
placeholder: 'us.anthropic.claude-opus-4-7',
validate: required('Large model ID is required'),
});
if (p.isCancel(large)) return cancelAndExit();
@@ -262,7 +265,7 @@ async function setupVertex(): Promise<ShannonConfig> {
large: () =>
p.text({
message: 'Large model ID',
placeholder: 'claude-opus-4-6',
placeholder: 'claude-opus-4-7',
validate: required('Large model ID is required'),
}),
});
@@ -281,6 +284,20 @@ async function setupVertex(): Promise<ShannonConfig> {
// === Helpers ===
async function maybePromptAdaptiveThinking(config: ShannonConfig): Promise<void> {
const m = config.models;
const hasOpus47 = !m || [m.small, m.medium, m.large].some((v) => v && /opus-4-[67]/.test(v));
if (!hasOpus47) return;
const enable = await p.confirm({
message: 'Enable adaptive thinking on Opus 4.6/4.7? Claude decides when and how deeply to reason.',
initialValue: true,
});
if (p.isCancel(enable)) return cancelAndExit();
config.core = { ...config.core, adaptive_thinking: enable };
}
async function promptSecret(message: string): Promise<string> {
const value = await p.password({
message,
+2 -1
View File
@@ -65,7 +65,7 @@ export async function start(args: StartArgs): Promise<void> {
const workspacePath = path.join(workspacesDir, workspace);
fs.mkdirSync(workspacePath, { recursive: true });
fs.chmodSync(workspacePath, 0o777);
for (const dir of ['deliverables', 'scratchpad', '.playwright-cli']) {
for (const dir of ['deliverables', 'scratchpad', '.playwright-cli', '.playwright']) {
const dirPath = path.join(workspacePath, dir);
fs.mkdirSync(dirPath, { recursive: true });
fs.chmodSync(dirPath, 0o777);
@@ -76,6 +76,7 @@ export async function start(args: StartArgs): Promise<void> {
for (const dir of ['deliverables', 'scratchpad', '.playwright-cli']) {
fs.mkdirSync(path.join(shannonDir, dir), { recursive: true });
}
fs.mkdirSync(path.join(repo.hostPath, '.playwright'), { recursive: true });
const credentialsPath = getCredentialsPath();
const hasCredentials = fs.existsSync(credentialsPath);
+10 -6
View File
@@ -18,12 +18,14 @@ interface ConfigMapping {
readonly env: string;
readonly toml: string;
readonly type: TOMLType;
readonly boolFormat?: 'numeric' | 'literal';
}
/** Maps every supported env var to its TOML path (section.key) and expected type. */
const CONFIG_MAP: readonly ConfigMapping[] = [
// Core
{ env: 'CLAUDE_CODE_MAX_OUTPUT_TOKENS', toml: 'core.max_tokens', type: 'number' },
{ env: 'CLAUDE_ADAPTIVE_THINKING', toml: 'core.adaptive_thinking', type: 'boolean', boolFormat: 'literal' },
// Anthropic
{ env: 'ANTHROPIC_API_KEY', toml: 'anthropic.api_key', type: 'string' },
@@ -56,9 +58,9 @@ type TOMLValue = string | number | boolean;
type TOMLSection = Record<string, TOMLValue>;
type TOMLConfig = Record<string, TOMLSection>;
/** Read a nested TOML value by dotted path (e.g. "anthropic.api_key"). */
function getTomlValue(config: TOMLConfig, path: string): string | undefined {
const [section, key] = path.split('.');
/** Read a nested TOML value for a given mapping. */
function getTomlValue(config: TOMLConfig, mapping: ConfigMapping): string | undefined {
const [section, key] = mapping.toml.split('.');
if (!section || !key) return undefined;
const sectionObj = config[section];
@@ -67,8 +69,10 @@ function getTomlValue(config: TOMLConfig, path: string): string | undefined {
const value = sectionObj[key];
if (value === undefined || value === null) return undefined;
// NOTE: env.ts checks bedrock/vertex via `=== '1'`, so booleans must map to "1"/"0"
if (typeof value === 'boolean') return value ? '1' : '0';
if (typeof value === 'boolean') {
if (mapping.boolFormat === 'literal') return value ? 'true' : 'false';
return value ? '1' : '0';
}
return String(value);
}
@@ -273,7 +277,7 @@ export function resolveConfig(): void {
for (const mapping of CONFIG_MAP) {
if (process.env[mapping.env]) continue;
const value = getTomlValue(toml, mapping.toml);
const value = getTomlValue(toml, mapping);
if (value) {
process.env[mapping.env] = value;
}
+1 -1
View File
@@ -8,7 +8,7 @@ import { getConfigFile } from '../home.js';
// === Types ===
export interface ShannonConfig {
core?: { max_tokens?: number };
core?: { max_tokens?: number; adaptive_thinking?: boolean };
anthropic?: { api_key?: string; oauth_token?: string };
custom_base_url?: { base_url?: string; auth_token?: string };
bedrock?: { use?: boolean; region?: string; token?: string };
+87 -1
View File
@@ -7,6 +7,7 @@
import { type ChildProcess, execFileSync, spawn } from 'node:child_process';
import crypto from 'node:crypto';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import { setTimeout as sleep } from 'node:timers/promises';
@@ -145,6 +146,87 @@ function addHostFlag(): string[] {
return [];
}
/**
* Names whose standard IPs aren't covered by `shouldSkipHostsIp`. Loopback names
* stay because their IPs (127.x, ::1) get rewritten — not skipped. Others like
* `broadcasthost` and `ip6-mcastprefix` are intentionally omitted: their IPs
* (255.255.255.255, ff00::/8) are already dropped at the IP filter.
*/
const HOSTS_SKIP_NAMES = new Set([
'localhost',
'ip6-localhost',
'ip6-loopback',
'ip6-localnet',
'host.docker.internal',
'gateway.docker.internal',
'kubernetes.docker.internal',
]);
function isLoopbackIp(ip: string): boolean {
return ip.startsWith('127.') || ip === '::1';
}
function shouldSkipHostsIp(ip: string): boolean {
if (ip === '0.0.0.0' || ip === '255.255.255.255') return true;
// Cloud metadata range — consistent with Shannon's SSRF guard
if (ip.startsWith('169.254.')) return true;
const lower = ip.toLowerCase();
if (lower.startsWith('fe80:') || lower.startsWith('ff')) return true;
return false;
}
function shouldSkipHostsName(name: string, hostname: string): boolean {
const lower = name.toLowerCase();
if (HOSTS_SKIP_NAMES.has(lower)) return true;
if (lower === hostname.toLowerCase()) return true;
if (lower.endsWith('.localhost')) return true;
return false;
}
/**
* Read the host's /etc/hosts and emit --add-host flags so the worker resolves
* user-added entries the same way. Loopback IPs (127.x, ::1) are rewritten to
* `host-gateway` so they target the host's loopback instead of the container's.
*/
function forwardEtcHostsFlags(): string[] {
if (process.env.SHANNON_FORWARD_HOSTS === 'false') return [];
if (os.platform() === 'win32') return [];
let content: string;
try {
content = fs.readFileSync('/etc/hosts', 'utf-8');
} catch {
return [];
}
const hostname = os.hostname();
const flags: string[] = [];
for (const rawLine of content.split('\n')) {
const hashIdx = rawLine.indexOf('#');
const line = (hashIdx >= 0 ? rawLine.slice(0, hashIdx) : rawLine).trim();
if (!line) continue;
const tokens = line
.split(' ')
.flatMap((t) => t.split('\t'))
.filter(Boolean);
const ip = tokens[0];
const names = tokens.slice(1);
if (!ip || names.length === 0) continue;
if (shouldSkipHostsIp(ip)) continue;
const targetIp = isLoopbackIp(ip) ? 'host-gateway' : ip;
const formattedIp = targetIp.includes(':') ? `[${targetIp}]` : targetIp;
for (const name of names) {
if (shouldSkipHostsName(name, hostname)) continue;
flags.push('--add-host', `${name}:${formattedIp}`);
}
}
return flags;
}
export interface WorkerOptions {
version: string;
url: string;
@@ -176,6 +258,9 @@ export function spawnWorker(opts: WorkerOptions): ChildProcess {
// Add host flag for Linux
args.push(...addHostFlag());
// Forward user-added /etc/hosts entries into the worker
args.push(...forwardEtcHostsFlags());
// UID remapping for Linux bind mounts
if (os.platform() === 'linux' && process.getuid && process.getgid) {
args.push('-e', `SHANNON_HOST_UID=${process.getuid()}`, '-e', `SHANNON_HOST_GID=${process.getgid()}`);
@@ -185,11 +270,12 @@ export function spawnWorker(opts: WorkerOptions): ChildProcess {
args.push('-v', `${opts.workspacesDir}:/app/workspaces`);
args.push('-v', `${opts.repo.hostPath}:${opts.repo.containerPath}:ro`);
// Writable overlays: shadow .shannon/ inside the :ro repo with workspace-backed dirs
// Writable overlays: shadow .shannon/ and .playwright/ inside the :ro repo with workspace-backed dirs
const workspacePath = path.join(opts.workspacesDir, opts.workspace);
args.push('-v', `${path.join(workspacePath, 'deliverables')}:${opts.repo.containerPath}/.shannon/deliverables`);
args.push('-v', `${path.join(workspacePath, 'scratchpad')}:${opts.repo.containerPath}/.shannon/scratchpad`);
args.push('-v', `${path.join(workspacePath, '.playwright-cli')}:${opts.repo.containerPath}/.shannon/.playwright-cli`);
args.push('-v', `${path.join(workspacePath, '.playwright')}:${opts.repo.containerPath}/.playwright`);
// Local mode: mount prompts for live editing
if (opts.promptsDir) {
+1
View File
@@ -26,6 +26,7 @@ const FORWARD_VARS = [
'ANTHROPIC_MEDIUM_MODEL',
'ANTHROPIC_LARGE_MODEL',
'CLAUDE_CODE_MAX_OUTPUT_TOKENS',
'CLAUDE_ADAPTIVE_THINKING',
] as const;
/**
+21
View File
@@ -25,6 +25,25 @@ import { displaySplash } from './splash.js';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
function blockSudo(): void {
const isSudo = !!process.env.SUDO_USER;
const isRoot = process.geteuid?.() === 0;
if (!isSudo && !isRoot) return;
if (isSudo) {
console.error('ERROR: Shannon must not be run with sudo.');
console.error('Re-run this command as your normal user.');
} else {
console.error('ERROR: Shannon must not be run as the root user.');
console.error('Switch to a regular user account and re-run this command.');
}
if (process.platform === 'linux') {
console.error('Configure Docker to run without sudo first:');
console.error('https://docs.docker.com/engine/install/linux-postinstall');
}
process.exit(1);
}
function getVersion(): string {
try {
const pkgPath = path.join(__dirname, '..', 'package.json');
@@ -178,6 +197,8 @@ function parseStartArgs(argv: string[]): ParsedStartArgs {
// === Main Dispatch ===
blockSudo();
const args = process.argv.slice(2);
const command = args[0];
+80 -7
View File
@@ -39,9 +39,33 @@
"type": "string",
"pattern": "^[A-Za-z2-7]+=*$",
"description": "TOTP secret for two-factor authentication (Base32 encoded, case insensitive)"
},
"email_login": {
"type": "object",
"description": "Email account credentials for magic-link or OTP follow-through flows",
"properties": {
"address": {
"type": "string",
"format": "email",
"description": "Email address used to receive magic links or OTPs"
},
"password": {
"type": "string",
"minLength": 1,
"maxLength": 255,
"description": "Password for the email account"
},
"totp_secret": {
"type": "string",
"pattern": "^[A-Za-z2-7]+=*$",
"description": "TOTP secret for the email account's two-factor authentication (Base32 encoded)"
}
},
"required": ["address", "password"],
"additionalProperties": false
}
},
"required": ["username", "password"],
"required": ["username"],
"additionalProperties": false
},
"login_flow": {
@@ -118,6 +142,51 @@
},
"additionalProperties": false
},
"vuln_classes": {
"type": "array",
"description": "Vulnerability classes to test. When omitted, all five classes run. When set, only listed classes run; their vuln+exploit agents and report sections are included.",
"items": {
"type": "string",
"enum": ["injection", "xss", "auth", "authz", "ssrf"]
},
"minItems": 1,
"maxItems": 5,
"uniqueItems": true
},
"exploit": {
"type": "string",
"enum": ["true", "false"],
"description": "Whether to run the exploitation phase (default true). Set false to run only analysis."
},
"report": {
"type": "object",
"description": "Report filtering and guidance applied by the report agent.",
"properties": {
"min_severity": {
"type": "string",
"enum": ["low", "medium", "high", "critical"],
"description": "Minimum severity threshold; findings below are dropped by the report agent."
},
"min_confidence": {
"type": "string",
"enum": ["low", "medium", "high"],
"description": "Minimum confidence threshold; findings below are dropped by the report agent."
},
"guidance": {
"type": "string",
"minLength": 1,
"maxLength": 500,
"description": "Free-text guidance to the report agent (e.g., 'Drop findings about missing security headers')."
}
},
"additionalProperties": false
},
"rules_of_engagement": {
"type": "string",
"minLength": 1,
"maxLength": 1000,
"description": "Free-text instructions to the agent that render into every prompt."
},
"login": {
"type": "object",
"description": "Deprecated: Use 'authentication' section instead",
@@ -135,7 +204,11 @@
{ "required": ["authentication"] },
{ "required": ["rules"] },
{ "required": ["authentication", "rules"] },
{ "required": ["description"] }
{ "required": ["description"] },
{ "required": ["vuln_classes"] },
{ "required": ["exploit"] },
{ "required": ["report"] },
{ "required": ["rules_of_engagement"] }
],
"additionalProperties": false,
"$defs": {
@@ -151,17 +224,17 @@
},
"type": {
"type": "string",
"enum": ["path", "subdomain", "domain", "method", "header", "parameter"],
"description": "Type of rule (what aspect of requests to match against)"
"enum": ["url_path", "subdomain", "domain", "method", "header", "parameter", "code_path"],
"description": "Type of rule (what aspect of requests or source code to match against)"
},
"url_path": {
"value": {
"type": "string",
"minLength": 1,
"maxLength": 1000,
"description": "URL path pattern or value to match"
"description": "Value to match"
}
},
"required": ["description", "type", "url_path"],
"required": ["description", "type", "value"],
"additionalProperties": false
}
}
+63 -8
View File
@@ -4,6 +4,27 @@
# Description of the target environment (optional, max 500 chars)
description: "Next.js e-commerce app on PostgreSQL. Local dev environment — .env files contain local-only credentials, not deployed to production."
# Limit which vulnerability classes run end-to-end (optional, default: all five)
# vuln_classes: [injection, xss, auth, authz, ssrf]
# Skip the exploitation phase (optional, default: "true")
# exploit: "false"
# Free-form engagement rules applied to analysis and exploitation agents (optional).
# Example below is illustrative; edit, remove, or add sections as needed.
# rules_of_engagement: |
# Forbidden techniques:
# - No password brute-force or credential stuffing. Cap login attempts at 5 per account.
# - ...
#
# Operational:
# - Throttle to under 5 requests per second per endpoint. Back off 60 seconds on any 429 response.
# - ...
#
# Data handling:
# - Do not include actual values in deliverables — use placeholders like [order_id] or [user_email].
# - ...
authentication:
login_type: form # Options: 'form' or 'sso'
login_url: "https://example.com/login"
@@ -12,6 +33,12 @@ authentication:
password: "testpassword"
totp_secret: "JBSWY3DPEHPK3PXP" # Optional TOTP secret for 2FA
# Optional mailbox credentials for magic-link / email-OTP flows.
# email_login:
# address: "inbox@example.com"
# password: "mailbox-password"
# totp_secret: "JBSWY3DPEHPK3PXP"
# Natural language instructions for login flow
login_flow:
- "Type $username into the email field"
@@ -25,27 +52,55 @@ authentication:
value: "/dashboard"
rules:
# Supported types: url_path, subdomain, domain, method, header, parameter, code_path
avoid:
- description: "Do not test the marketing site subdomain"
type: subdomain
url_path: "www"
value: "www"
- description: "Skip logout functionality"
type: path
url_path: "/logout"
type: url_path
value: "/logout"
- description: "No DELETE operations on user API"
type: path
url_path: "/api/v1/users/*"
type: url_path
value: "/api/v1/users/*"
# code_path values are repo-relative file paths or globs (e.g. "src/auth.ts", "test/**").
# - description: "Test fixtures and specs (not production code)"
# type: code_path
# value: "test/**"
#
# - description: "Generated migrations"
# type: code_path
# value: "db/migrations/**"
focus:
- description: "Prioritize beta admin panel subdomain"
type: subdomain
url_path: "beta-admin"
value: "beta-admin"
- description: "Focus on user profile updates"
type: path
url_path: "/api/v2/user-profile"
type: url_path
value: "/api/v2/user-profile"
# code_path values are repo-relative file paths or globs (e.g. "src/auth.ts", "routes/*.ts").
# - description: "Express route handlers"
# type: code_path
# value: "routes/*.ts"
#
# - description: "Sequelize ORM model definitions"
# type: code_path
# value: "models/*.ts"
# Report filters applied by the report agent when assembling the final report (optional).
# Example below is illustrative; edit, remove, or add sections as needed.
# report:
# min_severity: low
# min_confidence: low
# guidance: |
# Drop findings about missing security headers and rate-limit gaps.
# ...
# Pipeline execution settings (optional)
# pipeline:
+2
View File
@@ -105,6 +105,8 @@ If you only confirmed a vulnerability without attempting to weaponize it, you ha
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<starting_context>
- Your **actionable exploitation queue** is located at `.shannon/deliverables/auth_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
+4
View File
@@ -88,10 +88,14 @@ After exhaustive bypass attempts, determine:
@include(shared/_rules.txt)
</rules>
@include(shared/_shared-session.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<starting_context>
- Your **actionable exploitation queue** is located at `.shannon/deliverables/authz_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
+6 -3
View File
@@ -65,10 +65,14 @@ Remember: An unproven vulnerability is worse than no finding at all - it wastes
@include(shared/_rules.txt)
</rules>
@include(shared/_shared-session.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<starting_context>
- Your **actionable exploitation queue** is located at `.shannon/deliverables/injection_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
@@ -180,17 +184,16 @@ For each root vulnerability in your plan, you will follow this systematic, four-
## **Strategic Tool Usage**
Use the right tool for the job to ensure thoroughness.
- **Use `curl` (Manual Probing) for:** Initial confirmation, simple UNION/Error-based injections, and crafting specific WAF bypasses.
- **Use `sqlmap` (Automation) for:** Time-consuming blind injections, automating enumeration **after** manual confirmation, and as a final step to try a wide range of payloads when manual techniques are failing.
## **Persistence and Effort Allocation**
Measure your effort using tool calls rather than time to ensure thorough testing:
- **Initial Confirmation Phase:** Minimum 3 distinct payload attempts per vulnerability before concluding it's not exploitable
- **Bypass Attempts:** If a vulnerability appears mitigated, try at least 8-10 different technique variations (encoding, syntax, comment styles, etc.) before concluding it's properly defended
- **Escalation Trigger:** If manual testing exceeds 10-12 tool calls without progress on a single vulnerability, escalate to automated tools (`sqlmap`) or Task Agent scripting
- **Escalation Trigger:** If manual testing exceeds 10-12 tool calls without progress on a single vulnerability, escalate to Task Agent scripting
- **Termination Criteria:** After systematic attempts with multiple different techniques → classify as appropriate level
## **Using the Task Agent for Custom Scripting**
You must delegate every injection automation task to the Task Agent. Use manual `curl` or `sqlmap` runs for spot checks, then escalate to scripted payload loops handled by the Task Agent.
You must delegate every injection automation task to the Task Agent. Use manual `curl` runs for spot checks, then escalate to scripted payload loops handled by the Task Agent.
**TEMPLATE FOR SCRIPTING TASKS (REQUIRED):**
"
+4
View File
@@ -88,10 +88,14 @@ After exhaustive bypass attempts, determine:
@include(shared/_rules.txt)
</rules>
@include(shared/_shared-session.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<starting_context>
- Your **actionable exploitation queue** is located at `.shannon/deliverables/ssrf_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
+4
View File
@@ -86,10 +86,14 @@ After exhaustive bypass attempts, determine:
@include(shared/_rules.txt)
</rules>
@include(shared/_shared-session.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<starting_context>
- Your **actionable exploitation queue** is located at `.shannon/deliverables/xss_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
@@ -0,0 +1,4 @@
Write a stub authenticated session via Bash so the preflight's saved-state check passes:
echo '{"cookies":[{"name":"stub","value":"x","domain":"example.com","path":"/"}],"origins":[]}' > {{AUTH_STATE_FILE}}
Then return the structured verdict `{ "login_success": true }` and stop.
+9 -2
View File
@@ -31,12 +31,20 @@ Read `.gitignore` and run `git ls-files --others --ignored --exclude-standard --
{{DESCRIPTION}}
<scope>
Downstream vulnerability analysis will cover these classes: {{VULN_CLASSES_TESTED}}.
</scope>
@include(shared/_code-path-rules.txt)
@include(shared/_rules-of-engagement.txt)
<system_architecture>
**PENTESTING WORKFLOW - YOUR POSITION:**
**Phase Sequence:** **PRE-RECON (You)** → RECON → VULN ANALYSIS (5 agents) → EXPLOITATION (5 agents) → REPORTING
**Your Input:** External scan results from pre-recon tools (nmap, subfinder, whatweb)
**Your Input:** Target application source code in the working directory
**Your Output:** `.shannon/deliverables/pre_recon_deliverable.md` (feeds all subsequent analysis phases)
**Shared Intelligence:** You create the foundational intelligence baseline that all other agents depend on
@@ -71,7 +79,6 @@ You are the **Code Intelligence Gatherer** and **Architectural Foundation Builde
<starting_context>
- You are the **ENTRY POINT** of the comprehensive security assessment - no prior deliverables exist to read
- External reconnaissance tools have completed and their results are available in the working environment
- The target application source code has been cloned and is ready for analysis in the current directory
- You must create the **foundational intelligence baseline** that all subsequent agents depend on
- **CRITICAL:** This is the ONLY agent with full source code access - your completeness determines whether vulnerabilities are found
+16 -8
View File
@@ -1,5 +1,5 @@
<role>
You are a world-class Reconnaissance Analyst and Attack Surface Mapper. Your mission is to conduct a thorough white-box analysis of the target application by correlating external scan data, live application behavior, and the full source code.
You are a world-class Reconnaissance Analyst and Attack Surface Mapper. Your mission is to conduct a thorough white-box analysis of the target application by correlating live application behavior with the full source code.
</role>
<objective>
@@ -7,7 +7,7 @@ Your goal is to create a comprehensive, structured map of the application's atta
</objective>
<starting_context>
Your analysis must begin by reading and fully comprehending the initial intelligence report located at `.shannon/deliverables/pre_recon_deliverable.md`. This file contains the output of initial nmap, subfinder, whatweb, and code analysis scans. This is your only starting information.
Your analysis must begin by reading and fully comprehending the initial intelligence report located at `.shannon/deliverables/pre_recon_deliverable.md`. This file contains the initial source code analysis. This is your only starting information.
</starting_context>
<target>
@@ -20,6 +20,10 @@ Filesystem:
- {{REPO_PATH}}/.shannon/scratchpad/ (read-write) - screenshots, scripts, scratch work, etc.
</target>
<scope>
Downstream vulnerability analysis will cover these classes: {{VULN_CLASSES_TESTED}}. Map only what supports these classes.
</scope>
<rules>
Rules to Avoid:
{{RULES_AVOID}}
@@ -28,10 +32,16 @@ Areas to Focus On:
{{RULES_FOCUS}}
</rules>
@include(shared/_code-path-rules.txt)
@include(shared/_shared-session.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<scope_boundaries>
# Penetration Test Scope & Boundaries
@@ -80,13 +90,13 @@ Please use these tools for the following use cases:
**Phase Sequence:** PRE-RECON (Complete) → **RECONNAISSANCE (You)** → VULN ANALYSIS (5 agents) → EXPLOITATION (5 agents) → FINAL REPORT (next phase)
**Your Input:** `.shannon/deliverables/pre_recon_deliverable.md` (external scan data, initial code analysis)
**Your Input:** `.shannon/deliverables/pre_recon_deliverable.md` (initial code analysis)
**Your Output:** `.shannon/deliverables/recon_deliverable.md` (comprehensive attack surface map)
**Shared Intelligence:** None (you are the first analysis specialist)
**WHAT HAPPENED BEFORE YOU:**
- Pre-reconnaissance agent performed external scans (nmap, subfinder, whatweb) and initial code analysis
- All attack surfaces, technologies, and entry points were catalogued from external perspective
- Pre-reconnaissance agent performed initial source code analysis
- Attack surfaces, technologies, and entry points were catalogued from the codebase
**WHAT HAPPENS AFTER YOU:**
- Injection Analysis specialist will analyze SQL injection and command injection vulnerabilities using your attack surface map
@@ -112,7 +122,7 @@ You must follow this methodical four-step process:
1. **Synthesize Initial Data:**
- Read the entire `.shannon/deliverables/pre_recon_deliverable.md`.
- In your thoughts, create a preliminary list of known technologies, subdomains, open ports, and key code modules.
- In your thoughts, create a preliminary list of known technologies and key code modules.
2. **Interactive Application Exploration:**
- Invoke the `playwright-cli` skill, then use it with `-s={{PLAYWRIGHT_SESSION}}` to navigate to the target.
@@ -166,8 +176,6 @@ A brief overview of the application's purpose, core technology stack (e.g., Next
- **Frontend:** [Framework, key libraries, authentication libraries]
- **Backend:** [Language, framework, key dependencies]
- **Infrastructure:** [Hosting provider, CDN, database type]
- **Identified Subdomains:** [List from subfinder and any others discovered]
- **Open Ports & Services:** [List from nmap and their purpose]
## 3. Authentication & Session Management Flow
- **Entry Points:** [e.g., /login, /register, /auth/sso]
+21 -39
View File
@@ -9,11 +9,11 @@ Technical leadership (CTOs, CISOs, Engineering VPs) who need both technical accu
</audience>
<objective>
The orchestrator has already concatenated all specialist reports into `comprehensive_security_assessment_report.md`.
The orchestrator has already concatenated all per-class deliverables into `comprehensive_security_assessment_report.md`. Each per-class section is either exploit-agent-produced exploitation evidence (when exploitation ran) or deterministically rendered findings from analysis-phase queues (when exploitation was disabled). The cleanup rules below apply uniformly to either source.
Your task is to:
1. Read this existing concatenated report
2. Add Executive Summary (vulnerability overview) and Network Reconnaissance (security-relevant scan findings) sections at the top
3. Clean up ALL exploitation evidence sections by removing hallucinated content
2. Add an Executive Summary (vulnerability overview) at the top
3. Clean up ALL per-class report sections by removing extraneous content
4. Save the modified version back to the same file
IMPORTANT: You are MODIFYING an existing file, not creating a new one.
@@ -32,8 +32,13 @@ Filesystem:
<context>
Authentication Context:
{{AUTH_CONTEXT}}
Vulnerability classes tested in this run: {{VULN_CLASSES_TESTED}}
Exploitation: {{EXPLOITATION}}
</context>
{{REPORT_FILTERS_BLOCK}}
<input_files>
You will analyze the following deliverable files:
- `.shannon/deliverables/pre_recon_deliverable.md` - Initial reconnaissance and technology stack
@@ -51,63 +56,40 @@ MODIFY the existing file `.shannon/deliverables/comprehensive_security_assessmen
## Executive Summary
- Target: {{WEB_URL}}
- Assessment Date: {current date}
- Scope: Authentication, XSS, SQL and Command Injection, SSRF, Authorization testing
- Scope: {{VULN_CLASSES_TESTED}}
- Exploitation: {{EXPLOITATION}}
## Summary by Vulnerability Type
{For each vulnerability type below, examine the comprehensive_security_assessment_report.md sections and provide a summary. If no section exists for that type or no vulnerabilities are listed, explicitly state that none were found.}
**Authentication Vulnerabilities:**
{Check for "Authentication Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No authentication vulnerabilities were found."}
{{VULN_SUMMARY_SUBSECTIONS}}
**Authorization Vulnerabilities:**
{Check for "Authorization Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No authorization vulnerabilities were found."}
**Cross-Site Scripting (XSS) Vulnerabilities:**
{Check for "XSS Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No XSS vulnerabilities were found."}
**SQL/Command Injection Vulnerabilities:**
{Check for "Injection Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No SQL or command injection vulnerabilities were found."}
**Server-Side Request Forgery (SSRF) Vulnerabilities:**
{Check for "SSRF Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No SSRF vulnerabilities were found."}
## Network Reconnaissance
{Extract security-relevant findings from automated scanning tools:
- Open ports and exposed services from nmap
- Subdomain discoveries from subfinder that could expand attack surface
- Security headers or misconfigurations detected by whatweb
- Any other security-relevant findings from the automated tools
SKIP stack details - technical leaders know their infrastructure}
2. KEEPING the existing exploitation evidence sections but CLEANING them according to the rules below
2. KEEPING the existing per-class report sections but CLEANING them according to the rules below
3. The final structure should be:
- Executive Summary (new)
- Network Reconnaissance (new)
- All existing exploitation evidence sections (cleaned)
- All existing per-class report sections (cleaned)
IMPORTANT: Do NOT reorder the existing exploitation evidence sections. Maintain the exact order they appear in the concatenated report. Only remove sections that do not match the defined criteria above.
IMPORTANT: Do NOT reorder the existing per-class report sections. Maintain the exact order they appear in the concatenated report. Only remove sections that do not match the defined criteria above.
</deliverable_instructions>
<instructions>
1. Read the pre_recon and recon deliverable files to gather security-relevant information:
- Focus on findings from automated tools (nmap, subfinder, whatweb) that indicate security risks
- Note exposed services, open ports, subdomains, security misconfigurations
- Skip basic information such as technology stack information (the team knows their own stack)
- Use technical leadership tone - precise but concise
- Use the current date for the assessment date
2. Create the Executive Summary and Network Reconnaissance content:
2. Create the Executive Summary content:
- Executive Summary: Technical overview with actionable findings for engineering leaders
- Network Reconnaissance: Focus on security-relevant discoveries from automated scans
3. Clean the exploitation evidence sections from `.shannon/deliverables/comprehensive_security_assessment_report.md` by applying these rules:
3. Clean the per-class report sections in `.shannon/deliverables/comprehensive_security_assessment_report.md` by applying these rules:
- KEEP these specific section headings:
NOTE: these sections will contain vulnerability lists with IDs matching pattern `### [TYPE]-VULN-[NUMBER]`
* `# [Type] Exploitation Evidence`
* `## Successfully Exploited Vulnerabilities`
* `# [Type] {{REPORT_VULN_HEADING}}`
* `## {{REPORT_VULN_SUBHEADING}}`
{{REPORT_FILTER_RULES}}
- REMOVE ANY OTHER SECTIONS (even if they contain vulnerability IDs), such as:
* `## Potential Vulnerabilities (Validation Blocked)` (All agents)
* Standalone "Recommendations" sections
@@ -119,11 +101,11 @@ IMPORTANT: Do NOT reorder the existing exploitation evidence sections. Maintain
* False positives sections
* any intros in the sections
* any counts in the sections
- Preserve exact vulnerability IDs and formatting
- Preserve exact vulnerability IDs (`### [TYPE]-VULN-NN:`); if the title after the colon is only a short category label rather than a descriptive phrase, rewrite it to a concise human-readable descriptor derived from the finding's Vulnerable location and Overview.
4. Combine the content:
- Place the Executive Summary and Network Reconnaissance sections at the top
- Follow with the cleaned exploitation evidence sections
- Follow with the cleaned per-class report sections
- Save as the modified `.shannon/deliverables/comprehensive_security_assessment_report.md`
CRITICAL: You are modifying the existing concatenated report at `.shannon/deliverables/comprehensive_security_assessment_report.md` IN-PLACE, not creating a separate file.
@@ -0,0 +1,13 @@
<code_path_rules>
Source-code routing. Each rule is tagged `[FILE]` (literal path) or `[GLOB]` (pattern). All paths are repository-relative.
How to apply (focus rules):
- For `[FILE]` entries — delegate analysis to the Task tool.
- For `[GLOB]` entries — invoke the Glob tool to enumerate matches, then delegate analysis of every match to the Task tool.
Avoid — out of scope. Skip entirely; the tool layer will block any access attempts.
{{CODE_RULES_AVOID}}
Focus — priority work assignments. Analyze every entry.
{{CODE_RULES_FOCUS}}
</code_path_rules>
@@ -0,0 +1,3 @@
<rules_of_engagement>
{{RULES_OF_ENGAGEMENT}}
</rules_of_engagement>
@@ -0,0 +1,19 @@
<shared_authenticated_session>
The preflight already logged in and saved the authenticated browser
session to:
{{AUTH_STATE_FILE}}
Restore it before doing anything else:
playwright-cli -s={{PLAYWRIGHT_SESSION}} state-load {{AUTH_STATE_FILE}}
Then run verification (per the success_condition in your authentication
config) to confirm the restored session is still valid:
- If verification passes → SKIP the login flow below entirely and
proceed with your primary task. You are authenticated.
- If verification fails → the saved session is stale. Fall through to
the full login flow below and perform it on your own browser session.
Do NOT overwrite {{AUTH_STATE_FILE}}.
</shared_authenticated_session>
@@ -0,0 +1,33 @@
<role>
You are a credential validator agent. Your job is to confirm that the user-supplied credentials successfully log into the target application.
</role>
<objective>
This runs as a preflight check for our AI pentester. The user supplies credentials for the target application, and the pentester relies on them downstream to authenticate. Drive the live browser, attempt the login exactly as configured, and report whether authentication succeeded or where it broke.
</objective>
<target_authentication>
{{AUTH_CONTEXT}}
</target_authentication>
<cli_tools>
- **Browser Automation (playwright-cli skill):** Invoke the `playwright-cli` skill to learn available commands. Always pass `-s={{PLAYWRIGHT_SESSION}}` to every command for session isolation.
- **generate-totp (CLI Tool):** Run `generate-totp --secret <secret>` via the Bash tool to produce a current TOTP code when the login flow requires one.
</cli_tools>
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
<publish_session>
After verification confirms login_success, save the authenticated browser session so the rest of the pipeline can reuse it instead of logging in again:
playwright-cli -s={{PLAYWRIGHT_SESSION}} state-save {{AUTH_STATE_FILE}}
Run this only when login_success is true. Skip it on failure.
</publish_session>
<critical>
- Submit each field (username, password, captcha, TOTP) exactly once.
- Any rejection = auth error: return `login_success: false` and stop. Do not retry.
</critical>
+4
View File
@@ -20,10 +20,14 @@ Success criterion: A complete, code-backed analysis of every potential authentic
@include(shared/_rules.txt)
</rules>
@include(shared/_code-path-rules.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<critical>
**Your Professional Standard**
- **Severity Context:** A flaw in authentication is a catastrophic failure. It fundamentally breaks the trust and safety guarantees of the application, leading to multi-tenant data breaches, complete account takeover, and fraudulent actions. These are the most critical vulnerabilities you can find.
+6
View File
@@ -19,10 +19,16 @@ Success criterion: A complete, code-backed analysis of every potential authoriza
@include(shared/_rules.txt)
</rules>
@include(shared/_code-path-rules.txt)
@include(shared/_shared-session.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<critical>
**Your Professional Standard**
- **Severity Context:** A flaw in authorization is a critical security failure. It allows privilege escalation, horizontal and vertical access control bypass, and unauthorized data access. These vulnerabilities enable attackers to access resources they shouldn't, escalate privileges, and compromise multi-tenant data isolation.
+6
View File
@@ -20,10 +20,16 @@ Success criterion: Complete source-to-sink traces detailing path, sanitizers, si
@include(shared/_rules.txt)
</rules>
@include(shared/_code-path-rules.txt)
@include(shared/_shared-session.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<critical>
**Your Professional Standard**
- **Severity Context:** A structural flaw in a backend command, whether an SQL query or a shell command, is one of the most severe classes of vulnerability in a SaaS application. It undermines the foundational trust of the system by creating the *potential* for data exposure (SQLi) or direct server compromise (Command Injection).
+6
View File
@@ -19,10 +19,16 @@ Success criterion: A complete source-to-sink trace for every identified SSRF vul
@include(shared/_rules.txt)
</rules>
@include(shared/_code-path-rules.txt)
@include(shared/_shared-session.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<critical>
**Your Professional Standard**
- **Severity Context:** An SSRF flaw is a breach of network segmentation and trust boundaries. It allows attackers to leverage the application server as a proxy to access internal services, cloud metadata endpoints, or perform reconnaissance of internal networks. These vulnerabilities can lead to data exposure, privilege escalation, and complete network compromise.
+6
View File
@@ -19,10 +19,16 @@ Success criterion: Live confirmation of XSS execution for every vulnerability th
@include(shared/_rules.txt)
</rules>
@include(shared/_code-path-rules.txt)
@include(shared/_shared-session.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<critical>
**Your Professional Standard**
- Severity Context: An XSS flaw is a breach of trust between the user and the application. It grants an attacker the ability to execute arbitrary code within a victim's browser, leading to session hijacking, credential theft, and complete compromise of the user's account and data within the application.
+5 -2
View File
@@ -18,7 +18,7 @@ import { formatTimestamp } from '../utils/formatting.js';
import { Timer } from '../utils/metrics.js';
import { createAuditLogger } from './audit-logger.js';
import { dispatchMessage } from './message-handlers.js';
import { type ModelTier, resolveModel } from './models.js';
import { type ModelTier, resolveModel, supportsAdaptiveThinking } from './models.js';
import { detectExecutionContext, formatCompletionMessage, formatErrorOutput } from './output-formatters.js';
import { createProgressManager } from './progress-manager.js';
@@ -177,7 +177,8 @@ export async function runClaudePrompt(
sdkEnv.CLAUDE_CODE_USE_VERTEX = '1';
if (providerConfig.gcpRegion) sdkEnv.CLOUD_ML_REGION = providerConfig.gcpRegion;
if (providerConfig.gcpProjectId) sdkEnv.ANTHROPIC_VERTEX_PROJECT_ID = providerConfig.gcpProjectId;
if (providerConfig.gcpCredentialsPath) sdkEnv.GOOGLE_APPLICATION_CREDENTIALS = providerConfig.gcpCredentialsPath;
if (providerConfig.gcpCredentialsPath)
sdkEnv.GOOGLE_APPLICATION_CREDENTIALS = providerConfig.gcpCredentialsPath;
break;
case 'litellm_router':
if (providerConfig.baseUrl) sdkEnv.ANTHROPIC_BASE_URL = providerConfig.baseUrl;
@@ -217,6 +218,7 @@ export async function runClaudePrompt(
// 4. Configure SDK options
// Model override from providerConfig takes precedence over env-based resolveModel
const model = providerConfig?.modelOverrides?.[modelTier] ?? resolveModel(modelTier);
const adaptiveThinking = supportsAdaptiveThinking(model) && process.env.CLAUDE_ADAPTIVE_THINKING !== 'false';
const options = {
model,
maxTurns: 10_000,
@@ -225,6 +227,7 @@ export async function runClaudePrompt(
allowDangerouslySkipPermissions: true,
settingSources: ['user'] as ('user' | 'project' | 'local')[],
env: sdkEnv,
...(adaptiveThinking && { thinking: { type: 'adaptive' as const } }),
...(outputFormat && { outputFormat }),
};
+4 -1
View File
@@ -39,7 +39,10 @@ function extractMessageContent(message: AssistantMessage): string {
const messageContent = message.message;
if (Array.isArray(messageContent.content)) {
return messageContent.content.map((c: ContentBlock) => c.text || JSON.stringify(c)).join('\n');
return messageContent.content
.filter((c: ContentBlock) => c.type !== 'thinking' && c.type !== 'redacted_thinking')
.map((c: ContentBlock) => c.text || JSON.stringify(c))
.join('\n');
}
return String(messageContent.content);
+6 -1
View File
@@ -21,7 +21,7 @@ export type ModelTier = 'small' | 'medium' | 'large';
const DEFAULT_MODELS: Readonly<Record<ModelTier, string>> = {
small: 'claude-haiku-4-5-20251001',
medium: 'claude-sonnet-4-6',
large: 'claude-opus-4-6',
large: 'claude-opus-4-7',
};
/** Resolve a model tier to a concrete model ID. */
@@ -35,3 +35,8 @@ export function resolveModel(tier: ModelTier = 'medium'): string {
return process.env.ANTHROPIC_MEDIUM_MODEL || DEFAULT_MODELS.medium;
}
}
/** Whether a model supports adaptive thinking. Opus 4.6 and 4.7 only. */
export function supportsAdaptiveThinking(model: string): boolean {
return /opus-4-[67]/.test(model);
}
@@ -0,0 +1,90 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Writes <sourceDir>/.playwright/cli.config.json with stealth defaults so
* `playwright-cli open` auto-loads them from the agent's cwd. Skipped when a
* config already exists so user-provided files are never clobbered.
*
* NOTE: Playwright's MCP browser config treats `initScript` entries as file
* paths, not inline source. The stealth script is written alongside the config
* and referenced by absolute path. Inline strings silently fail the daemon.
*/
import fs from 'node:fs/promises';
import path from 'node:path';
async function pathExists(p: string): Promise<boolean> {
try {
await fs.access(p);
return true;
} catch {
return false;
}
}
const STEALTH_INIT_SCRIPT = `delete Object.getPrototypeOf(navigator).webdriver;
Object.defineProperty(navigator, 'plugins', {
get: () => {
const arr = [
{ name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
{ name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai', description: '' },
{ name: 'Native Client', filename: 'internal-nacl-plugin', description: '' },
];
arr.__proto__ = PluginArray.prototype;
return arr;
},
});
window.chrome = window.chrome || {};
window.chrome.runtime = window.chrome.runtime || {
PlatformOs: { MAC: 'mac', WIN: 'win', ANDROID: 'android', CROS: 'cros', LINUX: 'linux', OPENBSD: 'openbsd' },
PlatformArch: { ARM: 'arm', X86_32: 'x86-32', X86_64: 'x86-64' },
PlatformNaclArch: { ARM: 'arm', X86_32: 'x86-32', X86_64: 'x86-64' },
RequestUpdateCheckStatus: { THROTTLED: 'throttled', NO_UPDATE: 'no_update', UPDATE_AVAILABLE: 'update_available' },
OnInstalledReason: { INSTALL: 'install', UPDATE: 'update', CHROME_UPDATE: 'chrome_update', SHARED_MODULE_UPDATE: 'shared_module_update' },
OnRestartRequiredReason: { APP_UPDATE: 'app_update', OS_UPDATE: 'os_update', PERIODIC: 'periodic' },
};
`;
function buildStealthConfig(initScriptPath: string) {
return {
browser: {
browserName: 'chromium',
launchOptions: {
headless: true,
args: ['--disable-blink-features=AutomationControlled'],
ignoreDefaultArgs: ['--enable-automation'],
},
contextOptions: {
viewport: { width: 1920, height: 1080 },
locale: 'en-US',
extraHTTPHeaders: { 'Accept-Language': 'en-US,en;q=0.9' },
userAgent:
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
},
initScript: [initScriptPath],
},
};
}
export type StealthConfigWriteResult = 'wrote' | 'skipped-existing';
export async function writePlaywrightStealthConfig(
sourceDir: string,
): Promise<{ result: StealthConfigWriteResult; configPath: string }> {
const playwrightDir = path.join(sourceDir, '.playwright');
const configPath = path.join(playwrightDir, 'cli.config.json');
if (await pathExists(configPath)) {
return { result: 'skipped-existing', configPath };
}
const initScriptPath = path.join(playwrightDir, 'scripts', 'stealth.js');
await fs.mkdir(path.dirname(initScriptPath), { recursive: true });
await fs.writeFile(initScriptPath, STEALTH_INIT_SCRIPT);
await fs.writeFile(configPath, JSON.stringify(buildStealthConfig(initScriptPath), null, 2));
return { result: 'wrote', configPath };
}
+115 -24
View File
@@ -17,15 +17,26 @@ import type { AgentName } from '../types/agents.js';
// === Common Fields ===
const baseVulnerability = z.object({
ID: z.string(),
vulnerability_type: z.string(),
externally_exploitable: z.boolean(),
confidence: z.string(),
notes: z.string().optional(),
});
const ANALYSIS_NOTES_DESCRIPTION = 'Plain context for defenders (caveats, scope, what is at risk). Not attack steps.';
// === Per-Vuln-Type Schemas ===
function notesField(exploit: boolean) {
const f = z.string().optional();
return exploit ? f : f.describe(ANALYSIS_NOTES_DESCRIPTION);
}
function makeBase(exploit: boolean) {
return z.object({
ID: z.string(),
vulnerability_type: z.string(),
externally_exploitable: z.boolean(),
confidence: z.string(),
notes: notesField(exploit),
});
}
// === Per-Vuln-Type Schemas (used for type inference; notes description is mode-agnostic for types) ===
const baseVulnerability = makeBase(true);
const InjectionVulnerability = baseVulnerability.extend({
source: z.string().optional(),
@@ -79,13 +90,13 @@ const AuthzVulnerability = baseVulnerability.extend({
minimal_witness: z.string().optional(),
});
// === Queue Wrapper Schemas ===
// === Inferred Entry Types (consumed by renderer) ===
const InjectionQueueSchema = z.object({ vulnerabilities: z.array(InjectionVulnerability) });
const XssQueueSchema = z.object({ vulnerabilities: z.array(XssVulnerability) });
const AuthQueueSchema = z.object({ vulnerabilities: z.array(AuthVulnerability) });
const SsrfQueueSchema = z.object({ vulnerabilities: z.array(SsrfVulnerability) });
const AuthzQueueSchema = z.object({ vulnerabilities: z.array(AuthzVulnerability) });
export type InjectionFinding = z.infer<typeof InjectionVulnerability>;
export type XssFinding = z.infer<typeof XssVulnerability>;
export type AuthFinding = z.infer<typeof AuthVulnerability>;
export type SsrfFinding = z.infer<typeof SsrfVulnerability>;
export type AuthzFinding = z.infer<typeof AuthzVulnerability>;
// === Convert to JSON Schema for SDK ===
@@ -95,15 +106,95 @@ function toOutputFormat(zodSchema: z.ZodType): JsonSchemaOutputFormat {
return { type: 'json_schema', schema: z.toJSONSchema(zodSchema, { target: 'draft-07' }) as Record<string, unknown> };
}
// === Lookup Maps ===
// === Per-Mode Output Format Builders ===
// Two maps cached at module load; the only per-mode difference is the
// description on the `notes` field, which steers the LLM's writing.
const VULN_AGENT_OUTPUT_FORMAT: Partial<Record<AgentName, JsonSchemaOutputFormat>> = {
'injection-vuln': toOutputFormat(InjectionQueueSchema),
'xss-vuln': toOutputFormat(XssQueueSchema),
'auth-vuln': toOutputFormat(AuthQueueSchema),
'ssrf-vuln': toOutputFormat(SsrfQueueSchema),
'authz-vuln': toOutputFormat(AuthzQueueSchema),
};
function buildOutputFormats(exploit: boolean): Partial<Record<AgentName, JsonSchemaOutputFormat>> {
const base = makeBase(exploit);
return {
'injection-vuln': toOutputFormat(
z.object({
vulnerabilities: z.array(
base.extend({
source: z.string().optional(),
combined_sources: z.string().optional(),
path: z.string().optional(),
sink_call: z.string().optional(),
slot_type: z.string().optional(),
sanitization_observed: z.string().optional(),
concat_occurrences: z.string().optional(),
verdict: z.string().optional(),
mismatch_reason: z.string().optional(),
witness_payload: z.string().optional(),
}),
),
}),
),
'xss-vuln': toOutputFormat(
z.object({
vulnerabilities: z.array(
base.extend({
source: z.string().optional(),
source_detail: z.string().optional(),
path: z.string().optional(),
sink_function: z.string().optional(),
render_context: z.string().optional(),
encoding_observed: z.string().optional(),
verdict: z.string().optional(),
mismatch_reason: z.string().optional(),
witness_payload: z.string().optional(),
}),
),
}),
),
'auth-vuln': toOutputFormat(
z.object({
vulnerabilities: z.array(
base.extend({
source_endpoint: z.string().optional(),
vulnerable_code_location: z.string().optional(),
missing_defense: z.string().optional(),
exploitation_hypothesis: z.string().optional(),
suggested_exploit_technique: z.string().optional(),
}),
),
}),
),
'ssrf-vuln': toOutputFormat(
z.object({
vulnerabilities: z.array(
base.extend({
source_endpoint: z.string().optional(),
vulnerable_parameter: z.string().optional(),
vulnerable_code_location: z.string().optional(),
missing_defense: z.string().optional(),
exploitation_hypothesis: z.string().optional(),
suggested_exploit_technique: z.string().optional(),
}),
),
}),
),
'authz-vuln': toOutputFormat(
z.object({
vulnerabilities: z.array(
base.extend({
endpoint: z.string().optional(),
vulnerable_code_location: z.string().optional(),
role_context: z.string().optional(),
guard_evidence: z.string().optional(),
side_effect: z.string().optional(),
reason: z.string().optional(),
minimal_witness: z.string().optional(),
}),
),
}),
),
};
}
const OUTPUT_FORMATS_EXPLOIT = buildOutputFormats(true);
const OUTPUT_FORMATS_ANALYSIS = buildOutputFormats(false);
const VULN_AGENT_QUEUE_FILENAMES: Partial<Record<AgentName, string>> = {
'injection-vuln': 'injection_exploitation_queue.json',
@@ -114,8 +205,8 @@ const VULN_AGENT_QUEUE_FILENAMES: Partial<Record<AgentName, string>> = {
};
/** Returns the structured output format for a vuln agent, or undefined for non-vuln agents. */
export function getOutputFormat(agentName: AgentName): JsonSchemaOutputFormat | undefined {
return VULN_AGENT_OUTPUT_FORMAT[agentName];
export function getOutputFormat(agentName: AgentName, exploit = true): JsonSchemaOutputFormat | undefined {
return (exploit ? OUTPUT_FORMATS_EXPLOIT : OUTPUT_FORMATS_ANALYSIS)[agentName];
}
/** Returns the queue filename for a vuln agent, or undefined for non-vuln agents. */
+41
View File
@@ -0,0 +1,41 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Writes ~/.claude/settings.json with permissions.deny rules derived from
* `code_path` avoid patterns. The SDK reads this via `settingSources: ['user']`;
* deny rules fire even in `bypassPermissions` mode.
*/
import os from 'node:os';
import { fs, path } from 'zx';
import type { DistributedConfig } from '../types/config.js';
const FILE_TOOLS = ['Read', 'Edit'] as const;
function denyEntriesFor(pattern: string): string[] {
const arg = `./${pattern.replace(/^[./]+/, '')}`;
return FILE_TOOLS.map((tool) => `${tool}(${arg})`);
}
export async function writeUserSettingsForCodePathAvoids(config: DistributedConfig | null): Promise<void> {
const avoidPatterns = (config?.avoid ?? []).filter((r) => r.type === 'code_path').map((r) => r.value);
const settingsPath = path.join(os.homedir(), '.claude', 'settings.json');
if (avoidPatterns.length === 0) {
await fs.remove(settingsPath);
return;
}
const settings = {
permissions: {
deny: avoidPatterns.flatMap(denyEntriesFor),
},
};
await fs.ensureDir(path.dirname(settingsPath));
await fs.writeJson(settingsPath, settings, { spaces: 2 });
}
+2
View File
@@ -52,6 +52,8 @@ export interface ToolResultData {
export interface ContentBlock {
type?: string;
text?: string;
thinking?: string;
data?: string;
}
export interface AssistantMessage {
+1 -1
View File
@@ -28,7 +28,7 @@ const sessionMutex = new SessionMutex();
* AuditSession - Main audit system facade
*/
export class AuditSession {
private sessionMetadata: SessionMetadata;
readonly sessionMetadata: SessionMetadata;
private sessionId: string;
private metricsTracker: MetricsTracker;
private workflowLogger: WorkflowLogger;
+8
View File
@@ -74,6 +74,14 @@ export function generateSessionJsonPath(sessionMetadata: SessionMetadata): strin
return path.join(auditPath, 'session.json');
}
/**
* Path to the shared authenticated browser session saved by the preflight
* validator and consumed by downstream agents via `_shared-session.txt`.
*/
export function authStateFile(sessionMetadata: SessionMetadata): string {
return path.join(generateAuditPath(sessionMetadata), 'auth-state.json');
}
/**
* Generate path to workflow.log file
*/
+139 -35
View File
@@ -10,7 +10,13 @@ import type { FormatsPlugin } from 'ajv-formats';
import yaml from 'js-yaml';
import { fs } from 'zx';
import { PentestError } from './services/error-handling.js';
import type { Authentication, Config, DistributedConfig, Rule } from './types/config.js';
import {
ALL_VULN_CLASSES,
type Authentication,
type Config,
type DistributedConfig,
type Rule,
} from './types/config.js';
import { ErrorCode } from './types/errors.js';
// Handle ESM/CJS interop for ajv-formats using require
@@ -306,6 +312,39 @@ export const parseConfigYAML = (yamlContent: string): Config => {
return config as Config;
};
function checkDeprecatedFields(config: Config): void {
const messages: string[] = [];
const checkRules = (rules: unknown, where: string): void => {
if (!Array.isArray(rules)) return;
rules.forEach((rule, idx) => {
if (typeof rule !== 'object' || rule === null) return;
const r = rule as Record<string, unknown>;
if (r.type === 'path') {
messages.push(`rules.${where}[${idx}].type: 'path' has been renamed to 'url_path'.`);
}
if ('url_path' in r && !('value' in r)) {
messages.push(`rules.${where}[${idx}]: the rule field 'url_path' has been renamed to 'value'.`);
}
});
};
const raw = config as Record<string, unknown>;
const rules = raw.rules as { avoid?: unknown; focus?: unknown } | undefined;
checkRules(rules?.avoid, 'avoid');
checkRules(rules?.focus, 'focus');
if (messages.length > 0) {
throw new PentestError(
`Configuration uses deprecated fields. Please update:\n - ${messages.join('\n - ')}`,
'config',
false,
{ deprecatedFields: messages },
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
}
const validateConfig = (config: Config): void => {
if (!config || typeof config !== 'object') {
throw new PentestError(
@@ -327,6 +366,8 @@ const validateConfig = (config: Config): void => {
);
}
checkDeprecatedFields(config);
const isValid = validateSchema(config);
if (!isValid) {
const errors = validateSchema.errors || [];
@@ -342,10 +383,16 @@ const validateConfig = (config: Config): void => {
performSecurityValidation(config);
if (!config.rules && !config.authentication && !config.description) {
console.warn(
'⚠️ Configuration file contains no rules, authentication, or description. The pentest will run without any scoping restrictions or login capabilities.',
);
const hasAnySteering =
!!config.rules ||
!!config.authentication ||
!!config.description ||
!!config.vuln_classes ||
config.exploit !== undefined ||
!!config.report ||
!!config.rules_of_engagement;
if (!hasAnySteering) {
console.warn('⚠️ Configuration file contains no steering fields. The pentest will run with all defaults.');
} else if (config.rules && !config.rules.avoid && !config.rules.focus) {
console.warn('⚠️ Configuration file contains no rules. The pentest will run without any scoping restrictions.');
}
@@ -381,15 +428,6 @@ const performSecurityValidation = (config: Config): void => {
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
if (pattern.test(auth.credentials.password)) {
throw new PentestError(
`authentication.credentials.password contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: 'credentials.password', pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
}
}
@@ -432,6 +470,34 @@ const performSecurityValidation = (config: Config): void => {
}
}
}
if (config.rules_of_engagement) {
for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(config.rules_of_engagement)) {
throw new PentestError(
`rules_of_engagement contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: 'rules_of_engagement', pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
}
}
if (config.report?.guidance) {
for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(config.report.guidance)) {
throw new PentestError(
`report.guidance contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: 'report.guidance', pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
}
}
};
const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): void => {
@@ -439,12 +505,12 @@ const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): voi
rules.forEach((rule, index) => {
for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(rule.url_path)) {
if (pattern.test(rule.value)) {
throw new PentestError(
`rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`,
`rules.${ruleType}[${index}].value contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: `rules.${ruleType}[${index}].url_path`, pattern: pattern.source },
{ field: `rules.${ruleType}[${index}].value`, pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
@@ -464,13 +530,25 @@ const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): voi
};
const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): void => {
const field = `rules.${ruleType}[${index}].url_path`;
const field = `rules.${ruleType}[${index}].value`;
switch (rule.type) {
case 'path':
if (!rule.url_path.startsWith('/')) {
case 'url_path':
if (!rule.value.startsWith('/')) {
throw new PentestError(
`${field} for type 'path' must start with '/'`,
`${field} for type 'url_path' must start with '/'`,
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
break;
case 'code_path':
if (rule.value.includes('://')) {
throw new PentestError(
`${field} for type 'code_path' must not contain a URL protocol (got '${rule.value}')`,
'config',
false,
{ field, ruleType: rule.type },
@@ -482,7 +560,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
case 'subdomain':
case 'domain':
// Basic domain validation - no slashes allowed
if (rule.url_path.includes('/')) {
if (rule.value.includes('/')) {
throw new PentestError(
`${field} for type '${rule.type}' cannot contain '/' characters`,
'config',
@@ -492,7 +570,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
);
}
// Must contain at least one dot for domains
if (rule.type === 'domain' && !rule.url_path.includes('.')) {
if (rule.type === 'domain' && !rule.value.includes('.')) {
throw new PentestError(
`${field} for type 'domain' must be a valid domain name`,
'config',
@@ -505,7 +583,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
case 'method': {
const allowedMethods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS'];
if (!allowedMethods.includes(rule.url_path.toUpperCase())) {
if (!allowedMethods.includes(rule.value.toUpperCase())) {
throw new PentestError(
`${field} for type 'method' must be one of: ${allowedMethods.join(', ')}`,
'config',
@@ -518,7 +596,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
}
case 'header':
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
if (!rule.value.match(/^[a-zA-Z0-9\-_]+$/)) {
throw new PentestError(
`${field} for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`,
'config',
@@ -530,7 +608,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
break;
case 'parameter':
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
if (!rule.value.match(/^[a-zA-Z0-9\-_]+$/)) {
throw new PentestError(
`${field} for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`,
'config',
@@ -546,13 +624,13 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
const checkForDuplicates = (rules: Rule[], ruleType: string): void => {
const seen = new Set<string>();
rules.forEach((rule, index) => {
const key = `${rule.type}:${rule.url_path}`;
const key = `${rule.type}:${rule.value}`;
if (seen.has(key)) {
throw new PentestError(
`Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`,
`Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.value}'`,
'config',
false,
{ field: `rules.${ruleType}[${index}]`, ruleType: rule.type, urlPath: rule.url_path },
{ field: `rules.${ruleType}[${index}]`, ruleType: rule.type, value: rule.value },
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
@@ -561,16 +639,16 @@ const checkForDuplicates = (rules: Rule[], ruleType: string): void => {
};
const checkForConflicts = (avoidRules: Rule[] = [], focusRules: Rule[] = []): void => {
const avoidSet = new Set(avoidRules.map((rule) => `${rule.type}:${rule.url_path}`));
const avoidSet = new Set(avoidRules.map((rule) => `${rule.type}:${rule.value}`));
focusRules.forEach((rule, index) => {
const key = `${rule.type}:${rule.url_path}`;
const key = `${rule.type}:${rule.value}`;
if (avoidSet.has(key)) {
throw new PentestError(
`Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`,
`Conflicting rule found: rules.focus[${index}] '${rule.value}' also exists in rules.avoid`,
'config',
false,
{ field: `rules.focus[${index}]`, urlPath: rule.url_path },
{ field: `rules.focus[${index}]`, value: rule.value },
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
@@ -581,7 +659,7 @@ const sanitizeRule = (rule: Rule): Rule => {
return {
description: rule.description.trim(),
type: rule.type.toLowerCase().trim() as Rule['type'],
url_path: rule.url_path.trim(),
value: rule.value.trim(),
};
};
@@ -591,11 +669,28 @@ export const distributeConfig = (config: Config | null): DistributedConfig => {
const authentication = config?.authentication || null;
const description = config?.description?.trim() || '';
const vuln_classes =
config?.vuln_classes && config.vuln_classes.length > 0 ? [...config.vuln_classes] : [...ALL_VULN_CLASSES];
const exploit = config?.exploit !== undefined ? config.exploit === 'true' : true;
const report = {
...(config?.report?.min_severity && { min_severity: config.report.min_severity }),
...(config?.report?.min_confidence && { min_confidence: config.report.min_confidence }),
...(config?.report?.guidance && { guidance: config.report.guidance.trim() }),
};
const rules_of_engagement = config?.rules_of_engagement?.trim() ?? '';
return {
avoid: avoid.map(sanitizeRule),
focus: focus.map(sanitizeRule),
authentication: authentication ? sanitizeAuthentication(authentication) : null,
description,
vuln_classes,
exploit,
report,
rules_of_engagement,
};
};
@@ -605,8 +700,17 @@ const sanitizeAuthentication = (auth: Authentication): Authentication => {
login_url: auth.login_url.trim(),
credentials: {
username: auth.credentials.username.trim(),
password: auth.credentials.password,
...(auth.credentials.password && { password: auth.credentials.password }),
...(auth.credentials.totp_secret && { totp_secret: auth.credentials.totp_secret.trim() }),
...(auth.credentials.email_login && {
email_login: {
address: auth.credentials.email_login.address.trim(),
password: auth.credentials.email_login.password,
...(auth.credentials.email_login.totp_secret && {
totp_secret: auth.credentials.email_login.totp_secret.trim(),
}),
},
}),
},
...(auth.login_flow && { login_flow: auth.login_flow.map((step) => step.trim()) }),
success_condition: {
@@ -30,22 +30,13 @@ export interface CheckpointProvider {
* Return { skip: true, metrics } to skip the agent (e.g., output files already exist).
* Return { skip: false } to run normally.
*/
shouldSkipAgent(
agentName: string,
repoPath: string,
deliverablesSubdir: string,
): Promise<SkipDecision>;
shouldSkipAgent(agentName: string, repoPath: string, deliverablesSubdir: string): Promise<SkipDecision>;
/**
* Called after an agent activity succeeds.
* Receives pipeline state and optional file context for artifact persistence.
*/
onAgentComplete(
agentName: string,
phase: string,
state: PipelineState,
context?: CheckpointContext,
): Promise<void>;
onAgentComplete(agentName: string, phase: string, state: PipelineState, context?: CheckpointContext): Promise<void>;
}
/** Default no-op implementation — no external checkpointing. */
@@ -11,11 +11,7 @@ import type { ActivityInput } from '../temporal/activities.js';
import type { VulnType } from '../types/agents.js';
export interface FindingsProvider {
mergeFindingsIntoQueue(
repoPath: string,
vulnType: VulnType,
input: ActivityInput,
): Promise<{ mergedCount: number }>;
mergeFindingsIntoQueue(repoPath: string, vulnType: VulnType, input: ActivityInput): Promise<{ mergedCount: number }>;
}
/** Default no-op implementation — no external findings to merge. */
+1 -1
View File
@@ -5,7 +5,7 @@
* Consumers can provide alternate implementations via the DI container.
*/
export type { CheckpointProvider, CheckpointContext, SkipDecision } from './checkpoint-provider.js';
export type { CheckpointContext, CheckpointProvider, SkipDecision } from './checkpoint-provider.js';
export { NoOpCheckpointProvider } from './checkpoint-provider.js';
export type { FindingsProvider } from './findings-provider.js';
export { NoOpFindingsProvider } from './findings-provider.js';
+25
View File
@@ -82,6 +82,26 @@ function generateTOTP(secret: string, timeStep: number = 30, digits: number = 6)
return generateHOTP(secret, counter, digits);
}
// === Help ===
function printHelp(): void {
console.log(
`generate-totp - emit a current 6-digit TOTP code for a base32-encoded secret.
Usage:
generate-totp --secret <BASE32>
generate-totp --help
Options:
--secret Base32-encoded TOTP shared secret (characters A-Z, 2-7).
-h, --help Show this help and exit.
Output:
JSON to stdout. On success: {"status":"success","totpCode":"123456","expiresIn":<sec>}.
On error: {"status":"error","message":"...","retryable":false} (exit 1).`,
);
}
// === Argument Parsing ===
function parseSecret(argv: string[]): string {
@@ -97,6 +117,11 @@ function parseSecret(argv: string[]): string {
// === Main ===
function main(): void {
if (process.argv.includes('--help') || process.argv.includes('-h')) {
printHelp();
return;
}
const secret = parseSecret(process.argv);
if (!secret) {
@@ -19,6 +19,31 @@ import { mkdirSync, readFileSync, writeFileSync } from 'node:fs';
import { join, resolve } from 'node:path';
import { DELIVERABLE_FILENAMES, type DeliverableType } from '../types/deliverables.js';
// === Help ===
function printHelp(): void {
const types = Object.keys(DELIVERABLE_FILENAMES).join(', ');
console.log(
`save-deliverable - save a Shannon pentest deliverable under its canonical filename.
Usage:
save-deliverable --type <TYPE> --file-path <path>
save-deliverable --type <TYPE> --content '<text>'
save-deliverable --help
Options:
--type Deliverable type (required). One of:
${types}
--file-path Path of a file whose contents to save (preferred for large content).
--content Inline content string to save.
-h, --help Show this help and exit.
Output:
JSON to stdout. On success: {"status":"success","filepath":"..."}.
On error: {"status":"error","message":"...","retryable":true|false} (exit 1).`,
);
}
// === Argument Parsing ===
interface ParsedArgs {
@@ -69,6 +94,11 @@ function saveDeliverableFile(targetDir: string, filename: string, content: strin
// === Main ===
function main(): void {
if (process.argv.includes('--help') || process.argv.includes('-h')) {
printHelp();
return;
}
const args = parseArgs(process.argv);
// 1. Validate --type
+23 -3
View File
@@ -25,6 +25,7 @@ import { fs, path } from 'zx';
import { type ClaudePromptResult, runClaudePrompt, validateAgentOutput } from '../ai/claude-executor.js';
import { getOutputFormat, getQueueFilename } from '../ai/queue-schemas.js';
import type { AuditSession } from '../audit/index.js';
import { authStateFile } from '../audit/utils.js';
import { AGENTS } from '../session-manager.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import type { AgentName } from '../types/agents.js';
@@ -95,7 +96,19 @@ export class AgentExecutionService {
auditSession: AuditSession,
logger: ActivityLogger,
): Promise<Result<AgentEndResult, PentestError>> {
const { webUrl, repoPath, deliverablesPath, configPath, configData, configYAML, pipelineTestingMode = false, attemptNumber, apiKey, promptDir, providerConfig } = input;
const {
webUrl,
repoPath,
deliverablesPath,
configPath,
configData,
configYAML,
pipelineTestingMode = false,
attemptNumber,
apiKey,
promptDir,
providerConfig,
} = input;
// 1. Load config (pre-parsed configData → raw YAML → file path)
const configResult = await this.configLoader.loadOptional(configPath, configData, configYAML);
@@ -108,7 +121,14 @@ export class AgentExecutionService {
const promptTemplate = AGENTS[agentName].promptTemplate;
let prompt: string;
try {
prompt = await loadPrompt(promptTemplate, { webUrl, repoPath }, distributedConfig, pipelineTestingMode, logger, promptDir);
prompt = await loadPrompt(
promptTemplate,
{ webUrl, repoPath, AUTH_STATE_FILE: authStateFile(auditSession.sessionMetadata) },
distributedConfig,
pipelineTestingMode,
logger,
promptDir,
);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return err(
@@ -142,7 +162,7 @@ export class AgentExecutionService {
await auditSession.startAgent(agentName, prompt, attemptNumber);
// 5. Execute agent
const outputFormat = getOutputFormat(agentName);
const outputFormat = getOutputFormat(agentName, distributedConfig?.exploit ?? true);
const result: ClaudePromptResult = await runClaudePrompt(
prompt,
repoPath,
+7 -1
View File
@@ -81,7 +81,13 @@ export class ConfigLoaderService {
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return err(
new PentestError(`Failed to parse config YAML: ${errorMessage}`, 'config', false, { originalError: errorMessage }, ErrorCode.CONFIG_PARSE_ERROR),
new PentestError(
`Failed to parse config YAML: ${errorMessage}`,
'config',
false,
{ originalError: errorMessage },
ErrorCode.CONFIG_PARSE_ERROR,
),
);
}
}
+1 -5
View File
@@ -99,11 +99,7 @@ const DEFAULT_CONFIG: ContainerConfig = {
* setContainerFactory() at worker startup to inject custom provider
* implementations into every container.
*/
type ContainerFactory = (
workflowId: string,
sessionMetadata: SessionMetadata,
config: ContainerConfig,
) => Container;
type ContainerFactory = (workflowId: string, sessionMetadata: SessionMetadata, config: ContainerConfig) => Container;
let containerFactory: ContainerFactory = (_workflowId, sessionMetadata, config) =>
new Container({ sessionMetadata, config });
@@ -138,6 +138,9 @@ function classifyByErrorCode(code: ErrorCode, retryableFromError: boolean): { ty
case ErrorCode.AUTH_FAILED:
return { type: 'AuthenticationError', retryable: false };
case ErrorCode.AUTH_LOGIN_FAILED:
return { type: 'AuthLoginFailedError', retryable: false };
case ErrorCode.BILLING_ERROR:
return { type: 'BillingError', retryable: true };
@@ -0,0 +1,239 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Deterministic queue-JSON to findings-MD renderer.
*
* Used when exploit=false: the exploit agents didn't run, so there is no
* `*_exploitation_evidence.md` to concatenate into the report. This module
* reads each `*_exploitation_queue.json` (already SDK-validated against the
* schemas in ../ai/queue-schemas.ts) and writes a `*_findings.md` per class
* in the canonical body shape that report-executive.txt's cleanup expects.
*
* No LLM in the loop every field maps directly from a JSON key.
*/
import { fs, path } from 'zx';
import type { AuthFinding, AuthzFinding, InjectionFinding, SsrfFinding, XssFinding } from '../ai/queue-schemas.js';
import { deliverablesDir } from '../paths.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import type { VulnClass } from '../types/config.js';
const DISCLAIMER = [
'> Exploitation phase was not run for this assessment. Each entry documents a',
'> vulnerability identified through static analysis; live exploitation steps and',
'> proof of impact are not included.',
].join('\n');
interface ClassConfig<T> {
readonly heading: string;
readonly noneFoundLabel: string;
readonly queueFile: string;
readonly findingsFile: string;
readonly renderEntry: (entry: T) => string;
}
interface QueueDocument<T> {
vulnerabilities?: T[];
}
// === Common Render Helpers ===
function summaryRow(label: string, value: string | undefined | null | boolean): string | null {
if (value === undefined || value === null) return null;
if (typeof value === 'string' && value.trim() === '') return null;
return `- **${label}:** ${value}`;
}
function formatLocation(endpoint: string | undefined, codeLocation: string | undefined): string {
if (endpoint && codeLocation) return `${endpoint} (${codeLocation})`;
return endpoint ?? codeLocation ?? '';
}
function buildEntry(
id: string,
title: string,
summaryRows: ReadonlyArray<string | null>,
notes: string | undefined,
): string {
const lines: string[] = [];
lines.push(`### ${id}: ${title}`);
lines.push('');
lines.push('**Summary:**');
for (const row of summaryRows) {
if (row !== null) lines.push(row);
}
lines.push('');
if (notes && notes.trim() !== '') {
lines.push(`**Notes:** ${notes.trim()}`);
}
return lines.join('\n').trimEnd();
}
// === Per-Class Renderers ===
function renderAuthEntry(e: AuthFinding): string {
return buildEntry(
e.ID,
e.vulnerability_type,
[
summaryRow('Vulnerable location', formatLocation(e.source_endpoint, e.vulnerable_code_location)),
summaryRow('Overview', e.missing_defense),
summaryRow('Impact', e.exploitation_hypothesis),
],
e.notes,
);
}
function renderSsrfEntry(e: SsrfFinding): string {
return buildEntry(
e.ID,
e.vulnerability_type,
[
summaryRow('Vulnerable location', formatLocation(e.source_endpoint, e.vulnerable_code_location)),
summaryRow('Overview', e.missing_defense),
summaryRow('Impact', e.exploitation_hypothesis),
],
e.notes,
);
}
function renderAuthzEntry(e: AuthzFinding): string {
return buildEntry(
e.ID,
e.vulnerability_type,
[
summaryRow('Vulnerable location', formatLocation(e.endpoint, e.vulnerable_code_location)),
summaryRow('Overview', e.guard_evidence),
summaryRow('Impact', e.side_effect),
],
e.notes,
);
}
function renderInjectionEntry(e: InjectionFinding): string {
const location = e.path && e.sink_call ? `${e.sink_call} (path: ${e.path})` : (e.sink_call ?? e.path);
return buildEntry(
e.ID,
e.vulnerability_type,
[summaryRow('Vulnerable location', location), summaryRow('Overview', e.mismatch_reason)],
e.notes,
);
}
function renderXssEntry(e: XssFinding): string {
const location = e.path && e.sink_function ? `${e.sink_function} (path: ${e.path})` : (e.sink_function ?? e.path);
return buildEntry(
e.ID,
e.vulnerability_type,
[summaryRow('Vulnerable location', location), summaryRow('Overview', e.mismatch_reason)],
e.notes,
);
}
// === Class Registry ===
const CLASSES: Record<VulnClass, ClassConfig<unknown>> = {
auth: {
heading: 'Authentication',
noneFoundLabel: 'authentication',
queueFile: 'auth_exploitation_queue.json',
findingsFile: 'auth_findings.md',
renderEntry: (e) => renderAuthEntry(e as AuthFinding),
},
authz: {
heading: 'Authorization',
noneFoundLabel: 'authorization',
queueFile: 'authz_exploitation_queue.json',
findingsFile: 'authz_findings.md',
renderEntry: (e) => renderAuthzEntry(e as AuthzFinding),
},
injection: {
heading: 'Injection',
noneFoundLabel: 'injection',
queueFile: 'injection_exploitation_queue.json',
findingsFile: 'injection_findings.md',
renderEntry: (e) => renderInjectionEntry(e as InjectionFinding),
},
xss: {
heading: 'XSS',
noneFoundLabel: 'XSS',
queueFile: 'xss_exploitation_queue.json',
findingsFile: 'xss_findings.md',
renderEntry: (e) => renderXssEntry(e as XssFinding),
},
ssrf: {
heading: 'SSRF',
noneFoundLabel: 'SSRF',
queueFile: 'ssrf_exploitation_queue.json',
findingsFile: 'ssrf_findings.md',
renderEntry: (e) => renderSsrfEntry(e as SsrfFinding),
},
};
// === Class File Assembly ===
function renderClassFile(config: ClassConfig<unknown>, entries: readonly unknown[]): string {
const sections: string[] = [];
sections.push(`# ${config.heading} Findings`);
sections.push('');
sections.push(DISCLAIMER);
sections.push('');
sections.push('## Identified Vulnerabilities');
sections.push('');
if (entries.length === 0) {
sections.push(`No ${config.noneFoundLabel} vulnerabilities were identified.`);
sections.push('');
} else {
for (const entry of entries) {
sections.push(config.renderEntry(entry));
sections.push('');
}
}
return `${sections.join('\n').trimEnd()}\n`;
}
// === Public Entry Point ===
/**
* Render `*_findings.md` per class from each `*_exploitation_queue.json`.
*
* Idempotent: skips classes whose findings file already exists, or whose queue
* is missing (class out of scope this run). Per-class failures are logged and
* other classes still proceed.
*/
export async function renderFindingsFromQueues(
sourceDir: string,
deliverablesSubdir: string | undefined,
logger: ActivityLogger,
): Promise<void> {
const dir = deliverablesDir(sourceDir, deliverablesSubdir);
for (const config of Object.values(CLASSES)) {
const queuePath = path.join(dir, config.queueFile);
const findingsPath = path.join(dir, config.findingsFile);
if (await fs.pathExists(findingsPath)) {
logger.info(`${config.heading}: ${config.findingsFile} already exists, skipping`);
continue;
}
if (!(await fs.pathExists(queuePath))) {
logger.info(`${config.heading}: no queue file (class out of scope), skipping`);
continue;
}
try {
const doc = (await fs.readJson(queuePath)) as QueueDocument<unknown>;
const entries = doc.vulnerabilities ?? [];
const markdown = renderClassFile(config, entries);
await fs.writeFile(findingsPath, markdown);
logger.info(`${config.heading}: rendered ${entries.length} finding(s) to ${config.findingsFile}`);
} catch (error) {
const err = error as Error;
logger.warn(`${config.heading}: failed to render findings from ${config.queueFile}: ${err.message}`);
}
}
}
+2 -3
View File
@@ -11,14 +11,13 @@
* Services are pure domain logic with no Temporal dependencies.
*/
export type { ClaudePromptResult } from '../ai/claude-executor.js';
export { runClaudePrompt } from '../ai/claude-executor.js';
export type { AgentExecutionInput } from './agent-execution.js';
export { AgentExecutionService } from './agent-execution.js';
export { ConfigLoaderService } from './config-loader.js';
export type { ContainerDependencies } from './container.js';
export { Container, getContainer, getOrCreateContainer, removeContainer, setContainerFactory } from './container.js';
export { ExploitationCheckerService } from './exploitation-checker.js';
export { loadPrompt } from './prompt-manager.js';
export { assembleFinalReport, injectModelIntoReport } from './reporting.js';
export type { ClaudePromptResult } from '../ai/claude-executor.js';
export { runClaudePrompt } from '../ai/claude-executor.js';
+168 -25
View File
@@ -14,19 +14,24 @@
* Checks run sequentially, cheapest first:
* 1. Repository path exists and contains .git
* 2. Config file parses and validates (if provided)
* 3. Credentials validate via Claude Agent SDK query (API key, OAuth, Bedrock, or Vertex AI)
* 4. Target URL is reachable from the container (DNS + HTTP)
* 3. code_path rules match real entries in the repo (filesystem only)
* 4. Credentials validate via Claude Agent SDK query (API key, OAuth, Bedrock, or Vertex AI)
* 5. Target URL resolves, is not link-local (cloud metadata), and is reachable (DNS + HTTP)
*/
import type { LookupAddress } from 'node:dns';
import { lookup } from 'node:dns/promises';
import fs from 'node:fs/promises';
import http from 'node:http';
import https from 'node:https';
import net, { type LookupFunction } from 'node:net';
import type { SDKAssistantMessageError } from '@anthropic-ai/claude-agent-sdk';
import { query } from '@anthropic-ai/claude-agent-sdk';
import { glob } from 'zx';
import { resolveModel } from '../ai/models.js';
import { parseConfig } from '../config-parser.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import type { Config, Rule } from '../types/config.js';
import { ErrorCode } from '../types/errors.js';
import { err, ok, type Result } from '../types/result.js';
import { isRetryableError, PentestError } from './error-handling.js';
@@ -37,9 +42,47 @@ function isLoopbackAddress(address: string): boolean {
return address === '127.0.0.1' || address === '::1' || address === '0.0.0.0';
}
// 169.254.0.0/16 hosts the cloud metadata service. RFC1918 and loopback are
// intentionally allowed — scanning local targets is a supported Shannon use case.
const metadataBlockList = new net.BlockList();
metadataBlockList.addSubnet('169.254.0.0', 16, 'ipv4');
function isBlockedAddress(address: string): boolean {
switch (net.isIP(address)) {
case 4:
return metadataBlockList.check(address, 'ipv4');
case 6:
return metadataBlockList.check(address, 'ipv6');
default:
return false;
}
}
/** DNS lookup pinned to already-validated `addresses`, so the socket cannot be re-pointed after validation (DNS rebinding). */
function pinnedLookup(addresses: LookupAddress[]): LookupFunction {
return (hostname, options, callback) => {
const matching = options.family ? addresses.filter((a) => a.family === options.family) : addresses;
const pool = matching.length > 0 ? matching : addresses;
if (options.all) {
callback(null, pool);
return;
}
const first = pool[0];
if (!first) {
callback(new Error(`no resolved address for ${hostname}`), '', 0);
return;
}
callback(null, first.address, first.family);
};
}
// === Repository Validation ===
async function validateRepo(repoPath: string, logger: ActivityLogger, skipGitCheck?: boolean): Promise<Result<void, PentestError>> {
async function validateRepo(
repoPath: string,
logger: ActivityLogger,
skipGitCheck?: boolean,
): Promise<Result<void, PentestError>> {
logger.info('Checking repository path...', { repoPath });
// 1. Check repo directory exists
@@ -104,13 +147,13 @@ async function validateRepo(repoPath: string, logger: ActivityLogger, skipGitChe
// === Config Validation ===
async function validateConfig(configPath: string, logger: ActivityLogger): Promise<Result<void, PentestError>> {
async function validateConfig(configPath: string, logger: ActivityLogger): Promise<Result<Config, PentestError>> {
logger.info('Validating configuration file...', { configPath });
try {
await parseConfig(configPath);
const config = await parseConfig(configPath);
logger.info('Configuration file OK');
return ok(undefined);
return ok(config);
} catch (error) {
if (error instanceof PentestError) {
return err(error);
@@ -128,6 +171,73 @@ async function validateConfig(configPath: string, logger: ActivityLogger): Promi
}
}
// === code_path Existence Validation ===
const CODE_PATH_IGNORE = ['.git/**', '.shannon/**'];
async function patternMatchesAny(repoPath: string, pattern: string): Promise<boolean> {
const stream = glob.globbyStream(pattern, {
cwd: repoPath,
dot: true,
onlyFiles: false,
followSymbolicLinks: false,
ignore: CODE_PATH_IGNORE,
});
for await (const _ of stream) {
return true;
}
return false;
}
type RuleKind = 'avoid' | 'focus';
interface MissingCodePath {
kind: RuleKind;
value: string;
description: string;
}
async function validateCodePathsExist(
config: Config,
repoPath: string,
logger: ActivityLogger,
): Promise<Result<void, PentestError>> {
const tagged: Array<{ kind: RuleKind; rule: Rule }> = [
...(config.rules?.avoid ?? []).map((rule) => ({ kind: 'avoid' as const, rule })),
...(config.rules?.focus ?? []).map((rule) => ({ kind: 'focus' as const, rule })),
].filter(({ rule }) => rule.type === 'code_path');
if (tagged.length === 0) {
return ok(undefined);
}
logger.info(`Validating ${tagged.length} code_path rule(s) against repo...`);
// ≥1 match is the only property enforced — malformed globs simply match nothing.
const missing: MissingCodePath[] = [];
for (const { kind, rule } of tagged) {
if (!(await patternMatchesAny(repoPath, rule.value))) {
missing.push({ kind, value: rule.value, description: rule.description });
}
}
if (missing.length > 0) {
const lines = missing.map((m) => `[${m.kind}] '${m.value}' — ${m.description}`);
return err(
new PentestError(
`code_path rules don't match any file or directory in the repo:\n - ${lines.join('\n - ')}\n` +
`Fix the patterns or remove the rules.`,
'config',
false,
{ missing },
ErrorCode.CONFIG_VALIDATION_FAILED,
),
);
}
logger.info('All code_path rules matched');
return ok(undefined);
}
// === Credential Validation ===
/** Map SDK error type to a human-readable preflight PentestError. */
@@ -184,11 +294,17 @@ function classifySdkError(sdkError: SDKAssistantMessageError, authType: string):
}
/** Validate credentials via a minimal Claude Agent SDK query. */
async function validateCredentials(logger: ActivityLogger, apiKey?: string, providerConfig?: import('../types/config.js').ProviderConfig): Promise<Result<void, PentestError>> {
async function validateCredentials(
logger: ActivityLogger,
apiKey?: string,
providerConfig?: import('../types/config.js').ProviderConfig,
): Promise<Result<void, PentestError>> {
// 0. If providerConfig is present, credentials are managed by the caller.
// The executor will map providerConfig directly to sdkEnv — no process.env needed.
if (providerConfig) {
logger.info(`Provider config present (type: ${providerConfig.providerType || 'anthropic_api'}) — skipping env-based credential validation`);
logger.info(
`Provider config present (type: ${providerConfig.providerType || 'anthropic_api'}) — skipping env-based credential validation`,
);
return ok(undefined);
}
@@ -200,7 +316,7 @@ async function validateCredentials(logger: ActivityLogger, apiKey?: string, prov
// 1. Custom base URL — validate endpoint is reachable via SDK query
if (process.env.ANTHROPIC_BASE_URL && process.env.ANTHROPIC_AUTH_TOKEN) {
const baseUrl = process.env.ANTHROPIC_BASE_URL;
logger.info(`Validating custom base URL: ${baseUrl}`);
logger.info('Validating custom base URL');
try {
for await (const message of query({ prompt: 'hi', options: { model: resolveModel('small'), maxTurns: 1 } })) {
@@ -354,7 +470,7 @@ async function validateCredentials(logger: ActivityLogger, apiKey?: string, prov
// === Target URL Validation ===
/** HTTP HEAD with TLS verification disabled — we check reachability, not certificate validity. */
function httpHead(url: string, timeoutMs: number): Promise<number> {
function httpHead(url: string, timeoutMs: number, addresses: LookupAddress[]): Promise<number> {
return new Promise((resolve, reject) => {
const parsed = new URL(url);
const isHttps = parsed.protocol === 'https:';
@@ -365,6 +481,7 @@ function httpHead(url: string, timeoutMs: number): Promise<number> {
{
method: 'HEAD',
timeout: timeoutMs,
lookup: pinnedLookup(addresses),
...(isHttps && { rejectUnauthorized: false }),
},
(res) => {
@@ -384,7 +501,7 @@ function httpHead(url: string, timeoutMs: number): Promise<number> {
/** Check that the target URL is reachable from inside the container. */
async function validateTargetUrl(targetUrl: string, logger: ActivityLogger): Promise<Result<void, PentestError>> {
logger.info('Checking target URL reachability...', { targetUrl });
logger.info('Checking target URL reachability...');
// 1. Parse URL
let parsed: URL;
@@ -402,12 +519,11 @@ async function validateTargetUrl(targetUrl: string, logger: ActivityLogger): Pro
);
}
// 2. DNS lookup — detect loopback addresses early for a better hint
// 2. Resolve all records once — reused (pinned) for the connection below.
const hostname = parsed.hostname;
let resolvedAddress: string | undefined;
let addresses: LookupAddress[];
try {
const result = await lookup(hostname);
resolvedAddress = result.address;
addresses = await lookup(hostname, { all: true });
} catch {
return err(
new PentestError(
@@ -420,25 +536,40 @@ async function validateTargetUrl(targetUrl: string, logger: ActivityLogger): Pro
);
}
// 3. HTTP reachability check
// 3. Reject the link-local metadata range (169.254.0.0/16).
const blocked = addresses.find((entry) => isBlockedAddress(entry.address));
if (blocked) {
return err(
new PentestError(
`Target URL ${targetUrl} resolves to ${blocked.address}, a link-local address ` +
`(169.254.0.0/16). This range hosts the cloud instance metadata service and cannot be scanned.`,
'config',
false,
{ targetUrl, hostname, address: blocked.address },
ErrorCode.TARGET_UNREACHABLE,
),
);
}
// 4. HTTP reachability check (socket pinned to the resolved addresses).
try {
await httpHead(targetUrl, TARGET_URL_TIMEOUT_MS);
await httpHead(targetUrl, TARGET_URL_TIMEOUT_MS, addresses);
logger.info('Target URL OK');
return ok(undefined);
} catch (error) {
const isLoopback = isLoopbackAddress(resolvedAddress);
const detail = error instanceof Error ? error.message : String(error);
const isLoopback = addresses.some((entry) => isLoopbackAddress(entry.address));
if (isLoopback) {
const suggestion = targetUrl.replace(hostname, 'host.docker.internal');
return err(
new PentestError(
`Target URL ${targetUrl} resolves to ${resolvedAddress} (loopback) and is not reachable. ` +
`Target URL ${targetUrl} resolves to a loopback address and is not reachable. ` +
`For local services, use host.docker.internal instead of ${hostname} (e.g., ${suggestion})`,
'network',
false,
{ targetUrl, resolvedAddress, hostname },
{ targetUrl, hostname },
ErrorCode.TARGET_UNREACHABLE,
),
);
@@ -449,7 +580,7 @@ async function validateTargetUrl(targetUrl: string, logger: ActivityLogger): Pro
`Target URL ${targetUrl} is not reachable: ${detail}`,
'network',
false,
{ targetUrl, resolvedAddress },
{ targetUrl },
ErrorCode.TARGET_UNREACHABLE,
),
);
@@ -463,8 +594,9 @@ async function validateTargetUrl(targetUrl: string, logger: ActivityLogger): Pro
*
* 1. Repository path exists and contains .git
* 2. Config file parses and validates (if configPath provided)
* 3. Credentials validate (API key, OAuth, Bedrock, or Vertex AI)
* 4. Target URL is reachable from the container
* 3. code_path rules match at least one entry in the repo (skipped without config)
* 4. Credentials validate (API key, OAuth, Bedrock, or Vertex AI)
* 5. Target URL is reachable from the container
*
* Returns on first failure.
*/
@@ -484,20 +616,31 @@ export async function runPreflightChecks(
}
// 2. Config check (free — filesystem + CPU)
let parsedConfig: Config | null = null;
if (configPath) {
const configResult = await validateConfig(configPath, logger);
if (!configResult.ok) {
return configResult;
}
parsedConfig = configResult.value;
}
// 3. Credential check (cheap — 1 SDK round-trip, skipped when providerConfig present)
// 3. code_path rules must match real entries in the repo (filesystem only).
// Runs after both repo and config are valid, before any network round-trip.
if (parsedConfig) {
const codePathResult = await validateCodePathsExist(parsedConfig, repoPath, logger);
if (!codePathResult.ok) {
return codePathResult;
}
}
// 4. Credential check (cheap — 1 SDK round-trip, skipped when providerConfig present)
const credResult = await validateCredentials(logger, apiKey, providerConfig);
if (!credResult.ok) {
return credResult;
}
// 4. Target URL reachability check (cheap — 1 HTTP round-trip)
// 5. Target URL reachability check (cheap — 1 HTTP round-trip)
const urlResult = await validateTargetUrl(targetUrl, logger);
if (!urlResult.ok) {
return urlResult;
+189 -29
View File
@@ -8,12 +8,117 @@ import { fs, path } from 'zx';
import { PROMPTS_DIR } from '../paths.js';
import { PLAYWRIGHT_SESSION_MAPPING } from '../session-manager.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import type { Authentication, DistributedConfig } from '../types/config.js';
import type { Authentication, DistributedConfig, ReportConfig, Rule, VulnClass } from '../types/config.js';
import { isGlobPattern } from '../utils/glob.js';
import { handlePromptError, PentestError } from './error-handling.js';
function renderCodePathRules(rules: Rule[]): string {
const filtered = rules.filter((r) => r.type === 'code_path');
if (filtered.length === 0) return 'None';
return filtered
.map((r) => {
const kind = isGlobPattern(r.value) ? '[GLOB]' : '[FILE]';
return `- ${r.value} ${kind}${r.description}`;
})
.join('\n');
}
interface VulnSummarySpec {
readonly heading: string;
readonly evidenceSection: string;
readonly noneFoundLabel: string;
}
const VULN_SUMMARY_SPECS: Record<VulnClass, VulnSummarySpec> = {
auth: {
heading: 'Authentication Vulnerabilities',
evidenceSection: 'Authentication Exploitation Evidence',
noneFoundLabel: 'authentication',
},
authz: {
heading: 'Authorization Vulnerabilities',
evidenceSection: 'Authorization Exploitation Evidence',
noneFoundLabel: 'authorization',
},
xss: {
heading: 'Cross-Site Scripting (XSS) Vulnerabilities',
evidenceSection: 'XSS Exploitation Evidence',
noneFoundLabel: 'XSS',
},
injection: {
heading: 'SQL/Command Injection Vulnerabilities',
evidenceSection: 'Injection Exploitation Evidence',
noneFoundLabel: 'SQL or command injection',
},
ssrf: {
heading: 'Server-Side Request Forgery (SSRF) Vulnerabilities',
evidenceSection: 'SSRF Exploitation Evidence',
noneFoundLabel: 'SSRF',
},
};
function renderVulnSummarySubsections(selected: readonly VulnClass[]): string {
const classes = selected.length > 0 ? selected : (Object.keys(VULN_SUMMARY_SPECS) as VulnClass[]);
return classes
.map((cls) => {
const spec = VULN_SUMMARY_SPECS[cls];
return `**${spec.heading}:**\n{Check for "${spec.evidenceSection}" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No ${spec.noneFoundLabel} vulnerabilities were found."}`;
})
.join('\n\n');
}
/**
* Renders the top-level <report_filters> block. Empty when no filters are set
* each filter is included only when the operator configured it, so the agent
* never sees `none` placeholders or instructions for filters that don't apply.
*/
function renderReportFiltersBlock(report: ReportConfig | undefined): string {
if (!report) return '';
const guidance = report.guidance?.trim();
if (!report.min_severity && !report.min_confidence && !guidance) return '';
const lines: string[] = [
'<report_filters>',
'The filters below are user-supplied and binding for this assessment. Honor each strictly when assembling the final report.',
'',
];
if (report.min_severity) {
lines.push(
`- Minimum severity: ${report.min_severity} — keep only findings rated this severity or higher (scale: low < medium < high < critical).`,
);
}
if (report.min_confidence) {
lines.push(
`- Minimum confidence: ${report.min_confidence} — keep only findings rated this confidence or higher (scale: low < medium < high).`,
);
}
if (guidance) {
lines.push('');
lines.push('User guidance — apply throughout the report as binding directives for finding selection:');
lines.push(guidance);
}
lines.push('</report_filters>');
return lines.join('\n');
}
/**
* Renders the per-finding DROP rules used inside the cleanup step. Severity and
* confidence inline as concrete thresholds; guidance is referenced by pointer
* so the actual text only lives in <report_filters>, avoiding double-statement.
*/
function renderReportFilterRules(report: ReportConfig | undefined): string {
const drops: string[] = [];
if (report?.min_severity) drops.push(`* severity is below ${report.min_severity}`);
if (report?.min_confidence) drops.push(`* confidence is below ${report.min_confidence}`);
if (report?.guidance?.trim()) drops.push('* topic matches an exclusion in the user guidance');
if (drops.length === 0) return '';
return [' - DROP any `### [TYPE]-VULN-[NUMBER]` finding whose:', ...drops.map((d) => ` ${d}`)].join('\n');
}
interface PromptVariables {
webUrl: string;
repoPath: string;
AUTH_STATE_FILE: string;
PLAYWRIGHT_SESSION?: string;
}
@@ -76,6 +181,21 @@ async function buildLoginInstructions(
`generated TOTP code using secret "${authentication.credentials.totp_secret}"`,
);
}
if (authentication.credentials.email_login?.address) {
userInstructions = userInstructions.replace(/\$email_address/g, authentication.credentials.email_login.address);
}
if (authentication.credentials.email_login?.password) {
userInstructions = userInstructions.replace(
/\$email_password/g,
authentication.credentials.email_login.password,
);
}
if (authentication.credentials.email_login?.totp_secret) {
userInstructions = userInstructions.replace(
/\$email_totp/g,
`generated TOTP code using secret "${authentication.credentials.email_login.totp_secret}"`,
);
}
}
loginInstructions = loginInstructions.replace(/{{user_instructions}}/g, userInstructions);
@@ -175,36 +295,69 @@ async function interpolateVariables(
.replace(/{{AUTH_CONTEXT}}/g, buildAuthContext(config))
.replace(/{{DESCRIPTION}}/g, config?.description ? `Description: ${config.description}` : '');
if (config) {
// Handle rules section - if both are empty, use cleaner messaging
const hasAvoidRules = config.avoid && config.avoid.length > 0;
const hasFocusRules = config.focus && config.focus.length > 0;
if (!hasAvoidRules && !hasFocusRules) {
// Replace the entire rules section with a clean message
const cleanRulesSection = '<rules>\nNo specific rules or focus areas provided for this test.\n</rules>';
result = result.replace(/<rules>[\s\S]*?<\/rules>/g, cleanRulesSection);
} else {
const avoidRules = hasAvoidRules ? config.avoid?.map((r) => `- ${r.description}`).join('\n') : 'None';
const focusRules = hasFocusRules ? config.focus?.map((r) => `- ${r.description}`).join('\n') : 'None';
result = result.replace(/{{RULES_AVOID}}/g, avoidRules).replace(/{{RULES_FOCUS}}/g, focusRules);
}
// Extract and inject login instructions from config
if (config.authentication?.login_flow) {
const loginInstructions = await buildLoginInstructions(config.authentication, logger, promptsBaseDir);
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, loginInstructions);
} else {
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, '');
}
const avoidUrlRules = config?.avoid?.filter((r) => r.type !== 'code_path') ?? [];
const focusUrlRules = config?.focus?.filter((r) => r.type !== 'code_path') ?? [];
if (avoidUrlRules.length === 0 && focusUrlRules.length === 0) {
result = result.replace(/<rules>[\s\S]*?<\/rules>\s*/g, '');
} else {
const avoidStr = avoidUrlRules.length > 0 ? avoidUrlRules.map((r) => `- ${r.description}`).join('\n') : 'None';
const focusStr = focusUrlRules.length > 0 ? focusUrlRules.map((r) => `- ${r.description}`).join('\n') : 'None';
result = result.replace(/{{RULES_AVOID}}/g, avoidStr).replace(/{{RULES_FOCUS}}/g, focusStr);
}
const avoidCodeRules = (config?.avoid ?? []).filter((r) => r.type === 'code_path');
const focusCodeRules = (config?.focus ?? []).filter((r) => r.type === 'code_path');
if (avoidCodeRules.length === 0 && focusCodeRules.length === 0) {
result = result.replace(/<code_path_rules>[\s\S]*?<\/code_path_rules>\s*/g, '');
} else {
result = result
.replace(/{{CODE_RULES_AVOID}}/g, renderCodePathRules(config?.avoid ?? []))
.replace(/{{CODE_RULES_FOCUS}}/g, renderCodePathRules(config?.focus ?? []));
}
const roe = config?.rules_of_engagement?.trim() ?? '';
if (roe) {
result = result.replace(/{{RULES_OF_ENGAGEMENT}}/g, roe);
} else {
result = result.replace(/<rules_of_engagement>[\s\S]*?<\/rules_of_engagement>\s*/g, '');
}
if (!config?.authentication) {
result = result.replace(/<shared_authenticated_session>[\s\S]*?<\/shared_authenticated_session>\s*/g, '');
} else {
result = result.replace(/{{AUTH_STATE_FILE}}/g, variables.AUTH_STATE_FILE);
}
if (config?.authentication?.login_flow) {
const loginInstructions = await buildLoginInstructions(config.authentication, logger, promptsBaseDir);
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, loginInstructions);
} else {
// Replace the entire rules section with a clean message when no config provided
const cleanRulesSection = '<rules>\nNo specific rules or focus areas provided for this test.\n</rules>';
result = result.replace(/<rules>[\s\S]*?<\/rules>/g, cleanRulesSection);
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, '');
}
const vulnClasses = config?.vuln_classes ?? [];
result = result.replace(
/{{VULN_CLASSES_TESTED}}/g,
vulnClasses.length > 0 ? vulnClasses.join(', ') : 'injection, xss, auth, authz, ssrf',
);
result = result.replace(/{{VULN_SUMMARY_SUBSECTIONS}}/g, renderVulnSummarySubsections(vulnClasses));
const exploitEnabled = config?.exploit ?? true;
result = result
.replace(/{{EXPLOITATION}}/g, exploitEnabled ? 'enabled' : 'disabled')
.replace(/{{REPORT_VULN_HEADING}}/g, exploitEnabled ? 'Exploitation Evidence' : 'Findings')
.replace(
/{{REPORT_VULN_SUBHEADING}}/g,
exploitEnabled ? 'Successfully Exploited Vulnerabilities' : 'Identified Vulnerabilities',
);
result = result
.replace(/{{REPORT_FILTERS_BLOCK}}/g, renderReportFiltersBlock(config?.report))
.replace(/{{REPORT_FILTER_RULES}}/g, renderReportFilterRules(config?.report));
// Collapse runs of 3+ newlines (left behind by tag-strip and empty-fragment substitutions).
result = result.replace(/\n{3,}/g, '\n\n');
// Validate that all placeholders have been replaced (excluding instructional text)
const remainingPlaceholders = result.match(/\{\{[^}]+\}\}/g);
if (remainingPlaceholders) {
@@ -221,6 +374,14 @@ async function interpolateVariables(
}
}
// Resolve promptDir override against SHANNON_WORKER_ROOT so relative paths
// from callers stay cwd-independent.
function resolvePromptDir(promptDir: string | undefined): string {
if (!promptDir) return PROMPTS_DIR;
if (path.isAbsolute(promptDir)) return promptDir;
return path.resolve(process.env.SHANNON_WORKER_ROOT ?? process.cwd(), promptDir);
}
// Pure function: Load and interpolate prompt template
export async function loadPrompt(
promptName: string,
@@ -231,8 +392,7 @@ export async function loadPrompt(
promptDir?: string,
): Promise<string> {
try {
// 1. Resolve prompt file path (promptDir override → default PROMPTS_DIR)
const basePromptsDir = promptDir ?? PROMPTS_DIR;
const basePromptsDir = resolvePromptDir(promptDir);
const promptsDir = pipelineTestingMode ? path.join(basePromptsDir, 'pipeline-testing') : basePromptsDir;
const promptPath = path.join(promptsDir, `${promptName}.txt`);
+39 -30
View File
@@ -12,60 +12,66 @@ import { PentestError } from './error-handling.js';
interface DeliverableFile {
name: string;
path: string;
/** Candidate filenames in priority order. First one that exists wins. */
paths: readonly string[];
required: boolean;
}
// Pure function: Assemble final report from specialist deliverables
// Pure function: Assemble final report from specialist deliverables.
// Per class, prefer the exploit-agent's evidence file; fall back to renderer-produced findings.
// Both never coexist for a workspace because scope (exploit flag) is locked.
export async function assembleFinalReport(
sourceDir: string,
deliverablesSubdir: string | undefined,
logger: ActivityLogger,
): Promise<string> {
const deliverableFiles: DeliverableFile[] = [
{ name: 'Injection', path: 'injection_exploitation_evidence.md', required: false },
{ name: 'XSS', path: 'xss_exploitation_evidence.md', required: false },
{ name: 'Authentication', path: 'auth_exploitation_evidence.md', required: false },
{ name: 'SSRF', path: 'ssrf_exploitation_evidence.md', required: false },
{ name: 'Authorization', path: 'authz_exploitation_evidence.md', required: false },
const deliverableFiles: readonly DeliverableFile[] = [
{ name: 'Injection', paths: ['injection_exploitation_evidence.md', 'injection_findings.md'], required: false },
{ name: 'XSS', paths: ['xss_exploitation_evidence.md', 'xss_findings.md'], required: false },
{ name: 'Authentication', paths: ['auth_exploitation_evidence.md', 'auth_findings.md'], required: false },
{ name: 'SSRF', paths: ['ssrf_exploitation_evidence.md', 'ssrf_findings.md'], required: false },
{ name: 'Authorization', paths: ['authz_exploitation_evidence.md', 'authz_findings.md'], required: false },
];
const dir = deliverablesDir(sourceDir, deliverablesSubdir);
const sections: string[] = [];
for (const file of deliverableFiles) {
const filePath = path.join(deliverablesDir(sourceDir, deliverablesSubdir), file.path);
try {
if (await fs.pathExists(filePath)) {
const content = await fs.readFile(filePath, 'utf8');
sections.push(content);
logger.info(`Added ${file.name} findings`);
} else if (file.required) {
let added = false;
for (const candidate of file.paths) {
const filePath = path.join(dir, candidate);
try {
if (await fs.pathExists(filePath)) {
const content = await fs.readFile(filePath, 'utf8');
sections.push(content);
logger.info(`Added ${file.name} section from ${candidate}`);
added = true;
break;
}
} catch (error) {
const err = error as Error;
logger.warn(`Could not read ${candidate}: ${err.message}`);
}
}
if (!added) {
if (file.required) {
throw new PentestError(
`Required deliverable file not found: ${file.path}`,
`Required deliverable file not found: ${file.paths.join(' or ')}`,
'filesystem',
false,
{ deliverableFile: file.path, sourceDir },
{ deliverableFile: file.paths, sourceDir },
ErrorCode.DELIVERABLE_NOT_FOUND,
);
} else {
logger.info(`No ${file.name} deliverable found`);
}
} catch (error) {
if (file.required) {
throw error;
}
const err = error as Error;
logger.warn(`Could not read ${file.path}: ${err.message}`);
logger.info(`No ${file.name} deliverable found`);
}
}
const finalContent = sections.join('\n\n');
const outputDir = deliverablesDir(sourceDir, deliverablesSubdir);
const finalReportPath = path.join(outputDir, 'comprehensive_security_assessment_report.md');
const finalReportPath = path.join(dir, 'comprehensive_security_assessment_report.md');
try {
// Ensure deliverables directory exists
await fs.ensureDir(outputDir);
await fs.ensureDir(dir);
await fs.writeFile(finalReportPath, finalContent);
logger.info(`Final report assembled at ${finalReportPath}`);
} catch (error) {
@@ -123,7 +129,10 @@ export async function injectModelIntoReport(
logger.info(`Injecting model info into report: ${modelStr}`);
// 3. Read the final report
const reportPath = path.join(deliverablesDir(repoPath, deliverablesSubdir), 'comprehensive_security_assessment_report.md');
const reportPath = path.join(
deliverablesDir(repoPath, deliverablesSubdir),
'comprehensive_security_assessment_report.md',
);
if (!(await fs.pathExists(reportPath))) {
logger.warn('Final report not found, skipping model injection');
@@ -0,0 +1,262 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Authentication validation service.
*
* Drives a real browser via the playwright-cli skill to confirm
* user-supplied credentials log in successfully, before the pentest
* pipeline burns hours on broken auth.
*/
import { readFile, rm } from 'node:fs/promises';
import type { JsonSchemaOutputFormat } from '@anthropic-ai/claude-agent-sdk';
import { z } from 'zod';
import { runClaudePrompt } from '../ai/claude-executor.js';
import type { AuditSession } from '../audit/index.js';
import { authStateFile } from '../audit/utils.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import type { AgentEndResult } from '../types/audit.js';
import type { DistributedConfig, ProviderConfig } from '../types/config.js';
import { ErrorCode } from '../types/errors.js';
import { err, ok, type Result } from '../types/result.js';
import { PentestError } from './error-handling.js';
import { loadPrompt } from './prompt-manager.js';
const FAILURE_POINTS = ['username_or_password', 'totp_secret', 'out_of_band'] as const;
type AuthFailurePoint = (typeof FAILURE_POINTS)[number];
function isAuthFailurePoint(v: unknown): v is AuthFailurePoint {
return typeof v === 'string' && (FAILURE_POINTS as readonly string[]).includes(v);
}
// NOTE: SDK's AJV validator expects draft-07; Zod defaults to draft-2020-12,
// which causes the SDK to silently skip structured output.
const AuthValidationSchema = z.object({
login_success: z.boolean(),
failure_point: z.enum(FAILURE_POINTS).optional(),
failure_detail: z
.string()
.max(250)
.optional()
.describe(
'Free-form 1-2 sentence diagnostic of what the page showed (error messages, page state) when login failed. Required when login_success is false. Mask any sensitive values.',
),
});
type AuthValidationVerdict = z.infer<typeof AuthValidationSchema>;
const VALIDATION_SCHEMA: JsonSchemaOutputFormat = {
type: 'json_schema',
schema: z.toJSONSchema(AuthValidationSchema, { target: 'draft-07' }) as Record<string, unknown>,
};
const AGENT_NAME = 'validate-authentication';
export interface ValidateAuthInput {
readonly distributedConfig: DistributedConfig;
readonly repoPath: string;
readonly webUrl: string;
readonly logger: ActivityLogger;
readonly auditSession: AuditSession;
readonly attemptNumber: number;
readonly apiKey?: string;
readonly providerConfig?: ProviderConfig;
readonly deliverablesSubdir?: string;
readonly promptDir?: string;
readonly pipelineTestingMode?: boolean;
}
export async function validateAuthentication(input: ValidateAuthInput): Promise<Result<void, PentestError>> {
const {
distributedConfig,
repoPath,
webUrl,
logger,
auditSession,
attemptNumber,
apiKey,
providerConfig,
deliverablesSubdir,
promptDir,
pipelineTestingMode,
} = input;
const authentication = distributedConfig.authentication;
if (!authentication) {
return ok(undefined);
}
logger.info('Validating authentication credentials with live browser...', {
loginUrl: authentication.login_url,
loginType: authentication.login_type,
});
const stateFile = authStateFile(auditSession.sessionMetadata);
await rm(stateFile, { force: true });
const prompt = await loadPrompt(
AGENT_NAME,
{ webUrl, repoPath, AUTH_STATE_FILE: stateFile },
distributedConfig,
pipelineTestingMode ?? false,
logger,
promptDir,
);
await auditSession.startAgent(AGENT_NAME, prompt, attemptNumber);
const startTime = Date.now();
const result = await runClaudePrompt(
prompt,
repoPath,
'',
'Authentication validation',
AGENT_NAME,
auditSession,
logger,
'medium',
VALIDATION_SCHEMA,
apiKey,
deliverablesSubdir,
providerConfig,
);
let classification = classifyResult(result, authentication);
if (classification.ok) {
const sessionCheck = await verifySavedAuthState(stateFile, logger);
if (!sessionCheck.ok) {
classification = sessionCheck;
}
}
const endResult: AgentEndResult = {
attemptNumber,
duration_ms: Date.now() - startTime,
cost_usd: result.cost || 0,
success: classification.ok,
...(result.model !== undefined && { model: result.model }),
...(!classification.ok && { error: classification.error.message }),
};
await auditSession.endAgent(AGENT_NAME, endResult);
return classification;
}
async function verifySavedAuthState(stateFile: string, logger: ActivityLogger): Promise<Result<void, PentestError>> {
let contents: string;
try {
contents = await readFile(stateFile, 'utf8');
} catch {
return err(
new PentestError(
`Preflight reported login success but did not save the authenticated session to ${stateFile}.`,
'validation',
true,
{ stateFile },
ErrorCode.AGENT_EXECUTION_FAILED,
),
);
}
let parsed: unknown;
try {
parsed = JSON.parse(contents);
} catch (parseErr) {
const detail = parseErr instanceof Error ? parseErr.message : String(parseErr);
return err(
new PentestError(
`Preflight saved an authenticated session to ${stateFile}, but the file is not valid JSON: ${detail}`,
'validation',
true,
{ stateFile, parseError: detail },
ErrorCode.AGENT_EXECUTION_FAILED,
),
);
}
const cookieCount = countStorageEntries(parsed, 'cookies');
const originCount = countStorageEntries(parsed, 'origins');
if (cookieCount === 0 && originCount === 0) {
return err(
new PentestError(
`Preflight saved an authenticated session to ${stateFile}, but it contains no cookies or origins — the browser was not actually logged in.`,
'validation',
true,
{ stateFile, cookieCount, originCount },
ErrorCode.AGENT_EXECUTION_FAILED,
),
);
}
logger.info('Preflight authenticated session saved', { stateFile, cookieCount, originCount });
return ok(undefined);
}
function countStorageEntries(parsed: unknown, key: 'cookies' | 'origins'): number {
if (typeof parsed !== 'object' || parsed === null) return 0;
const value = (parsed as Record<string, unknown>)[key];
return Array.isArray(value) ? value.length : 0;
}
function classifyResult(
result: import('../ai/claude-executor.js').ClaudePromptResult,
authentication: NonNullable<DistributedConfig['authentication']>,
): Result<void, PentestError> {
if (!result.success) {
const detail = result.error ?? 'Validator agent terminated unexpectedly.';
return err(
new PentestError(
`Authentication validator failed to run: ${detail}`,
'validation',
result.retryable ?? true,
{ originalError: detail, errorType: result.errorType, cost: result.cost },
ErrorCode.AGENT_EXECUTION_FAILED,
),
);
}
if (!result.structuredOutput || typeof result.structuredOutput !== 'object') {
return err(
new PentestError(
'Authentication validator did not return a structured verdict.',
'validation',
true,
{ cost: result.cost },
ErrorCode.AGENT_EXECUTION_FAILED,
),
);
}
const verdict = result.structuredOutput as Partial<AuthValidationVerdict>;
if (verdict.login_success === true) {
return ok(undefined);
}
const failurePoint: AuthFailurePoint = isAuthFailurePoint(verdict.failure_point)
? verdict.failure_point
: 'out_of_band';
const failureDetail =
verdict.failure_detail?.trim() || 'Login failed without a specific diagnostic from the validator agent.';
return err(
new PentestError(
`Authentication failed at "${failurePoint}": ${failureDetail}`,
'config',
false,
{
failurePoint,
failureDetail,
loginUrl: authentication.login_url,
loginType: authentication.login_type,
cost: result.cost,
},
ErrorCode.AUTH_LOGIN_FAILED,
),
);
}
+3
View File
@@ -151,6 +151,9 @@ function createExploitValidator(vulnType: VulnType): AgentValidator {
// Playwright session mapping - assigns each agent to a specific session for browser isolation
// Keys are promptTemplate values from AGENTS registry
export const PLAYWRIGHT_SESSION_MAPPING: Record<string, PlaywrightSession> = Object.freeze({
// Runs before any agent — non-concurrent, so agent1 is safe to share
'validate-authentication': 'agent1',
// Phase 1: Pre-reconnaissance
'pre-recon-code': 'agent1',
+221 -26
View File
@@ -18,26 +18,30 @@
import fs from 'node:fs/promises';
import path from 'node:path';
import { ApplicationFailure, Context, heartbeat } from '@temporalio/activity';
import { writePlaywrightStealthConfig } from '../ai/playwright-config-writer.js';
import { writeUserSettingsForCodePathAvoids } from '../ai/settings-writer.js';
import { AuditSession } from '../audit/index.js';
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
import type { SessionMetadata } from '../audit/utils.js';
import { authStateFile, generateSessionJsonPath, type SessionMetadata } from '../audit/utils.js';
import type { WorkflowSummary } from '../audit/workflow-logger.js';
import type { ContainerConfig, ProviderConfig } from '../types/config.js';
import type { CheckpointContext } from '../interfaces/checkpoint-provider.js';
import { DEFAULT_DELIVERABLES_SUBDIR, deliverablesDir } from '../paths.js';
import { getContainer, getOrCreateContainer, removeContainer } from '../services/container.js';
import { classifyErrorForTemporal, PentestError } from '../services/error-handling.js';
import { ExploitationCheckerService } from '../services/exploitation-checker.js';
import { renderFindingsFromQueues } from '../services/findings-renderer.js';
import { executeGitCommandWithRetry } from '../services/git-manager.js';
import { runPreflightChecks } from '../services/preflight.js';
import type { ExploitationDecision, VulnType } from '../services/queue-validation.js';
import { assembleFinalReport, injectModelIntoReport } from '../services/reporting.js';
import { validateAuthentication } from '../services/validate-authentication.js';
import { AGENTS } from '../session-manager.js';
import type { AgentName } from '../types/agents.js';
import { ALL_AGENTS } from '../types/agents.js';
import type { ContainerConfig, ProviderConfig, VulnClass } from '../types/config.js';
import { ErrorCode } from '../types/errors.js';
import { isErr } from '../types/result.js';
import { DEFAULT_DELIVERABLES_SUBDIR, deliverablesDir } from '../paths.js';
import { fileExists, readJson } from '../utils/file-io.js';
import { atomicWrite, fileExists, readJson } from '../utils/file-io.js';
import { createActivityLogger } from './activity-logger.js';
import type { AgentMetrics, PipelineState, ResumeState } from './shared.js';
@@ -135,7 +139,8 @@ async function runAgentActivity(agentName: AgentName, input: ActivityInput): Pro
// Skip guard: the checkpoint provider decides whether to run the agent.
// The default NoOp provider always returns { skip: false }.
const skipContainer = getContainer(workflowId) ??
const skipContainer =
getContainer(workflowId) ??
getOrCreateContainer(workflowId, buildSessionMetadata(input), buildContainerConfig(input));
const decision = await skipContainer.checkpointProvider.shouldSkipAgent(
agentName,
@@ -181,11 +186,7 @@ async function runAgentActivity(agentName: AgentName, input: ActivityInput): Pro
attemptNumber,
...(input.apiKey !== undefined && { apiKey: input.apiKey }),
...(input.providerConfig !== undefined && { providerConfig: input.providerConfig }),
...(input.promptDir !== undefined && {
promptDir: path.isAbsolute(input.promptDir)
? input.promptDir
: path.resolve(process.env.SHANNON_WORKER_ROOT ?? process.cwd(), input.promptDir),
}),
...(input.promptDir !== undefined && { promptDir: input.promptDir }),
...(input.configYAML !== undefined && { configYAML: input.configYAML }),
},
auditSession,
@@ -321,7 +322,15 @@ export async function runPreflightValidation(input: ActivityInput): Promise<void
const logger = createActivityLogger();
logger.info('Running preflight validation...', { attempt: attemptNumber });
const result = await runPreflightChecks(input.webUrl, input.repoPath, input.configPath, logger, input.skipGitCheck, input.apiKey, input.providerConfig);
const result = await runPreflightChecks(
input.webUrl,
input.repoPath,
input.configPath,
logger,
input.skipGitCheck,
input.apiKey,
input.providerConfig,
);
if (isErr(result)) {
const classified = classifyErrorForTemporal(result.error);
@@ -364,6 +373,95 @@ export async function runPreflightValidation(input: ActivityInput): Promise<void
}
}
/**
* Authentication validation activity. No-ops without an authentication
* block; otherwise surfaces a classified failure (failurePoint +
* failureDetail in ApplicationFailure.details) on credential rejection.
*/
export async function runAuthenticationValidation(input: ActivityInput): Promise<void> {
const startTime = Date.now();
const attemptNumber = Context.current().info.attempt;
const heartbeatInterval = setInterval(() => {
const elapsed = Math.floor((Date.now() - startTime) / 1000);
heartbeat({ phase: 'auth-validation', elapsedSeconds: elapsed, attempt: attemptNumber });
}, HEARTBEAT_INTERVAL_MS);
try {
const logger = createActivityLogger();
const sessionMetadata = buildSessionMetadata(input);
const container = getOrCreateContainer(input.workflowId, sessionMetadata, buildContainerConfig(input));
const configResult = await container.configLoader.loadOptional(input.configPath, undefined, input.configYAML);
if (isErr(configResult)) {
// runPreflightValidation already validated parsing, so this is unexpected.
logger.warn(`runAuthenticationValidation: config load failed unexpectedly: ${configResult.error.message}`);
return;
}
const distributedConfig = configResult.value;
if (!distributedConfig?.authentication) {
logger.info('No authentication configured — skipping credential validation');
return;
}
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize(input.workflowId);
const result = await validateAuthentication({
distributedConfig,
repoPath: input.repoPath,
webUrl: input.webUrl,
logger,
auditSession,
attemptNumber,
...(input.apiKey !== undefined && { apiKey: input.apiKey }),
...(input.providerConfig !== undefined && { providerConfig: input.providerConfig }),
...(input.deliverablesSubdir !== undefined && { deliverablesSubdir: input.deliverablesSubdir }),
...(input.promptDir !== undefined && { promptDir: input.promptDir }),
...(input.pipelineTestingMode !== undefined && { pipelineTestingMode: input.pipelineTestingMode }),
});
if (isErr(result)) {
const classified = classifyErrorForTemporal(result.error);
const message = truncateErrorMessage(result.error.message);
const ctx = result.error.context;
const details = [
{
phase: 'auth-validation',
attemptNumber,
elapsed: Date.now() - startTime,
...(ctx.failurePoint !== undefined && { failurePoint: ctx.failurePoint }),
...(ctx.failureDetail !== undefined && { failureDetail: ctx.failureDetail }),
},
];
const failure = classified.retryable
? ApplicationFailure.create({ message, type: classified.type, details })
: ApplicationFailure.nonRetryable(message, classified.type, details);
truncateStackTrace(failure);
throw failure;
}
} catch (error) {
if (error instanceof ApplicationFailure) {
throw error;
}
const classified = classifyErrorForTemporal(error);
const rawMessage = error instanceof Error ? error.message : String(error);
const message = truncateErrorMessage(rawMessage);
const details = [{ phase: 'auth-validation', attemptNumber, elapsed: Date.now() - startTime }];
const failure = classified.retryable
? ApplicationFailure.create({ message, type: classified.type, details })
: ApplicationFailure.nonRetryable(message, classified.type, details);
truncateStackTrace(failure);
throw failure;
} finally {
clearInterval(heartbeatInterval);
}
}
/**
* Initialize a private git repository inside the workspace deliverables directory.
* Idempotent skips if .git already exists (resume case).
@@ -390,11 +488,68 @@ export async function initDeliverableGit(input: ActivityInput): Promise<void> {
}
/**
* Assemble the final report by concatenating exploitation evidence files.
* Drop a stealth cli.config.json into the repo's .playwright/ directory so
* `playwright-cli open` auto-loads anti-detection defaults from the agent's
* cwd (disables the Blink AutomationControlled flag, drops the
* --enable-automation default, and overrides the HeadlessChrome user agent).
*
* No-op when the repo already has its own .playwright/cli.config.json.
*/
export async function assembleReportActivity(input: ActivityInput): Promise<void> {
export async function syncPlaywrightStealthConfig(input: ActivityInput): Promise<void> {
const logger = createActivityLogger();
const { result, configPath } = await writePlaywrightStealthConfig(input.repoPath);
if (result === 'skipped-existing') {
logger.info(`Playwright stealth config: leaving existing ${configPath} in place`);
} else {
logger.info(`Playwright stealth config: wrote ${configPath}`);
}
}
/**
* Sync code_path avoid rules into Claude's user-scope settings.json so the
* SDK enforces them at the tool layer for every agent in this run.
*
* Runs once per workflow before any agent fires. Config is fixed for the
* lifetime of the workflow, so writing once avoids the parallel-agent race
* on the global ~/.claude/settings.json file.
*/
export async function syncCodePathDenyRules(input: ActivityInput): Promise<void> {
const logger = createActivityLogger();
const container = getOrCreateContainer(input.workflowId, buildSessionMetadata(input), buildContainerConfig(input));
const configResult = await container.configLoader.loadOptional(input.configPath, undefined, input.configYAML);
if (isErr(configResult)) {
logger.warn(`syncCodePathDenyRules: skipping (config load failed: ${configResult.error.message})`);
return;
}
const config = configResult.value;
const denyCount = (config?.avoid ?? []).filter((r) => r.type === 'code_path').length;
await writeUserSettingsForCodePathAvoids(config);
logger.info(`Synced code_path deny rules to user settings (${denyCount} entries)`);
}
/**
* Assemble the final report by concatenating per-class deliverables.
*
* Under exploit=true, each exploit agent has produced `*_exploitation_evidence.md`
* directly. Under exploit=false, exploit agents didn't run; we deterministically
* render `*_findings.md` from each `*_exploitation_queue.json` first, then assemble.
*/
export async function assembleReportActivity(input: ActivityInput, exploit: boolean): Promise<void> {
const { repoPath, deliverablesSubdir } = input;
const logger = createActivityLogger();
if (!exploit) {
logger.info('Rendering per-class findings from analysis queues...');
try {
await renderFindingsFromQueues(repoPath, deliverablesSubdir, logger);
} catch (error) {
const err = error as Error;
logger.warn(`Error rendering findings from queues: ${err.message}`);
}
}
logger.info('Assembling deliverables from specialist agents...');
try {
await assembleFinalReport(repoPath, deliverablesSubdir, logger);
@@ -438,6 +593,11 @@ export async function checkExploitationQueue(input: ActivityInput, vulnType: Vul
return checker.checkQueue(vulnType, delivPath, logger);
}
interface RunScope {
vulnClasses: VulnClass[];
exploit: boolean;
}
interface SessionJson {
session: {
id: string;
@@ -445,6 +605,7 @@ interface SessionJson {
repoPath?: string;
originalWorkflowId?: string;
resumeAttempts?: ResumeAttempt[];
scope?: RunScope;
};
metrics: {
agents: Record<
@@ -562,6 +723,42 @@ export async function loadResumeState(
};
}
/** First run records scope into session.json; resume runs throw if it differs. */
export async function persistOrValidateRunScope(
input: ActivityInput,
vulnClasses: VulnClass[],
exploit: boolean,
): Promise<void> {
const sessionMetadata = buildSessionMetadata(input);
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize(input.workflowId);
const sessionPath = generateSessionJsonPath(sessionMetadata);
const session = await readJson<SessionJson>(sessionPath);
if (session.session.scope) {
const recorded = session.session.scope;
const sameClasses =
recorded.vulnClasses.length === vulnClasses.length &&
recorded.vulnClasses.every((c) => vulnClasses.includes(c)) &&
vulnClasses.every((c) => recorded.vulnClasses.includes(c));
if (!sameClasses || recorded.exploit !== exploit) {
throw ApplicationFailure.nonRetryable(
`Resume scope mismatch for workspace ${input.sessionId}.\n` +
` Original: vuln_classes=[${recorded.vulnClasses.join(', ')}], exploit=${recorded.exploit}\n` +
` Provided: vuln_classes=[${vulnClasses.join(', ')}], exploit=${exploit}\n` +
`Resume requires the same scope as the original run. Start a new workspace if you want different scope.`,
'ScopeMismatchError',
);
}
return;
}
session.session.scope = { vulnClasses: [...vulnClasses], exploit };
await atomicWrite(sessionPath, session);
}
async function findLatestCommit(gitDir: string, commitHashes: string[]): Promise<string> {
if (commitHashes.length === 1) {
const hash = commitHashes[0];
@@ -605,7 +802,7 @@ export async function restoreGitCheckpoint(
await executeGitCommandWithRetry(
['git', 'rev-parse', '--verify', checkpointHash],
repoPath,
'verify checkpoint hash exists'
'verify checkpoint hash exists',
);
} catch {
logger.info(`Checkpoint hash not found in clone, skipping git reset: ${checkpointHash}`);
@@ -729,7 +926,15 @@ export async function logWorkflowComplete(input: ActivityInput, summary: Workflo
// 5. Write completion entry to workflow.log
await auditSession.logWorkflowComplete(cumulativeSummary);
// 6. Clean up container
// 6. Drop the authenticated browser session
try {
await fs.rm(authStateFile(sessionMetadata), { force: true });
} catch (error) {
const detail = error instanceof Error ? error.message : String(error);
console.warn(`Failed to clean up auth-state.json: ${detail}`);
}
// 7. Clean up container
removeContainer(workflowId);
}
@@ -787,17 +992,7 @@ export async function generateReportOutputActivity(input: ActivityInput): Promis
const logger = createActivityLogger();
// Resolve promptDir against the worker root so providers are cwd-independent.
const resolvedInput: ActivityInput = {
...input,
...(input.promptDir !== undefined && {
promptDir: path.isAbsolute(input.promptDir)
? input.promptDir
: path.resolve(process.env.SHANNON_WORKER_ROOT ?? process.cwd(), input.promptDir),
}),
};
const result = await container.reportOutputProvider.generate(resolvedInput, logger);
const result = await container.reportOutputProvider.generate(input, logger);
if (result.outputPath) {
logger.info(`Report output written to ${result.outputPath}`);
}
+2 -2
View File
@@ -5,7 +5,7 @@
* within their own workflow context.
*/
export { pentestPipeline } from './workflows.js';
export type { ActivityInput } from './activities.js';
export type {
AgentMetrics,
PipelineInput,
@@ -14,4 +14,4 @@ export type {
ResumeState,
VulnExploitPipelineResult,
} from './shared.js';
export type { ActivityInput } from './activities.js';
export { pentestPipeline } from './workflows.js';
+3 -1
View File
@@ -2,7 +2,7 @@ import { defineQuery } from '@temporalio/workflow';
export type { AgentMetrics } from '../types/metrics.js';
import type { DistributedConfig, PipelineConfig, ProviderConfig } from '../types/config.js';
import type { DistributedConfig, PipelineConfig, ProviderConfig, VulnClass } from '../types/config.js';
import type { ErrorCode } from '../types/errors.js';
import type { AgentMetrics } from '../types/metrics.js';
@@ -29,6 +29,8 @@ export interface PipelineInput {
checkpointsEnabled?: boolean; // Enable checkpoint activities (default: false)
skipGitCheck?: boolean; // Skip .git directory validation in preflight (e.g. when .git is removed after clone)
providerConfig?: ProviderConfig; // LLM provider configuration (Bedrock, Vertex, etc.)
vulnClasses?: VulnClass[]; // omitted = all five
exploit?: boolean; // false skips the exploitation phase
}
export interface ResumeState {
+27 -16
View File
@@ -36,7 +36,7 @@ import dotenv from 'dotenv';
import { sanitizeHostname } from '../audit/utils.js';
import { parseConfig } from '../config-parser.js';
import { deliverablesDir } from '../paths.js';
import type { PipelineConfig } from '../types/config.js';
import type { PipelineConfig, VulnClass } from '../types/config.js';
import { fileExists, readJson } from '../utils/file-io.js';
import * as activities from './activities.js';
import type { PipelineInput, PipelineProgress, PipelineState } from './shared.js';
@@ -275,30 +275,39 @@ async function resolveWorkspace(client: Client, args: CliArgs): Promise<Workspac
// === Pipeline Input Construction ===
async function loadPipelineConfig(configPath: string | undefined): Promise<PipelineConfig> {
if (!configPath) return {};
interface OrchestrationConfig {
pipelineConfig: PipelineConfig;
vulnClasses?: VulnClass[];
exploit?: boolean;
}
async function loadOrchestrationConfig(configPath: string | undefined): Promise<OrchestrationConfig> {
if (!configPath) return { pipelineConfig: {} };
try {
const config = await parseConfig(configPath);
const raw = config.pipeline;
if (!raw) return {};
const result: PipelineConfig = {};
if (raw.retry_preset !== undefined) {
result.retry_preset = raw.retry_preset;
const pipelineConfig: PipelineConfig = {};
if (config.pipeline?.retry_preset !== undefined) {
pipelineConfig.retry_preset = config.pipeline.retry_preset;
}
if (raw.max_concurrent_pipelines !== undefined) {
result.max_concurrent_pipelines = Number(raw.max_concurrent_pipelines);
if (config.pipeline?.max_concurrent_pipelines !== undefined) {
pipelineConfig.max_concurrent_pipelines = Number(config.pipeline.max_concurrent_pipelines);
}
return result;
return {
pipelineConfig,
...(config.vuln_classes && config.vuln_classes.length > 0 && { vulnClasses: [...config.vuln_classes] }),
...(config.exploit !== undefined && { exploit: config.exploit === 'true' }),
};
} catch {
return {};
return { pipelineConfig: {} };
}
}
function buildPipelineInput(
args: CliArgs,
workspace: WorkspaceResolution,
pipelineConfig: PipelineConfig,
orchestration: OrchestrationConfig,
): PipelineInput {
return {
webUrl: args.webUrl,
@@ -309,7 +318,9 @@ function buildPipelineInput(
...(args.pipelineTestingMode && { pipelineTestingMode: args.pipelineTestingMode }),
...(workspace.isResume && args.resumeFromWorkspace && { resumeFromWorkspace: args.resumeFromWorkspace }),
...(workspace.terminatedWorkflows.length > 0 && { terminatedWorkflows: workspace.terminatedWorkflows }),
...(Object.keys(pipelineConfig).length > 0 && { pipelineConfig }),
...(Object.keys(orchestration.pipelineConfig).length > 0 && { pipelineConfig: orchestration.pipelineConfig }),
...(orchestration.vulnClasses && { vulnClasses: orchestration.vulnClasses }),
...(orchestration.exploit !== undefined && { exploit: orchestration.exploit }),
};
}
@@ -417,8 +428,8 @@ async function run(): Promise<void> {
// 4. Resolve workspace and build pipeline input
const workspace = await resolveWorkspace(client, args);
const pipelineConfig = await loadPipelineConfig(args.configPath);
const input = buildPipelineInput(args, workspace, pipelineConfig);
const orchestration = await loadOrchestrationConfig(args.configPath);
const input = buildPipelineInput(args, workspace, orchestration);
// 5. Start worker polling in the background
const workerDone = worker.run();
+90 -19
View File
@@ -33,6 +33,7 @@ import {
} from '@temporalio/workflow';
import type { AgentName, VulnType } from '../types/agents.js';
import { ALL_AGENTS } from '../types/agents.js';
import { ALL_VULN_CLASSES, type VulnClass } from '../types/config.js';
import type * as activities from './activities.js';
import type { ActivityInput } from './activities.js';
import {
@@ -48,6 +49,19 @@ import {
import { toWorkflowSummary } from './summary-mapper.js';
import { classifyErrorCode, formatWorkflowError } from './workflow-errors.js';
/** Agents this run is expected to produce — drives the resume short-circuit. */
function computeExpectedAgents(vulnClasses: readonly VulnClass[], exploit: boolean): string[] {
const expected: string[] = ['pre-recon', 'recon'];
for (const cls of vulnClasses) {
expected.push(`${cls}-vuln`);
if (exploit) {
expected.push(`${cls}-exploit`);
}
}
expected.push('report');
return expected;
}
// Retry configuration for production (long intervals for billing recovery)
const PRODUCTION_RETRY = {
initialInterval: '5 minutes',
@@ -62,6 +76,7 @@ const PRODUCTION_RETRY = {
'ConfigurationError',
'InvalidTargetError',
'ExecutionLimitError',
'AuthLoginFailedError',
],
};
@@ -120,6 +135,22 @@ const preflightActs = proxyActivities<typeof activities>({
retry: PREFLIGHT_RETRY,
});
// Credential rejection is not retryable; transient SDK errors get 3 attempts.
const AUTH_VALIDATION_RETRY = {
initialInterval: '10 seconds',
maximumInterval: '1 minute',
backoffCoefficient: 2,
maximumAttempts: 3,
nonRetryableErrorTypes: PRODUCTION_RETRY.nonRetryableErrorTypes,
};
// Browser-driving validation measured at 60180s; 10 min start-to-close leaves headroom for slow SSO/MFA flows.
const authValidationActs = proxyActivities<typeof activities>({
startToCloseTimeout: '10 minutes',
heartbeatTimeout: '10 minutes',
retry: AUTH_VALIDATION_RETRY,
});
/**
* Compute aggregated metrics from the current pipeline state.
* Called on both success and failure to provide partial metrics.
@@ -215,22 +246,42 @@ export async function pentestPipeline(input: PipelineInput): Promise<PipelineSta
...(input.providerConfig !== undefined && { providerConfig: input.providerConfig }),
};
const selectedVulnClasses: readonly VulnClass[] =
input.vulnClasses && input.vulnClasses.length > 0 ? input.vulnClasses : ALL_VULN_CLASSES;
const selectedClassSet = new Set<VulnClass>(selectedVulnClasses);
const exploit: boolean = input.exploit ?? true;
const expectedAgents = computeExpectedAgents(selectedVulnClasses, exploit);
await a.persistOrValidateRunScope(activityInput, [...selectedVulnClasses], exploit);
let resumeState: ResumeState | null = null;
if (input.resumeFromWorkspace) {
// 1. Load resume state (validates workspace, cross-checks deliverables)
resumeState = await a.loadResumeState(input.resumeFromWorkspace, input.webUrl, input.repoPath, input.deliverablesSubdir);
resumeState = await a.loadResumeState(
input.resumeFromWorkspace,
input.webUrl,
input.repoPath,
input.deliverablesSubdir,
);
// 2. Restore git workspace and clean up incomplete deliverables
const incompleteAgents = ALL_AGENTS.filter(
(agentName) => !resumeState?.completedAgents.includes(agentName),
) as AgentName[];
await a.restoreGitCheckpoint(input.repoPath, resumeState.checkpointHash, incompleteAgents, input.deliverablesSubdir);
await a.restoreGitCheckpoint(
input.repoPath,
resumeState.checkpointHash,
incompleteAgents,
input.deliverablesSubdir,
);
// 3. Short-circuit if all agents already completed
if (resumeState.completedAgents.length === ALL_AGENTS.length) {
log.info(`All ${ALL_AGENTS.length} agents already completed. Nothing to resume.`);
// 3. Short-circuit when every agent expected by this run is done.
// Uses dynamic expectedAgents (not ALL_AGENTS) so a class-scoped run completes sooner.
const allExpectedDone = expectedAgents.every((a) => resumeState?.completedAgents.includes(a));
if (allExpectedDone) {
log.info(`All ${expectedAgents.length} expected agents already completed. Nothing to resume.`);
state.status = 'completed';
state.completedAgents = [...resumeState.completedAgents];
state.summary = computeSummary(state);
@@ -386,9 +437,26 @@ export async function pentestPipeline(input: PipelineInput): Promise<PipelineSta
await preflightActs.runPreflightValidation(activityInput);
log.info('Preflight validation passed');
// === Playwright stealth config ===
// Write the playwright-cli config before any browser session opens so the
// validator and downstream agents inherit anti-detection defaults.
await preflightActs.syncPlaywrightStealthConfig(activityInput);
// === Authentication Validation ===
state.currentPhase = 'auth-validation';
state.currentAgent = 'validate-authentication';
await authValidationActs.runAuthenticationValidation(activityInput);
state.currentAgent = null;
log.info('Authentication validation passed');
// === Initialize Deliverables Git ===
await a.initDeliverableGit(activityInput);
// === Sync SDK deny rules ===
await a.syncCodePathDenyRules(activityInput);
log.info(`Run scope: vuln_classes=[${selectedVulnClasses.join(', ')}] exploit=${exploit}`);
// === Phase 1: Pre-Reconnaissance ===
await runSequentialPhase('pre-recon', 'pre-recon', a.runPreReconAgent);
@@ -432,19 +500,17 @@ export async function pentestPipeline(input: PipelineInput): Promise<PipelineSta
// 2. Check exploitation queue for actionable findings
const decision = await a.checkExploitationQueue(activityInput, vulnType);
// 3. Conditionally run exploitation agent
// 3. Previously-completed exploits are preserved regardless of mode; new exploits gated by mode.
let exploitMetrics: AgentMetrics | null = null;
if (decision.shouldExploit) {
if (!shouldSkip(exploitAgentName)) {
exploitMetrics = await runExploitAgent();
state.agentMetrics[exploitAgentName] = exploitMetrics;
state.completedAgents.push(exploitAgentName);
if (input.checkpointsEnabled) {
await a.saveCheckpoint(activityInput, exploitAgentName, 'exploitation', state);
}
} else {
log.info(`Skipping ${exploitAgentName} (already complete)`);
state.completedAgents.push(exploitAgentName);
if (shouldSkip(exploitAgentName)) {
log.info(`Skipping ${exploitAgentName} (already complete)`);
state.completedAgents.push(exploitAgentName);
} else if (decision.shouldExploit && exploit) {
exploitMetrics = await runExploitAgent();
state.agentMetrics[exploitAgentName] = exploitMetrics;
state.completedAgents.push(exploitAgentName);
if (input.checkpointsEnabled) {
await a.saveCheckpoint(activityInput, exploitAgentName, 'exploitation', state);
}
}
@@ -466,6 +532,11 @@ export async function pentestPipeline(input: PipelineInput): Promise<PipelineSta
const pipelineThunks: Array<() => Promise<VulnExploitPipelineResult>> = [];
for (const config of pipelineConfigs) {
// Excluded classes drop entirely; any prior deliverables stay on disk but don't count this run.
if (!selectedClassSet.has(config.vulnType)) {
log.info(`Skipping ${config.vulnType} pipeline (class not selected this run)`);
continue;
}
if (!shouldSkip(config.vulnAgent) || !shouldSkip(config.exploitAgent)) {
pipelineThunks.push(() => runVulnExploitPipeline(config.vulnType, config.runVuln, config.runExploit));
} else {
@@ -487,8 +558,8 @@ export async function pentestPipeline(input: PipelineInput): Promise<PipelineSta
state.currentAgent = 'report';
await a.logPhaseTransition(activityInput, 'reporting', 'start');
// First, assemble the concatenated report from exploitation evidence files
await a.assembleReportActivity(activityInput);
// First, assemble the concatenated report from per-class deliverables
await a.assembleReportActivity(activityInput, exploit);
// Then run the report agent to add executive summary and clean up
state.agentMetrics.report = await a.runReportAgent(activityInput);
+32 -4
View File
@@ -8,12 +8,12 @@
* Configuration type definitions
*/
export type RuleType = 'path' | 'subdomain' | 'domain' | 'method' | 'header' | 'parameter';
export type RuleType = 'url_path' | 'subdomain' | 'domain' | 'method' | 'header' | 'parameter' | 'code_path';
export interface Rule {
description: string;
type: RuleType;
url_path: string;
value: string;
}
export interface Rules {
@@ -21,6 +21,19 @@ export interface Rules {
focus?: Rule[];
}
export type VulnClass = 'injection' | 'xss' | 'auth' | 'authz' | 'ssrf';
export const ALL_VULN_CLASSES: readonly VulnClass[] = ['injection', 'xss', 'auth', 'authz', 'ssrf'];
export type Severity = 'low' | 'medium' | 'high' | 'critical';
export type Confidence = 'low' | 'medium' | 'high';
export interface ReportConfig {
min_severity?: Severity;
min_confidence?: Confidence;
guidance?: string;
}
export type LoginType = 'form' | 'sso' | 'api' | 'basic';
export interface SuccessCondition {
@@ -28,12 +41,19 @@ export interface SuccessCondition {
value: string;
}
export interface Credentials {
username: string;
export interface EmailLogin {
address: string;
password: string;
totp_secret?: string;
}
export interface Credentials {
username: string;
password?: string;
totp_secret?: string;
email_login?: EmailLogin;
}
export interface Authentication {
login_type: LoginType;
login_url: string;
@@ -47,6 +67,10 @@ export interface Config {
authentication?: Authentication;
pipeline?: PipelineConfig;
description?: string;
vuln_classes?: VulnClass[];
exploit?: 'true' | 'false';
report?: ReportConfig;
rules_of_engagement?: string;
}
export type RetryPreset = 'default' | 'subscription';
@@ -61,6 +85,10 @@ export interface DistributedConfig {
focus: Rule[];
authentication: Authentication | null;
description: string;
vuln_classes: VulnClass[];
exploit: boolean;
report: ReportConfig;
rules_of_engagement: string;
}
/**
+3 -10
View File
@@ -11,7 +11,7 @@
/**
* Specific error codes for reliable classification.
*
* ErrorCode provides precision within the coarse 8-category PentestErrorType.
* ErrorCode provides precision within the coarse 7-category PentestErrorType.
* Used by classifyErrorForTemporal for code-based classification (preferred)
* with string matching as fallback for external errors.
*/
@@ -44,18 +44,11 @@ export enum ErrorCode {
REPO_NOT_FOUND = 'REPO_NOT_FOUND',
TARGET_UNREACHABLE = 'TARGET_UNREACHABLE',
AUTH_FAILED = 'AUTH_FAILED',
AUTH_LOGIN_FAILED = 'AUTH_LOGIN_FAILED',
BILLING_ERROR = 'BILLING_ERROR',
}
export type PentestErrorType =
| 'config'
| 'network'
| 'tool'
| 'prompt'
| 'filesystem'
| 'validation'
| 'billing'
| 'unknown';
export type PentestErrorType = 'config' | 'network' | 'prompt' | 'filesystem' | 'validation' | 'billing' | 'unknown';
export interface PentestErrorContext {
[key: string]: unknown;
+11
View File
@@ -0,0 +1,11 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { glob } from 'zx';
export function isGlobPattern(value: string): boolean {
return glob.isDynamicPattern(value);
}
+1 -1
View File
@@ -4,7 +4,7 @@ networks:
services:
temporal:
image: temporalio/temporal:latest
image: temporalio/temporal:1.7.0
container_name: shannon-temporal
command: ["server", "start-dev", "--db-filename", "/home/temporal/temporal.db", "--ip", "0.0.0.0"]
ports:
+861 -175
View File
File diff suppressed because it is too large Load Diff
+1 -1
View File
@@ -2,4 +2,4 @@ packages:
- "apps/*"
catalog:
"@anthropic-ai/claude-agent-sdk": ^0.2.38
"@anthropic-ai/claude-agent-sdk": ^0.2.114