mirror of
https://github.com/KeygraphHQ/shannon.git
synced 2026-07-01 02:55:37 +02:00
Compare commits
24 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| ca86c839cc | |||
| 0a57b062fd | |||
| 46be49c175 | |||
| 95998d1a44 | |||
| 6c8135d031 | |||
| 03a3d764af | |||
| 79caada539 | |||
| dcabe6e82e | |||
| ccb5303106 | |||
| 581c208b84 | |||
| 01644ff2ed | |||
| 0ce34c9c27 | |||
| 671d41699e | |||
| 8ca34dad69 | |||
| a111863778 | |||
| 3f83a51e22 | |||
| c78ae0b3b6 | |||
| c0794bccf6 | |||
| 1f6dfd7e17 | |||
| f6fd1edad6 | |||
| 77e300d52a | |||
| 99629c2b66 | |||
| 2a433f090f | |||
| 6a0c8ce710 |
@@ -135,7 +135,6 @@ shannon <URL> <REPO> --pipeline-testing
|
||||
|-------------------|---------|------------|
|
||||
| `config` | Configuration file issues | No |
|
||||
| `network` | Connection/timeout issues | Yes |
|
||||
| `tool` | External tool (nmap, etc.) failed | Yes |
|
||||
| `prompt` | Claude SDK/API issues | Sometimes |
|
||||
| `filesystem` | File read/write errors | Sometimes |
|
||||
| `validation` | Deliverable validation failed | Yes (via retry) |
|
||||
|
||||
+2
-2
@@ -1,5 +1,5 @@
|
||||
# Node.js
|
||||
node_modules/
|
||||
**/node_modules/
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
@@ -49,7 +49,7 @@ Thumbs.db
|
||||
# CLI package (runs on host, not in container)
|
||||
# Keep apps/cli/package.json so pnpm workspaces resolve
|
||||
apps/cli/src/
|
||||
apps/cli/dist/
|
||||
**/dist/
|
||||
apps/cli/infra/
|
||||
apps/cli/tsconfig.json
|
||||
apps/cli/tsdown.config.ts
|
||||
|
||||
+9
-26
@@ -4,8 +4,11 @@
|
||||
# Recommended output token configuration for larger tool outputs
|
||||
CLAUDE_CODE_MAX_OUTPUT_TOKENS=64000
|
||||
|
||||
# Adaptive thinking is enabled automatically on Opus 4.6/4.7. Set to false to disable.
|
||||
# CLAUDE_ADAPTIVE_THINKING=false
|
||||
|
||||
# =============================================================================
|
||||
# OPTION 1: Direct Anthropic (default, no router)
|
||||
# OPTION 1: Direct Anthropic
|
||||
# =============================================================================
|
||||
ANTHROPIC_API_KEY=your-api-key-here
|
||||
|
||||
@@ -19,20 +22,6 @@ ANTHROPIC_API_KEY=your-api-key-here
|
||||
# ANTHROPIC_BASE_URL=https://your-proxy.example.com
|
||||
# ANTHROPIC_AUTH_TOKEN=your-auth-token # Auth token for the custom endpoint
|
||||
|
||||
# =============================================================================
|
||||
# OPTION 3: Router Mode (use alternative providers)
|
||||
# =============================================================================
|
||||
# Enable router mode by running: ./shannon start ... ROUTER=true
|
||||
# Then configure ONE of the providers below:
|
||||
|
||||
# --- OpenAI ---
|
||||
# OPENAI_API_KEY=sk-your-openai-key
|
||||
# ROUTER_DEFAULT=openai,gpt-5.2
|
||||
|
||||
# --- OpenRouter (access Gemini 3 models via single API) ---
|
||||
# OPENROUTER_API_KEY=sk-or-your-openrouter-key
|
||||
# ROUTER_DEFAULT=openrouter,google/gemini-3-flash-preview
|
||||
|
||||
# =============================================================================
|
||||
# Model Tier Overrides (Anthropic API / OAuth / Custom Base URL / Bedrock)
|
||||
# =============================================================================
|
||||
@@ -40,24 +29,24 @@ ANTHROPIC_API_KEY=your-api-key-here
|
||||
# Optional for direct Anthropic and custom base URL modes. Required for Bedrock/Vertex.
|
||||
# ANTHROPIC_SMALL_MODEL=... # Small tier (default: claude-haiku-4-5-20251001)
|
||||
# ANTHROPIC_MEDIUM_MODEL=... # Medium tier (default: claude-sonnet-4-6)
|
||||
# ANTHROPIC_LARGE_MODEL=... # Large tier (default: claude-opus-4-6)
|
||||
# ANTHROPIC_LARGE_MODEL=... # Large tier (default: claude-opus-4-7)
|
||||
|
||||
# =============================================================================
|
||||
# OPTION 4: AWS Bedrock
|
||||
# OPTION 3: AWS Bedrock
|
||||
# =============================================================================
|
||||
# https://aws.amazon.com/blogs/machine-learning/accelerate-ai-development-with-amazon-bedrock-api-keys/
|
||||
# Requires the model tier overrides above to be set with Bedrock-specific model IDs.
|
||||
# Example Bedrock model IDs for us-east-1:
|
||||
# ANTHROPIC_SMALL_MODEL=us.anthropic.claude-haiku-4-5-20251001-v1:0
|
||||
# ANTHROPIC_MEDIUM_MODEL=us.anthropic.claude-sonnet-4-6
|
||||
# ANTHROPIC_LARGE_MODEL=us.anthropic.claude-opus-4-6
|
||||
# ANTHROPIC_LARGE_MODEL=us.anthropic.claude-opus-4-7
|
||||
|
||||
# CLAUDE_CODE_USE_BEDROCK=1
|
||||
# AWS_REGION=us-east-1
|
||||
# AWS_BEARER_TOKEN_BEDROCK=your-bearer-token
|
||||
|
||||
# =============================================================================
|
||||
# OPTION 5: Google Vertex AI
|
||||
# OPTION 4: Google Vertex AI
|
||||
# =============================================================================
|
||||
# https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-partner-models
|
||||
# Requires a GCP service account with roles/aiplatform.user.
|
||||
@@ -66,15 +55,9 @@ ANTHROPIC_API_KEY=your-api-key-here
|
||||
# Example Vertex AI model IDs:
|
||||
# ANTHROPIC_SMALL_MODEL=claude-haiku-4-5@20251001
|
||||
# ANTHROPIC_MEDIUM_MODEL=claude-sonnet-4-6
|
||||
# ANTHROPIC_LARGE_MODEL=claude-opus-4-6
|
||||
# ANTHROPIC_LARGE_MODEL=claude-opus-4-7
|
||||
|
||||
# CLAUDE_CODE_USE_VERTEX=1
|
||||
# CLOUD_ML_REGION=us-east5
|
||||
# ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id
|
||||
# GOOGLE_APPLICATION_CREDENTIALS=./credentials/google-sa-key.json
|
||||
|
||||
# =============================================================================
|
||||
# Available Models
|
||||
# =============================================================================
|
||||
# OpenAI: gpt-5.2, gpt-5-mini
|
||||
# OpenRouter: google/gemini-3-flash-preview
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
*.sh text eol=lf
|
||||
@@ -55,7 +55,7 @@ body:
|
||||
label: If applicable
|
||||
options:
|
||||
- label: I have included relevant error messages, stack traces, or failure details.
|
||||
- label: I have checked the audit logs and pasted the relevant errors.
|
||||
- label: I have checked the workspaces folder for logs and pasted the relevant errors.
|
||||
- label: I have inspected the failed Temporal workflow run and included the failure reason.
|
||||
- label: I have included clear steps to reproduce the issue.
|
||||
- label: I have redacted any sensitive information (tokens, URLs, repo names).
|
||||
@@ -69,7 +69,9 @@ body:
|
||||
|
||||
Issues without this information may be difficult to triage.
|
||||
|
||||
- Check the logs at: `./workspaces/target_url_shannon-123/workflow.log`
|
||||
- Check the workflow log:
|
||||
- **npx mode:** `~/.shannon/workspaces/<workspace>/workflow.log`
|
||||
- **Local mode:** `./workspaces/<workspace>/workflow.log`
|
||||
Use `grep` or search to identify errors.
|
||||
Paste the relevant error output below.
|
||||
- Temporal:
|
||||
@@ -83,13 +85,13 @@ body:
|
||||
id: debugging-details
|
||||
attributes:
|
||||
label: Debugging details
|
||||
description: Paste any error messages, stack traces, or failure details from the audit logs or Temporal UI.
|
||||
description: Paste any error messages, stack traces, or failure details from the workspace logs or Temporal UI.
|
||||
|
||||
- type: textarea
|
||||
id: screenshots
|
||||
attributes:
|
||||
label: Screenshots
|
||||
description: If applicable, add screenshots of the audit logs or Temporal failure details.
|
||||
description: If applicable, add screenshots of the workspace logs or Temporal failure details.
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
@@ -99,35 +101,36 @@ body:
|
||||
Provide the following information (redact sensitive data such as repository names, URLs, and tokens):
|
||||
|
||||
- type: dropdown
|
||||
id: auth-method
|
||||
id: cli-mode
|
||||
attributes:
|
||||
label: Authentication method used
|
||||
label: CLI mode
|
||||
options:
|
||||
- CLAUDE_CODE_OAUTH_TOKEN
|
||||
- ANTHROPIC_API_KEY
|
||||
- "npx (@keygraph/shannon)"
|
||||
- "Local (./shannon)"
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: provider
|
||||
attributes:
|
||||
label: Provider
|
||||
options:
|
||||
- "Anthropic (API key)"
|
||||
- "Anthropic (OAuth token)"
|
||||
- "Custom base URL (proxy/gateway)"
|
||||
- "AWS Bedrock"
|
||||
- "Google Vertex AI"
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: shannon-command
|
||||
attributes:
|
||||
label: Full ./shannon command with all flags used (with redactions)
|
||||
|
||||
- type: dropdown
|
||||
id: experimental-models
|
||||
attributes:
|
||||
label: Are you using any experimental models or providers other than default Anthropic models?
|
||||
options:
|
||||
- "No"
|
||||
- "Yes"
|
||||
label: Full command with all flags used (with redactions)
|
||||
placeholder: "e.g. npx @keygraph/shannon start -u <url> -r my-repo OR ./shannon start -u <url> -r my-repo"
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: experimental-model-details
|
||||
attributes:
|
||||
label: If Yes, which one (model/provider)?
|
||||
|
||||
- type: input
|
||||
id: os-version
|
||||
attributes:
|
||||
@@ -136,6 +139,14 @@ body:
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: node-version
|
||||
attributes:
|
||||
label: "Node.js version ('node -v')"
|
||||
placeholder: "e.g. 22.12.0"
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: docker-version
|
||||
attributes:
|
||||
|
||||
@@ -20,6 +20,15 @@ body:
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: cli-mode
|
||||
attributes:
|
||||
label: Which CLI mode does this apply to?
|
||||
options:
|
||||
- Both
|
||||
- "npx (@keygraph/shannon)"
|
||||
- "Local (./shannon)"
|
||||
|
||||
- type: textarea
|
||||
id: alternatives-considered
|
||||
attributes:
|
||||
|
||||
@@ -1,2 +1,4 @@
|
||||
auto-install-peers=true
|
||||
strict-peer-dependencies=false
|
||||
minimum-release-age=10080
|
||||
ignore-scripts=true
|
||||
|
||||
@@ -82,7 +82,7 @@ pnpm biome:fix # Auto-fix lint, format, and import sorting
|
||||
|
||||
**Monorepo tooling:** pnpm workspaces, Turborepo for task orchestration, Biome for linting/formatting. TypeScript compiler options shared via `tsconfig.base.json` at the root. All packages extend it, overriding only `rootDir` and `outDir`. Shared devDependencies (`typescript`, `@types/node`, `turbo`, `@biomejs/biome`) are hoisted to the root workspace.
|
||||
|
||||
**Options:** `-c <file>` (YAML config), `-o <path>` (output directory), `-w <name>` (named workspace; auto-resumes if exists), `--pipeline-testing` (minimal prompts, 10s retries), `--router` (multi-model routing via [claude-code-router](https://github.com/musistudio/claude-code-router))
|
||||
**Options:** `-c <file>` (YAML config), `-o <path>` (output directory), `-w <name>` (named workspace; auto-resumes if exists), `--pipeline-testing` (minimal prompts, 10s retries), `--debug` (preserve worker container after exit for log inspection)
|
||||
|
||||
## Architecture
|
||||
|
||||
@@ -106,14 +106,14 @@ Published as `@keygraph/shannon` on npm. Contains only Docker orchestration logi
|
||||
- `apps/cli/src/commands/setup.ts` — Interactive TUI wizard (`@clack/prompts`) for provider credential setup (npx only)
|
||||
- `apps/cli/src/paths.ts` — Repo/config path resolution (bare name → `./repos/<name>`, or any absolute/relative path)
|
||||
- `apps/cli/src/commands/` — Command handlers
|
||||
- `apps/cli/infra/compose.yml` — Bundled Temporal + router compose file for npx mode
|
||||
- `apps/cli/infra/compose.yml` — Bundled Temporal compose file for npx mode
|
||||
- `apps/cli/tsdown.config.ts` — tsdown bundler config
|
||||
- `shannon` — Node.js entry point (`#!/usr/bin/env node`) that delegates to `apps/cli/dist/index.mjs`
|
||||
|
||||
### Docker Architecture
|
||||
Infra (Temporal + router) runs via `docker-compose.yml`. Workers are ephemeral `docker run --rm` containers, one per scan, each with a unique task queue and isolated volume mounts.
|
||||
Infra (Temporal) runs via `docker-compose.yml`. Workers are ephemeral `docker run --rm` containers, one per scan, each with a unique task queue and isolated volume mounts.
|
||||
|
||||
- `docker-compose.yml` — Infra only: `shannon-temporal` (port 7233/8233) and `shannon-router` (port 3456, optional via profile). Network: `shannon-net`
|
||||
- `docker-compose.yml` — Infra only: `shannon-temporal` (port 7233/8233). Network: `shannon-net`
|
||||
- `Dockerfile` — 2-stage build (builder + Chainguard Wolfi runtime). Uses pnpm. Entrypoint: `CMD ["node", "apps/worker/dist/temporal/worker.js"]`
|
||||
- No `docker-compose.docker.yml` — host gateway handled via `--add-host` flag in CLI
|
||||
|
||||
@@ -137,16 +137,16 @@ Durable workflow orchestration with crash recovery, queryable progress, intellig
|
||||
- `apps/worker/src/temporal/shared.ts` — Types, interfaces, query definitions
|
||||
### Five-Phase Pipeline
|
||||
|
||||
1. **Pre-Recon** (`pre-recon`) — External scans (nmap, subfinder, whatweb) + source code analysis
|
||||
1. **Pre-Recon** (`pre-recon`) — Source code analysis to build the architectural baseline
|
||||
2. **Recon** (`recon`) — Attack surface mapping from initial findings
|
||||
3. **Vulnerability Analysis** (5 parallel agents) — injection, xss, auth, authz, ssrf
|
||||
4. **Exploitation** (5 parallel agents, conditional) — Exploits confirmed vulnerabilities
|
||||
5. **Reporting** (`report`) — Executive-level security report
|
||||
|
||||
### Supporting Systems
|
||||
- **Configuration** — YAML configs in `apps/worker/configs/` with JSON Schema validation (`config-schema.json`). Supports auth settings, MFA/TOTP, and per-app testing parameters. Credential resolution — local mode: env vars → `./.env`; npx mode: env vars → `~/.shannon/config.toml` (via `shn setup`)
|
||||
- **Prompts** — Per-phase templates in `apps/worker/prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `apps/worker/prompts/shared/` via `apps/worker/src/services/prompt-manager.ts`
|
||||
- **SDK Integration** — Uses `@anthropic-ai/claude-agent-sdk` with `maxTurns: 10_000` and `bypassPermissions` mode. Browser automation via `playwright-cli` with session isolation (`-s=<session>`). TOTP generation via `generate-totp` CLI tool. Login flow template at `apps/worker/prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth
|
||||
- **Configuration** — YAML configs in `apps/worker/configs/` with JSON Schema validation (`config-schema.json`). Supports auth settings (MFA/TOTP), URL/code rule scoping (`rules.avoid`/`rules.focus`), run-scope steering (`vuln_classes`, `exploit`), free-form `rules_of_engagement`, and post-hoc `report` filters (`min_severity`, `min_confidence`, `guidance`). `code_path` avoid rules are written into `~/.claude/settings.json` `permissions.deny` (`Read`/`Edit`) once per workflow by `apps/worker/src/temporal/activities.ts:syncCodePathDenyRules` so the SDK enforces them at the tool layer even in `bypassPermissions` mode. `vuln_classes`/`exploit` scope is locked into `session.json` on first run; resumes with a different scope fail fast (`persistOrValidateRunScope`). Credential resolution — local mode: env vars → `./.env`; npx mode: env vars → `~/.shannon/config.toml` (via `shn setup`)
|
||||
- **Prompts** — Per-phase templates in `apps/worker/prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `apps/worker/prompts/shared/` via `apps/worker/src/services/prompt-manager.ts`, including `_code-path-rules.txt` (focus/avoid `[FILE]`/`[GLOB]` routing) and `_rules-of-engagement.txt` (free-text engagement rules). When `exploit: false`, `apps/worker/src/services/findings-renderer.ts` deterministically converts each `*_exploitation_queue.json` into a `*_findings.md` for report assembly — no LLM in the loop
|
||||
- **SDK Integration** — Uses `@anthropic-ai/claude-agent-sdk` with `maxTurns: 10_000` and `bypassPermissions` mode. Adaptive thinking is enabled by default on Opus 4.6/4.7 (`supportsAdaptiveThinking` in `apps/worker/src/ai/models.ts`); disable per-scan via `CLAUDE_ADAPTIVE_THINKING=false` (env) or `core.adaptive_thinking = false` (npx TOML). Browser automation via `playwright-cli` with session isolation (`-s=<session>`). TOTP generation via `generate-totp` CLI tool. Login flow template at `apps/worker/prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth
|
||||
- **Audit System** — Crash-safe append-only logging in `workspaces/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables. WorkflowLogger (`apps/worker/src/audit/workflow-logger.ts`) provides unified human-readable per-workflow logs, backed by LogStream (`apps/worker/src/audit/log-stream.ts`) shared stream primitive
|
||||
- **Deliverables** — Saved to `deliverables/` in the target repo via the `save-deliverable` CLI script (`apps/worker/src/scripts/save-deliverable.ts`)
|
||||
- **Workspaces & Resume** — Named workspaces via `-w <name>` or auto-named from URL+timestamp. Resume detects completed agents via `session.json`. `loadResumeState()` in `apps/worker/src/temporal/activities.ts` validates deliverable existence, restores git checkpoints, and cleans up incomplete deliverables. Workspace listing via `apps/worker/src/temporal/workspaces.ts`
|
||||
@@ -227,12 +227,16 @@ Comments must be **timeless** — no references to this conversation, refactorin
|
||||
|
||||
**Entry Points:** `apps/worker/src/temporal/workflows.ts`, `apps/worker/src/temporal/activities.ts`, `apps/worker/src/temporal/worker.ts`
|
||||
|
||||
**Core Logic:** `apps/worker/src/session-manager.ts`, `apps/worker/src/ai/claude-executor.ts`, `apps/worker/src/config-parser.ts`, `apps/worker/src/services/`, `apps/worker/src/audit/`
|
||||
**Core Logic:** `apps/worker/src/session-manager.ts`, `apps/worker/src/ai/claude-executor.ts`, `apps/worker/src/ai/settings-writer.ts` (writes `code_path` deny rules to `~/.claude/settings.json`), `apps/worker/src/config-parser.ts`, `apps/worker/src/services/` (incl. `preflight.ts`, `findings-renderer.ts`, `reporting.ts`), `apps/worker/src/audit/`
|
||||
|
||||
**Config:** `docker-compose.yml`, `apps/cli/infra/compose.yml`, `apps/worker/configs/`, `apps/worker/prompts/`, `tsconfig.base.json` (shared compiler options), `turbo.json`, `biome.json`
|
||||
|
||||
**CI/CD:** `.github/workflows/release.yml` (Docker Hub push + npm publish + GitHub release, manual dispatch)
|
||||
|
||||
## Package Installation
|
||||
|
||||
Package managers are configured with a minimum release age (7 days). Requires pnpm >= 10.16.0. If `pnpm install` fails due to a package being too new, **do not attempt to bypass it** — report the blocked package to the user and stop.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- **"Repository not found"** — Pass a bare name (`-r my-repo`) for `./repos/my-repo`, or a path (`-r /path/to/repo`) for any directory
|
||||
@@ -240,5 +244,4 @@ Comments must be **timeless** — no references to this conversation, refactorin
|
||||
- **Worker not processing** — Check `docker ps --filter "name=shannon-worker-"`
|
||||
- **Reset state** — `./shannon stop --clean`
|
||||
- **Local apps unreachable** — Use `host.docker.internal` instead of `localhost`
|
||||
- **Missing tools** — Use `--pipeline-testing` to skip nmap/subfinder/whatweb (graceful degradation)
|
||||
- **Container permissions** — On Linux, may need `sudo` for docker commands
|
||||
|
||||
+5
-54
@@ -13,46 +13,14 @@ RUN apk update && apk add --no-cache \
|
||||
curl \
|
||||
wget \
|
||||
ca-certificates \
|
||||
# Network libraries for Go tools
|
||||
libpcap-dev \
|
||||
linux-headers \
|
||||
# Language runtimes
|
||||
go \
|
||||
nodejs-22 \
|
||||
npm \
|
||||
python3 \
|
||||
py3-pip \
|
||||
ruby \
|
||||
ruby-dev \
|
||||
# Security tools available in Wolfi
|
||||
nmap \
|
||||
# Additional utilities
|
||||
bash
|
||||
|
||||
# Set environment variables for Go
|
||||
ENV GOPATH=/go
|
||||
ENV PATH=$GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
ENV CGO_ENABLED=1
|
||||
|
||||
# Create directories
|
||||
RUN mkdir -p $GOPATH/bin
|
||||
|
||||
# Install Go-based security tools
|
||||
RUN go install -v github.com/projectdiscovery/subfinder/v2/cmd/subfinder@v2.13.0
|
||||
# Install WhatWeb from release tarball (Ruby-based tool)
|
||||
RUN curl -sL https://github.com/urbanadventurer/WhatWeb/archive/refs/tags/v0.6.3.tar.gz | tar xz -C /opt && \
|
||||
mv /opt/WhatWeb-0.6.3 /opt/whatweb && \
|
||||
chmod +x /opt/whatweb/whatweb && \
|
||||
gem install addressable -v 2.8.9 && \
|
||||
echo '#!/bin/bash' > /usr/local/bin/whatweb && \
|
||||
echo 'cd /opt/whatweb && exec ./whatweb "$@"' >> /usr/local/bin/whatweb && \
|
||||
chmod +x /usr/local/bin/whatweb
|
||||
|
||||
# Install Python-based tools
|
||||
RUN pip3 install --no-cache-dir schemathesis==4.13.0
|
||||
|
||||
# Install pnpm
|
||||
RUN npm install -g pnpm@10.12.1
|
||||
RUN npm install -g pnpm@10.33.0
|
||||
|
||||
# Build Node.js application in builder to avoid QEMU emulation failures in CI
|
||||
WORKDIR /app
|
||||
@@ -69,7 +37,8 @@ COPY . .
|
||||
# Build worker. CLI not needed in Docker
|
||||
RUN pnpm --filter @shannon/worker run build
|
||||
|
||||
RUN pnpm prune --prod
|
||||
# Production-only deps (pnpm recommends install --prod over prune in monorepos)
|
||||
RUN rm -rf node_modules apps/*/node_modules && pnpm install --frozen-lockfile --prod
|
||||
|
||||
# Runtime stage - Minimal production image
|
||||
FROM cgr.dev/chainguard/wolfi-base:latest AS runtime
|
||||
@@ -82,15 +51,11 @@ RUN apk update && apk add --no-cache \
|
||||
bash \
|
||||
curl \
|
||||
ca-certificates \
|
||||
# Network libraries (runtime)
|
||||
libpcap \
|
||||
# Security tools
|
||||
nmap \
|
||||
shadow \
|
||||
# Language runtimes (minimal)
|
||||
nodejs-22 \
|
||||
npm \
|
||||
python3 \
|
||||
ruby \
|
||||
# Chromium browser and dependencies for Playwright
|
||||
chromium \
|
||||
# Additional libraries Chromium needs
|
||||
@@ -108,20 +73,6 @@ RUN apk update && apk add --no-cache \
|
||||
# Font rendering
|
||||
fontconfig
|
||||
|
||||
# Copy Go binaries from builder
|
||||
COPY --from=builder /go/bin/subfinder /usr/local/bin/
|
||||
|
||||
# Copy WhatWeb from builder
|
||||
COPY --from=builder /opt/whatweb /opt/whatweb
|
||||
COPY --from=builder /usr/local/bin/whatweb /usr/local/bin/whatweb
|
||||
|
||||
# Install WhatWeb Ruby dependencies in runtime stage
|
||||
RUN gem install addressable -v 2.8.9
|
||||
|
||||
# Copy Python packages from builder
|
||||
COPY --from=builder /usr/lib/python3.*/site-packages /usr/lib/python3.12/site-packages
|
||||
COPY --from=builder /usr/bin/schemathesis /usr/bin/
|
||||
|
||||
# Create non-root user
|
||||
RUN addgroup -g 1001 pentest && \
|
||||
adduser -u 1001 -G pentest -s /bin/bash -D pentest
|
||||
@@ -153,7 +104,7 @@ RUN ln -s /app/apps/worker/dist/scripts/save-deliverable.js /usr/local/bin/save-
|
||||
chmod +x /app/apps/worker/dist/scripts/generate-totp.js
|
||||
|
||||
# Create directories for session data and ensure proper permissions
|
||||
RUN mkdir -p /app/sessions /app/deliverables /app/repos /app/workspaces && \
|
||||
RUN mkdir -p /app/sessions /app/repos /app/workspaces && \
|
||||
mkdir -p /tmp/.cache /tmp/.config /tmp/.npm && \
|
||||
chmod 777 /app && \
|
||||
chmod 777 /tmp/.cache && \
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
>[!NOTE]
|
||||
> **[📢 New: Shannon is now available via `npx @keygraph/shannon`. →](https://github.com/KeygraphHQ/shannon/discussions/249)**
|
||||
> **[📢 Sunsetting Router Mode (claude-code-router)`. →](https://github.com/KeygraphHQ/shannon/discussions/301)**
|
||||
|
||||
<div align="center">
|
||||
|
||||
@@ -44,7 +44,6 @@ Shannon identified 20+ vulnerabilities in OWASP Juice Shop, including authentica
|
||||
- **Reproducible Proof-of-Concept Exploits**: The final report contains only proven, exploitable findings with copy-and-paste PoCs. Vulnerabilities that cannot be exploited are not reported.
|
||||
- **OWASP Vulnerability Coverage**: Identifies and validates Injection, XSS, SSRF, and Broken Authentication/Authorization, with additional categories in development.
|
||||
- **Code-Aware Dynamic Testing**: Analyzes source code to guide attack strategy, then validates findings with live browser and CLI-based exploits against the running application.
|
||||
- **Integrated Security Tooling**: Leverages Nmap, Subfinder, WhatWeb, and Schemathesis during reconnaissance and discovery phases.
|
||||
- **Parallel Processing**: Vulnerability analysis and exploitation phases run concurrently across all attack categories.
|
||||
|
||||
## Product Line
|
||||
@@ -118,7 +117,6 @@ Shannon Pro supports a self-hosted runner model (similar to GitHub Actions self-
|
||||
- [AWS Bedrock](#aws-bedrock)
|
||||
- [Google Vertex AI](#google-vertex-ai)
|
||||
- [Custom Base URL](#custom-base-url)
|
||||
- [Router Mode](#experimental---unsupported-router-mode-alternative-providers)
|
||||
- [Platform-Specific Instructions](#platform-specific-instructions)
|
||||
- [Output and Results](#output-and-results)
|
||||
- [Sample Reports](#sample-reports)
|
||||
@@ -144,13 +142,15 @@ Shannon Pro supports a self-hosted runner model (similar to GitHub Actions self-
|
||||
- **Claude Code OAuth token**
|
||||
- **AWS Bedrock** - Route through Amazon Bedrock with AWS credentials (see [AWS Bedrock](#aws-bedrock))
|
||||
- **Google Vertex AI** - Route through Google Cloud Vertex AI (see [Google Vertex AI](#google-vertex-ai))
|
||||
- **[EXPERIMENTAL - UNSUPPORTED] Alternative providers via Router Mode** - OpenAI or Google Gemini via OpenRouter (see [Router Mode](#experimental---unsupported-router-mode-alternative-providers))
|
||||
|
||||
> [!NOTE]
|
||||
> Docker is still required to use the `npx` workflow. Under the hood, the CLI pulls and runs a prebuilt Shannon worker image from Docker Hub, which is approximately 1 GB and contains Shannon plus all required dependencies.
|
||||
> Docker is still required to use the `npx` workflow. Under the hood, the CLI pulls and runs a prebuilt Shannon worker image from Docker Hub, which is approximately 1 GB and contains Shannon plus all required dependencies. Shannon mounts the target repository as read-only inside the worker container to protect against accidental modifications during analysis. Run Shannon via `npx @keygraph/shannon` for the latest released version, or pull the latest `main` if building from source.
|
||||
|
||||
### Quick Start (Recommended: npx)
|
||||
|
||||
> [!WARNING]
|
||||
> **Please read the [Disclaimers](#disclaimers) before running Shannon.** Shannon is **not** a passive scanner — it actively executes exploits against the target. You must have **explicit, written authorization** from the system owner.
|
||||
|
||||
```bash
|
||||
# 1. Configure credentials (interactive wizard — one-time setup)
|
||||
npx @keygraph/shannon setup
|
||||
@@ -373,9 +373,21 @@ cp configs/example-config.yaml ./my-app-config.yaml
|
||||
##### Basic Configuration Structure
|
||||
|
||||
```yaml
|
||||
# Optional: describe your target environment (max 500 chars)
|
||||
# Describe your target environment (optional, max 500 chars)
|
||||
description: "Next.js e-commerce app on PostgreSQL. Local dev environment — .env files contain local-only credentials, not deployed to production."
|
||||
|
||||
# Limit which vulnerability classes run end-to-end (optional, default: all five)
|
||||
# vuln_classes: [injection, xss, auth, authz, ssrf]
|
||||
|
||||
# Skip the exploitation phase (optional, default: "true")
|
||||
# exploit: "false"
|
||||
|
||||
# Free-form rules of engagement (optional)
|
||||
# rules_of_engagement: |
|
||||
# - No password brute-force; cap login attempts at 5 per account.
|
||||
# - Throttle to under 5 requests per second per endpoint; back off 60s on any 429.
|
||||
# - Use placeholders like [order_id] in deliverables — no real data values.
|
||||
|
||||
authentication:
|
||||
login_type: form
|
||||
login_url: "https://your-app.com/login"
|
||||
@@ -394,15 +406,28 @@ authentication:
|
||||
value: "/dashboard"
|
||||
|
||||
rules:
|
||||
# Supported types: url_path, subdomain, domain, method, header, parameter, code_path
|
||||
avoid:
|
||||
- description: "AI should avoid testing logout functionality"
|
||||
type: path
|
||||
url_path: "/logout"
|
||||
type: url_path
|
||||
value: "/logout"
|
||||
|
||||
# code_path values are repo-relative file paths or globs (e.g. "src/auth.ts", "src/vendor/**").
|
||||
# - description: "Out-of-scope vendored libraries"
|
||||
# type: code_path
|
||||
# value: "src/vendor/**"
|
||||
|
||||
focus:
|
||||
- description: "AI should emphasize testing API endpoints"
|
||||
type: path
|
||||
url_path: "/api"
|
||||
type: url_path
|
||||
value: "/api"
|
||||
|
||||
# Filters applied by the report agent when assembling the final report (optional).
|
||||
# report:
|
||||
# min_severity: low # drop findings below this severity (low | medium | high | critical)
|
||||
# min_confidence: low # drop findings below this confidence (low | medium | high)
|
||||
# guidance: |
|
||||
# Drop findings about missing security headers and rate-limit gaps.
|
||||
```
|
||||
|
||||
Run with:
|
||||
@@ -424,6 +449,13 @@ npx @keygraph/shannon start -u https://example.com -r /path/to/repo -c ./my-app-
|
||||
|
||||
If your application uses two-factor authentication, simply add the TOTP secret to your config file. The AI will automatically generate the required codes during testing.
|
||||
|
||||
#### Adaptive Thinking (Opus 4.6/4.7)
|
||||
|
||||
Claude decides when and how deeply to reason on Opus 4.6 and 4.7. Enabled by default whenever a tier resolves to one of these models.
|
||||
|
||||
- **npx mode** — `npx @keygraph/shannon setup` prompts you during the wizard.
|
||||
- **Local mode** — set `CLAUDE_ADAPTIVE_THINKING=false` in `.env` (or as an exported env var) to disable.
|
||||
|
||||
#### Subscription Plan Rate Limits
|
||||
|
||||
Anthropic subscription plans reset usage on a **rolling 5-hour window**. The default retry strategy (30-min max backoff) will exhaust retries before the window resets. Add this to your config:
|
||||
@@ -452,7 +484,7 @@ export AWS_REGION=us-east-1
|
||||
export AWS_BEARER_TOKEN_BEDROCK=your-bearer-token
|
||||
export ANTHROPIC_SMALL_MODEL=us.anthropic.claude-haiku-4-5-20251001-v1:0
|
||||
export ANTHROPIC_MEDIUM_MODEL=us.anthropic.claude-sonnet-4-6
|
||||
export ANTHROPIC_LARGE_MODEL=us.anthropic.claude-opus-4-6
|
||||
export ANTHROPIC_LARGE_MODEL=us.anthropic.claude-opus-4-7
|
||||
```
|
||||
|
||||
<details>
|
||||
@@ -464,12 +496,12 @@ AWS_REGION=us-east-1
|
||||
AWS_BEARER_TOKEN_BEDROCK=your-bearer-token
|
||||
ANTHROPIC_SMALL_MODEL=us.anthropic.claude-haiku-4-5-20251001-v1:0
|
||||
ANTHROPIC_MEDIUM_MODEL=us.anthropic.claude-sonnet-4-6
|
||||
ANTHROPIC_LARGE_MODEL=us.anthropic.claude-opus-4-6
|
||||
ANTHROPIC_LARGE_MODEL=us.anthropic.claude-opus-4-7
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
Shannon uses three model tiers: **small** (`claude-haiku-4-5-20251001`) for summarization, **medium** (`claude-sonnet-4-6`) for security analysis, and **large** (`claude-opus-4-6`) for deep reasoning. Set `ANTHROPIC_SMALL_MODEL`, `ANTHROPIC_MEDIUM_MODEL`, and `ANTHROPIC_LARGE_MODEL` to the Bedrock model IDs for your region.
|
||||
Shannon uses three model tiers: **small** (`claude-haiku-4-5-20251001`) for summarization, **medium** (`claude-sonnet-4-6`) for security analysis, and **large** (`claude-opus-4-7`) for deep reasoning. Set `ANTHROPIC_SMALL_MODEL`, `ANTHROPIC_MEDIUM_MODEL`, and `ANTHROPIC_LARGE_MODEL` to the Bedrock model IDs for your region.
|
||||
|
||||
### Google Vertex AI
|
||||
|
||||
@@ -490,7 +522,7 @@ export ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id
|
||||
export GOOGLE_APPLICATION_CREDENTIALS=/path/to/your-sa-key.json
|
||||
export ANTHROPIC_SMALL_MODEL=claude-haiku-4-5@20251001
|
||||
export ANTHROPIC_MEDIUM_MODEL=claude-sonnet-4-6
|
||||
export ANTHROPIC_LARGE_MODEL=claude-opus-4-6
|
||||
export ANTHROPIC_LARGE_MODEL=claude-opus-4-7
|
||||
```
|
||||
|
||||
<details>
|
||||
@@ -503,7 +535,7 @@ ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id
|
||||
GOOGLE_APPLICATION_CREDENTIALS=./credentials/google-sa-key.json
|
||||
ANTHROPIC_SMALL_MODEL=claude-haiku-4-5@20251001
|
||||
ANTHROPIC_MEDIUM_MODEL=claude-sonnet-4-6
|
||||
ANTHROPIC_LARGE_MODEL=claude-opus-4-6
|
||||
ANTHROPIC_LARGE_MODEL=claude-opus-4-7
|
||||
```
|
||||
|
||||
</details>
|
||||
@@ -512,7 +544,12 @@ Set `CLOUD_ML_REGION=global` for global endpoints, or a specific region like `us
|
||||
|
||||
### Custom Base URL
|
||||
|
||||
Shannon supports pointing the SDK at any Anthropic-compatible endpoint (proxies, gateways, etc.) via `ANTHROPIC_BASE_URL`.
|
||||
Shannon supports pointing the SDK at any Anthropic-compatible endpoint via `ANTHROPIC_BASE_URL`. For users who need proxy-based routing, the supported path is to use an LLM proxy such as [LiteLLM](https://github.com/BerriAI/litellm) configured to expose an Anthropic-compatible endpoint.
|
||||
|
||||
> [!IMPORTANT]
|
||||
> **Only Claude models are officially supported.** Shannon's evaluations, internal testing, and agent harness are all optimized for Claude. Smaller or alternative models — including non-Claude models routed through a proxy — may not reliably follow Shannon's instructions or tool-use constraints, and are not officially supported. Use them at your own risk; results may be incomplete, inaccurate, or unstable.
|
||||
>
|
||||
> The previously experimental `claude-code-router` integration is being removed in an upcoming release. If you currently rely on it, migrate to an Anthropic-compatible proxy such as LiteLLM before upgrading.
|
||||
|
||||
Run `npx @keygraph/shannon setup` and select **Custom Base URL**. The wizard will prompt for your endpoint URL, auth token, and optionally let you override the default model tiers.
|
||||
|
||||
@@ -525,7 +562,7 @@ export ANTHROPIC_AUTH_TOKEN=your-auth-token
|
||||
# Optionally override model tiers (defaults are used if not set)
|
||||
export ANTHROPIC_SMALL_MODEL=claude-haiku-4-5-20251001
|
||||
export ANTHROPIC_MEDIUM_MODEL=claude-sonnet-4-6
|
||||
export ANTHROPIC_LARGE_MODEL=claude-opus-4-6
|
||||
export ANTHROPIC_LARGE_MODEL=claude-opus-4-7
|
||||
```
|
||||
|
||||
<details>
|
||||
@@ -536,68 +573,16 @@ ANTHROPIC_BASE_URL=https://your-proxy.example.com
|
||||
ANTHROPIC_AUTH_TOKEN=your-auth-token
|
||||
ANTHROPIC_SMALL_MODEL=claude-haiku-4-5-20251001
|
||||
ANTHROPIC_MEDIUM_MODEL=claude-sonnet-4-6
|
||||
ANTHROPIC_LARGE_MODEL=claude-opus-4-6
|
||||
ANTHROPIC_LARGE_MODEL=claude-opus-4-7
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
### [EXPERIMENTAL - UNSUPPORTED] Router Mode (Alternative Providers)
|
||||
|
||||
Shannon can experimentally route requests through alternative AI providers using claude-code-router. This mode is not officially supported and is intended primarily for:
|
||||
|
||||
- **Model experimentation** — try Shannon with GPT-5.2 or Gemini 3-family models
|
||||
|
||||
#### Quick Setup
|
||||
|
||||
Run `npx @keygraph/shannon setup` and select **Router**. The wizard will prompt you to choose a provider (OpenAI or OpenRouter), enter your API key, and select a default model.
|
||||
|
||||
Or export env vars directly:
|
||||
|
||||
```bash
|
||||
export OPENAI_API_KEY=sk-... # or OPENROUTER_API_KEY=sk-or-...
|
||||
export ROUTER_DEFAULT=openai,gpt-5.2 # provider,model format
|
||||
```
|
||||
|
||||
```bash
|
||||
npx @keygraph/shannon start -u https://example.com -r /path/to/repo --router
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>Clone and Build: add to .env and run with --router</summary>
|
||||
|
||||
```bash
|
||||
OPENAI_API_KEY=sk-...
|
||||
# OR
|
||||
OPENROUTER_API_KEY=sk-or-...
|
||||
ROUTER_DEFAULT=openai,gpt-5.2
|
||||
```
|
||||
|
||||
```bash
|
||||
./shannon start -u https://example.com -r /path/to/repo --router
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
#### Experimental Models
|
||||
|
||||
| Provider | Models |
|
||||
|----------|--------|
|
||||
| OpenAI | gpt-5.2, gpt-5-mini |
|
||||
| OpenRouter | google/gemini-3-flash-preview |
|
||||
|
||||
#### Disclaimer
|
||||
|
||||
This feature is experimental and unsupported. Output quality depends heavily on the model. Shannon is built on top of the Anthropic Agent SDK and is optimized and primarily tested with Anthropic Claude models. Alternative providers may produce inconsistent results (including failing early phases like Recon) depending on the model and routing setup.
|
||||
|
||||
### Platform-Specific Instructions
|
||||
|
||||
**For Windows:**
|
||||
|
||||
*Native (Git Bash):*
|
||||
|
||||
Install [Git for Windows](https://git-scm.com/install/windows) and run Shannon from **Git Bash** with Docker Desktop installed. Both `npx @keygraph/shannon` and local clone mode are supported.
|
||||
|
||||
*WSL2 (Recommended):*
|
||||
Shannon on Windows is only supported via **WSL2**. Native Windows (including Git Bash) is not supported.
|
||||
|
||||
**Step 1: Ensure WSL 2**
|
||||
|
||||
@@ -758,8 +743,7 @@ Shannon uses a multi-agent architecture that combines white-box source code anal
|
||||
```
|
||||
┌──────────────────────┐
|
||||
│ Pre-Reconnaissance │
|
||||
│ (nmap, subfinder, │
|
||||
│ whatweb, code scan) │
|
||||
│ (source code scan) │
|
||||
└──────────┬───────────┘
|
||||
│
|
||||
▼
|
||||
@@ -802,7 +786,7 @@ Each scan runs in its own ephemeral Docker container (`docker run --rm`) with a
|
||||
|
||||
#### **Phase 1: Pre-Reconnaissance**
|
||||
|
||||
External scanning using nmap, subfinder, and whatweb to fingerprint the target's infrastructure and tech stack. Simultaneously performs source code analysis to identify the application framework, entry points, and potential attack surface from the codebase.
|
||||
Performs source code analysis to identify the application framework, entry points, and potential attack surface from the codebase. Builds the foundational architectural intelligence that all subsequent agents depend on.
|
||||
|
||||
#### **Phase 2: Reconnaissance**
|
||||
|
||||
@@ -840,6 +824,7 @@ This is not a passive scanner. The exploitation agents are designed to **activel
|
||||
>
|
||||
> - It is intended exclusively for use on sandboxed, staging, or local development environments where data integrity is not a concern.
|
||||
> - Potential mutative effects include, but are not limited to: creating new users, modifying or deleting data, compromising test accounts, and triggering unintended side effects from injection attacks.
|
||||
> - **For maximum security and isolation, run Shannon inside a virtual machine (VM).** This confines any side effects from exploitation — including unexpected outbound traffic, file writes from agent tooling, or interactions with local services — to a disposable environment.
|
||||
|
||||
#### **2. Legal & Ethical Use**
|
||||
|
||||
@@ -853,6 +838,7 @@ Shannon is designed for legitimate security auditing purposes only.
|
||||
#### **3. LLM & Automation Caveats**
|
||||
|
||||
- **Verification is Required**: While significant engineering has gone into our "proof-by-exploitation" methodology to eliminate false positives, the underlying LLMs can still generate hallucinated or weakly-supported content in the final report. **Human oversight is essential** to validate the legitimacy and severity of all reported findings.
|
||||
- **Model Support**: Shannon is officially supported only with **Claude models**. Our evaluations, internal testing, and agent harness are all optimized for Claude. Smaller or alternative models — including non-Claude models routed through a proxy — may not reliably follow Shannon's instructions or tool-use constraints, and are not officially supported.
|
||||
- **Comprehensiveness**: The analysis in Shannon Lite may not be exhaustive due to the inherent limitations of LLM context windows. For a more comprehensive, graph-based analysis of your entire codebase, **Shannon Pro** leverages its advanced data flow analysis engine to ensure deeper and more thorough coverage.
|
||||
|
||||
#### **4. Scope of Analysis**
|
||||
@@ -921,8 +907,8 @@ Issues are welcome for bug reports and feature requests.
|
||||
Shannon Pro is Keygraph's all-in-one AppSec platform. For organizations that need unified SAST, SCA, and autonomous pentesting with static-dynamic correlation, CI/CD integration, or self-hosted deployment, see the [Shannon Pro technical overview](./SHANNON-PRO.md).
|
||||
|
||||
<p align="center">
|
||||
<a href="https://docs.google.com/forms/d/e/1FAIpQLSf-cPZcWjlfBJ3TCT8AaWpf8ztsw3FaHzJE4urr55KdlQs6cQ/viewform?usp=header" target="_blank">
|
||||
<img src="https://img.shields.io/badge/Shannon%20Pro%20Inquiry-4285F4?style=for-the-badge&logo=google&logoColor=white" alt="Shannon Pro Inquiry">
|
||||
<a href="https://cal.com/team/keygraph/shannon-pro" target="_blank">
|
||||
<img src="./assets/Demo_Button.png" height="40" alt="Shannon Pro Inquiry">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
|
||||
+1
-2
@@ -147,7 +147,7 @@ This phase informs everything downstream. If the codebase uses an ORM with param
|
||||
|
||||
## Phase 2: Reconnaissance
|
||||
|
||||
Bridges static and dynamic analysis using browser automation. The recon agent correlates code findings with the live application, validating that endpoints actually exist, mapping authentication flows, inventorying input vectors (URL parameters, POST fields, headers, cookies), and documenting the real authorization architecture. This phase may also integrate with infrastructure discovery tools including Nmap, Subfinder, and WhatWeb for network perimeter mapping.
|
||||
Bridges static and dynamic analysis using browser automation. The recon agent correlates code findings with the live application, validating that endpoints actually exist, mapping authentication flows, inventorying input vectors (URL parameters, POST fields, headers, cookies), and documenting the real authorization architecture.
|
||||
|
||||
## Phase 3: Vulnerability Analysis
|
||||
|
||||
@@ -194,7 +194,6 @@ This correlation means that a data flow vulnerability identified in static analy
|
||||
- **Fully Autonomous Operation:** Shannon Pro handles complex workflows including 2FA/TOTP logins and SSO (e.g., Sign in with Google) without human intervention. TOTP is handled via a dedicated MCP server tool.
|
||||
- **White-Box Awareness:** Unlike black-box scanners, Shannon Pro reads the source code to intelligently guide its attack strategy, combining code-level insight with runtime validation.
|
||||
- **Parallel Processing:** Vulnerability analysis and exploitation phases run concurrently across attack domains, with pipelined parallelism minimizing total execution time.
|
||||
- **Tool Orchestration:** Shannon Pro orchestrates existing security tools (e.g., Schemathesis for API testing, Nmap for network discovery) while adding LLM reasoning to interpret results.
|
||||
- **Configurable Login Flows:** Authentication configuration specifies login procedures and credentials, which are interpolated into agent prompts for authenticated testing.
|
||||
|
||||
---
|
||||
|
||||
@@ -19,32 +19,5 @@ services:
|
||||
retries: 10
|
||||
start_period: 30s
|
||||
|
||||
router:
|
||||
image: node:20-slim
|
||||
container_name: shannon-router
|
||||
profiles: ["router"]
|
||||
command: >
|
||||
sh -c "apt-get update && apt-get install -y gettext-base &&
|
||||
npm install -g @musistudio/claude-code-router &&
|
||||
mkdir -p /root/.claude-code-router &&
|
||||
envsubst < /config/router-config.json > /root/.claude-code-router/config.json &&
|
||||
ccr start"
|
||||
ports:
|
||||
- "127.0.0.1:3456:3456"
|
||||
volumes:
|
||||
- ./router-config.json:/config/router-config.json:ro
|
||||
environment:
|
||||
- HOST=0.0.0.0
|
||||
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
|
||||
- OPENROUTER_API_KEY=${OPENROUTER_API_KEY:-}
|
||||
- ROUTER_DEFAULT=${ROUTER_DEFAULT:-openai,gpt-4o}
|
||||
healthcheck:
|
||||
test: ["CMD", "node", "-e", "require('http').get('http://localhost:3456/health', r => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 30s
|
||||
|
||||
volumes:
|
||||
temporal-data:
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
{
|
||||
"HOST": "0.0.0.0",
|
||||
"APIKEY": "shannon-router-key",
|
||||
"LOG": true,
|
||||
"LOG_LEVEL": "info",
|
||||
"NON_INTERACTIVE_MODE": true,
|
||||
"API_TIMEOUT_MS": 600000,
|
||||
"Providers": [
|
||||
{
|
||||
"name": "openai",
|
||||
"api_base_url": "https://api.openai.com/v1/chat/completions",
|
||||
"api_key": "$OPENAI_API_KEY",
|
||||
"models": ["gpt-5.2", "gpt-5-mini"],
|
||||
"transformer": {
|
||||
"use": [["maxcompletiontokens", { "max_completion_tokens": 16384 }]]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "openrouter",
|
||||
"api_base_url": "https://openrouter.ai/api/v1/chat/completions",
|
||||
"api_key": "$OPENROUTER_API_KEY",
|
||||
"models": ["google/gemini-3-flash-preview"],
|
||||
"transformer": {
|
||||
"use": ["openrouter"]
|
||||
}
|
||||
}
|
||||
],
|
||||
"Router": {
|
||||
"default": "$ROUTER_DEFAULT"
|
||||
}
|
||||
}
|
||||
@@ -13,7 +13,7 @@ import { type ShannonConfig, saveConfig } from '../config/writer.js';
|
||||
|
||||
const SHANNON_HOME = path.join(os.homedir(), '.shannon');
|
||||
|
||||
type Provider = 'anthropic' | 'custom_base_url' | 'bedrock' | 'vertex' | 'router';
|
||||
type Provider = 'anthropic' | 'custom_base_url' | 'bedrock' | 'vertex';
|
||||
|
||||
export async function setup(): Promise<void> {
|
||||
p.intro('Shannon Setup');
|
||||
@@ -26,14 +26,16 @@ export async function setup(): Promise<void> {
|
||||
{ value: 'custom_base_url' as const, label: 'Custom Base URL', hint: 'proxies, gateways' },
|
||||
{ value: 'bedrock' as const, label: 'Claude via AWS Bedrock' },
|
||||
{ value: 'vertex' as const, label: 'Claude via Google Vertex AI' },
|
||||
{ value: 'router' as const, label: 'Router', hint: 'experimental' },
|
||||
],
|
||||
});
|
||||
if (p.isCancel(provider)) return cancelAndExit();
|
||||
|
||||
const config = await setupProvider(provider as Provider);
|
||||
|
||||
// 2. Save config
|
||||
// 2. Adaptive thinking
|
||||
await maybePromptAdaptiveThinking(config);
|
||||
|
||||
// 3. Save config
|
||||
saveConfig(config);
|
||||
|
||||
const configPath = path.join(SHANNON_HOME, 'config.toml');
|
||||
@@ -51,8 +53,6 @@ async function setupProvider(provider: Provider): Promise<ShannonConfig> {
|
||||
return setupBedrock();
|
||||
case 'vertex':
|
||||
return setupVertex();
|
||||
case 'router':
|
||||
return setupRouter();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,7 +83,7 @@ async function setupAnthropic(): Promise<ShannonConfig> {
|
||||
'Do you want to change the default models?\n' +
|
||||
' Small - claude-haiku-4-5-20251001\n' +
|
||||
' Medium - claude-sonnet-4-6\n' +
|
||||
' Large - claude-opus-4-6',
|
||||
' Large - claude-opus-4-7',
|
||||
initialValue: false,
|
||||
});
|
||||
if (p.isCancel(customizeModels)) return cancelAndExit();
|
||||
@@ -105,7 +105,7 @@ async function setupAnthropic(): Promise<ShannonConfig> {
|
||||
|
||||
const large = await p.text({
|
||||
message: 'Large model ID',
|
||||
initialValue: 'claude-opus-4-6',
|
||||
initialValue: 'claude-opus-4-7',
|
||||
validate: required('Large model ID is required'),
|
||||
});
|
||||
if (p.isCancel(large)) return cancelAndExit();
|
||||
@@ -143,7 +143,7 @@ async function setupCustomBaseUrl(): Promise<ShannonConfig> {
|
||||
'Do you want to change the default models?\n' +
|
||||
' Small - claude-haiku-4-5-20251001\n' +
|
||||
' Medium - claude-sonnet-4-6\n' +
|
||||
' Large - claude-opus-4-6',
|
||||
' Large - claude-opus-4-7',
|
||||
initialValue: false,
|
||||
});
|
||||
if (p.isCancel(customizeModels)) return cancelAndExit();
|
||||
@@ -165,7 +165,7 @@ async function setupCustomBaseUrl(): Promise<ShannonConfig> {
|
||||
|
||||
const large = await p.text({
|
||||
message: 'Large model ID',
|
||||
initialValue: 'claude-opus-4-6',
|
||||
initialValue: 'claude-opus-4-7',
|
||||
validate: required('Large model ID is required'),
|
||||
});
|
||||
if (p.isCancel(large)) return cancelAndExit();
|
||||
@@ -202,7 +202,7 @@ async function setupBedrock(): Promise<ShannonConfig> {
|
||||
|
||||
const large = await p.text({
|
||||
message: 'Large model ID',
|
||||
placeholder: 'us.anthropic.claude-opus-4-6',
|
||||
placeholder: 'us.anthropic.claude-opus-4-7',
|
||||
validate: required('Large model ID is required'),
|
||||
});
|
||||
if (p.isCancel(large)) return cancelAndExit();
|
||||
@@ -265,7 +265,7 @@ async function setupVertex(): Promise<ShannonConfig> {
|
||||
large: () =>
|
||||
p.text({
|
||||
message: 'Large model ID',
|
||||
placeholder: 'claude-opus-4-6',
|
||||
placeholder: 'claude-opus-4-7',
|
||||
validate: required('Large model ID is required'),
|
||||
}),
|
||||
});
|
||||
@@ -282,52 +282,22 @@ async function setupVertex(): Promise<ShannonConfig> {
|
||||
};
|
||||
}
|
||||
|
||||
async function setupRouter(): Promise<ShannonConfig> {
|
||||
const routerProvider = await p.select({
|
||||
message: 'Router provider',
|
||||
options: [
|
||||
{ value: 'openai' as const, label: 'OpenAI' },
|
||||
{ value: 'openrouter' as const, label: 'OpenRouter' },
|
||||
],
|
||||
});
|
||||
if (p.isCancel(routerProvider)) return cancelAndExit();
|
||||
|
||||
const apiKey = await promptSecret(
|
||||
routerProvider === 'openai' ? 'Enter your OpenAI API key' : 'Enter your OpenRouter API key',
|
||||
);
|
||||
|
||||
let defaultModel: string;
|
||||
if (routerProvider === 'openai') {
|
||||
const model = await p.select({
|
||||
message: 'Default model',
|
||||
options: [
|
||||
{ value: 'gpt-5.2' as const, label: 'GPT-5.2' },
|
||||
{ value: 'gpt-5-mini' as const, label: 'GPT-5 Mini' },
|
||||
],
|
||||
});
|
||||
if (p.isCancel(model)) return cancelAndExit();
|
||||
defaultModel = `openai,${model}`;
|
||||
} else {
|
||||
const model = await p.select({
|
||||
message: 'Default model',
|
||||
options: [{ value: 'google/gemini-3-flash-preview' as const, label: 'Google Gemini 3 Flash Preview' }],
|
||||
});
|
||||
if (p.isCancel(model)) return cancelAndExit();
|
||||
defaultModel = `openrouter,${model}`;
|
||||
}
|
||||
|
||||
const router: ShannonConfig['router'] = { default: defaultModel };
|
||||
if (routerProvider === 'openai') {
|
||||
router.openai_key = apiKey;
|
||||
} else {
|
||||
router.openrouter_key = apiKey;
|
||||
}
|
||||
|
||||
return { router };
|
||||
}
|
||||
|
||||
// === Helpers ===
|
||||
|
||||
async function maybePromptAdaptiveThinking(config: ShannonConfig): Promise<void> {
|
||||
const m = config.models;
|
||||
const hasOpus47 = !m || [m.small, m.medium, m.large].some((v) => v && /opus-4-[67]/.test(v));
|
||||
if (!hasOpus47) return;
|
||||
|
||||
const enable = await p.confirm({
|
||||
message: 'Enable adaptive thinking on Opus 4.6/4.7? Claude decides when and how deeply to reason.',
|
||||
initialValue: true,
|
||||
});
|
||||
if (p.isCancel(enable)) return cancelAndExit();
|
||||
|
||||
config.core = { ...config.core, adaptive_thinking: enable };
|
||||
}
|
||||
|
||||
async function promptSecret(message: string): Promise<string> {
|
||||
const value = await p.password({
|
||||
message,
|
||||
|
||||
@@ -9,10 +9,10 @@ import { execFileSync } from 'node:child_process';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { ensureImage, ensureInfra, randomSuffix, spawnWorker } from '../docker.js';
|
||||
import { buildEnvFlags, isRouterConfigured, loadEnv, validateCredentials } from '../env.js';
|
||||
import { buildEnvFlags, loadEnv, validateCredentials } from '../env.js';
|
||||
import { getCredentialsPath, getWorkspacesDir, initHome } from '../home.js';
|
||||
import { isLocal } from '../mode.js';
|
||||
import { ensureDeliverables, resolveConfig, resolveRepo } from '../paths.js';
|
||||
import { resolveConfig, resolveRepo } from '../paths.js';
|
||||
import { displaySplash } from '../splash.js';
|
||||
|
||||
export interface StartArgs {
|
||||
@@ -22,7 +22,7 @@ export interface StartArgs {
|
||||
workspace?: string;
|
||||
output?: string;
|
||||
pipelineTesting: boolean;
|
||||
router: boolean;
|
||||
debug: boolean;
|
||||
version: string;
|
||||
}
|
||||
|
||||
@@ -31,44 +31,52 @@ export async function start(args: StartArgs): Promise<void> {
|
||||
initHome();
|
||||
loadEnv();
|
||||
|
||||
// 2. Validate credentials and auto-detect router mode
|
||||
// 2. Validate credentials
|
||||
const creds = validateCredentials();
|
||||
if (!creds.valid) {
|
||||
console.error(`ERROR: ${creds.error}`);
|
||||
process.exit(1);
|
||||
}
|
||||
const useRouter = args.router || isRouterConfigured();
|
||||
|
||||
// 3. Resolve paths
|
||||
const repo = resolveRepo(args.repo);
|
||||
const config = args.config ? resolveConfig(args.config) : undefined;
|
||||
ensureDeliverables(repo.hostPath);
|
||||
|
||||
// 4. Ensure workspaces dir is writable by container user (UID 1001)
|
||||
const workspacesDir = getWorkspacesDir();
|
||||
fs.mkdirSync(workspacesDir, { recursive: true });
|
||||
fs.chmodSync(workspacesDir, 0o777);
|
||||
|
||||
// 5. Handle router env
|
||||
if (useRouter) {
|
||||
process.env.ANTHROPIC_BASE_URL = 'http://shannon-router:3456';
|
||||
process.env.ANTHROPIC_AUTH_TOKEN = 'shannon-router-key';
|
||||
}
|
||||
|
||||
// 6. Ensure image (auto-build in dev, pull in npx) and start infra
|
||||
// 5. Ensure image (auto-build in dev, pull in npx) and start infra
|
||||
ensureImage(args.version);
|
||||
await ensureInfra(useRouter);
|
||||
await ensureInfra();
|
||||
|
||||
// 7. Generate unique task queue and container name
|
||||
// 6. Generate unique task queue and container name
|
||||
const suffix = randomSuffix();
|
||||
const taskQueue = `shannon-${suffix}`;
|
||||
const containerName = `shannon-worker-${suffix}`;
|
||||
|
||||
// 8. Generate workspace name if not provided
|
||||
// 7. Generate workspace name if not provided
|
||||
const workspace =
|
||||
args.workspace ?? `${new URL(args.url).hostname.replace(/[^a-zA-Z0-9-]/g, '-')}_shannon-${Date.now()}`;
|
||||
|
||||
// 9. Resolve credentials — mount single file to fixed container path
|
||||
// 8. Create writable overlay directories (mounted over :ro repo paths inside container)
|
||||
// Workspace dir must be 0o777 so the container user (UID 1001) can create audit subdirs
|
||||
const workspacePath = path.join(workspacesDir, workspace);
|
||||
fs.mkdirSync(workspacePath, { recursive: true });
|
||||
fs.chmodSync(workspacePath, 0o777);
|
||||
for (const dir of ['deliverables', 'scratchpad', '.playwright-cli']) {
|
||||
const dirPath = path.join(workspacePath, dir);
|
||||
fs.mkdirSync(dirPath, { recursive: true });
|
||||
fs.chmodSync(dirPath, 0o777);
|
||||
}
|
||||
|
||||
// 9. Pre-create overlay mount points (:ro mounts can't auto-create them)
|
||||
const shannonDir = path.join(repo.hostPath, '.shannon');
|
||||
for (const dir of ['deliverables', 'scratchpad', '.playwright-cli']) {
|
||||
fs.mkdirSync(path.join(shannonDir, dir), { recursive: true });
|
||||
}
|
||||
|
||||
const credentialsPath = getCredentialsPath();
|
||||
const hasCredentials = fs.existsSync(credentialsPath);
|
||||
|
||||
@@ -101,16 +109,24 @@ export async function start(args: StartArgs): Promise<void> {
|
||||
...(hasCredentials && { credentials: credentialsPath }),
|
||||
...(promptsDir && { promptsDir }),
|
||||
...(outputDir && { outputDir }),
|
||||
...(workspace && { workspace }),
|
||||
workspace,
|
||||
...(args.pipelineTesting && { pipelineTesting: true }),
|
||||
...(args.debug && { debug: true }),
|
||||
});
|
||||
|
||||
// 14. Wait for workflow to register, then display info
|
||||
proc.on('error', (err) => {
|
||||
console.error(`Failed to start worker: ${err.message}`);
|
||||
process.exit(1);
|
||||
// 14. Bail if `docker run -d` itself fails (mount error, image missing, etc.)
|
||||
const dockerExitCode = await new Promise<number>((resolve) => {
|
||||
proc.once('exit', (code) => resolve(code ?? 1));
|
||||
proc.once('error', (err) => {
|
||||
console.error(`Failed to start worker: ${err.message}`);
|
||||
resolve(1);
|
||||
});
|
||||
});
|
||||
|
||||
if (dockerExitCode !== 0) {
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Detect whether this is a fresh workspace or a resume by checking session.json existence
|
||||
const sessionJson = path.join(workspacesDir, workspace, 'session.json');
|
||||
const isResume = fs.existsSync(sessionJson);
|
||||
@@ -154,7 +170,7 @@ export async function start(args: StartArgs): Promise<void> {
|
||||
|
||||
// Clear waiting line and show info
|
||||
process.stdout.write('\r\x1b[K');
|
||||
printInfo(args, useRouter, workspace, workflowId, repo.hostPath, workspacesDir);
|
||||
printInfo(args, workspace, workflowId, repo.hostPath, workspacesDir);
|
||||
return;
|
||||
}
|
||||
} catch {
|
||||
@@ -175,6 +191,9 @@ export async function start(args: StartArgs): Promise<void> {
|
||||
} catch {
|
||||
// Container may have already exited
|
||||
}
|
||||
if (args.debug) {
|
||||
printDebugHint(containerName);
|
||||
}
|
||||
};
|
||||
|
||||
process.on('SIGINT', () => {
|
||||
@@ -188,9 +207,16 @@ export async function start(args: StartArgs): Promise<void> {
|
||||
process.on('exit', cleanup);
|
||||
}
|
||||
|
||||
function printDebugHint(containerName: string): void {
|
||||
console.log('');
|
||||
console.log(` Worker container preserved: ${containerName}`);
|
||||
console.log(` Inspect logs: docker logs ${containerName}`);
|
||||
console.log(` Remove: docker rm ${containerName}`);
|
||||
console.log('');
|
||||
}
|
||||
|
||||
function printInfo(
|
||||
args: StartArgs,
|
||||
routerActive: boolean,
|
||||
workspace: string,
|
||||
workflowId: string,
|
||||
repoPath: string,
|
||||
@@ -208,9 +234,6 @@ function printInfo(
|
||||
if (args.pipelineTesting) {
|
||||
console.log(' Mode: Pipeline Testing');
|
||||
}
|
||||
if (routerActive) {
|
||||
console.log(' Router: Enabled');
|
||||
}
|
||||
console.log('');
|
||||
console.log(' Monitor:');
|
||||
if (workflowId) {
|
||||
|
||||
@@ -18,12 +18,14 @@ interface ConfigMapping {
|
||||
readonly env: string;
|
||||
readonly toml: string;
|
||||
readonly type: TOMLType;
|
||||
readonly boolFormat?: 'numeric' | 'literal';
|
||||
}
|
||||
|
||||
/** Maps every supported env var to its TOML path (section.key) and expected type. */
|
||||
const CONFIG_MAP: readonly ConfigMapping[] = [
|
||||
// Core
|
||||
{ env: 'CLAUDE_CODE_MAX_OUTPUT_TOKENS', toml: 'core.max_tokens', type: 'number' },
|
||||
{ env: 'CLAUDE_ADAPTIVE_THINKING', toml: 'core.adaptive_thinking', type: 'boolean', boolFormat: 'literal' },
|
||||
|
||||
// Anthropic
|
||||
{ env: 'ANTHROPIC_API_KEY', toml: 'anthropic.api_key', type: 'string' },
|
||||
@@ -44,11 +46,6 @@ const CONFIG_MAP: readonly ConfigMapping[] = [
|
||||
{ env: 'ANTHROPIC_BASE_URL', toml: 'custom_base_url.base_url', type: 'string' },
|
||||
{ env: 'ANTHROPIC_AUTH_TOKEN', toml: 'custom_base_url.auth_token', type: 'string' },
|
||||
|
||||
// Router
|
||||
{ env: 'ROUTER_DEFAULT', toml: 'router.default', type: 'string' },
|
||||
{ env: 'OPENAI_API_KEY', toml: 'router.openai_key', type: 'string' },
|
||||
{ env: 'OPENROUTER_API_KEY', toml: 'router.openrouter_key', type: 'string' },
|
||||
|
||||
// Model tiers
|
||||
{ env: 'ANTHROPIC_SMALL_MODEL', toml: 'models.small', type: 'string' },
|
||||
{ env: 'ANTHROPIC_MEDIUM_MODEL', toml: 'models.medium', type: 'string' },
|
||||
@@ -61,9 +58,9 @@ type TOMLValue = string | number | boolean;
|
||||
type TOMLSection = Record<string, TOMLValue>;
|
||||
type TOMLConfig = Record<string, TOMLSection>;
|
||||
|
||||
/** Read a nested TOML value by dotted path (e.g. "anthropic.api_key"). */
|
||||
function getTomlValue(config: TOMLConfig, path: string): string | undefined {
|
||||
const [section, key] = path.split('.');
|
||||
/** Read a nested TOML value for a given mapping. */
|
||||
function getTomlValue(config: TOMLConfig, mapping: ConfigMapping): string | undefined {
|
||||
const [section, key] = mapping.toml.split('.');
|
||||
if (!section || !key) return undefined;
|
||||
|
||||
const sectionObj = config[section];
|
||||
@@ -72,8 +69,10 @@ function getTomlValue(config: TOMLConfig, path: string): string | undefined {
|
||||
const value = sectionObj[key];
|
||||
if (value === undefined || value === null) return undefined;
|
||||
|
||||
// NOTE: env.ts checks bedrock/vertex via `=== '1'`, so booleans must map to "1"/"0"
|
||||
if (typeof value === 'boolean') return value ? '1' : '0';
|
||||
if (typeof value === 'boolean') {
|
||||
if (mapping.boolFormat === 'literal') return value ? 'true' : 'false';
|
||||
return value ? '1' : '0';
|
||||
}
|
||||
|
||||
return String(value);
|
||||
}
|
||||
@@ -165,20 +164,6 @@ function validateProviderFields(config: TOMLConfig, provider: string, errors: st
|
||||
validateModelTiers(config, 'vertex', errors);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'router': {
|
||||
if (!keys.includes('default')) {
|
||||
errors.push('[router] missing required key: default');
|
||||
}
|
||||
if (!keys.includes('openai_key') && !keys.includes('openrouter_key')) {
|
||||
errors.push('[router] requires either openai_key or openrouter_key');
|
||||
}
|
||||
const models = config.models as Record<string, unknown> | undefined;
|
||||
if (models && typeof models === 'object' && Object.keys(models).length > 0) {
|
||||
errors.push('[models] is not supported with [router]');
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -242,7 +227,7 @@ function validateConfig(config: TOMLConfig): string[] {
|
||||
}
|
||||
|
||||
// 4. Only one provider section allowed (ignore empty sections)
|
||||
const PROVIDER_SECTIONS = ['anthropic', 'custom_base_url', 'bedrock', 'vertex', 'router'] as const;
|
||||
const PROVIDER_SECTIONS = ['anthropic', 'custom_base_url', 'bedrock', 'vertex'] as const;
|
||||
const present = PROVIDER_SECTIONS.filter((s) => {
|
||||
const section = config[s];
|
||||
return section && typeof section === 'object' && Object.keys(section).length > 0;
|
||||
@@ -292,7 +277,7 @@ export function resolveConfig(): void {
|
||||
for (const mapping of CONFIG_MAP) {
|
||||
if (process.env[mapping.env]) continue;
|
||||
|
||||
const value = getTomlValue(toml, mapping.toml);
|
||||
const value = getTomlValue(toml, mapping);
|
||||
if (value) {
|
||||
process.env[mapping.env] = value;
|
||||
}
|
||||
|
||||
@@ -8,12 +8,11 @@ import { getConfigFile } from '../home.js';
|
||||
// === Types ===
|
||||
|
||||
export interface ShannonConfig {
|
||||
core?: { max_tokens?: number };
|
||||
core?: { max_tokens?: number; adaptive_thinking?: boolean };
|
||||
anthropic?: { api_key?: string; oauth_token?: string };
|
||||
custom_base_url?: { base_url?: string; auth_token?: string };
|
||||
bedrock?: { use?: boolean; region?: string; token?: string };
|
||||
vertex?: { use?: boolean; region?: string; project_id?: string; key_path?: string };
|
||||
router?: { default?: string; openai_key?: string; openrouter_key?: string };
|
||||
models?: { small?: string; medium?: string; large?: string };
|
||||
}
|
||||
|
||||
|
||||
+34
-59
@@ -69,65 +69,28 @@ export function isTemporalReady(): boolean {
|
||||
return output.includes('SERVING');
|
||||
}
|
||||
|
||||
/** Check if the router container is running and healthy. */
|
||||
function isRouterReady(): boolean {
|
||||
const status = runOutput('docker', ['inspect', '--format', '{{.State.Health.Status}}', 'shannon-router']);
|
||||
return status === 'healthy';
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure Temporal (and optionally router) are running via compose.
|
||||
* If Temporal is already up but router is needed and missing, starts router only.
|
||||
* Ensure Temporal is running via compose.
|
||||
*/
|
||||
export async function ensureInfra(useRouter: boolean): Promise<void> {
|
||||
const temporalReady = isTemporalReady();
|
||||
const routerNeeded = useRouter && !isRouterReady();
|
||||
|
||||
if (temporalReady && !routerNeeded) {
|
||||
export async function ensureInfra(): Promise<void> {
|
||||
if (isTemporalReady()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const composeFile = getComposeFile();
|
||||
const composeArgs = ['compose', '-f', composeFile];
|
||||
if (useRouter) composeArgs.push('--profile', 'router');
|
||||
composeArgs.push('up', '-d');
|
||||
console.log('Starting Shannon infrastructure...');
|
||||
execFileSync('docker', ['compose', '-f', composeFile, 'up', '-d'], { stdio: 'inherit' });
|
||||
|
||||
if (temporalReady && routerNeeded) {
|
||||
console.log('Starting router...');
|
||||
} else {
|
||||
console.log('Starting Shannon infrastructure...');
|
||||
}
|
||||
execFileSync('docker', composeArgs, { stdio: 'inherit' });
|
||||
|
||||
// Wait for Temporal if it wasn't already running
|
||||
if (!temporalReady) {
|
||||
console.log('Waiting for Temporal to be ready...');
|
||||
for (let i = 0; i < 30; i++) {
|
||||
if (isTemporalReady()) {
|
||||
console.log('Temporal is ready!');
|
||||
break;
|
||||
}
|
||||
if (i === 29) {
|
||||
console.error('Timeout waiting for Temporal');
|
||||
process.exit(1);
|
||||
}
|
||||
await sleep(2000);
|
||||
console.log('Waiting for Temporal to be ready...');
|
||||
for (let i = 0; i < 30; i++) {
|
||||
if (isTemporalReady()) {
|
||||
console.log('Temporal is ready!');
|
||||
return;
|
||||
}
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
// Wait for router if needed
|
||||
if (routerNeeded) {
|
||||
console.log('Waiting for router to be ready...');
|
||||
for (let i = 0; i < 15; i++) {
|
||||
if (isRouterReady()) {
|
||||
console.log('Router is ready!');
|
||||
return;
|
||||
}
|
||||
await sleep(2000);
|
||||
}
|
||||
console.error('Timeout waiting for router');
|
||||
process.exit(1);
|
||||
}
|
||||
console.error('Timeout waiting for Temporal');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -194,15 +157,21 @@ export interface WorkerOptions {
|
||||
credentials?: string;
|
||||
promptsDir?: string;
|
||||
outputDir?: string;
|
||||
workspace?: string;
|
||||
workspace: string;
|
||||
pipelineTesting?: boolean;
|
||||
debug?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Spawn the worker container in detached mode and return the process.
|
||||
* When `opts.debug` is true, omits `--rm` so the container persists for log inspection.
|
||||
*/
|
||||
export function spawnWorker(opts: WorkerOptions): ChildProcess {
|
||||
const args = ['run', '-d', '--rm', '--name', opts.containerName, '--network', 'shannon-net'];
|
||||
const args = ['run', '-d'];
|
||||
if (!opts.debug) {
|
||||
args.push('--rm');
|
||||
}
|
||||
args.push('--name', opts.containerName, '--network', 'shannon-net');
|
||||
|
||||
// Add host flag for Linux
|
||||
args.push(...addHostFlag());
|
||||
@@ -214,7 +183,13 @@ export function spawnWorker(opts: WorkerOptions): ChildProcess {
|
||||
|
||||
// Volume mounts
|
||||
args.push('-v', `${opts.workspacesDir}:/app/workspaces`);
|
||||
args.push('-v', `${opts.repo.hostPath}:${opts.repo.containerPath}`);
|
||||
args.push('-v', `${opts.repo.hostPath}:${opts.repo.containerPath}:ro`);
|
||||
|
||||
// Writable overlays: shadow .shannon/ inside the :ro repo with workspace-backed dirs
|
||||
const workspacePath = path.join(opts.workspacesDir, opts.workspace);
|
||||
args.push('-v', `${path.join(workspacePath, 'deliverables')}:${opts.repo.containerPath}/.shannon/deliverables`);
|
||||
args.push('-v', `${path.join(workspacePath, 'scratchpad')}:${opts.repo.containerPath}/.shannon/scratchpad`);
|
||||
args.push('-v', `${path.join(workspacePath, '.playwright-cli')}:${opts.repo.containerPath}/.shannon/.playwright-cli`);
|
||||
|
||||
// Local mode: mount prompts for live editing
|
||||
if (opts.promptsDir) {
|
||||
@@ -253,16 +228,16 @@ export function spawnWorker(opts: WorkerOptions): ChildProcess {
|
||||
if (opts.outputDir) {
|
||||
args.push('--output', '/app/output');
|
||||
}
|
||||
if (opts.workspace) {
|
||||
args.push('--workspace', opts.workspace);
|
||||
}
|
||||
args.push('--workspace', opts.workspace);
|
||||
if (opts.pipelineTesting) {
|
||||
args.push('--pipeline-testing');
|
||||
}
|
||||
|
||||
// Prevent MSYS/Git Bash from converting Unix paths (e.g. /repos/my-repo) to Windows paths
|
||||
// Inherit stderr so `docker run` daemon errors surface to the user;
|
||||
// ignore stdin/stdout (the container ID is noise).
|
||||
return spawn('docker', args, {
|
||||
stdio: 'pipe',
|
||||
stdio: ['ignore', 'ignore', 'inherit'],
|
||||
// Prevent MSYS/Git Bash from converting Unix paths on Windows
|
||||
...(os.platform() === 'win32' && { env: { ...process.env, MSYS_NO_PATHCONV: '1' } }),
|
||||
});
|
||||
}
|
||||
@@ -284,7 +259,7 @@ export function stopWorkers(): void {
|
||||
*/
|
||||
export function stopInfra(clean: boolean): void {
|
||||
const composeFile = getComposeFile();
|
||||
const args = ['compose', '-f', composeFile, '--profile', 'router', 'down'];
|
||||
const args = ['compose', '-f', composeFile, 'down'];
|
||||
if (clean) args.push('-v');
|
||||
execFileSync('docker', args, { stdio: 'inherit' });
|
||||
}
|
||||
|
||||
+2
-17
@@ -14,7 +14,6 @@ const FORWARD_VARS = [
|
||||
'ANTHROPIC_API_KEY',
|
||||
'ANTHROPIC_BASE_URL',
|
||||
'ANTHROPIC_AUTH_TOKEN',
|
||||
'ROUTER_DEFAULT',
|
||||
'CLAUDE_CODE_OAUTH_TOKEN',
|
||||
'CLAUDE_CODE_USE_BEDROCK',
|
||||
'AWS_REGION',
|
||||
@@ -27,8 +26,7 @@ const FORWARD_VARS = [
|
||||
'ANTHROPIC_MEDIUM_MODEL',
|
||||
'ANTHROPIC_LARGE_MODEL',
|
||||
'CLAUDE_CODE_MAX_OUTPUT_TOKENS',
|
||||
'OPENAI_API_KEY',
|
||||
'OPENROUTER_API_KEY',
|
||||
'CLAUDE_ADAPTIVE_THINKING',
|
||||
] as const;
|
||||
|
||||
/**
|
||||
@@ -64,12 +62,7 @@ export function buildEnvFlags(): string[] {
|
||||
interface CredentialValidation {
|
||||
valid: boolean;
|
||||
error?: string;
|
||||
mode: 'api-key' | 'oauth' | 'custom-base-url' | 'bedrock' | 'vertex' | 'router';
|
||||
}
|
||||
|
||||
/** Check if router credentials are present in the environment. */
|
||||
export function isRouterConfigured(): boolean {
|
||||
return !!(process.env.ROUTER_DEFAULT && (process.env.OPENAI_API_KEY || process.env.OPENROUTER_API_KEY));
|
||||
mode: 'api-key' | 'oauth' | 'custom-base-url' | 'bedrock' | 'vertex';
|
||||
}
|
||||
|
||||
/** Check if a custom Anthropic-compatible base URL is configured. */
|
||||
@@ -85,7 +78,6 @@ function detectProviders(): string[] {
|
||||
if (isCustomBaseUrlConfigured()) providers.push('Custom Base URL');
|
||||
if (process.env.CLAUDE_CODE_USE_BEDROCK === '1') providers.push('AWS Bedrock');
|
||||
if (process.env.CLAUDE_CODE_USE_VERTEX === '1') providers.push('Google Vertex');
|
||||
if (isRouterConfigured()) providers.push('Router');
|
||||
return providers;
|
||||
}
|
||||
|
||||
@@ -110,8 +102,6 @@ export function validateCredentials(): CredentialValidation {
|
||||
return { valid: true, mode: 'oauth' };
|
||||
}
|
||||
if (isCustomBaseUrlConfigured()) {
|
||||
// Set auth token as API key so the SDK can initialize
|
||||
process.env.ANTHROPIC_API_KEY = process.env.ANTHROPIC_AUTH_TOKEN;
|
||||
return { valid: true, mode: 'custom-base-url' };
|
||||
}
|
||||
if (process.env.CLAUDE_CODE_USE_BEDROCK === '1') {
|
||||
@@ -153,11 +143,6 @@ export function validateCredentials(): CredentialValidation {
|
||||
}
|
||||
return { valid: true, mode: 'vertex' };
|
||||
}
|
||||
if (isRouterConfigured()) {
|
||||
// Set a placeholder so the worker doesn't reject the missing key
|
||||
process.env.ANTHROPIC_API_KEY = 'router-mode';
|
||||
return { valid: true, mode: 'router' };
|
||||
}
|
||||
|
||||
const hint =
|
||||
getMode() === 'local'
|
||||
|
||||
+27
-6
@@ -25,6 +25,25 @@ import { displaySplash } from './splash.js';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
function blockSudo(): void {
|
||||
const isSudo = !!process.env.SUDO_USER;
|
||||
const isRoot = process.geteuid?.() === 0;
|
||||
if (!isSudo && !isRoot) return;
|
||||
|
||||
if (isSudo) {
|
||||
console.error('ERROR: Shannon must not be run with sudo.');
|
||||
console.error('Re-run this command as your normal user.');
|
||||
} else {
|
||||
console.error('ERROR: Shannon must not be run as the root user.');
|
||||
console.error('Switch to a regular user account and re-run this command.');
|
||||
}
|
||||
if (process.platform === 'linux') {
|
||||
console.error('Configure Docker to run without sudo first:');
|
||||
console.error('https://docs.docker.com/engine/install/linux-postinstall');
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
function getVersion(): string {
|
||||
try {
|
||||
const pkgPath = path.join(__dirname, '..', 'package.json');
|
||||
@@ -69,7 +88,7 @@ Options for 'start':
|
||||
-o, --output <path> Copy deliverables to this directory after run
|
||||
-w, --workspace <name> Named workspace (auto-resumes if exists)
|
||||
--pipeline-testing Use minimal prompts for fast testing
|
||||
--router Route requests through claude-code-router
|
||||
--debug Preserve worker container after exit for log inspection
|
||||
|
||||
Examples:
|
||||
${prefix} start -u https://example.com -r ${mode === 'local' ? 'my-repo' : './my-repo'}
|
||||
@@ -94,7 +113,7 @@ interface ParsedStartArgs {
|
||||
workspace?: string;
|
||||
output?: string;
|
||||
pipelineTesting: boolean;
|
||||
router: boolean;
|
||||
debug: boolean;
|
||||
}
|
||||
|
||||
function parseStartArgs(argv: string[]): ParsedStartArgs {
|
||||
@@ -104,7 +123,7 @@ function parseStartArgs(argv: string[]): ParsedStartArgs {
|
||||
let workspace: string | undefined;
|
||||
let output: string | undefined;
|
||||
let pipelineTesting = false;
|
||||
let router = false;
|
||||
let debug = false;
|
||||
|
||||
for (let i = 0; i < argv.length; i++) {
|
||||
const arg = argv[i];
|
||||
@@ -149,8 +168,8 @@ function parseStartArgs(argv: string[]): ParsedStartArgs {
|
||||
case '--pipeline-testing':
|
||||
pipelineTesting = true;
|
||||
break;
|
||||
case '--router':
|
||||
router = true;
|
||||
case '--debug':
|
||||
debug = true;
|
||||
break;
|
||||
default:
|
||||
console.error(`Unknown option: ${arg}`);
|
||||
@@ -169,7 +188,7 @@ function parseStartArgs(argv: string[]): ParsedStartArgs {
|
||||
url,
|
||||
repo,
|
||||
pipelineTesting,
|
||||
router,
|
||||
debug,
|
||||
...(config && { config }),
|
||||
...(workspace && { workspace }),
|
||||
...(output && { output }),
|
||||
@@ -178,6 +197,8 @@ function parseStartArgs(argv: string[]): ParsedStartArgs {
|
||||
|
||||
// === Main Dispatch ===
|
||||
|
||||
blockSudo();
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
const command = args[0];
|
||||
|
||||
|
||||
@@ -76,12 +76,3 @@ export function resolveConfig(configArg: string): MountPair {
|
||||
containerPath: `/app/configs/${basename}`,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure the deliverables directory exists and is writable by the container user.
|
||||
*/
|
||||
export function ensureDeliverables(repoHostPath: string): void {
|
||||
const deliverables = path.join(repoHostPath, 'deliverables');
|
||||
fs.mkdirSync(deliverables, { recursive: true });
|
||||
fs.chmodSync(deliverables, 0o777);
|
||||
}
|
||||
|
||||
@@ -118,6 +118,51 @@
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"vuln_classes": {
|
||||
"type": "array",
|
||||
"description": "Vulnerability classes to test. When omitted, all five classes run. When set, only listed classes run; their vuln+exploit agents and report sections are included.",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"enum": ["injection", "xss", "auth", "authz", "ssrf"]
|
||||
},
|
||||
"minItems": 1,
|
||||
"maxItems": 5,
|
||||
"uniqueItems": true
|
||||
},
|
||||
"exploit": {
|
||||
"type": "string",
|
||||
"enum": ["true", "false"],
|
||||
"description": "Whether to run the exploitation phase (default true). Set false to run only analysis."
|
||||
},
|
||||
"report": {
|
||||
"type": "object",
|
||||
"description": "Report filtering and guidance applied by the report agent.",
|
||||
"properties": {
|
||||
"min_severity": {
|
||||
"type": "string",
|
||||
"enum": ["low", "medium", "high", "critical"],
|
||||
"description": "Minimum severity threshold; findings below are dropped by the report agent."
|
||||
},
|
||||
"min_confidence": {
|
||||
"type": "string",
|
||||
"enum": ["low", "medium", "high"],
|
||||
"description": "Minimum confidence threshold; findings below are dropped by the report agent."
|
||||
},
|
||||
"guidance": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"maxLength": 500,
|
||||
"description": "Free-text guidance to the report agent (e.g., 'Drop findings about missing security headers')."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"rules_of_engagement": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"maxLength": 1000,
|
||||
"description": "Free-text instructions to the agent that render into every prompt."
|
||||
},
|
||||
"login": {
|
||||
"type": "object",
|
||||
"description": "Deprecated: Use 'authentication' section instead",
|
||||
@@ -135,7 +180,11 @@
|
||||
{ "required": ["authentication"] },
|
||||
{ "required": ["rules"] },
|
||||
{ "required": ["authentication", "rules"] },
|
||||
{ "required": ["description"] }
|
||||
{ "required": ["description"] },
|
||||
{ "required": ["vuln_classes"] },
|
||||
{ "required": ["exploit"] },
|
||||
{ "required": ["report"] },
|
||||
{ "required": ["rules_of_engagement"] }
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"$defs": {
|
||||
@@ -151,17 +200,17 @@
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"enum": ["path", "subdomain", "domain", "method", "header", "parameter"],
|
||||
"description": "Type of rule (what aspect of requests to match against)"
|
||||
"enum": ["url_path", "subdomain", "domain", "method", "header", "parameter", "code_path"],
|
||||
"description": "Type of rule (what aspect of requests or source code to match against)"
|
||||
},
|
||||
"url_path": {
|
||||
"value": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"maxLength": 1000,
|
||||
"description": "URL path pattern or value to match"
|
||||
"description": "Value to match"
|
||||
}
|
||||
},
|
||||
"required": ["description", "type", "url_path"],
|
||||
"required": ["description", "type", "value"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,27 @@
|
||||
# Description of the target environment (optional, max 500 chars)
|
||||
description: "Next.js e-commerce app on PostgreSQL. Local dev environment — .env files contain local-only credentials, not deployed to production."
|
||||
|
||||
# Limit which vulnerability classes run end-to-end (optional, default: all five)
|
||||
# vuln_classes: [injection, xss, auth, authz, ssrf]
|
||||
|
||||
# Skip the exploitation phase (optional, default: "true")
|
||||
# exploit: "false"
|
||||
|
||||
# Free-form engagement rules applied to analysis and exploitation agents (optional).
|
||||
# Example below is illustrative; edit, remove, or add sections as needed.
|
||||
# rules_of_engagement: |
|
||||
# Forbidden techniques:
|
||||
# - No password brute-force or credential stuffing. Cap login attempts at 5 per account.
|
||||
# - ...
|
||||
#
|
||||
# Operational:
|
||||
# - Throttle to under 5 requests per second per endpoint. Back off 60 seconds on any 429 response.
|
||||
# - ...
|
||||
#
|
||||
# Data handling:
|
||||
# - Do not include actual values in deliverables — use placeholders like [order_id] or [user_email].
|
||||
# - ...
|
||||
|
||||
authentication:
|
||||
login_type: form # Options: 'form' or 'sso'
|
||||
login_url: "https://example.com/login"
|
||||
@@ -25,27 +46,55 @@ authentication:
|
||||
value: "/dashboard"
|
||||
|
||||
rules:
|
||||
# Supported types: url_path, subdomain, domain, method, header, parameter, code_path
|
||||
avoid:
|
||||
- description: "Do not test the marketing site subdomain"
|
||||
type: subdomain
|
||||
url_path: "www"
|
||||
value: "www"
|
||||
|
||||
- description: "Skip logout functionality"
|
||||
type: path
|
||||
url_path: "/logout"
|
||||
type: url_path
|
||||
value: "/logout"
|
||||
|
||||
- description: "No DELETE operations on user API"
|
||||
type: path
|
||||
url_path: "/api/v1/users/*"
|
||||
type: url_path
|
||||
value: "/api/v1/users/*"
|
||||
|
||||
# code_path values are repo-relative file paths or globs (e.g. "src/auth.ts", "test/**").
|
||||
# - description: "Test fixtures and specs (not production code)"
|
||||
# type: code_path
|
||||
# value: "test/**"
|
||||
#
|
||||
# - description: "Generated migrations"
|
||||
# type: code_path
|
||||
# value: "db/migrations/**"
|
||||
|
||||
focus:
|
||||
- description: "Prioritize beta admin panel subdomain"
|
||||
type: subdomain
|
||||
url_path: "beta-admin"
|
||||
value: "beta-admin"
|
||||
|
||||
- description: "Focus on user profile updates"
|
||||
type: path
|
||||
url_path: "/api/v2/user-profile"
|
||||
type: url_path
|
||||
value: "/api/v2/user-profile"
|
||||
|
||||
# code_path values are repo-relative file paths or globs (e.g. "src/auth.ts", "routes/*.ts").
|
||||
# - description: "Express route handlers"
|
||||
# type: code_path
|
||||
# value: "routes/*.ts"
|
||||
#
|
||||
# - description: "Sequelize ORM model definitions"
|
||||
# type: code_path
|
||||
# value: "models/*.ts"
|
||||
|
||||
# Report filters applied by the report agent when assembling the final report (optional).
|
||||
# Example below is illustrative; edit, remove, or add sections as needed.
|
||||
# report:
|
||||
# min_severity: low
|
||||
# min_confidence: low
|
||||
# guidance: |
|
||||
# Drop findings about missing security headers and rate-limit gaps.
|
||||
# ...
|
||||
|
||||
# Pipeline execution settings (optional)
|
||||
# pipeline:
|
||||
|
||||
@@ -3,6 +3,16 @@
|
||||
"version": "0.0.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"exports": {
|
||||
"./interfaces": "./dist/interfaces/index.js",
|
||||
"./types": "./dist/types/index.js",
|
||||
"./types/config": "./dist/types/config.js",
|
||||
"./types/agents": "./dist/types/agents.js",
|
||||
"./pipeline": "./dist/temporal/pipeline.js",
|
||||
"./activities": "./dist/temporal/activities.js",
|
||||
"./services": "./dist/services/index.js",
|
||||
"./config": "./dist/config-parser.js"
|
||||
},
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"check": "tsc --noEmit",
|
||||
@@ -18,6 +28,7 @@
|
||||
"ajv-formats": "^2.1.1",
|
||||
"dotenv": "^16.4.5",
|
||||
"js-yaml": "^4.1.0",
|
||||
"zod": "^4.3.6",
|
||||
"zx": "^8.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
||||
@@ -105,14 +105,16 @@ If you only confirmed a vulnerability without attempting to weaponize it, you ha
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<starting_context>
|
||||
- Your **actionable exploitation queue** is located at `deliverables/auth_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
|
||||
- Your **actionable exploitation queue** is located at `.shannon/deliverables/auth_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
|
||||
|
||||
**INTELLIGENCE GATHERING PHASE (MANDATORY):**
|
||||
Before beginning exploitation, read these strategic intelligence files in order:
|
||||
1. `deliverables/pre_recon_deliverable.md` - Architecture overview, technology stack, and authentication code locations.
|
||||
2. `deliverables/recon_deliverable.md` - Complete API inventory, user roles, and data flow maps.
|
||||
3. `deliverables/auth_analysis_deliverable.md` - Strategic context from the Auth analysis specialist, including notes on session mechanisms, password policies, and flawed logic paths.
|
||||
1. `.shannon/deliverables/pre_recon_deliverable.md` - Architecture overview, technology stack, and authentication code locations.
|
||||
2. `.shannon/deliverables/recon_deliverable.md` - Complete API inventory, user roles, and data flow maps.
|
||||
3. `.shannon/deliverables/auth_analysis_deliverable.md` - Strategic context from the Auth analysis specialist, including notes on session mechanisms, password policies, and flawed logic paths.
|
||||
|
||||
- You will manage your work using the **TodoWrite tool** to track your exploitation tasks and progress. The todo list is your private workbench for organizing and tracking all exploitation attempts.
|
||||
</starting_context>
|
||||
@@ -122,13 +124,13 @@ Before beginning exploitation, read these strategic intelligence files in order:
|
||||
|
||||
**Phase Sequence:** VULN ANALYSIS (Complete) → **AUTH EXPLOITATION (You)** → FINAL REPORT (next phase)
|
||||
|
||||
**Your Input:** `deliverables/auth_exploitation_queue.json` (primary exploitation queue)
|
||||
**Your Output:** `deliverables/auth_exploitation_evidence.md` (exploitation evidence)
|
||||
**Your Input:** `.shannon/deliverables/auth_exploitation_queue.json` (primary exploitation queue)
|
||||
**Your Output:** `.shannon/deliverables/auth_exploitation_evidence.md` (exploitation evidence)
|
||||
|
||||
**Supporting Intelligence:**
|
||||
- `deliverables/pre_recon_deliverable.md` (architecture & code context)
|
||||
- `deliverables/recon_deliverable.md` (API inventory & user roles)
|
||||
- `deliverables/auth_analysis_deliverable.md` (strategic intel)
|
||||
- `.shannon/deliverables/pre_recon_deliverable.md` (architecture & code context)
|
||||
- `.shannon/deliverables/recon_deliverable.md` (API inventory & user roles)
|
||||
- `.shannon/deliverables/auth_analysis_deliverable.md` (strategic intel)
|
||||
|
||||
**WHAT HAPPENED BEFORE YOU:**
|
||||
- Reconnaissance agent mapped application architecture and attack surfaces
|
||||
@@ -144,10 +146,10 @@ You are the **Identity Compromise Specialist** - proving tangible impact of brok
|
||||
<cli_tools>
|
||||
- **Browser Automation (playwright-cli skill):** Essential for interacting with multi-step authentication flows, injecting stolen session cookies, and verifying account takeover in a real browser context. Invoke the `playwright-cli` skill to learn available commands. Always pass `-s={{PLAYWRIGHT_SESSION}}` to every command for session isolation.
|
||||
- **save-deliverable (CLI Tool):** Saves your deliverable files with automatic validation.
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<json>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"...","validated":true}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<text>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"..."}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **For large reports:** Write to disk first, then use `--file-path`. Do NOT pass large reports via `--content`.
|
||||
- **For JSON queues:** You may use `--content '{"vulnerabilities": [...]}'`. Queue files are validated automatically.
|
||||
|
||||
- **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
|
||||
- **Task Agent:** Mandatory coder-executor for brute force batches, credential stuffing, token replay automation, and any scripted workflow.
|
||||
- **TodoWrite tool:** Essential for managing your exploitation task list. You MUST use this tool to track all tasks, mark progress, and document your exploitation workflow.
|
||||
@@ -189,7 +191,7 @@ Your primary goal is to achieve a definitive, evidence-backed conclusion for **e
|
||||
Your entire exploitation process will be managed through the TodoWrite tool. You MUST adhere to this process.
|
||||
|
||||
**1. Initialization:**
|
||||
- Read `deliverables/auth_exploitation_queue.json`.
|
||||
- Read `.shannon/deliverables/auth_exploitation_queue.json`.
|
||||
- Use the TodoWrite tool to create tasks for each vulnerability in the queue.
|
||||
- Each task should include the vulnerability ID and the hypothesis to test.
|
||||
Example tasks:
|
||||
@@ -315,7 +317,7 @@ When your attack plan is fully explored (all checkboxes are marked `[x]`), your
|
||||
First, synthesize all evidence into your specialist section and save it using the save-deliverable CLI with --type AUTH_EVIDENCE. This section will be combined with other specialist reports to form the complete security assessment.
|
||||
|
||||
**Section Ordering & Format Requirements:**
|
||||
- Use the exact vulnerability IDs from `deliverables/auth_exploitation_queue.json`
|
||||
- Use the exact vulnerability IDs from `.shannon/deliverables/auth_exploitation_queue.json`
|
||||
- Within "Successfully Exploited" section: Order by severity of impact (most critical first)
|
||||
- Within "Confirmed Vulnerabilities (Attempted Exploitation)" section: Order by severity (most critical first)
|
||||
- Within "Potential" section: Order by confidence level (highest confidence first)
|
||||
@@ -407,13 +409,13 @@ COMPLETION REQUIREMENTS (ALL must be satisfied):
|
||||
1. Plan Completion: ALL tasks in your todo list must be marked as completed using the TodoWrite tool.
|
||||
2. Deliverable Generation: The required deliverable must be successfully saved using the save-deliverable CLI:
|
||||
- **CHUNKED WRITING (MANDATORY):**
|
||||
1. Use the **Write** tool to create `deliverables/auth_exploitation_evidence.md` with the title and first major section
|
||||
1. Use the **Write** tool to create `.shannon/deliverables/auth_exploitation_evidence.md` with the title and first major section
|
||||
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
|
||||
3. Repeat step 2 for all remaining sections
|
||||
4. Run `save-deliverable` with `--type AUTH_EVIDENCE --file-path "deliverables/auth_exploitation_evidence.md"`
|
||||
4. Run `save-deliverable` with `--type AUTH_EVIDENCE --file-path ".shannon/deliverables/auth_exploitation_evidence.md"`
|
||||
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
|
||||
|
||||
CRITICAL WARNING: Announcing completion before every item in deliverables/auth_exploitation_queue.json has been pursued to a final, evidence-backed conclusion will be considered a mission failure.
|
||||
CRITICAL WARNING: Announcing completion before every item in .shannon/deliverables/auth_exploitation_queue.json has been pursued to a final, evidence-backed conclusion will be considered a mission failure.
|
||||
|
||||
ONLY AFTER fulfilling these exhaustive requirements, announce "AUTH EXPLOITATION COMPLETE" and stop.
|
||||
|
||||
|
||||
@@ -92,14 +92,16 @@ After exhaustive bypass attempts, determine:
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<starting_context>
|
||||
- Your **actionable exploitation queue** is located at `deliverables/authz_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
|
||||
- Your **actionable exploitation queue** is located at `.shannon/deliverables/authz_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
|
||||
|
||||
**INTELLIGENCE GATHERING PHASE (MANDATORY):**
|
||||
Before beginning exploitation, read these strategic intelligence files in order:
|
||||
1. `deliverables/pre_recon_deliverable.md` - Architecture overview, technology stack, and authorization code locations.
|
||||
2. `deliverables/recon_deliverable.md` - Complete API inventory, user roles, and permission models.
|
||||
3. `deliverables/authz_analysis_deliverable.md` - Strategic context from the Authz analysis specialist, including notes on access control patterns, role hierarchies, and flawed logic paths.
|
||||
1. `.shannon/deliverables/pre_recon_deliverable.md` - Architecture overview, technology stack, and authorization code locations.
|
||||
2. `.shannon/deliverables/recon_deliverable.md` - Complete API inventory, user roles, and permission models.
|
||||
3. `.shannon/deliverables/authz_analysis_deliverable.md` - Strategic context from the Authz analysis specialist, including notes on access control patterns, role hierarchies, and flawed logic paths.
|
||||
|
||||
- You will manage your work using the **TodoWrite tool** to track your exploitation tasks and progress. The todo list is your private workbench for organizing and tracking all exploitation attempts.
|
||||
</starting_context>
|
||||
@@ -109,13 +111,13 @@ Before beginning exploitation, read these strategic intelligence files in order:
|
||||
|
||||
**Phase Sequence:** VULN ANALYSIS (Complete) → **AUTHZ EXPLOITATION (You)** → FINAL REPORT (next phase)
|
||||
|
||||
**Your Input:** `deliverables/authz_exploitation_queue.json` (primary exploitation queue)
|
||||
**Your Output:** `deliverables/authz_exploitation_evidence.md` (exploitation evidence)
|
||||
**Your Input:** `.shannon/deliverables/authz_exploitation_queue.json` (primary exploitation queue)
|
||||
**Your Output:** `.shannon/deliverables/authz_exploitation_evidence.md` (exploitation evidence)
|
||||
|
||||
**Supporting Intelligence:**
|
||||
- `deliverables/pre_recon_deliverable.md` (architecture & code context)
|
||||
- `deliverables/recon_deliverable.md` (API inventory & user roles)
|
||||
- `deliverables/authz_analysis_deliverable.md` (strategic intel)
|
||||
- `.shannon/deliverables/pre_recon_deliverable.md` (architecture & code context)
|
||||
- `.shannon/deliverables/recon_deliverable.md` (API inventory & user roles)
|
||||
- `.shannon/deliverables/authz_analysis_deliverable.md` (strategic intel)
|
||||
|
||||
**WHAT HAPPENED BEFORE YOU:**
|
||||
- Reconnaissance agent mapped application architecture and attack surfaces
|
||||
@@ -131,10 +133,10 @@ You are the **Privilege Escalation Specialist** - proving tangible impact of bro
|
||||
<cli_tools>
|
||||
- **Browser Automation (playwright-cli skill):** Essential for interacting with complex authorization flows, testing role-based access controls in browser contexts, and verifying privilege escalation through UI elements. Invoke the `playwright-cli` skill to learn available commands. Always pass `-s={{PLAYWRIGHT_SESSION}}` to every command for session isolation.
|
||||
- **save-deliverable (CLI Tool):** Saves your deliverable files with automatic validation.
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<json>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"...","validated":true}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<text>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"..."}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **For large reports:** Write to disk first, then use `--file-path`. Do NOT pass large reports via `--content`.
|
||||
- **For JSON queues:** You may use `--content '{"vulnerabilities": [...]}'`. Queue files are validated automatically.
|
||||
|
||||
- **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
|
||||
- **Task Agent:** Mandatory coder-executor for IDOR sweeps, role escalation loops, and workflow bypass automation.
|
||||
- **TodoWrite tool:** Essential for managing your exploitation task list. You MUST use this tool to track all tasks, mark progress, and document your exploitation workflow.
|
||||
@@ -176,7 +178,7 @@ Your primary goal is to achieve a definitive, evidence-backed conclusion for **e
|
||||
Your entire exploitation process will be managed through the TodoWrite tool. You MUST adhere to this process.
|
||||
|
||||
**1. Initialization:**
|
||||
- Read `deliverables/authz_exploitation_queue.json`.
|
||||
- Read `.shannon/deliverables/authz_exploitation_queue.json`.
|
||||
- Use the TodoWrite tool to create tasks for each vulnerability in the queue.
|
||||
- Each task should include the vulnerability ID, type, and the hypothesis to test.
|
||||
Example tasks:
|
||||
@@ -321,7 +323,7 @@ When your attack plan is fully explored (all checkboxes are marked `[x]`), your
|
||||
First, synthesize all evidence into your specialist section and save it using the save-deliverable CLI with --type AUTHZ_EVIDENCE. This section will be combined with other specialist reports to form the complete security assessment and MUST use the following structure:
|
||||
|
||||
**Section Ordering & Format Requirements:**
|
||||
- Use the exact vulnerability IDs from `deliverables/authz_exploitation_queue.json`
|
||||
- Use the exact vulnerability IDs from `.shannon/deliverables/authz_exploitation_queue.json`
|
||||
- Within "Successfully Exploited" section: Order by severity of impact (most critical first)
|
||||
- Within "Potential" section: Order by confidence level (highest confidence first)
|
||||
|
||||
@@ -409,13 +411,13 @@ COMPLETION REQUIREMENTS (ALL must be satisfied):
|
||||
1. Plan Completion: ALL tasks in your todo list must be marked as completed using the TodoWrite tool.
|
||||
2. Deliverable Generation: The required deliverable must be successfully saved using the save-deliverable CLI:
|
||||
- **CHUNKED WRITING (MANDATORY):**
|
||||
1. Use the **Write** tool to create `deliverables/authz_exploitation_evidence.md` with the title and first major section
|
||||
1. Use the **Write** tool to create `.shannon/deliverables/authz_exploitation_evidence.md` with the title and first major section
|
||||
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
|
||||
3. Repeat step 2 for all remaining sections
|
||||
4. Run `save-deliverable` with `--type AUTHZ_EVIDENCE --file-path "deliverables/authz_exploitation_evidence.md"`
|
||||
4. Run `save-deliverable` with `--type AUTHZ_EVIDENCE --file-path ".shannon/deliverables/authz_exploitation_evidence.md"`
|
||||
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
|
||||
|
||||
CRITICAL WARNING: Announcing completion before every item in deliverables/authz_exploitation_queue.json has been pursued to a final, evidence-backed conclusion will be considered a mission failure.
|
||||
CRITICAL WARNING: Announcing completion before every item in .shannon/deliverables/authz_exploitation_queue.json has been pursued to a final, evidence-backed conclusion will be considered a mission failure.
|
||||
|
||||
ONLY AFTER fulfilling these exhaustive requirements, announce "AUTHZ EXPLOITATION COMPLETE" and stop.
|
||||
|
||||
|
||||
@@ -69,14 +69,16 @@ Remember: An unproven vulnerability is worse than no finding at all - it wastes
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<starting_context>
|
||||
- Your **actionable exploitation queue** is located at `deliverables/injection_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
|
||||
- Your **actionable exploitation queue** is located at `.shannon/deliverables/injection_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
|
||||
|
||||
**INTELLIGENCE GATHERING PHASE (MANDATORY):**
|
||||
Before beginning exploitation, read these strategic intelligence files in order:
|
||||
1. `deliverables/pre_recon_deliverable.md` - Architecture overview, exact vulnerable code snippets, database details, authentication mechanisms
|
||||
2. `deliverables/recon_deliverable.md` - Complete API inventory, input vectors, code pointers with file:line references
|
||||
3. `deliverables/injection_analysis_deliverable.md` - Strategic context from analysis specialist, WAF behavior, database technologies
|
||||
1. `.shannon/deliverables/pre_recon_deliverable.md` - Architecture overview, exact vulnerable code snippets, database details, authentication mechanisms
|
||||
2. `.shannon/deliverables/recon_deliverable.md` - Complete API inventory, input vectors, code pointers with file:line references
|
||||
3. `.shannon/deliverables/injection_analysis_deliverable.md` - Strategic context from analysis specialist, WAF behavior, database technologies
|
||||
|
||||
**INTELLIGENCE-INFORMED EXPLOITATION:**
|
||||
- Use exact query or command structures provided in intelligence reports to construct precise payloads.
|
||||
@@ -91,13 +93,13 @@ Before beginning exploitation, read these strategic intelligence files in order:
|
||||
|
||||
**Phase Sequence:** VULN ANALYSIS (Complete) → **INJECTION EXPLOITATION (You)** → FINAL REPORT (next phase)
|
||||
|
||||
**Your Input:** `deliverables/injection_exploitation_queue.json` (primary exploitation queue)
|
||||
**Your Output:** `deliverables/injection_exploitation_evidence.md` (exploitation evidence)
|
||||
**Your Input:** `.shannon/deliverables/injection_exploitation_queue.json` (primary exploitation queue)
|
||||
**Your Output:** `.shannon/deliverables/injection_exploitation_evidence.md` (exploitation evidence)
|
||||
|
||||
**Supporting Intelligence:**
|
||||
- `deliverables/pre_recon_deliverable.md` (architecture & code context)
|
||||
- `deliverables/recon_deliverable.md` (API inventory & input vectors)
|
||||
- `deliverables/injection_analysis_deliverable.md` (strategic intel)
|
||||
- `.shannon/deliverables/pre_recon_deliverable.md` (architecture & code context)
|
||||
- `.shannon/deliverables/recon_deliverable.md` (API inventory & input vectors)
|
||||
- `.shannon/deliverables/injection_analysis_deliverable.md` (strategic intel)
|
||||
|
||||
**WHAT HAPPENED BEFORE YOU:**
|
||||
- Reconnaissance agent mapped application architecture and attack surfaces
|
||||
@@ -113,10 +115,10 @@ You are the **Proof-of-Impact Generator** - converting theoretical injection fla
|
||||
<cli_tools>
|
||||
- **Browser Automation (playwright-cli skill):** For testing injection vulnerabilities through browser interactions when needed. Invoke the `playwright-cli` skill to learn available commands. Always pass `-s={{PLAYWRIGHT_SESSION}}` to every command for session isolation.
|
||||
- **save-deliverable (CLI Tool):** Saves your deliverable files with automatic validation.
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<json>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"...","validated":true}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<text>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"..."}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **For large reports:** Write to disk first, then use `--file-path`. Do NOT pass large reports via `--content`.
|
||||
- **For JSON queues:** You may use `--content '{"vulnerabilities": [...]}'`. Queue files are validated automatically.
|
||||
|
||||
- **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
|
||||
- **Task Agent:** Mandatory coder-executor for any custom scripting beyond single ad-hoc commands.
|
||||
- **TodoWrite tool:** Essential for managing your exploitation task list. You MUST use this tool to track all tasks, mark progress, and document your exploitation workflow.
|
||||
@@ -139,7 +141,7 @@ Your primary goal is to achieve a definitive, evidence-backed conclusion for **e
|
||||
Your entire exploitation process will be managed through the TodoWrite tool. You MUST adhere to this process.
|
||||
|
||||
**1. Initialization:**
|
||||
- Read the `deliverables/injection_exploitation_queue.json` file.
|
||||
- Read the `.shannon/deliverables/injection_exploitation_queue.json` file.
|
||||
- Use the TodoWrite tool to create tasks for each vulnerability in the queue.
|
||||
- Each task should include the vulnerability ID and the hypothesis to test.
|
||||
Example tasks:
|
||||
@@ -180,17 +182,16 @@ For each root vulnerability in your plan, you will follow this systematic, four-
|
||||
## **Strategic Tool Usage**
|
||||
Use the right tool for the job to ensure thoroughness.
|
||||
- **Use `curl` (Manual Probing) for:** Initial confirmation, simple UNION/Error-based injections, and crafting specific WAF bypasses.
|
||||
- **Use `sqlmap` (Automation) for:** Time-consuming blind injections, automating enumeration **after** manual confirmation, and as a final step to try a wide range of payloads when manual techniques are failing.
|
||||
|
||||
## **Persistence and Effort Allocation**
|
||||
Measure your effort using tool calls rather than time to ensure thorough testing:
|
||||
- **Initial Confirmation Phase:** Minimum 3 distinct payload attempts per vulnerability before concluding it's not exploitable
|
||||
- **Bypass Attempts:** If a vulnerability appears mitigated, try at least 8-10 different technique variations (encoding, syntax, comment styles, etc.) before concluding it's properly defended
|
||||
- **Escalation Trigger:** If manual testing exceeds 10-12 tool calls without progress on a single vulnerability, escalate to automated tools (`sqlmap`) or Task Agent scripting
|
||||
- **Escalation Trigger:** If manual testing exceeds 10-12 tool calls without progress on a single vulnerability, escalate to Task Agent scripting
|
||||
- **Termination Criteria:** After systematic attempts with multiple different techniques → classify as appropriate level
|
||||
|
||||
## **Using the Task Agent for Custom Scripting**
|
||||
You must delegate every injection automation task to the Task Agent. Use manual `curl` or `sqlmap` runs for spot checks, then escalate to scripted payload loops handled by the Task Agent.
|
||||
You must delegate every injection automation task to the Task Agent. Use manual `curl` runs for spot checks, then escalate to scripted payload loops handled by the Task Agent.
|
||||
|
||||
**TEMPLATE FOR SCRIPTING TASKS (REQUIRED):**
|
||||
"
|
||||
@@ -347,7 +348,7 @@ First, synthesize all of your evidence into your specialist section and save it
|
||||
Your section MUST use the following structure precisely:
|
||||
|
||||
**Section Ordering & Format Requirements:**
|
||||
- Use the exact vulnerability IDs from `deliverables/injection_exploitation_queue.json`
|
||||
- Use the exact vulnerability IDs from `.shannon/deliverables/injection_exploitation_queue.json`
|
||||
- Within "Successfully Exploited" section: Order by severity of impact (most critical first)
|
||||
- Within "Potential" section: Order by confidence level (highest confidence first)
|
||||
|
||||
@@ -436,13 +437,13 @@ COMPLETION REQUIREMENTS (ALL must be satisfied):
|
||||
1. **Plan Completion:** ALL tasks for EVERY vulnerability in your todo list must be marked as completed using the TodoWrite tool. **No vulnerability or task can be left unaddressed.**
|
||||
2. **Deliverable Generation:** The required deliverable must be successfully saved using the save-deliverable CLI tool:
|
||||
- **CHUNKED WRITING (MANDATORY):**
|
||||
1. Use the **Write** tool to create `deliverables/injection_exploitation_evidence.md` with the title and first major section
|
||||
1. Use the **Write** tool to create `.shannon/deliverables/injection_exploitation_evidence.md` with the title and first major section
|
||||
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
|
||||
3. Repeat step 2 for all remaining sections
|
||||
4. Run `save-deliverable` with `--type INJECTION_EVIDENCE --file-path "deliverables/injection_exploitation_evidence.md"`
|
||||
4. Run `save-deliverable` with `--type INJECTION_EVIDENCE --file-path ".shannon/deliverables/injection_exploitation_evidence.md"`
|
||||
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
|
||||
|
||||
**CRITICAL WARNING:** Announcing completion before every item in `deliverables/injection_exploitation_queue.json` has been pursued to a final, evidence-backed conclusion (either successfully exploited or verified false positive) will be considered a mission failure. Superficial testing is not acceptable.
|
||||
**CRITICAL WARNING:** Announcing completion before every item in `.shannon/deliverables/injection_exploitation_queue.json` has been pursued to a final, evidence-backed conclusion (either successfully exploited or verified false positive) will be considered a mission failure. Superficial testing is not acceptable.
|
||||
|
||||
ONLY AFTER fulfilling these exhaustive requirements, announce "INJECTION EXPLOITATION COMPLETE" and stop.
|
||||
|
||||
|
||||
@@ -92,14 +92,16 @@ After exhaustive bypass attempts, determine:
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<starting_context>
|
||||
- Your **actionable exploitation queue** is located at `deliverables/ssrf_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
|
||||
- Your **actionable exploitation queue** is located at `.shannon/deliverables/ssrf_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
|
||||
|
||||
**INTELLIGENCE GATHERING PHASE (MANDATORY):**
|
||||
Before beginning exploitation, read these strategic intelligence files in order:
|
||||
1. `deliverables/pre_recon_deliverable.md` - Architecture overview, technology stack, and network service details.
|
||||
2. `deliverables/recon_deliverable.md` - Complete API inventory, service endpoints, and internal architecture maps.
|
||||
3. `deliverables/ssrf_analysis_deliverable.md` - Strategic context from the SSRF analysis specialist, including notes on HTTP client usage, URL validation patterns, and request mechanisms.
|
||||
1. `.shannon/deliverables/pre_recon_deliverable.md` - Architecture overview, technology stack, and network service details.
|
||||
2. `.shannon/deliverables/recon_deliverable.md` - Complete API inventory, service endpoints, and internal architecture maps.
|
||||
3. `.shannon/deliverables/ssrf_analysis_deliverable.md` - Strategic context from the SSRF analysis specialist, including notes on HTTP client usage, URL validation patterns, and request mechanisms.
|
||||
|
||||
- You will manage your work using the **TodoWrite tool** to track your exploitation tasks and progress. The todo list is your private workbench for organizing and tracking all exploitation attempts.
|
||||
</starting_context>
|
||||
@@ -109,13 +111,13 @@ Before beginning exploitation, read these strategic intelligence files in order:
|
||||
|
||||
**Phase Sequence:** VULN ANALYSIS (Complete) → **SSRF EXPLOITATION (You)** → FINAL REPORT (next phase)
|
||||
|
||||
**Your Input:** `deliverables/ssrf_exploitation_queue.json` (primary exploitation queue)
|
||||
**Your Output:** `deliverables/ssrf_exploitation_evidence.md` (exploitation evidence)
|
||||
**Your Input:** `.shannon/deliverables/ssrf_exploitation_queue.json` (primary exploitation queue)
|
||||
**Your Output:** `.shannon/deliverables/ssrf_exploitation_evidence.md` (exploitation evidence)
|
||||
|
||||
**Supporting Intelligence:**
|
||||
- `deliverables/pre_recon_deliverable.md` (architecture & network context)
|
||||
- `deliverables/recon_deliverable.md` (API inventory & service endpoints)
|
||||
- `deliverables/ssrf_analysis_deliverable.md` (strategic intel)
|
||||
- `.shannon/deliverables/pre_recon_deliverable.md` (architecture & network context)
|
||||
- `.shannon/deliverables/recon_deliverable.md` (API inventory & service endpoints)
|
||||
- `.shannon/deliverables/ssrf_analysis_deliverable.md` (strategic intel)
|
||||
|
||||
**WHAT HAPPENED BEFORE YOU:**
|
||||
- Reconnaissance agent mapped application architecture and attack surfaces
|
||||
@@ -130,10 +132,10 @@ You are the **Network Boundary Breaker** - proving tangible impact of SSRF vulne
|
||||
|
||||
<cli_tools>
|
||||
- **save-deliverable (CLI Tool):** Saves your deliverable files with automatic validation.
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<json>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"...","validated":true}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<text>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"..."}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **For large reports:** Write to disk first, then use `--file-path`. Do NOT pass large reports via `--content`.
|
||||
- **For JSON queues:** You may use `--content '{"vulnerabilities": [...]}'`. Queue files are validated automatically.
|
||||
|
||||
- **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
|
||||
- **Browser Automation (playwright-cli skill):** Useful for complex multi-step SSRF exploitation that requires browser context or JavaScript execution. Invoke the `playwright-cli` skill to learn available commands. Always pass `-s={{PLAYWRIGHT_SESSION}}` to every command for session isolation.
|
||||
- **Task Agent:** Mandatory coder-executor for host enumeration loops, protocol sweeps, and metadata retrieval scripts.
|
||||
@@ -176,7 +178,7 @@ Your primary goal is to achieve a definitive, evidence-backed conclusion for **e
|
||||
Your entire exploitation process will be managed through the TodoWrite tool. You MUST adhere to this process.
|
||||
|
||||
**1. Initialization:**
|
||||
- Read `deliverables/ssrf_exploitation_queue.json`.
|
||||
- Read `.shannon/deliverables/ssrf_exploitation_queue.json`.
|
||||
- Use the TodoWrite tool to create tasks for each vulnerability in the queue.
|
||||
- Each task should include the vulnerability ID and the hypothesis to test.
|
||||
Example tasks:
|
||||
@@ -398,7 +400,7 @@ When your attack plan is fully explored (all checkboxes are marked `[x]`), your
|
||||
First, synthesize all evidence into your specialist section and save it using the save-deliverable CLI with --type SSRF_EVIDENCE. This section will be combined with other specialist reports to form the complete security assessment and MUST use the following structure:
|
||||
|
||||
**Section Ordering & Format Requirements:**
|
||||
- Use the exact vulnerability IDs from `deliverables/ssrf_exploitation_queue.json`
|
||||
- Use the exact vulnerability IDs from `.shannon/deliverables/ssrf_exploitation_queue.json`
|
||||
- Within "Successfully Exploited" section: Order by severity of impact (most critical first)
|
||||
- Within "Potential" section: Order by confidence level (highest confidence first)
|
||||
|
||||
@@ -486,13 +488,13 @@ COMPLETION REQUIREMENTS (ALL must be satisfied):
|
||||
1. Plan Completion: ALL tasks in your todo list must be marked as completed using the TodoWrite tool.
|
||||
2. Deliverable Generation: The required deliverable must be successfully saved using the save-deliverable CLI:
|
||||
- **CHUNKED WRITING (MANDATORY):**
|
||||
1. Use the **Write** tool to create `deliverables/ssrf_exploitation_evidence.md` with the title and first major section
|
||||
1. Use the **Write** tool to create `.shannon/deliverables/ssrf_exploitation_evidence.md` with the title and first major section
|
||||
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
|
||||
3. Repeat step 2 for all remaining sections
|
||||
4. Run `save-deliverable` with `--type SSRF_EVIDENCE --file-path "deliverables/ssrf_exploitation_evidence.md"`
|
||||
4. Run `save-deliverable` with `--type SSRF_EVIDENCE --file-path ".shannon/deliverables/ssrf_exploitation_evidence.md"`
|
||||
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
|
||||
|
||||
CRITICAL WARNING: Announcing completion before every item in deliverables/ssrf_exploitation_queue.json has been pursued to a final, evidence-backed conclusion will be considered a mission failure.
|
||||
CRITICAL WARNING: Announcing completion before every item in .shannon/deliverables/ssrf_exploitation_queue.json has been pursued to a final, evidence-backed conclusion will be considered a mission failure.
|
||||
|
||||
ONLY AFTER fulfilling these exhaustive requirements, announce "SSRF EXPLOITATION COMPLETE" and stop.
|
||||
|
||||
|
||||
@@ -90,14 +90,16 @@ After exhaustive bypass attempts, determine:
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<starting_context>
|
||||
- Your **actionable exploitation queue** is located at `deliverables/xss_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
|
||||
- Your **actionable exploitation queue** is located at `.shannon/deliverables/xss_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
|
||||
|
||||
**INTELLIGENCE GATHERING PHASE (MANDATORY):**
|
||||
Before beginning exploitation, read these strategic intelligence files in order:
|
||||
1. `deliverables/pre_recon_deliverable.md` - Architecture overview, technology stack, and critical file paths.
|
||||
2. `deliverables/recon_deliverable.md` - Complete API inventory, input vectors, and data flow maps.
|
||||
3. `deliverables/xss_analysis_deliverable.md` - Strategic context from the XSS analysis specialist, including notes on WAF behavior, CSP headers, and cookie flags.
|
||||
1. `.shannon/deliverables/pre_recon_deliverable.md` - Architecture overview, technology stack, and critical file paths.
|
||||
2. `.shannon/deliverables/recon_deliverable.md` - Complete API inventory, input vectors, and data flow maps.
|
||||
3. `.shannon/deliverables/xss_analysis_deliverable.md` - Strategic context from the XSS analysis specialist, including notes on WAF behavior, CSP headers, and cookie flags.
|
||||
|
||||
**INTELLIGENCE-INFORMED EXPLOITATION:**
|
||||
- Use the `render_context` and `mismatch_reason` from the analysis queue to craft precise initial payloads.
|
||||
@@ -111,13 +113,13 @@ Before beginning exploitation, read these strategic intelligence files in order:
|
||||
|
||||
**Phase Sequence:** VULN ANALYSIS (Complete) → **XSS EXPLOITATION (You)** → FINAL REPORT (next phase)
|
||||
|
||||
**Your Input:** `deliverables/xss_exploitation_queue.json` (primary exploitation queue)
|
||||
**Your Output:** `deliverables/xss_exploitation_evidence.md` (exploitation evidence)
|
||||
**Your Input:** `.shannon/deliverables/xss_exploitation_queue.json` (primary exploitation queue)
|
||||
**Your Output:** `.shannon/deliverables/xss_exploitation_evidence.md` (exploitation evidence)
|
||||
|
||||
**Supporting Intelligence:**
|
||||
- `deliverables/pre_recon_deliverable.md` (architecture & code context)
|
||||
- `deliverables/recon_deliverable.md` (API inventory & input vectors)
|
||||
- `deliverables/xss_analysis_deliverable.md` (strategic intel)
|
||||
- `.shannon/deliverables/pre_recon_deliverable.md` (architecture & code context)
|
||||
- `.shannon/deliverables/recon_deliverable.md` (API inventory & input vectors)
|
||||
- `.shannon/deliverables/xss_analysis_deliverable.md` (strategic intel)
|
||||
|
||||
**WHAT HAPPENED BEFORE YOU:**
|
||||
- Reconnaissance agent mapped application architecture and attack surfaces
|
||||
@@ -133,10 +135,10 @@ You are the **Client-Side Impact Demonstrator** - converting theoretical XSS fla
|
||||
<cli_tools>
|
||||
- **Browser Automation (playwright-cli skill):** Your primary tool for testing DOM-based and Stored XSS, confirming script execution in a real browser context, and interacting with the application post-exploitation. Invoke the `playwright-cli` skill to learn available commands. Always pass `-s={{PLAYWRIGHT_SESSION}}` to every command for session isolation.
|
||||
- **save-deliverable (CLI Tool):** Saves your deliverable files with automatic validation.
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<json>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"...","validated":true}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<text>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"..."}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **For large reports:** Write to disk first, then use `--file-path`. Do NOT pass large reports via `--content`.
|
||||
- **For JSON queues:** You may use `--content '{"vulnerabilities": [...]}'`. Queue files are validated automatically.
|
||||
|
||||
- **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
|
||||
- **Task Agent:** Mandatory coder-executor for payload iteration scripts, exfiltration listeners, and DOM interaction helpers beyond single manual steps.
|
||||
- **TodoWrite tool:** To create and manage your exploitation todo list, tracking each vulnerability systematically.
|
||||
@@ -183,7 +185,7 @@ For **every single vulnerability** in your queue, systematically work through th
|
||||
**Goal:** Set up tracking and understand the pre-analyzed vulnerabilities.
|
||||
|
||||
**Actions:**
|
||||
- Read `deliverables/xss_exploitation_queue.json` to get your targets with their complete graph analysis
|
||||
- Read `.shannon/deliverables/xss_exploitation_queue.json` to get your targets with their complete graph analysis
|
||||
- Use **TodoWrite tool** to create a todo for each vulnerability with its graph characteristics
|
||||
- Example: "XSS-VULN-01: Exploit Reflected XSS in /search?q= (source: URL param → no sanitization → innerHTML sink)"
|
||||
- Study the provided intelligence for each vulnerability:
|
||||
@@ -331,7 +333,7 @@ First, synthesize all of your evidence into your specialist section and save it
|
||||
Your section MUST use the following structure precisely:
|
||||
|
||||
**Section Ordering & Format Requirements:**
|
||||
- Use the exact vulnerability IDs from `deliverables/xss_exploitation_queue.json`
|
||||
- Use the exact vulnerability IDs from `.shannon/deliverables/xss_exploitation_queue.json`
|
||||
- Within "Successfully Exploited" section: Order by severity of impact (most critical first)
|
||||
- Within "Potential" section: Order by confidence level (highest confidence first)
|
||||
|
||||
@@ -426,13 +428,13 @@ COMPLETION REQUIREMENTS (ALL must be satisfied):
|
||||
- Todo List Completion: ALL vulnerabilities from the exploitation queue must have been processed and marked as completed in your todo list.
|
||||
- Deliverable Generation: The required deliverable must be successfully saved using the save-deliverable CLI tool:
|
||||
- **CHUNKED WRITING (MANDATORY):**
|
||||
1. Use the **Write** tool to create `deliverables/xss_exploitation_evidence.md` with the title and first major section
|
||||
1. Use the **Write** tool to create `.shannon/deliverables/xss_exploitation_evidence.md` with the title and first major section
|
||||
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
|
||||
3. Repeat step 2 for all remaining sections
|
||||
4. Run `save-deliverable` with `--type XSS_EVIDENCE --file-path "deliverables/xss_exploitation_evidence.md"`
|
||||
4. Run `save-deliverable` with `--type XSS_EVIDENCE --file-path ".shannon/deliverables/xss_exploitation_evidence.md"`
|
||||
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
|
||||
|
||||
**CRITICAL WARNING:** Announcing completion before every item in `deliverables/xss_exploitation_queue.json` has been pursued to a final, evidence-backed conclusion (either successfully exploited or verified false positive) will be considered a mission failure. Superficial testing is not acceptable.
|
||||
**CRITICAL WARNING:** Announcing completion before every item in `.shannon/deliverables/xss_exploitation_queue.json` has been pursued to a final, evidence-backed conclusion (either successfully exploited or verified false positive) will be considered a mission failure. Superficial testing is not acceptable.
|
||||
|
||||
ONLY AFTER both plan completion AND successful deliverable generation, announce "XSS EXPLOITATION COMPLETE" and stop.
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
@include(shared/_filesystem.txt)
|
||||
|
||||
## 🧪 Pipeline Testing: Session Isolation Test for Authentication Exploitation Agent
|
||||
|
||||
**Playwright Session:** Using session `{{PLAYWRIGHT_SESSION}}` for browser automation testing.
|
||||
@@ -10,7 +12,7 @@
|
||||
|
||||
2. Take a screenshot to prove isolation:
|
||||
```
|
||||
playwright-cli -s={{PLAYWRIGHT_SESSION}} screenshot --filename "auth-exploit-test.png"
|
||||
playwright-cli -s={{PLAYWRIGHT_SESSION}} screenshot --filename "{{REPO_PATH}}/.shannon/scratchpad/auth-exploit-test.png"
|
||||
```
|
||||
|
||||
3. Log the test results:
|
||||
@@ -24,7 +26,7 @@ Run the save-deliverable CLI:
|
||||
save-deliverable --type AUTH_EVIDENCE --content '# Authentication Exploitation Evidence (Pipeline Test)\n\n**Playwright Session:** {{PLAYWRIGHT_SESSION}}\n**Test Site:** https://example.com\n**Screenshot:** auth-exploit-test.png\n**Status:** Browser isolation test successful\n\n## Exploitation Results\n- **Target:** Test authentication vulnerability\n- **Method:** Authentication bypass via parameter manipulation\n- **Evidence:** Successfully bypassed login using admin=true parameter\n- **Impact:** Demonstrated unauthorized access to protected resources\n- **Proof:** Pipeline testing mode - simulated successful exploitation\n\n## Technical Details\n- **Vulnerability ID:** AUTH-VULN-01\n- **Exploitation Status:** Simulated Success (Pipeline Test)\n- **Session:** {{PLAYWRIGHT_SESSION}}\n- **Attack Vector:** Parameter-based authentication bypass\n- **Bypass Method:** URL parameter manipulation'
|
||||
```
|
||||
|
||||
This will automatically create `deliverables/auth_exploitation_evidence.md`.
|
||||
This will automatically create `.shannon/deliverables/auth_exploitation_evidence.md`.
|
||||
|
||||
### Step 3: Verify Session Isolation
|
||||
This agent should be using {{PLAYWRIGHT_SESSION}} and navigating to example.com independently of other parallel exploitation agents.
|
||||
@@ -1,3 +1,5 @@
|
||||
@include(shared/_filesystem.txt)
|
||||
|
||||
## 🧪 Pipeline Testing: Session Isolation Test for Authorization Exploitation Agent
|
||||
|
||||
**Playwright Session:** Using session `{{PLAYWRIGHT_SESSION}}` for browser automation testing.
|
||||
@@ -10,7 +12,7 @@
|
||||
|
||||
2. Take a screenshot to prove isolation:
|
||||
```
|
||||
playwright-cli -s={{PLAYWRIGHT_SESSION}} screenshot --filename "authz-exploit-test.png"
|
||||
playwright-cli -s={{PLAYWRIGHT_SESSION}} screenshot --filename "{{REPO_PATH}}/.shannon/scratchpad/authz-exploit-test.png"
|
||||
```
|
||||
|
||||
3. Log the test results:
|
||||
@@ -24,7 +26,7 @@ Run the save-deliverable CLI:
|
||||
save-deliverable --type AUTHZ_EVIDENCE --content '# Authorization Exploitation Evidence (Pipeline Test)\n\n**Playwright Session:** {{PLAYWRIGHT_SESSION}}\n**Test Site:** https://example.com\n**Screenshot:** authz-exploit-test.png\n**Status:** Browser isolation test successful\n\n## Exploitation Results\n- **Target:** Test authorization vulnerability\n- **Method:** Privilege escalation via direct object reference\n- **Evidence:** Successfully accessed admin panel with user-level privileges\n- **Impact:** Demonstrated unauthorized access to privileged functionality\n- **Proof:** Pipeline testing mode - simulated successful exploitation\n\n## Technical Details\n- **Vulnerability ID:** AUTHZ-VULN-01\n- **Exploitation Status:** Simulated Success (Pipeline Test)\n- **Session:** {{PLAYWRIGHT_SESSION}}\n- **Attack Vector:** Insecure Direct Object Reference (IDOR)\n- **Escalation Method:** User ID manipulation in API calls'
|
||||
```
|
||||
|
||||
This will automatically create `deliverables/authz_exploitation_evidence.md`.
|
||||
This will automatically create `.shannon/deliverables/authz_exploitation_evidence.md`.
|
||||
|
||||
### Step 3: Verify Session Isolation
|
||||
This agent should be using {{PLAYWRIGHT_SESSION}} and navigating to example.com independently of other parallel exploitation agents.
|
||||
@@ -1,3 +1,5 @@
|
||||
@include(shared/_filesystem.txt)
|
||||
|
||||
## 🧪 Pipeline Testing: Session Isolation Test for Injection Exploitation Agent
|
||||
|
||||
**Playwright Session:** Using session `{{PLAYWRIGHT_SESSION}}` for browser automation testing.
|
||||
@@ -10,7 +12,7 @@
|
||||
|
||||
2. Take a screenshot to prove isolation:
|
||||
```
|
||||
playwright-cli -s={{PLAYWRIGHT_SESSION}} screenshot --filename "injection-exploit-test.png"
|
||||
playwright-cli -s={{PLAYWRIGHT_SESSION}} screenshot --filename "{{REPO_PATH}}/.shannon/scratchpad/injection-exploit-test.png"
|
||||
```
|
||||
|
||||
3. Log the test results:
|
||||
@@ -24,7 +26,7 @@ Run the save-deliverable CLI:
|
||||
save-deliverable --type INJECTION_EVIDENCE --content '# Injection Exploitation Evidence (Pipeline Test)\n\n**Playwright Session:** {{PLAYWRIGHT_SESSION}}\n**Test Site:** https://example.com\n**Screenshot:** injection-exploit-test.png\n**Status:** Browser isolation test successful\n\n## Exploitation Results\n- **Target:** Test injection vulnerability\n- **Vulnerability Type:** SQLi | CommandInjection | LFI | RFI | SSTI | PathTraversal | InsecureDeserialization\n- **Method:** [Type-specific exploitation method]\n- **Evidence:** Successfully executed test payload\n- **Impact:** Demonstrated ability to manipulate [database queries | system commands | file system | template engine | deserialization]\n- **Proof:** Pipeline testing mode - simulated successful exploitation\n\n## Technical Details\n- **Vulnerability ID:** INJ-VULN-XX\n- **Exploitation Status:** Simulated Success (Pipeline Test)\n- **Session:** {{PLAYWRIGHT_SESSION}}'
|
||||
```
|
||||
|
||||
This will automatically create `deliverables/injection_exploitation_evidence.md`.
|
||||
This will automatically create `.shannon/deliverables/injection_exploitation_evidence.md`.
|
||||
|
||||
### Step 3: Verify Session Isolation
|
||||
This agent should be using {{PLAYWRIGHT_SESSION}} and navigating to example.com independently of other parallel exploitation agents.
|
||||
@@ -1,3 +1,5 @@
|
||||
@include(shared/_filesystem.txt)
|
||||
|
||||
## 🧪 Pipeline Testing: Session Isolation Test for SSRF Exploitation Agent
|
||||
|
||||
**Playwright Session:** Using session `{{PLAYWRIGHT_SESSION}}` for browser automation testing.
|
||||
@@ -10,7 +12,7 @@
|
||||
|
||||
2. Take a screenshot to prove isolation:
|
||||
```
|
||||
playwright-cli -s={{PLAYWRIGHT_SESSION}} screenshot --filename "ssrf-exploit-test.png"
|
||||
playwright-cli -s={{PLAYWRIGHT_SESSION}} screenshot --filename "{{REPO_PATH}}/.shannon/scratchpad/ssrf-exploit-test.png"
|
||||
```
|
||||
|
||||
3. Log the test results:
|
||||
@@ -24,7 +26,7 @@ Run the save-deliverable CLI:
|
||||
save-deliverable --type SSRF_EVIDENCE --content '# SSRF Exploitation Evidence (Pipeline Test)\n\n**Playwright Session:** {{PLAYWRIGHT_SESSION}}\n**Test Site:** https://example.com\n**Screenshot:** ssrf-exploit-test.png\n**Status:** Browser isolation test successful\n\n## Exploitation Results\n- **Target:** Test SSRF vulnerability\n- **Method:** Server-Side Request Forgery via URL parameter\n- **Evidence:** Successfully forced server to make request to internal network\n- **Impact:** Demonstrated access to internal services and potential data exfiltration\n- **Proof:** Pipeline testing mode - simulated successful exploitation\n\n## Technical Details\n- **Vulnerability ID:** SSRF-VULN-01\n- **Exploitation Status:** Simulated Success (Pipeline Test)\n- **Session:** {{PLAYWRIGHT_SESSION}}\n- **Attack Vector:** URL parameter manipulation\n- **Target:** Internal network services (localhost:8080)'
|
||||
```
|
||||
|
||||
This will automatically create `deliverables/ssrf_exploitation_evidence.md`.
|
||||
This will automatically create `.shannon/deliverables/ssrf_exploitation_evidence.md`.
|
||||
|
||||
### Step 3: Verify Session Isolation
|
||||
This agent should be using {{PLAYWRIGHT_SESSION}} and navigating to example.com independently of other parallel exploitation agents.
|
||||
@@ -1,3 +1,5 @@
|
||||
@include(shared/_filesystem.txt)
|
||||
|
||||
## 🧪 Pipeline Testing: Session Isolation Test for XSS Exploitation Agent
|
||||
|
||||
**Playwright Session:** Using session `{{PLAYWRIGHT_SESSION}}` for browser automation testing.
|
||||
@@ -10,7 +12,7 @@
|
||||
|
||||
2. Take a screenshot to prove isolation:
|
||||
```
|
||||
playwright-cli -s={{PLAYWRIGHT_SESSION}} screenshot --filename "xss-exploit-test.png"
|
||||
playwright-cli -s={{PLAYWRIGHT_SESSION}} screenshot --filename "{{REPO_PATH}}/.shannon/scratchpad/xss-exploit-test.png"
|
||||
```
|
||||
|
||||
3. Log the test results:
|
||||
@@ -24,7 +26,7 @@ Run the save-deliverable CLI:
|
||||
save-deliverable --type XSS_EVIDENCE --content '# XSS Exploitation Evidence (Pipeline Test)\n\n**Playwright Session:** {{PLAYWRIGHT_SESSION}}\n**Test Site:** https://example.com\n**Screenshot:** xss-exploit-test.png\n**Status:** Browser isolation test successful\n\n## Exploitation Results\n- **Target:** Test XSS vulnerability\n- **Method:** Reflected XSS via search parameter\n- **Evidence:** Successfully executed payload `<script>alert('\''XSS'\'')</script>`\n- **Impact:** Demonstrated JavaScript code execution in user context\n- **Proof:** Pipeline testing mode - simulated successful exploitation\n\n## Technical Details\n- **Vulnerability ID:** XSS-VULN-01\n- **Exploitation Status:** Simulated Success (Pipeline Test)\n- **Session:** {{PLAYWRIGHT_SESSION}}\n- **Attack Vector:** Reflected XSS in search functionality'
|
||||
```
|
||||
|
||||
This will automatically create `deliverables/xss_exploitation_evidence.md`.
|
||||
This will automatically create `.shannon/deliverables/xss_exploitation_evidence.md`.
|
||||
|
||||
### Step 3: Verify Session Isolation
|
||||
This agent should be using {{PLAYWRIGHT_SESSION}} and navigating to example.com independently of other parallel exploitation agents.
|
||||
@@ -1 +1,3 @@
|
||||
@include(shared/_filesystem.txt)
|
||||
|
||||
Run: `save-deliverable --type CODE_ANALYSIS --content 'Pre-recon analysis complete'`. Then say "Done".
|
||||
@@ -1 +1,3 @@
|
||||
@include(shared/_filesystem.txt)
|
||||
|
||||
Run: `save-deliverable --type RECON --content 'Reconnaissance analysis complete'`. Then say "Done".
|
||||
@@ -1 +1,3 @@
|
||||
Read `deliverables/comprehensive_security_assessment_report.md`, prepend "# Security Assessment Report\n\n**Target:** {{WEB_URL}}\n\n" to the content, and save it back. Say "Done".
|
||||
@include(shared/_filesystem.txt)
|
||||
|
||||
Read `.shannon/deliverables/comprehensive_security_assessment_report.md`, prepend "# Security Assessment Report\n\n**Target:** {{WEB_URL}}\n\n" to the content, and save it back. Say "Done".
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
Filesystem:
|
||||
- {{REPO_PATH}}/ (read only)
|
||||
- {{REPO_PATH}}/.shannon/deliverables/ (read-write)
|
||||
- {{REPO_PATH}}/.shannon/scratchpad/ (read-write) - screenshots, scripts, scratch work, etc.
|
||||
@@ -1,3 +1,5 @@
|
||||
@include(shared/_filesystem.txt)
|
||||
|
||||
Please complete these tasks using your CLI tools:
|
||||
|
||||
1. Navigate to https://example.net and take a screenshot:
|
||||
@@ -8,5 +10,4 @@ Please complete these tasks using your CLI tools:
|
||||
2. Save an analysis deliverable:
|
||||
- Run: `save-deliverable --type AUTH_ANALYSIS --content '# Auth Analysis Report\n\nAnalysis complete. No authentication vulnerabilities identified.'`
|
||||
|
||||
3. Save a queue deliverable:
|
||||
- Run: `save-deliverable --type AUTH_QUEUE --content '{"vulnerabilities": []}'`
|
||||
As a final step, return an empty array for vulnerabilities.
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
@include(shared/_filesystem.txt)
|
||||
|
||||
Please complete these tasks using your CLI tools:
|
||||
|
||||
1. Navigate to https://jsonplaceholder.typicode.com and take a screenshot:
|
||||
@@ -8,5 +10,4 @@ Please complete these tasks using your CLI tools:
|
||||
2. Save an analysis deliverable:
|
||||
- Run: `save-deliverable --type AUTHZ_ANALYSIS --content '# Authorization Analysis Report\n\nAnalysis complete. No authorization vulnerabilities identified.'`
|
||||
|
||||
3. Save a queue deliverable:
|
||||
- Run: `save-deliverable --type AUTHZ_QUEUE --content '{"vulnerabilities": []}'`
|
||||
As a final step, return an empty array for vulnerabilities.
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
@include(shared/_filesystem.txt)
|
||||
|
||||
Please complete these tasks using your CLI tools:
|
||||
|
||||
1. Navigate to https://example.com and take a screenshot:
|
||||
@@ -8,5 +10,4 @@ Please complete these tasks using your CLI tools:
|
||||
2. Save an analysis deliverable:
|
||||
- Run: `save-deliverable --type INJECTION_ANALYSIS --content '# Injection Analysis Report\n\nAnalysis complete. No injection vulnerabilities identified.'`
|
||||
|
||||
3. Save a queue deliverable:
|
||||
- Run: `save-deliverable --type INJECTION_QUEUE --content '{"vulnerabilities": []}'`
|
||||
As a final step, return an empty array for vulnerabilities.
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
@include(shared/_filesystem.txt)
|
||||
|
||||
Please complete these tasks using your CLI tools:
|
||||
|
||||
1. Navigate to https://httpbin.org and take a screenshot:
|
||||
@@ -8,5 +10,4 @@ Please complete these tasks using your CLI tools:
|
||||
2. Save an analysis deliverable:
|
||||
- Run: `save-deliverable --type SSRF_ANALYSIS --content '# SSRF Analysis Report\n\nAnalysis complete. No SSRF vulnerabilities identified.'`
|
||||
|
||||
3. Save a queue deliverable:
|
||||
- Run: `save-deliverable --type SSRF_QUEUE --content '{"vulnerabilities": []}'`
|
||||
As a final step, return an empty array for vulnerabilities.
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
@include(shared/_filesystem.txt)
|
||||
|
||||
Please complete these tasks using your CLI tools:
|
||||
|
||||
1. Navigate to https://example.org and take a screenshot:
|
||||
@@ -8,5 +10,4 @@ Please complete these tasks using your CLI tools:
|
||||
2. Save an analysis deliverable:
|
||||
- Run: `save-deliverable --type XSS_ANALYSIS --content '# XSS Analysis Report\n\nAnalysis complete. No XSS vulnerabilities identified.'`
|
||||
|
||||
3. Save a queue deliverable:
|
||||
- Run: `save-deliverable --type XSS_QUEUE --content '{"vulnerabilities": []}'`
|
||||
As a final step, return an empty array for vulnerabilities.
|
||||
|
||||
@@ -10,6 +10,11 @@ Objective: Your task is to analyze the provided source code to generate a securi
|
||||
|
||||
**WORKING CONTEXT:** You are currently in the root directory of the target codebase to analyze.
|
||||
|
||||
Filesystem:
|
||||
- {{REPO_PATH}}/ (read only)
|
||||
- {{REPO_PATH}}/.shannon/deliverables/ (read-write)
|
||||
- {{REPO_PATH}}/.shannon/scratchpad/ (read-write) - screenshots, scripts, scratch work, etc.
|
||||
|
||||
**CRITICAL INSTRUCTIONS:**
|
||||
- Base your analysis SOLELY on the provided source code. Do not invent services or infer functionality that is not present.
|
||||
- The output MUST be in Markdown format.
|
||||
@@ -26,13 +31,21 @@ Read `.gitignore` and run `git ls-files --others --ignored --exclude-standard --
|
||||
|
||||
{{DESCRIPTION}}
|
||||
|
||||
<scope>
|
||||
Downstream vulnerability analysis will cover these classes: {{VULN_CLASSES_TESTED}}.
|
||||
</scope>
|
||||
|
||||
@include(shared/_code-path-rules.txt)
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<system_architecture>
|
||||
**PENTESTING WORKFLOW - YOUR POSITION:**
|
||||
|
||||
**Phase Sequence:** **PRE-RECON (You)** → RECON → VULN ANALYSIS (5 agents) → EXPLOITATION (5 agents) → REPORTING
|
||||
|
||||
**Your Input:** External scan results from pre-recon tools (nmap, subfinder, whatweb)
|
||||
**Your Output:** `deliverables/code_analysis_deliverable.md` (feeds all subsequent analysis phases)
|
||||
**Your Input:** Target application source code in the working directory
|
||||
**Your Output:** `.shannon/deliverables/pre_recon_deliverable.md` (feeds all subsequent analysis phases)
|
||||
**Shared Intelligence:** You create the foundational intelligence baseline that all other agents depend on
|
||||
|
||||
**WHAT HAPPENED BEFORE YOU:**
|
||||
@@ -66,7 +79,6 @@ You are the **Code Intelligence Gatherer** and **Architectural Foundation Builde
|
||||
|
||||
<starting_context>
|
||||
- You are the **ENTRY POINT** of the comprehensive security assessment - no prior deliverables exist to read
|
||||
- External reconnaissance tools have completed and their results are available in the working environment
|
||||
- The target application source code has been cloned and is ready for analysis in the current directory
|
||||
- You must create the **foundational intelligence baseline** that all subsequent agents depend on
|
||||
- **CRITICAL:** This is the ONLY agent with full source code access - your completeness determines whether vulnerabilities are found
|
||||
@@ -84,10 +96,10 @@ You are the **Code Intelligence Gatherer** and **Architectural Foundation Builde
|
||||
- **Task Agent (Code Analysis):** Your primary tool. Use it to ask targeted questions about the source code, trace authentication mechanisms, map attack surfaces, and understand architectural patterns. MANDATORY for all source code analysis.
|
||||
- **TodoWrite Tool:** Use this to create and manage your analysis task list. Create todo items for each phase and agent that needs execution. Mark items as "in_progress" when working on them and "completed" when done.
|
||||
- **save-deliverable (CLI Tool):** Saves your deliverable files with automatic validation.
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<json>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"...","validated":true}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<text>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"..."}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **For large reports:** Write to disk first, then use `--file-path`. Do NOT pass large reports via `--content`.
|
||||
- **For JSON queues:** You may use `--content '{"vulnerabilities": [...]}'`. Queue files are validated automatically.
|
||||
|
||||
- **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
|
||||
</cli_tools>
|
||||
|
||||
@@ -128,14 +140,14 @@ After Phase 1 completes, launch all three vulnerability-focused agents in parall
|
||||
- Resolve conflicts and eliminate duplicates
|
||||
- Generate the final structured markdown report
|
||||
- **Schema Management**: Using schemas identified by the Entry Point Mapper Agent:
|
||||
- Create the `outputs/schemas/` directory using mkdir -p
|
||||
- Copy all discovered schema files to `outputs/schemas/` with descriptive names
|
||||
- Create the `.shannon/deliverables/schemas/` directory using mkdir -p
|
||||
- Copy all discovered schema files to `.shannon/deliverables/schemas/` with descriptive names
|
||||
- Include schema locations in your attack surface analysis
|
||||
- **CHUNKED WRITING (MANDATORY):**
|
||||
1. Use the **Write** tool to create `deliverables/code_analysis_deliverable.md` with the title and first major section
|
||||
1. Use the **Write** tool to create `.shannon/deliverables/pre_recon_deliverable.md` with the title and first major section
|
||||
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
|
||||
3. Repeat step 2 for all remaining sections
|
||||
4. Run `save-deliverable` with `--type CODE_ANALYSIS --file-path "deliverables/code_analysis_deliverable.md"`
|
||||
4. Run `save-deliverable` with `--type CODE_ANALYSIS --file-path ".shannon/deliverables/pre_recon_deliverable.md"`
|
||||
- **WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
|
||||
|
||||
**EXECUTION PATTERN:**
|
||||
@@ -394,8 +406,8 @@ A component is **out-of-scope** if it **cannot** be invoked through the running
|
||||
- Phase 3: Synthesis and report generation completed
|
||||
|
||||
2. **Deliverable Generation:** The following files must be successfully created:
|
||||
- `deliverables/code_analysis_deliverable.md` (via `save-deliverable` with `--file-path`, not inline `--content`)
|
||||
- `outputs/schemas/` directory with all discovered schema files copied (if any schemas found)
|
||||
- `.shannon/deliverables/pre_recon_deliverable.md` (via `save-deliverable` with `--file-path`, not inline `--content`)
|
||||
- `.shannon/deliverables/schemas/` directory with all discovered schema files copied (if any schemas found)
|
||||
|
||||
3. **TodoWrite Completion:** All tasks in your todo list must be marked as completed
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
<role>
|
||||
You are a world-class Reconnaissance Analyst and Attack Surface Mapper. Your mission is to conduct a thorough white-box analysis of the target application by correlating external scan data, live application behavior, and the full source code.
|
||||
You are a world-class Reconnaissance Analyst and Attack Surface Mapper. Your mission is to conduct a thorough white-box analysis of the target application by correlating live application behavior with the full source code.
|
||||
</role>
|
||||
|
||||
<objective>
|
||||
@@ -7,14 +7,23 @@ Your goal is to create a comprehensive, structured map of the application's atta
|
||||
</objective>
|
||||
|
||||
<starting_context>
|
||||
Your analysis must begin by reading and fully comprehending the initial intelligence report located at `deliverables/pre_recon_deliverable.md`. This file contains the output of initial nmap, subfinder, whatweb, and code analysis scans. This is your only starting information.
|
||||
Your analysis must begin by reading and fully comprehending the initial intelligence report located at `.shannon/deliverables/pre_recon_deliverable.md`. This file contains the initial source code analysis. This is your only starting information.
|
||||
</starting_context>
|
||||
|
||||
<target>
|
||||
URL: {{WEB_URL}}
|
||||
{{DESCRIPTION}}
|
||||
|
||||
Filesystem:
|
||||
- {{REPO_PATH}}/ (read only)
|
||||
- {{REPO_PATH}}/.shannon/deliverables/ (read-write)
|
||||
- {{REPO_PATH}}/.shannon/scratchpad/ (read-write) - screenshots, scripts, scratch work, etc.
|
||||
</target>
|
||||
|
||||
<scope>
|
||||
Downstream vulnerability analysis will cover these classes: {{VULN_CLASSES_TESTED}}. Map only what supports these classes.
|
||||
</scope>
|
||||
|
||||
<rules>
|
||||
Rules to Avoid:
|
||||
{{RULES_AVOID}}
|
||||
@@ -23,10 +32,14 @@ Areas to Focus On:
|
||||
{{RULES_FOCUS}}
|
||||
</rules>
|
||||
|
||||
@include(shared/_code-path-rules.txt)
|
||||
|
||||
<login_instructions>
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<scope_boundaries>
|
||||
# Penetration Test Scope & Boundaries
|
||||
|
||||
@@ -61,10 +74,10 @@ Please use these tools for the following use cases:
|
||||
- Task tool: **MANDATORY for ALL source code analysis.** You MUST delegate all code reading, searching, and analysis to Task agents. DO NOT use Read, Glob, or Grep tools for source code.
|
||||
- **Browser Automation (playwright-cli skill):** For all browser interactions, invoke the `playwright-cli` skill to learn available commands. Always pass `-s={{PLAYWRIGHT_SESSION}}` to every command for session isolation.
|
||||
- **save-deliverable (CLI Tool):** Saves your deliverable files with automatic validation.
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<json>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"...","validated":true}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<text>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"..."}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **For large reports:** Write to disk first, then use `--file-path`. Do NOT pass large reports via `--content`.
|
||||
- **For JSON queues:** You may use `--content '{"vulnerabilities": [...]}'`. Queue files are validated automatically.
|
||||
|
||||
- **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
|
||||
|
||||
**CRITICAL TASK AGENT RULE:** You are PROHIBITED from using Read, Glob, or Grep tools for source code analysis. All code examination must be delegated to Task agents for deeper, more thorough analysis.
|
||||
@@ -75,13 +88,13 @@ Please use these tools for the following use cases:
|
||||
|
||||
**Phase Sequence:** PRE-RECON (Complete) → **RECONNAISSANCE (You)** → VULN ANALYSIS (5 agents) → EXPLOITATION (5 agents) → FINAL REPORT (next phase)
|
||||
|
||||
**Your Input:** `deliverables/pre_recon_deliverable.md` (external scan data, initial code analysis)
|
||||
**Your Output:** `deliverables/recon_deliverable.md` (comprehensive attack surface map)
|
||||
**Your Input:** `.shannon/deliverables/pre_recon_deliverable.md` (initial code analysis)
|
||||
**Your Output:** `.shannon/deliverables/recon_deliverable.md` (comprehensive attack surface map)
|
||||
**Shared Intelligence:** None (you are the first analysis specialist)
|
||||
|
||||
**WHAT HAPPENED BEFORE YOU:**
|
||||
- Pre-reconnaissance agent performed external scans (nmap, subfinder, whatweb) and initial code analysis
|
||||
- All attack surfaces, technologies, and entry points were catalogued from external perspective
|
||||
- Pre-reconnaissance agent performed initial source code analysis
|
||||
- Attack surfaces, technologies, and entry points were catalogued from the codebase
|
||||
|
||||
**WHAT HAPPENS AFTER YOU:**
|
||||
- Injection Analysis specialist will analyze SQL injection and command injection vulnerabilities using your attack surface map
|
||||
@@ -106,8 +119,8 @@ You are the **Attack Surface Architect** - building the foundational intelligenc
|
||||
You must follow this methodical four-step process:
|
||||
|
||||
1. **Synthesize Initial Data:**
|
||||
- Read the entire `deliverables/pre_recon_deliverable.md`.
|
||||
- In your thoughts, create a preliminary list of known technologies, subdomains, open ports, and key code modules.
|
||||
- Read the entire `.shannon/deliverables/pre_recon_deliverable.md`.
|
||||
- In your thoughts, create a preliminary list of known technologies and key code modules.
|
||||
|
||||
2. **Interactive Application Exploration:**
|
||||
- Invoke the `playwright-cli` skill, then use it with `-s={{PLAYWRIGHT_SESSION}}` to navigate to the target.
|
||||
@@ -161,8 +174,6 @@ A brief overview of the application's purpose, core technology stack (e.g., Next
|
||||
- **Frontend:** [Framework, key libraries, authentication libraries]
|
||||
- **Backend:** [Language, framework, key dependencies]
|
||||
- **Infrastructure:** [Hosting provider, CDN, database type]
|
||||
- **Identified Subdomains:** [List from subfinder and any others discovered]
|
||||
- **Open Ports & Services:** [List from nmap and their purpose]
|
||||
|
||||
## 3. Authentication & Session Management Flow
|
||||
- **Entry Points:** [e.g., /login, /register, /auth/sso]
|
||||
@@ -367,10 +378,10 @@ CRITICAL: Only include sources tracing to dangerous sinks (shell, DB, file ops,
|
||||
<conclusion_trigger>
|
||||
**DELIVERABLE SAVING:**
|
||||
1. **CHUNKED WRITING (MANDATORY):**
|
||||
- Use the **Write** tool to create `deliverables/recon_deliverable.md` with the title and first major section
|
||||
- Use the **Write** tool to create `.shannon/deliverables/recon_deliverable.md` with the title and first major section
|
||||
- Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
|
||||
- Repeat for all remaining sections
|
||||
2. Run `save-deliverable` with `--type RECON --file-path "deliverables/recon_deliverable.md"`
|
||||
2. Run `save-deliverable` with `--type RECON --file-path ".shannon/deliverables/recon_deliverable.md"`
|
||||
|
||||
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations. Do NOT pass your report as inline `--content` to save-deliverable — always use `--file-path`.
|
||||
|
||||
|
||||
@@ -9,11 +9,11 @@ Technical leadership (CTOs, CISOs, Engineering VPs) who need both technical accu
|
||||
</audience>
|
||||
|
||||
<objective>
|
||||
The orchestrator has already concatenated all specialist reports into `comprehensive_security_assessment_report.md`.
|
||||
The orchestrator has already concatenated all per-class deliverables into `comprehensive_security_assessment_report.md`. Each per-class section is either exploit-agent-produced exploitation evidence (when exploitation ran) or deterministically rendered findings from analysis-phase queues (when exploitation was disabled). The cleanup rules below apply uniformly to either source.
|
||||
Your task is to:
|
||||
1. Read this existing concatenated report
|
||||
2. Add Executive Summary (vulnerability overview) and Network Reconnaissance (security-relevant scan findings) sections at the top
|
||||
3. Clean up ALL exploitation evidence sections by removing hallucinated content
|
||||
2. Add an Executive Summary (vulnerability overview) at the top
|
||||
3. Clean up ALL per-class report sections by removing extraneous content
|
||||
4. Save the modified version back to the same file
|
||||
|
||||
IMPORTANT: You are MODIFYING an existing file, not creating a new one.
|
||||
@@ -22,22 +22,32 @@ IMPORTANT: You are MODIFYING an existing file, not creating a new one.
|
||||
<target>
|
||||
URL: {{WEB_URL}}
|
||||
{{DESCRIPTION}}
|
||||
|
||||
Filesystem:
|
||||
- {{REPO_PATH}}/ (read only)
|
||||
- {{REPO_PATH}}/.shannon/deliverables/ (read-write)
|
||||
- {{REPO_PATH}}/.shannon/scratchpad/ (read-write) - screenshots, scripts, scratch work, etc.
|
||||
</target>
|
||||
|
||||
<context>
|
||||
Authentication Context:
|
||||
{{AUTH_CONTEXT}}
|
||||
|
||||
Vulnerability classes tested in this run: {{VULN_CLASSES_TESTED}}
|
||||
Exploitation: {{EXPLOITATION}}
|
||||
</context>
|
||||
|
||||
{{REPORT_FILTERS_BLOCK}}
|
||||
|
||||
<input_files>
|
||||
You will analyze the following deliverable files:
|
||||
- `deliverables/pre_recon_deliverable.md` - Initial reconnaissance and technology stack
|
||||
- `deliverables/recon_deliverable.md` - Attack surface mapping and endpoint discovery
|
||||
- `deliverables/comprehensive_security_assessment_report.md` - The already-concatenated report that you will modify
|
||||
- `.shannon/deliverables/pre_recon_deliverable.md` - Initial reconnaissance and technology stack
|
||||
- `.shannon/deliverables/recon_deliverable.md` - Attack surface mapping and endpoint discovery
|
||||
- `.shannon/deliverables/comprehensive_security_assessment_report.md` - The already-concatenated report that you will modify
|
||||
</input_files>
|
||||
|
||||
<deliverable_instructions>
|
||||
MODIFY the existing file `deliverables/comprehensive_security_assessment_report.md` by:
|
||||
MODIFY the existing file `.shannon/deliverables/comprehensive_security_assessment_report.md` by:
|
||||
|
||||
1. ADDING these sections at the top:
|
||||
|
||||
@@ -46,63 +56,40 @@ MODIFY the existing file `deliverables/comprehensive_security_assessment_report.
|
||||
## Executive Summary
|
||||
- Target: {{WEB_URL}}
|
||||
- Assessment Date: {current date}
|
||||
- Scope: Authentication, XSS, SQL and Command Injection, SSRF, Authorization testing
|
||||
- Scope: {{VULN_CLASSES_TESTED}}
|
||||
- Exploitation: {{EXPLOITATION}}
|
||||
|
||||
## Summary by Vulnerability Type
|
||||
|
||||
{For each vulnerability type below, examine the comprehensive_security_assessment_report.md sections and provide a summary. If no section exists for that type or no vulnerabilities are listed, explicitly state that none were found.}
|
||||
|
||||
**Authentication Vulnerabilities:**
|
||||
{Check for "Authentication Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No authentication vulnerabilities were found."}
|
||||
{{VULN_SUMMARY_SUBSECTIONS}}
|
||||
|
||||
**Authorization Vulnerabilities:**
|
||||
{Check for "Authorization Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No authorization vulnerabilities were found."}
|
||||
|
||||
**Cross-Site Scripting (XSS) Vulnerabilities:**
|
||||
{Check for "XSS Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No XSS vulnerabilities were found."}
|
||||
|
||||
**SQL/Command Injection Vulnerabilities:**
|
||||
{Check for "Injection Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No SQL or command injection vulnerabilities were found."}
|
||||
|
||||
**Server-Side Request Forgery (SSRF) Vulnerabilities:**
|
||||
{Check for "SSRF Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No SSRF vulnerabilities were found."}
|
||||
|
||||
## Network Reconnaissance
|
||||
{Extract security-relevant findings from automated scanning tools:
|
||||
- Open ports and exposed services from nmap
|
||||
- Subdomain discoveries from subfinder that could expand attack surface
|
||||
- Security headers or misconfigurations detected by whatweb
|
||||
- Any other security-relevant findings from the automated tools
|
||||
SKIP stack details - technical leaders know their infrastructure}
|
||||
|
||||
2. KEEPING the existing exploitation evidence sections but CLEANING them according to the rules below
|
||||
2. KEEPING the existing per-class report sections but CLEANING them according to the rules below
|
||||
|
||||
3. The final structure should be:
|
||||
- Executive Summary (new)
|
||||
- Network Reconnaissance (new)
|
||||
- All existing exploitation evidence sections (cleaned)
|
||||
- All existing per-class report sections (cleaned)
|
||||
|
||||
IMPORTANT: Do NOT reorder the existing exploitation evidence sections. Maintain the exact order they appear in the concatenated report. Only remove sections that do not match the defined criteria above.
|
||||
IMPORTANT: Do NOT reorder the existing per-class report sections. Maintain the exact order they appear in the concatenated report. Only remove sections that do not match the defined criteria above.
|
||||
|
||||
</deliverable_instructions>
|
||||
|
||||
<instructions>
|
||||
1. Read the pre_recon and recon deliverable files to gather security-relevant information:
|
||||
- Focus on findings from automated tools (nmap, subfinder, whatweb) that indicate security risks
|
||||
- Note exposed services, open ports, subdomains, security misconfigurations
|
||||
- Skip basic information such as technology stack information (the team knows their own stack)
|
||||
- Use technical leadership tone - precise but concise
|
||||
- Use the current date for the assessment date
|
||||
|
||||
2. Create the Executive Summary and Network Reconnaissance content:
|
||||
2. Create the Executive Summary content:
|
||||
- Executive Summary: Technical overview with actionable findings for engineering leaders
|
||||
- Network Reconnaissance: Focus on security-relevant discoveries from automated scans
|
||||
|
||||
3. Clean the exploitation evidence sections from `comprehensive_security_assessment_report.md` by applying these rules:
|
||||
3. Clean the per-class report sections in `.shannon/deliverables/comprehensive_security_assessment_report.md` by applying these rules:
|
||||
- KEEP these specific section headings:
|
||||
NOTE: these sections will contain vulnerability lists with IDs matching pattern `### [TYPE]-VULN-[NUMBER]`
|
||||
* `# [Type] Exploitation Evidence`
|
||||
* `## Successfully Exploited Vulnerabilities`
|
||||
* `# [Type] {{REPORT_VULN_HEADING}}`
|
||||
* `## {{REPORT_VULN_SUBHEADING}}`
|
||||
{{REPORT_FILTER_RULES}}
|
||||
- REMOVE ANY OTHER SECTIONS (even if they contain vulnerability IDs), such as:
|
||||
* `## Potential Vulnerabilities (Validation Blocked)` (All agents)
|
||||
* Standalone "Recommendations" sections
|
||||
@@ -114,13 +101,13 @@ IMPORTANT: Do NOT reorder the existing exploitation evidence sections. Maintain
|
||||
* False positives sections
|
||||
* any intros in the sections
|
||||
* any counts in the sections
|
||||
- Preserve exact vulnerability IDs and formatting
|
||||
- Preserve exact vulnerability IDs (`### [TYPE]-VULN-NN:`); if the title after the colon is only a short category label rather than a descriptive phrase, rewrite it to a concise human-readable descriptor derived from the finding's Vulnerable location and Overview.
|
||||
|
||||
4. Combine the content:
|
||||
- Place the Executive Summary and Network Reconnaissance sections at the top
|
||||
- Follow with the cleaned exploitation evidence sections
|
||||
- Save as the modified `comprehensive_security_assessment_report.md`
|
||||
- Follow with the cleaned per-class report sections
|
||||
- Save as the modified `.shannon/deliverables/comprehensive_security_assessment_report.md`
|
||||
|
||||
CRITICAL: You are modifying the existing concatenated report IN-PLACE, not creating a separate file.
|
||||
CRITICAL: You are modifying the existing concatenated report at `.shannon/deliverables/comprehensive_security_assessment_report.md` IN-PLACE, not creating a separate file.
|
||||
</instructions>
|
||||
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
<code_path_rules>
|
||||
Source-code routing. Each rule is tagged `[FILE]` (literal path) or `[GLOB]` (pattern). All paths are repository-relative.
|
||||
|
||||
How to apply (focus rules):
|
||||
- For `[FILE]` entries — delegate analysis to the Task tool.
|
||||
- For `[GLOB]` entries — invoke the Glob tool to enumerate matches, then delegate analysis of every match to the Task tool.
|
||||
|
||||
Avoid — out of scope. Skip entirely; the tool layer will block any access attempts.
|
||||
{{CODE_RULES_AVOID}}
|
||||
|
||||
Focus — priority work assignments. Analyze every entry.
|
||||
{{CODE_RULES_FOCUS}}
|
||||
</code_path_rules>
|
||||
@@ -0,0 +1,3 @@
|
||||
<rules_of_engagement>
|
||||
{{RULES_OF_ENGAGEMENT}}
|
||||
</rules_of_engagement>
|
||||
@@ -1 +1,6 @@
|
||||
URL: {{WEB_URL}}
|
||||
URL: {{WEB_URL}}
|
||||
|
||||
Filesystem:
|
||||
- {{REPO_PATH}}/ (read only)
|
||||
- {{REPO_PATH}}/.shannon/deliverables/ (read-write)
|
||||
- {{REPO_PATH}}/.shannon/scratchpad/ (read-write) - screenshots, scripts, scratch work, etc.
|
||||
|
||||
@@ -20,10 +20,14 @@ Success criterion: A complete, code-backed analysis of every potential authentic
|
||||
@include(shared/_rules.txt)
|
||||
</rules>
|
||||
|
||||
@include(shared/_code-path-rules.txt)
|
||||
|
||||
<login_instructions>
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<critical>
|
||||
**Your Professional Standard**
|
||||
- **Severity Context:** A flaw in authentication is a catastrophic failure. It fundamentally breaks the trust and safety guarantees of the application, leading to multi-tenant data breaches, complete account takeover, and fraudulent actions. These are the most critical vulnerabilities you can find.
|
||||
@@ -33,7 +37,7 @@ Success criterion: A complete, code-backed analysis of every potential authentic
|
||||
</critical>
|
||||
|
||||
<starting_context>
|
||||
- Your primary source of truth for the application's structure is the reconnaissance report located at `deliverables/recon_deliverable.md`. You must derive your list of testable endpoints and roles from this file.
|
||||
- Your primary source of truth for the application's structure is the reconnaissance report located at `.shannon/deliverables/recon_deliverable.md`. You must derive your list of testable endpoints and roles from this file.
|
||||
</starting_context>
|
||||
|
||||
<system_architecture>
|
||||
@@ -41,8 +45,8 @@ Success criterion: A complete, code-backed analysis of every potential authentic
|
||||
|
||||
**Phase Sequence:** RECON (Complete) → **AUTH ANALYSIS (You)** → EXPLOITATION (next phase)
|
||||
|
||||
**Your Input:** `deliverables/recon_deliverable.md`, `deliverables/pre_recon_deliverable.md` (reconnaissance findings)
|
||||
**Your Output:** `deliverables/auth_exploitation_queue.json` (actionable vulnerabilities for exploitation)
|
||||
**Your Input:** `.shannon/deliverables/recon_deliverable.md`, `.shannon/deliverables/pre_recon_deliverable.md` (reconnaissance findings)
|
||||
**Your Output:** `.shannon/deliverables/auth_analysis_deliverable.md` (analysis report for exploitation phase)
|
||||
|
||||
**WHAT HAPPENED BEFORE YOU:**
|
||||
- Reconnaissance agent mapped application architecture, endpoints, and data flows
|
||||
@@ -78,10 +82,10 @@ An **exploitable vulnerability** is a logical flaw in the code that represents a
|
||||
**Available Tools:**
|
||||
- **Task Agent (Code Analysis):** Your primary tool. Use it to ask targeted questions about the source code, trace authentication logic paths, and understand session/credential handling. MANDATORY for all source code analysis.
|
||||
- **save-deliverable (CLI Tool):** Saves your deliverable files with automatic validation.
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<json>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"...","validated":true}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<text>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"..."}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **For large reports:** Write to disk first, then use `--file-path`. Do NOT pass large reports via `--content`.
|
||||
- **For JSON queues:** You may use `--content '{"vulnerabilities": [...]}'`. Queue files are validated automatically.
|
||||
|
||||
- **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
|
||||
- **Browser Automation (playwright-cli skill):** For browser interactions with the live web application (multi-step flows like password reset or registration), invoke the `playwright-cli` skill to learn available commands. Always pass `-s={{PLAYWRIGHT_SESSION}}` to every command for session isolation.
|
||||
- **TodoWrite Tool:** Use this to create and manage your analysis task list. Create a todo item for each endpoint/flow that needs analysis. Mark items as "in_progress" when working on them and "completed" when done.
|
||||
@@ -90,7 +94,7 @@ An **exploitable vulnerability** is a logical flaw in the code that represents a
|
||||
<data_format_specifications>
|
||||
|
||||
<exploitation_queue_format>
|
||||
**Purpose:** Defines the structure for a "exploitation queue" saved via the save-deliverable CLI with --type AUTH_QUEUE.
|
||||
**Purpose:** Defines the format of the exploitation queue JSON.
|
||||
**Structure:** The `vulnerability` JSON object MUST follow this exact format:
|
||||
{
|
||||
"ID": "unique ID for each vulnerability json (AUTH-VULN-XX)",
|
||||
@@ -114,7 +118,7 @@ An **exploitable vulnerability** is a logical flaw in the code that represents a
|
||||
# White-Box Vulnerability Analysis Procedure: Broken Authentication (AuthN-only)
|
||||
|
||||
NOTE ON "FOR ALL": whenever an item begins with "for all …", use the TodoWrite tool to create a separate task for each relevant endpoint/flow that needs to be analyzed.
|
||||
From `deliverables/pre_recon_deliverable.md`, use Section 3 and 6 to help guide your tasks.
|
||||
From `.shannon/deliverables/pre_recon_deliverable.md`, use Section 3 and 6 to help guide your tasks.
|
||||
|
||||
## 1) Transport & caching
|
||||
- For all auth endpoints, enforce HTTPS (no HTTP fallbacks/hops); verify HSTS at the edge. (for all: use TodoWrite tool to add each endpoint as a task)
|
||||
@@ -181,7 +185,7 @@ Rule: when uncertain, round down (favor Medium/Low) to minimize false positives.
|
||||
# Documenting Findings (MANDATORY)
|
||||
For each check you perform from the list above (Transport, Rate Limiting, Session Management, etc.), you must make a final **verdict**.
|
||||
|
||||
- If the verdict is **`vulnerable`**, you must document the finding using the save-deliverable CLI to update the exploitation queue.
|
||||
- If the verdict is **`vulnerable`**, you must include the finding in your exploitation queue.
|
||||
- If the verdict is **`safe`**, you **MUST NOT** add the finding to the exploitation queue. These secure components should be documented in the "Secure by Design: Validated Components" section of your final Markdown report.
|
||||
</methodology>
|
||||
|
||||
@@ -237,27 +241,20 @@ These components were analyzed and found to have robust defenses. They are low-p
|
||||
|
||||
</deliverable_instructions>
|
||||
|
||||
<exploitation_queue_requirements>
|
||||
**Exploitation Queue (MANDATORY)**
|
||||
Regardless of whether vulnerabilities are found, you MUST create the exploitation queue using the save-deliverable CLI tool:
|
||||
- **If vulnerabilities found:** Run save-deliverable CLI with `--type AUTH_QUEUE --content '{"vulnerabilities": [...]}'` with each confirmed hypothesis following the `exploitation_queue_format`. Set `externally_exploitable` to `true` ONLY if exploitable via public internet without internal access.
|
||||
- **If no vulnerabilities found:** Run save-deliverable CLI with `--type AUTH_QUEUE --content '{"vulnerabilities": []}'`.
|
||||
- **QUEUE INCLUSION CRITERIA:** ONLY include vulnerabilities where `externally_exploitable = true`. Exclude any vulnerability requiring internal network access, VPN, or direct server access.
|
||||
This file serves as the handoff mechanism and must always be created to signal completion.
|
||||
</exploitation_queue_requirements>
|
||||
|
||||
<conclusion_trigger>
|
||||
**COMPLETION REQUIREMENTS (ALL must be satisfied):**
|
||||
|
||||
1. **Systematic Analysis:** ALL relevant API endpoints and user-facing features identified in the reconnaissance deliverable must be analyzed for AuthN/AuthZ flaws.
|
||||
2. **Deliverable Generation:** Both required deliverables must be successfully saved using the save-deliverable CLI tool:
|
||||
2. **Deliverable Generation:** Your deliverable must be successfully saved using the save-deliverable CLI tool:
|
||||
- **CHUNKED WRITING (MANDATORY):**
|
||||
1. Use the **Write** tool to create `deliverables/auth_analysis_deliverable.md` with the title and first major section
|
||||
1. Use the **Write** tool to create `.shannon/deliverables/auth_analysis_deliverable.md` with the title and first major section
|
||||
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
|
||||
3. Repeat step 2 for all remaining sections
|
||||
4. Run `save-deliverable` with `--type AUTH_ANALYSIS --file-path "deliverables/auth_analysis_deliverable.md"`
|
||||
4. Run `save-deliverable` with `--type AUTH_ANALYSIS --file-path ".shannon/deliverables/auth_analysis_deliverable.md"`
|
||||
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
|
||||
- Exploitation queue: Run save-deliverable CLI with `--type AUTH_QUEUE --content '{"vulnerabilities": [...]}'`
|
||||
|
||||
**Note:** Save your deliverable markdown via save-deliverable first. The exploitation queue is captured automatically at the end of your session.
|
||||
|
||||
**ONLY AFTER** both systematic analysis AND successful deliverable generation, announce "**AUTH ANALYSIS COMPLETE**" and stop.
|
||||
|
||||
|
||||
@@ -19,10 +19,14 @@ Success criterion: A complete, code-backed analysis of every potential authoriza
|
||||
@include(shared/_rules.txt)
|
||||
</rules>
|
||||
|
||||
@include(shared/_code-path-rules.txt)
|
||||
|
||||
<login_instructions>
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<critical>
|
||||
**Your Professional Standard**
|
||||
- **Severity Context:** A flaw in authorization is a critical security failure. It allows privilege escalation, horizontal and vertical access control bypass, and unauthorized data access. These vulnerabilities enable attackers to access resources they shouldn't, escalate privileges, and compromise multi-tenant data isolation.
|
||||
@@ -32,7 +36,7 @@ Success criterion: A complete, code-backed analysis of every potential authoriza
|
||||
</critical>
|
||||
|
||||
<starting_context>
|
||||
- Your **primary source of truth** for authorization test targets is the reconnaissance report at `deliverables/recon_deliverable.md`. Look specifically for:
|
||||
- Your **primary source of truth** for authorization test targets is the reconnaissance report at `.shannon/deliverables/recon_deliverable.md`. Look specifically for:
|
||||
- **"Horizontal" section:** Endpoints where users access resources by ID that might belong to other users
|
||||
- **"Vertical" section:** Admin/privileged endpoints that regular users shouldn't access
|
||||
- **"Context" section:** Multi-step workflows where order/state matters
|
||||
@@ -44,8 +48,8 @@ Success criterion: A complete, code-backed analysis of every potential authoriza
|
||||
|
||||
**Phase Sequence:** RECON (Complete) → **AUTHZ ANALYSIS (You)** → EXPLOITATION (next phase)
|
||||
|
||||
**Your Input:** `deliverables/recon_deliverable.md`, `deliverables/pre_recon_deliverable.md` (reconnaissance findings)
|
||||
**Your Output:** `deliverables/authz_exploitation_queue.json` (actionable vulnerabilities for exploitation)
|
||||
**Your Input:** `.shannon/deliverables/recon_deliverable.md`, `.shannon/deliverables/pre_recon_deliverable.md` (reconnaissance findings)
|
||||
**Your Output:** `.shannon/deliverables/authz_analysis_deliverable.md` (analysis report for exploitation phase)
|
||||
|
||||
**WHAT HAPPENED BEFORE YOU:**
|
||||
- Reconnaissance agent mapped application architecture, endpoints, and data flows
|
||||
@@ -81,10 +85,10 @@ An **exploitable vulnerability** is a logical flaw in the code that represents a
|
||||
**Available Tools:**
|
||||
- **Task Agent (Code Analysis):** Your primary tool. Use it to ask targeted questions about the source code, trace authorization logic paths, and understand permission models. MANDATORY for all source code analysis.
|
||||
- **save-deliverable (CLI Tool):** Saves your deliverable files with automatic validation.
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<json>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"...","validated":true}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<text>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"..."}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **For large reports:** Write to disk first, then use `--file-path`. Do NOT pass large reports via `--content`.
|
||||
- **For JSON queues:** You may use `--content '{"vulnerabilities": [...]}'`. Queue files are validated automatically.
|
||||
|
||||
- **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
|
||||
- **Browser Automation (playwright-cli skill):** For browser interactions with the live web application (multi-step flows and role-based access controls), invoke the `playwright-cli` skill to learn available commands. Always pass `-s={{PLAYWRIGHT_SESSION}}` to every command for session isolation.
|
||||
- **TodoWrite Tool:** Use this to create and manage your analysis task list. Create a todo item for each endpoint that needs authorization analysis. Mark items as "in_progress" when working on them and "completed" when done.
|
||||
@@ -93,7 +97,7 @@ An **exploitable vulnerability** is a logical flaw in the code that represents a
|
||||
<data_format_specifications>
|
||||
|
||||
<exploitation_queue_format>
|
||||
**Purpose:** Defines the structure for a "exploitation queue" saved via the save-deliverable CLI with --type AUTHZ_QUEUE.
|
||||
**Purpose:** Defines the format of the exploitation queue JSON.
|
||||
**Structure:** The `vulnerability` JSON object MUST follow this exact format:
|
||||
{
|
||||
"ID": "unique ID for each vulnerability (e.g., AUTHZ-VULN-01)",
|
||||
@@ -125,7 +129,7 @@ An **exploitable vulnerability** is a logical flaw in the code that represents a
|
||||
### 1) Horizontal Authorization Analysis
|
||||
|
||||
- **Create To Dos:**
|
||||
For each item listed under **`deliverables/recon_deliverable.md` → section 8 "Horizontal"*, use the TodoWrite tool to create a task entry.
|
||||
For each item listed under **`.shannon/deliverables/recon_deliverable.md` → section 8 "Horizontal"*, use the TodoWrite tool to create a task entry.
|
||||
|
||||
- **Process:**
|
||||
- Start at the identified endpoint.
|
||||
@@ -157,7 +161,7 @@ An **exploitable vulnerability** is a logical flaw in the code that represents a
|
||||
### 2) Vertical Authorization Analysis
|
||||
|
||||
- **Create To Dos:**
|
||||
For each item listed under **`deliverables/recon_deliverable.md` → section 8 "Vertical"**, use the TodoWrite tool to create a task entry.
|
||||
For each item listed under **`.shannon/deliverables/recon_deliverable.md` → section 8 "Vertical"**, use the TodoWrite tool to create a task entry.
|
||||
|
||||
- **Process:**
|
||||
- Start at the identified endpoint.
|
||||
@@ -183,7 +187,7 @@ An **exploitable vulnerability** is a logical flaw in the code that represents a
|
||||
### 3) Context / Workflow Authorization Analysis
|
||||
|
||||
- **Create To Dos:**
|
||||
For each item listed under **`deliverables/recon_deliverable.md` → section 8 "Context"**, use the TodoWrite tool to create a task entry.
|
||||
For each item listed under **`.shannon/deliverables/recon_deliverable.md` → section 8 "Context"**, use the TodoWrite tool to create a task entry.
|
||||
|
||||
- **Process:**
|
||||
- Start at the endpoint that represents a step in a workflow.
|
||||
@@ -238,7 +242,7 @@ An **exploitable vulnerability** is a logical flaw in the code that represents a
|
||||
|
||||
For each analysis you perform from the lists above, you must make a final **verdict**:
|
||||
|
||||
- If the verdict is **`vulnerable`**, you must document the finding using the save-deliverable CLI to update the exploitation queue.
|
||||
- If the verdict is **`vulnerable`**, you must include the finding in your exploitation queue.
|
||||
- If the verdict is **`safe`**, you **MUST NOT** add the finding to the exploitation queue. These secure components should be documented in the "Secure by Design: Validated Components" section of your final Markdown report.
|
||||
</methodology>
|
||||
|
||||
@@ -340,27 +344,20 @@ examples:
|
||||
|
||||
</deliverable_instructions>
|
||||
|
||||
<exploitation_queue_requirements>
|
||||
**Exploitation Queue (MANDATORY)**
|
||||
Regardless of whether vulnerabilities are found, you MUST create the exploitation queue using the save-deliverable CLI tool:
|
||||
- **If vulnerabilities found:** Run save-deliverable CLI with `--type AUTHZ_QUEUE --content '{"vulnerabilities": [...]}'` with each confirmed hypothesis following the `exploitation_queue_format`. Set `externally_exploitable` to `true` ONLY if exploitable via public internet without internal access.
|
||||
- **If no vulnerabilities found:** Run save-deliverable CLI with `--type AUTHZ_QUEUE --content '{"vulnerabilities": []}'`.
|
||||
- **QUEUE INCLUSION CRITERIA:** ONLY include vulnerabilities where `externally_exploitable = true`. Exclude any vulnerability requiring internal network access, VPN, or direct server access.
|
||||
This file serves as the handoff mechanism and must always be created to signal completion.
|
||||
</exploitation_queue_requirements>
|
||||
|
||||
<conclusion_trigger>
|
||||
**COMPLETION REQUIREMENTS (ALL must be satisfied):**
|
||||
|
||||
1. **Todo Completion:** ALL tasks in your TodoWrite list must be marked as "completed"
|
||||
2. **Deliverable Generation:** Both required deliverables must be successfully saved using the save-deliverable CLI tool:
|
||||
2. **Deliverable Generation:** Your deliverable must be successfully saved using the save-deliverable CLI tool:
|
||||
- **CHUNKED WRITING (MANDATORY):**
|
||||
1. Use the **Write** tool to create `deliverables/authz_analysis_deliverable.md` with the title and first major section
|
||||
1. Use the **Write** tool to create `.shannon/deliverables/authz_analysis_deliverable.md` with the title and first major section
|
||||
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
|
||||
3. Repeat step 2 for all remaining sections
|
||||
4. Run `save-deliverable` with `--type AUTHZ_ANALYSIS --file-path "deliverables/authz_analysis_deliverable.md"`
|
||||
4. Run `save-deliverable` with `--type AUTHZ_ANALYSIS --file-path ".shannon/deliverables/authz_analysis_deliverable.md"`
|
||||
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
|
||||
- Exploitation queue: Run save-deliverable CLI with `--type AUTHZ_QUEUE --content '{"vulnerabilities": [...]}'`
|
||||
|
||||
**Note:** Save your deliverable markdown via save-deliverable first. The exploitation queue is captured automatically at the end of your session.
|
||||
|
||||
**ONLY AFTER** both todo completion AND successful deliverable generation, announce "**AUTHORIZATION ANALYSIS COMPLETE**" and stop.
|
||||
|
||||
|
||||
@@ -20,10 +20,14 @@ Success criterion: Complete source-to-sink traces detailing path, sanitizers, si
|
||||
@include(shared/_rules.txt)
|
||||
</rules>
|
||||
|
||||
@include(shared/_code-path-rules.txt)
|
||||
|
||||
<login_instructions>
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<critical>
|
||||
**Your Professional Standard**
|
||||
- **Severity Context:** A structural flaw in a backend command, whether an SQL query or a shell command, is one of the most severe classes of vulnerability in a SaaS application. It undermines the foundational trust of the system by creating the *potential* for data exposure (SQLi) or direct server compromise (Command Injection).
|
||||
@@ -33,7 +37,7 @@ Success criterion: Complete source-to-sink traces detailing path, sanitizers, si
|
||||
</critical>
|
||||
|
||||
<starting_context>
|
||||
- Your **single source of truth** for the application's structure is the reconnaissance report located at `deliverables/recon_deliverable.md`. You must derive your list of testable targets from this file.
|
||||
- Your **single source of truth** for the application's structure is the reconnaissance report located at `.shannon/deliverables/recon_deliverable.md`. You must derive your list of testable targets from this file.
|
||||
|
||||
</starting_context>
|
||||
|
||||
@@ -42,8 +46,8 @@ Success criterion: Complete source-to-sink traces detailing path, sanitizers, si
|
||||
|
||||
**Phase Sequence:** RECON (Complete) → **INJECTION ANALYSIS (You)** → EXPLOITATION (next phase)
|
||||
|
||||
**Your Input:** `deliverables/recon_deliverable.md` (reconnaissance findings)
|
||||
**Your Output:** `deliverables/injection_exploitation_queue.json` (actionable vulnerabilities for exploitation)
|
||||
**Your Input:** `.shannon/deliverables/recon_deliverable.md` (reconnaissance findings)
|
||||
**Your Output:** `.shannon/deliverables/injection_analysis_deliverable.md` (analysis report for exploitation phase)
|
||||
|
||||
**WHAT HAPPENED BEFORE YOU:**
|
||||
- Reconnaissance agent mapped application architecture, attack surfaces, endpoints, input vectors
|
||||
@@ -81,10 +85,10 @@ An **exploitable vulnerability** is a confirmed source-to-sink path where the en
|
||||
**Available Tools:**
|
||||
- **Task Agent (Code Analysis):** Your primary tool. Use it to ask targeted questions about the source code, map query/command construction paths, and verify sanitization coverage. MANDATORY for all source code analysis.
|
||||
- **save-deliverable (CLI Tool):** Saves your deliverable files with automatic validation.
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<json>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"...","validated":true}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<text>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"..."}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **For large reports:** Write to disk first, then use `--file-path`. Do NOT pass large reports via `--content`.
|
||||
- **For JSON queues:** You may use `--content '{"vulnerabilities": [...]}'`. Queue files are validated automatically.
|
||||
|
||||
- **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
|
||||
- **Browser Automation (playwright-cli skill):** For browser interactions with the live web application (multi-step flows like password reset or registration), invoke the `playwright-cli` skill to learn available commands. Always pass `-s={{PLAYWRIGHT_SESSION}}` to every command for session isolation.
|
||||
- **TodoWrite Tool:** Use this to create and manage your analysis task list. Create a todo item for each injection source that needs analysis. Mark items as "in_progress" when working on them and "completed" when done.
|
||||
@@ -93,7 +97,7 @@ An **exploitable vulnerability** is a confirmed source-to-sink path where the en
|
||||
<data_format_specifications>
|
||||
|
||||
<exploitation_queue_format>
|
||||
**Purpose:** Defines the structure for a "exploitation queue" saved via the save-deliverable CLI with --type INJECTION_QUEUE.
|
||||
**Purpose:** Defines the format of the exploitation queue JSON.
|
||||
|
||||
**Structure:** The `vulnerability` JSON object MUST follow this exact format:
|
||||
{
|
||||
@@ -124,7 +128,7 @@ An **exploitable vulnerability** is a confirmed source-to-sink path where the en
|
||||
|
||||
- **Goal:** Prove whether untrusted input can influence the **structure** of a backend command (SQL or Shell) or reach sensitive **slots** without the correct defense. No live exploitation in this phase.
|
||||
- **1) Create a To Do for each Injection Source found in the Pre-Recon Deliverable
|
||||
- inside of deliverables/pre_recon_deliverable.md under the section "7. Injection Sources (Command Injection and SQL Injection)" use the TodoWrite tool to create a task for each discovered Injection Source.
|
||||
- inside of .shannon/deliverables/pre_recon_deliverable.md under the section "7. Injection Sources (Command Injection and SQL Injection)" use the TodoWrite tool to create a task for each discovered Injection Source.
|
||||
- Note: All sources are marked as Tainted until they Hit a Santiization that matches the sink context. normalizers (lowercasing, trimming, JSON parse, schema decode) — still **tainted**.
|
||||
- **2) Trace Data Flow Paths from Source to Sink**
|
||||
- For each source, your goal is to identify every unique "Data Flow Path" to a database sink. A path is a distinct route the data takes through the code.
|
||||
@@ -147,8 +151,8 @@ An **exploitable vulnerability** is a confirmed source-to-sink path where the en
|
||||
- Include a short rationale (e.g., "context mismatch: regex escape on ORDER BY keyword slot").
|
||||
- If concat occurred **after** sanitization, treat that sanitization as **non-effective** for this path.
|
||||
- **6) Append to findings list (consistent fields)**
|
||||
- **If the verdict is `vulnerable`:** Use the Write tool to log the finding in `deliverables/injection_exploitation_queue.json`. Set `externally_exploitable` to `true` ONLY if exploitable via public internet without internal access. Ensure all fields in the `exploitation_queue_format`, including a minimal `witness_payload`, are populated.
|
||||
- **If the verdict is `safe`:** DO NOT add the finding to the exploitation queue. These secure vectors must be documented later in the "Vectors Analyzed and Confirmed Secure" section of your final Markdown report (`deliverables/injection_analysis_deliverable.md`).
|
||||
- **If the verdict is `vulnerable`:** Include the finding in your exploitation queue. Set `externally_exploitable` to `true` ONLY if exploitable via public internet without internal access. Ensure all fields in the `exploitation_queue_format`, including a minimal `witness_payload`, are populated.
|
||||
- **If the verdict is `safe`:** DO NOT add the finding to the exploitation queue. These secure vectors must be documented later in the "Vectors Analyzed and Confirmed Secure" section of your final Markdown report (`.shannon/deliverables/injection_analysis_deliverable.md`).
|
||||
- **If a single source is found to be vulnerable via multiple, distinct paths to different sinks, you must create a separate vulnerability entry in the exploitation queue for each unique vulnerable path.**
|
||||
- **QUEUE INCLUSION CRITERIA:** ONLY include vulnerabilities where `externally_exploitable = true`. Exclude any vulnerability requiring internal network access, VPN, or direct server access.
|
||||
|
||||
@@ -287,7 +291,7 @@ When you have systematically analyzed all input vectors, you MUST generate two f
|
||||
|
||||
**1. Your Specialist Deliverable**
|
||||
|
||||
First, synthesize all of your findings into a single, detailed Markdown report located at `deliverables/injection_analysis_deliverable.md`. This report is the official record of your work.
|
||||
First, synthesize all of your findings into a single, detailed Markdown report located at `.shannon/deliverables/injection_analysis_deliverable.md`. This report is the official record of your work.
|
||||
|
||||
Your report MUST use the following structure precisely:
|
||||
|
||||
@@ -297,7 +301,7 @@ Your report MUST use the following structure precisely:
|
||||
## 1. Executive Summary
|
||||
|
||||
- **Analysis Status:** Complete
|
||||
- **Key Outcome:** Several high-confidence SQL injection injection vulnerabilities (both SQLi and Command Injection) were identified. All findings have been passed to the exploitation phase via the machine-readable queue at `deliverables/injection_exploitation_queue.json`.
|
||||
- **Key Outcome:** Several high-confidence SQL injection injection vulnerabilities (both SQLi and Command Injection) were identified. All findings have been passed to the exploitation phase.
|
||||
- **Purpose of this Document:** This report provides the strategic context, dominant patterns, and environmental intelligence necessary to effectively exploit the vulnerabilities listed in the queue. It is intended to be read alongside the JSON deliverable.
|
||||
|
||||
|
||||
@@ -345,29 +349,20 @@ These input vectors were traced and confirmed to have robust, context-appropriat
|
||||
|
||||
</deliverable_instructions>
|
||||
|
||||
<exploitation_queue_requirements>
|
||||
**Exploitation Queue (MANDATORY)**
|
||||
|
||||
Regardless of whether vulnerabilities are found, you MUST create the exploitation queue using the save-deliverable CLI tool:
|
||||
|
||||
- **If vulnerabilities found:** Run save-deliverable CLI with `--type INJECTION_QUEUE --content '{"vulnerabilities": [...]}'` with each exploitable injection vulnerability (verdict: "vulnerable") following the exploitation_queue_format
|
||||
- **If no vulnerabilities found:** Run save-deliverable CLI with `--type INJECTION_QUEUE --content '{"vulnerabilities": []}'`
|
||||
|
||||
This file serves as the handoff mechanism to the Exploitation phase and must always be created to signal completion of your analysis.
|
||||
</exploitation_queue_requirements>
|
||||
|
||||
<conclusion_trigger>
|
||||
**COMPLETION REQUIREMENTS (ALL must be satisfied):**
|
||||
|
||||
1. **Todo Completion:** ALL tasks in your TodoWrite list must be marked as "completed"
|
||||
2. **Deliverable Generation:** Both required deliverables must be successfully saved using the save-deliverable CLI tool:
|
||||
2. **Deliverable Generation:** Your deliverable must be successfully saved using the save-deliverable CLI tool:
|
||||
- **CHUNKED WRITING (MANDATORY):**
|
||||
1. Use the **Write** tool to create `deliverables/injection_analysis_deliverable.md` with the title and first major section
|
||||
1. Use the **Write** tool to create `.shannon/deliverables/injection_analysis_deliverable.md` with the title and first major section
|
||||
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
|
||||
3. Repeat step 2 for all remaining sections
|
||||
4. Run `save-deliverable` with `--type INJECTION_ANALYSIS --file-path "deliverables/injection_analysis_deliverable.md"`
|
||||
4. Run `save-deliverable` with `--type INJECTION_ANALYSIS --file-path ".shannon/deliverables/injection_analysis_deliverable.md"`
|
||||
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
|
||||
- Exploitation queue: Run save-deliverable CLI with `--type INJECTION_QUEUE --content '{"vulnerabilities": [...]}'`
|
||||
|
||||
**Note:** Save your deliverable markdown via save-deliverable first. The exploitation queue is captured automatically at the end of your session.
|
||||
|
||||
**ONLY AFTER** both todo completion AND successful deliverable generation, announce "**INJECTION ANALYSIS COMPLETE**" and stop.
|
||||
|
||||
|
||||
@@ -19,10 +19,14 @@ Success criterion: A complete source-to-sink trace for every identified SSRF vul
|
||||
@include(shared/_rules.txt)
|
||||
</rules>
|
||||
|
||||
@include(shared/_code-path-rules.txt)
|
||||
|
||||
<login_instructions>
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<critical>
|
||||
**Your Professional Standard**
|
||||
- **Severity Context:** An SSRF flaw is a breach of network segmentation and trust boundaries. It allows attackers to leverage the application server as a proxy to access internal services, cloud metadata endpoints, or perform reconnaissance of internal networks. These vulnerabilities can lead to data exposure, privilege escalation, and complete network compromise.
|
||||
@@ -32,7 +36,7 @@ Success criterion: A complete source-to-sink trace for every identified SSRF vul
|
||||
</critical>
|
||||
|
||||
<starting_context>
|
||||
- Your primary source of truth for the application's structure is the reconnaissance report located at `deliverables/recon_deliverable.md`. You must derive your list of testable endpoints from this file.
|
||||
- Your primary source of truth for the application's structure is the reconnaissance report located at `.shannon/deliverables/recon_deliverable.md`. You must derive your list of testable endpoints from this file.
|
||||
</starting_context>
|
||||
|
||||
<system_architecture>
|
||||
@@ -40,8 +44,8 @@ Success criterion: A complete source-to-sink trace for every identified SSRF vul
|
||||
|
||||
**Phase Sequence:** RECON (Complete) → **SSRF ANALYSIS (You)** → EXPLOITATION (next phase)
|
||||
|
||||
**Your Input:** `deliverables/recon_deliverable.md`, `deliverables/pre_recon_deliverable.md` (reconnaissance findings)
|
||||
**Your Output:** `deliverables/ssrf_exploitation_queue.json` (actionable vulnerabilities for exploitation)
|
||||
**Your Input:** `.shannon/deliverables/recon_deliverable.md`, `.shannon/deliverables/pre_recon_deliverable.md` (reconnaissance findings)
|
||||
**Your Output:** `.shannon/deliverables/ssrf_analysis_deliverable.md` (analysis report for exploitation phase)
|
||||
|
||||
**WHAT HAPPENED BEFORE YOU:**
|
||||
- Reconnaissance agent mapped application architecture, endpoints, and data flows
|
||||
@@ -77,10 +81,10 @@ An **exploitable vulnerability** is a data flow where user-controlled input infl
|
||||
**Available Tools:**
|
||||
- **Task Agent (Code Analysis):** Your primary tool. Use it to ask targeted questions about the source code, trace data flows, and understand HTTP client usage. MANDATORY for all source code analysis.
|
||||
- **save-deliverable (CLI Tool):** Saves your deliverable files with automatic validation.
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<json>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"...","validated":true}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<text>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"..."}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **For large reports:** Write to disk first, then use `--file-path`. Do NOT pass large reports via `--content`.
|
||||
- **For JSON queues:** You may use `--content '{"vulnerabilities": [...]}'`. Queue files are validated automatically.
|
||||
|
||||
- **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
|
||||
- **Browser Automation (playwright-cli skill):** For browser interactions with the live web application (multi-step flows involving URL redirection or proxy functionality), invoke the `playwright-cli` skill to learn available commands. Always pass `-s={{PLAYWRIGHT_SESSION}}` to every command for session isolation.
|
||||
- **TodoWrite Tool:** Use this to create and manage your analysis task list. Create a todo item for each SSRF sink that needs analysis. Mark items as "in_progress" when working on them and "completed" when done.
|
||||
@@ -89,7 +93,7 @@ An **exploitable vulnerability** is a data flow where user-controlled input infl
|
||||
<data_format_specifications>
|
||||
|
||||
<exploitation_queue_format>
|
||||
**Purpose:** Defines the structure for a "exploitation queue" saved via the save-deliverable CLI with --type SSRF_QUEUE.
|
||||
**Purpose:** Defines the format of the exploitation queue JSON.
|
||||
**Structure:** The `vulnerability` JSON object MUST follow this exact format:
|
||||
{
|
||||
"ID": "unique ID for each vulnerability json (SSRF-VULN-XX)",
|
||||
@@ -114,7 +118,7 @@ An **exploitable vulnerability** is a data flow where user-controlled input infl
|
||||
# White-Box Vulnerability Analysis Procedure: Server-Side Request Forgery (SSRF)
|
||||
|
||||
NOTE ON "FOR ALL": whenever an item begins with "for all …", use the TodoWrite tool to create a separate task for each relevant endpoint/flow that needs to be analyzed.
|
||||
From `deliverables/pre_recon_deliverable.md`, use Section 10 (SSRF Sinks) to guide your tasks.
|
||||
From `.shannon/deliverables/pre_recon_deliverable.md`, use Section 10 (SSRF Sinks) to guide your tasks.
|
||||
|
||||
## 1) Identify HTTP Client Usage Patterns
|
||||
- For all endpoints that accept URL parameters, callback URLs, webhook URLs, or file paths, trace how these inputs are processed.
|
||||
@@ -166,7 +170,7 @@ From `deliverables/pre_recon_deliverable.md`, use Section 10 (SSRF Sinks) to gui
|
||||
|
||||
### **1) Create a To-Do Item for Each SSRF Sink**
|
||||
|
||||
Inside `deliverables/pre_recon_deliverable.md` under section `##10. SSRF Sinks##`.
|
||||
Inside `.shannon/deliverables/pre_recon_deliverable.md` under section `##10. SSRF Sinks##`.
|
||||
|
||||
Use the TodoWrite tool to create a task for each discovered sink (any server-side request composed even partially from user input).
|
||||
|
||||
@@ -228,7 +232,7 @@ Rule: when uncertain, round down (favor Medium/Low) to minimize false positives.
|
||||
# Documenting Findings (MANDATORY)
|
||||
For each check you perform from the list above, you must make a final **verdict**.
|
||||
|
||||
- If the verdict is **`vulnerable`**, you must document the finding using the save-deliverable CLI to update the exploitation queue.
|
||||
- If the verdict is **`vulnerable`**, you must include the finding in your exploitation queue.
|
||||
- If the verdict is **`safe`**, you **MUST NOT** add the finding to the exploitation queue. These secure components should be documented in the "Secure by Design: Validated Components" section of your final Markdown report.
|
||||
</methodology>
|
||||
|
||||
@@ -284,27 +288,20 @@ These components were analyzed and found to have robust defenses. They are low-p
|
||||
|
||||
</deliverable_instructions>
|
||||
|
||||
<exploitation_queue_requirements>
|
||||
**Exploitation Queue (MANDATORY)**
|
||||
Regardless of whether vulnerabilities are found, you MUST create the exploitation queue using the save-deliverable CLI tool:
|
||||
- **If vulnerabilities found:** Run save-deliverable CLI with `--type SSRF_QUEUE --content '{"vulnerabilities": [...]}'` with each confirmed hypothesis following the `exploitation_queue_format`. Set `externally_exploitable` to `true` ONLY if exploitable via public internet without internal access.
|
||||
- **If no vulnerabilities found:** Run save-deliverable CLI with `--type SSRF_QUEUE --content '{"vulnerabilities": []}'`.
|
||||
- **QUEUE INCLUSION CRITERIA:** ONLY include vulnerabilities where `externally_exploitable = true`. Exclude any vulnerability requiring internal network access, VPN, or direct server access.
|
||||
This file serves as the handoff mechanism and must always be created to signal completion.
|
||||
</exploitation_queue_requirements>
|
||||
|
||||
<conclusion_trigger>
|
||||
**COMPLETION REQUIREMENTS (ALL must be satisfied):**
|
||||
|
||||
1. **Systematic Analysis:** ALL relevant API endpoints and request-making features identified in the reconnaissance deliverable must be analyzed for SSRF vulnerabilities.
|
||||
2. **Deliverable Generation:** Both required deliverables must be successfully saved using the save-deliverable CLI tool:
|
||||
2. **Deliverable Generation:** Your deliverable must be successfully saved using the save-deliverable CLI tool:
|
||||
- **CHUNKED WRITING (MANDATORY):**
|
||||
1. Use the **Write** tool to create `deliverables/ssrf_analysis_deliverable.md` with the title and first major section
|
||||
1. Use the **Write** tool to create `.shannon/deliverables/ssrf_analysis_deliverable.md` with the title and first major section
|
||||
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
|
||||
3. Repeat step 2 for all remaining sections
|
||||
4. Run `save-deliverable` with `--type SSRF_ANALYSIS --file-path "deliverables/ssrf_analysis_deliverable.md"`
|
||||
4. Run `save-deliverable` with `--type SSRF_ANALYSIS --file-path ".shannon/deliverables/ssrf_analysis_deliverable.md"`
|
||||
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
|
||||
- Exploitation queue: Run save-deliverable CLI with `--type SSRF_QUEUE --content '{"vulnerabilities": [...]}'`
|
||||
|
||||
**Note:** Save your deliverable markdown via save-deliverable first. The exploitation queue is captured automatically at the end of your session.
|
||||
|
||||
**ONLY AFTER** both systematic analysis AND successful deliverable generation, announce "**SSRF ANALYSIS COMPLETE**" and stop.
|
||||
|
||||
|
||||
@@ -19,10 +19,14 @@ Success criterion: Live confirmation of XSS execution for every vulnerability th
|
||||
@include(shared/_rules.txt)
|
||||
</rules>
|
||||
|
||||
@include(shared/_code-path-rules.txt)
|
||||
|
||||
<login_instructions>
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<critical>
|
||||
**Your Professional Standard**
|
||||
- Severity Context: An XSS flaw is a breach of trust between the user and the application. It grants an attacker the ability to execute arbitrary code within a victim's browser, leading to session hijacking, credential theft, and complete compromise of the user's account and data within the application.
|
||||
@@ -32,7 +36,7 @@ Success criterion: Live confirmation of XSS execution for every vulnerability th
|
||||
</critical>
|
||||
|
||||
<starting_context>
|
||||
- Your primary source of truth for the application's structure is the reconnaissance report located at deliverables/recon_deliverable.md. You must derive your list of testable targets from this file.
|
||||
- Your primary source of truth for the application's structure is the reconnaissance report located at .shannon/deliverables/recon_deliverable.md. You must derive your list of testable targets from this file.
|
||||
</starting_context>
|
||||
|
||||
<system_architecture>
|
||||
@@ -40,8 +44,8 @@ Success criterion: Live confirmation of XSS execution for every vulnerability th
|
||||
|
||||
**Phase Sequence:** RECON (Complete) → **XSS ANALYSIS (You)** → EXPLOITATION (next phase)
|
||||
|
||||
**Your Input:** `deliverables/recon_deliverable.md`, `deliverables/pre_recon_deliverable.md` (reconnaissance findings)
|
||||
**Your Output:** `deliverables/xss_exploitation_queue.json` (actionable vulnerabilities for exploitation)
|
||||
**Your Input:** `.shannon/deliverables/recon_deliverable.md`, `.shannon/deliverables/pre_recon_deliverable.md` (reconnaissance findings)
|
||||
**Your Output:** `.shannon/deliverables/xss_analysis_deliverable.md` (analysis report for exploitation phase)
|
||||
|
||||
**WHAT HAPPENED BEFORE YOU:**
|
||||
- Reconnaissance agent mapped application architecture
|
||||
@@ -72,7 +76,7 @@ An **exploitable vulnerability** is a confirmed source-to-sink path where the en
|
||||
|
||||
**CRITICAL TOOL USAGE RESTRICTIONS:**
|
||||
- NEVER use the Read tool for application source code analysis - ALWAYS delegate to Task agents for examining .js, .ts, .py, .php files and application logic. You MAY use Read
|
||||
tool directly for these files: `deliverables/pre_recon_deliverable.md`, `deliverables/recon_deliverable.md`
|
||||
tool directly for these files: `.shannon/deliverables/pre_recon_deliverable.md`, `.shannon/deliverables/recon_deliverable.md`
|
||||
- Direct the Task Agent to trace render contexts, sanitization coverage, and template/component boundaries before deciding on exploitability.
|
||||
- **ALWAYS delegate code analysis to Task agents**
|
||||
|
||||
@@ -82,17 +86,17 @@ An **exploitable vulnerability** is a confirmed source-to-sink path where the en
|
||||
- **Browser Automation (playwright-cli skill):** MANDATORY for testing DOM-based XSS and form submission vectors. Invoke the `playwright-cli` skill to learn available commands. Use for stored XSS testing and client-side payload execution verification. Always pass `-s={{PLAYWRIGHT_SESSION}}` to every command for session isolation.
|
||||
- **TodoWrite Tool:** Use this to create and manage your analysis task list. Create a todo item for each sink you need to analyze.
|
||||
- **save-deliverable (CLI Tool):** Saves your deliverable files with automatic validation.
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<json>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"...","validated":true}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **Usage:** `save-deliverable --type <TYPE> --file-path <path>` or `--content '<text>'`
|
||||
- **Returns:** JSON to stdout: `{"status":"success","filepath":"..."}` or `{"status":"error","message":"...","retryable":true}`
|
||||
- **For large reports:** Write to disk first, then use `--file-path`. Do NOT pass large reports via `--content`.
|
||||
- **For JSON queues:** You may use `--content '{"vulnerabilities": [...]}'`. Queue files are validated automatically.
|
||||
|
||||
- **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
|
||||
</cli_tools>
|
||||
|
||||
<data_format_specifications>
|
||||
|
||||
<exploitation_queue_format>
|
||||
Purpose: Defines the structure for a "exploitation queue" saved via the save-deliverable CLI with --type XSS_QUEUE.
|
||||
Purpose: Defines the structure of the agent's final structured response.
|
||||
Structure: The vulnerability JSON object MUST follow this exact format:
|
||||
{
|
||||
"ID": "unique ID for each vulnerability json (XSS-VULN-XX)",
|
||||
@@ -124,7 +128,7 @@ Structure: The vulnerability JSON object MUST follow this exact format:
|
||||
- **Core Principle:** Data is assumed to be tainted until a context-appropriate output encoder (sanitization) is encountered on its path to the sink.
|
||||
|
||||
### **1) Create a todo item for each XSS sink using the TodoWrite tool**
|
||||
Read deliverables/pre_recon_deliverable.md section ##9. XSS Sinks and Render Contexts## and use the **TodoWrite tool** to create a todo item for each discovered sink-context pair that needs analysis.
|
||||
Read .shannon/deliverables/pre_recon_deliverable.md section ##9. XSS Sinks and Render Contexts## and use the **TodoWrite tool** to create a todo item for each discovered sink-context pair that needs analysis.
|
||||
|
||||
### **2) Trace Each Sink Backward (Backward Taint Analysis)**
|
||||
For each pending item in your todo list (managed via TodoWrite tool), trace the origin of the data variable backward from the sink through the application logic. Your goal is to find either a valid sanitizer or an untrusted source. Mark each todo item as completed after you've fully analyzed that sink.
|
||||
@@ -177,7 +181,7 @@ This rulebook is used for the **Early Termination** check in Step 2.
|
||||
- Include both safe and vulnerable paths to demonstrate **full coverage**.
|
||||
- Craft a minimal `witness_payload` that proves control over the render context.
|
||||
- For every path analyzed, you must document the outcome. The location of the documentation depends on the verdict:
|
||||
- If the verdict is 'vulnerable', you MUST use the save-deliverable CLI to save the finding to the exploitation queue, including complete source-to-sink information.
|
||||
- If the verdict is 'vulnerable', you MUST include the finding in your final structured response's exploitation queue, including complete source-to-sink information.
|
||||
- If the verdict is 'safe', you MUST NOT add it to the exploitation queue. Instead, you will document these secure paths in the "Vectors Analyzed and Confirmed Secure" table of your final analysis report.
|
||||
- For vulnerable findings, craft a minimal witness_payload that proves control over the render context.
|
||||
|
||||
@@ -211,7 +215,7 @@ When you have systematically analyzed all input vectors, you MUST generate two f
|
||||
## 1. Your Specialist Deliverable
|
||||
|
||||
- First, synthesize all of your findings into a single, detailed Markdown report located at
|
||||
`deliverables/xss_analysis_deliverable.md`.
|
||||
`.shannon/deliverables/xss_analysis_deliverable.md`.
|
||||
|
||||
- Your report MUST use the following structure precisely:
|
||||
|
||||
@@ -220,7 +224,7 @@ When you have systematically analyzed all input vectors, you MUST generate two f
|
||||
|
||||
## 1. Executive Summary
|
||||
- **Analysis Status:** Complete
|
||||
- **Key Outcome:** Several high-confidence XSS vulnerabilities were identified. All findings have been passed to the exploitation phase via `deliverables/xss_exploitation_queue.json`.
|
||||
- **Key Outcome:** Several high-confidence XSS vulnerabilities were identified. All findings have been passed to the exploitation phase.
|
||||
- **Purpose of this Document:** This report provides the strategic context, dominant patterns, and environmental intelligence necessary to effectively exploit the vulnerabilities.
|
||||
|
||||
## 2. Dominant Vulnerability Patterns
|
||||
@@ -265,35 +269,20 @@ These input vectors were traced and confirmed to have robust, context-appropriat
|
||||
|
||||
</deliverable_instructions>
|
||||
|
||||
<exploitation_queue_requirements>
|
||||
|
||||
## Exploitation Queue (MANDATORY)
|
||||
|
||||
Regardless of whether vulnerabilities are found, you MUST create the exploitation queue using the save-deliverable CLI tool.
|
||||
|
||||
- **If exploitable vulnerabilities found:**
|
||||
Run save-deliverable CLI with `--type XSS_QUEUE --content '{"vulnerabilities": [...]}'` with each exploitable XSS vulnerability (verdict: "vulnerable") following the `exploitation_queue_format`. Set `externally_exploitable` to `true` ONLY if exploitable via public internet without internal access.
|
||||
|
||||
- **If no exploitable vulnerabilities found:**
|
||||
Run save-deliverable CLI with `--type XSS_QUEUE --content '{"vulnerabilities": []}'`
|
||||
|
||||
- **QUEUE INCLUSION CRITERIA:** ONLY include vulnerabilities where `externally_exploitable = true`. Exclude any vulnerability requiring internal network access, VPN, or direct server access.
|
||||
|
||||
This file is the mandatory handoff to the Exploitation phase.
|
||||
</exploitation_queue_requirements>
|
||||
|
||||
<conclusion_trigger>
|
||||
COMPLETION REQUIREMENTS (ALL must be satisfied):
|
||||
|
||||
1. Systematic Analysis: ALL input vectors identified from the reconnaissance deliverable must be analyzed.
|
||||
2. Deliverable Generation: Both required deliverables must be successfully saved using the save-deliverable CLI tool:
|
||||
2. Deliverable Generation: Your deliverable must be successfully saved using the save-deliverable CLI tool:
|
||||
- **CHUNKED WRITING (MANDATORY):**
|
||||
1. Use the **Write** tool to create `deliverables/xss_analysis_deliverable.md` with the title and first major section
|
||||
1. Use the **Write** tool to create `.shannon/deliverables/xss_analysis_deliverable.md` with the title and first major section
|
||||
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
|
||||
3. Repeat step 2 for all remaining sections
|
||||
4. Run `save-deliverable` with `--type XSS_ANALYSIS --file-path "deliverables/xss_analysis_deliverable.md"`
|
||||
4. Run `save-deliverable` with `--type XSS_ANALYSIS --file-path ".shannon/deliverables/xss_analysis_deliverable.md"`
|
||||
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
|
||||
- Exploitation queue: Run save-deliverable CLI with `--type XSS_QUEUE --content '{"vulnerabilities": [...]}'`
|
||||
|
||||
**Note:** Save your deliverable markdown via save-deliverable first. The exploitation queue is captured automatically at the end of your session.
|
||||
|
||||
ONLY AFTER both systematic analysis AND successful deliverable generation, announce "XSS ANALYSIS COMPLETE" and stop.
|
||||
|
||||
|
||||
@@ -6,9 +6,10 @@
|
||||
|
||||
// Production Claude agent execution with retry, git checkpoints, and audit logging
|
||||
|
||||
import { query } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { type JsonSchemaOutputFormat, query } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { fs, path } from 'zx';
|
||||
import type { AuditSession } from '../audit/index.js';
|
||||
import { deliverablesDir } from '../paths.js';
|
||||
import { isRetryableError, PentestError } from '../services/error-handling.js';
|
||||
import { AGENT_VALIDATORS } from '../session-manager.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
@@ -17,10 +18,9 @@ import { formatTimestamp } from '../utils/formatting.js';
|
||||
import { Timer } from '../utils/metrics.js';
|
||||
import { createAuditLogger } from './audit-logger.js';
|
||||
import { dispatchMessage } from './message-handlers.js';
|
||||
import { type ModelTier, resolveModel } from './models.js';
|
||||
import { type ModelTier, resolveModel, supportsAdaptiveThinking } from './models.js';
|
||||
import { detectExecutionContext, formatCompletionMessage, formatErrorOutput } from './output-formatters.js';
|
||||
import { createProgressManager } from './progress-manager.js';
|
||||
import { getActualModelName } from './router-utils.js';
|
||||
|
||||
declare global {
|
||||
var SHANNON_DISABLE_LOADER: boolean | undefined;
|
||||
@@ -39,6 +39,7 @@ export interface ClaudePromptResult {
|
||||
errorType?: string | undefined;
|
||||
prompt?: string | undefined;
|
||||
retryable?: boolean | undefined;
|
||||
structuredOutput?: unknown;
|
||||
}
|
||||
|
||||
function outputLines(lines: string[]): void {
|
||||
@@ -71,7 +72,7 @@ async function writeErrorLog(
|
||||
},
|
||||
duration,
|
||||
};
|
||||
const logPath = path.join(sourceDir, 'error.log');
|
||||
const logPath = path.join(deliverablesDir(sourceDir), 'error.log');
|
||||
await fs.appendFile(logPath, `${JSON.stringify(errorLog)}\n`);
|
||||
} catch {
|
||||
// Best-effort error log writing - don't propagate failures
|
||||
@@ -87,8 +88,8 @@ export async function validateAgentOutput(
|
||||
logger.info(`Validating ${agentName} agent output`);
|
||||
|
||||
try {
|
||||
// Check if agent completed successfully
|
||||
if (!result.success || !result.result) {
|
||||
// Check if agent completed successfully (text result OR structured output)
|
||||
if (!result.success || (!result.result && result.structuredOutput === undefined)) {
|
||||
logger.error('Validation failed: Agent execution was unsuccessful');
|
||||
return false;
|
||||
}
|
||||
@@ -132,6 +133,10 @@ export async function runClaudePrompt(
|
||||
auditSession: AuditSession | null = null,
|
||||
logger: ActivityLogger,
|
||||
modelTier: ModelTier = 'medium',
|
||||
outputFormat?: JsonSchemaOutputFormat,
|
||||
apiKey?: string,
|
||||
deliverablesSubdir?: string,
|
||||
providerConfig?: import('../types/config.js').ProviderConfig,
|
||||
): Promise<ClaudePromptResult> {
|
||||
// 1. Initialize timing and prompt
|
||||
const timer = new Timer(`agent-${description.toLowerCase().replace(/\s+/g, '-')}`);
|
||||
@@ -150,22 +155,54 @@ export async function runClaudePrompt(
|
||||
// 3. Build env vars to pass to SDK subprocesses
|
||||
const sdkEnv: Record<string, string> = {
|
||||
CLAUDE_CODE_MAX_OUTPUT_TOKENS: process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS || '64000',
|
||||
PLAYWRIGHT_MCP_OUTPUT_DIR: deliverablesSubdir
|
||||
? path.join(sourceDir, path.dirname(deliverablesSubdir), '.playwright-cli')
|
||||
: path.join(sourceDir, '.shannon', '.playwright-cli'),
|
||||
// apiKey from ContainerConfig takes precedence over process.env
|
||||
...(apiKey && { ANTHROPIC_API_KEY: apiKey }),
|
||||
// Deliverables subdir for save-deliverable CLI tool
|
||||
...(deliverablesSubdir && { SHANNON_DELIVERABLES_SUBDIR: deliverablesSubdir }),
|
||||
};
|
||||
|
||||
// 3a. Apply structured provider config directly to sdkEnv (no process.env mutation)
|
||||
if (providerConfig) {
|
||||
switch (providerConfig.providerType) {
|
||||
case 'bedrock':
|
||||
sdkEnv.CLAUDE_CODE_USE_BEDROCK = '1';
|
||||
if (providerConfig.awsRegion) sdkEnv.AWS_REGION = providerConfig.awsRegion;
|
||||
if (providerConfig.awsAccessKeyId) sdkEnv.AWS_ACCESS_KEY_ID = providerConfig.awsAccessKeyId;
|
||||
if (providerConfig.awsSecretAccessKey) sdkEnv.AWS_SECRET_ACCESS_KEY = providerConfig.awsSecretAccessKey;
|
||||
break;
|
||||
case 'vertex':
|
||||
sdkEnv.CLAUDE_CODE_USE_VERTEX = '1';
|
||||
if (providerConfig.gcpRegion) sdkEnv.CLOUD_ML_REGION = providerConfig.gcpRegion;
|
||||
if (providerConfig.gcpProjectId) sdkEnv.ANTHROPIC_VERTEX_PROJECT_ID = providerConfig.gcpProjectId;
|
||||
if (providerConfig.gcpCredentialsPath) sdkEnv.GOOGLE_APPLICATION_CREDENTIALS = providerConfig.gcpCredentialsPath;
|
||||
break;
|
||||
case 'litellm_router':
|
||||
if (providerConfig.baseUrl) sdkEnv.ANTHROPIC_BASE_URL = providerConfig.baseUrl;
|
||||
if (providerConfig.authToken) sdkEnv.ANTHROPIC_AUTH_TOKEN = providerConfig.authToken;
|
||||
break;
|
||||
default:
|
||||
// 'anthropic_api' or unset — apiKey already handled above
|
||||
if (providerConfig.apiKey && !apiKey) sdkEnv.ANTHROPIC_API_KEY = providerConfig.apiKey;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// 3b. Passthrough env vars not already set by providerConfig or apiKey
|
||||
const passthroughVars = [
|
||||
'ANTHROPIC_API_KEY',
|
||||
...(!sdkEnv.ANTHROPIC_API_KEY ? ['ANTHROPIC_API_KEY'] : []),
|
||||
'CLAUDE_CODE_OAUTH_TOKEN',
|
||||
'ANTHROPIC_BASE_URL',
|
||||
'ANTHROPIC_AUTH_TOKEN',
|
||||
'CLAUDE_CODE_USE_BEDROCK',
|
||||
'AWS_REGION',
|
||||
...(!sdkEnv.ANTHROPIC_BASE_URL ? ['ANTHROPIC_BASE_URL'] : []),
|
||||
...(!sdkEnv.ANTHROPIC_AUTH_TOKEN ? ['ANTHROPIC_AUTH_TOKEN'] : []),
|
||||
...(!sdkEnv.CLAUDE_CODE_USE_BEDROCK ? ['CLAUDE_CODE_USE_BEDROCK'] : []),
|
||||
...(!sdkEnv.AWS_REGION ? ['AWS_REGION'] : []),
|
||||
'AWS_BEARER_TOKEN_BEDROCK',
|
||||
'CLAUDE_CODE_USE_VERTEX',
|
||||
'CLOUD_ML_REGION',
|
||||
'ANTHROPIC_VERTEX_PROJECT_ID',
|
||||
'GOOGLE_APPLICATION_CREDENTIALS',
|
||||
'ANTHROPIC_SMALL_MODEL',
|
||||
'ANTHROPIC_MEDIUM_MODEL',
|
||||
'ANTHROPIC_LARGE_MODEL',
|
||||
...(!sdkEnv.CLAUDE_CODE_USE_VERTEX ? ['CLAUDE_CODE_USE_VERTEX'] : []),
|
||||
...(!sdkEnv.CLOUD_ML_REGION ? ['CLOUD_ML_REGION'] : []),
|
||||
...(!sdkEnv.ANTHROPIC_VERTEX_PROJECT_ID ? ['ANTHROPIC_VERTEX_PROJECT_ID'] : []),
|
||||
...(!sdkEnv.GOOGLE_APPLICATION_CREDENTIALS ? ['GOOGLE_APPLICATION_CREDENTIALS'] : []),
|
||||
'HOME',
|
||||
'PATH',
|
||||
'PLAYWRIGHT_MCP_EXECUTABLE_PATH',
|
||||
@@ -178,14 +215,19 @@ export async function runClaudePrompt(
|
||||
}
|
||||
|
||||
// 4. Configure SDK options
|
||||
// Model override from providerConfig takes precedence over env-based resolveModel
|
||||
const model = providerConfig?.modelOverrides?.[modelTier] ?? resolveModel(modelTier);
|
||||
const adaptiveThinking = supportsAdaptiveThinking(model) && process.env.CLAUDE_ADAPTIVE_THINKING !== 'false';
|
||||
const options = {
|
||||
model: resolveModel(modelTier),
|
||||
model,
|
||||
maxTurns: 10_000,
|
||||
cwd: sourceDir,
|
||||
permissionMode: 'bypassPermissions' as const,
|
||||
allowDangerouslySkipPermissions: true,
|
||||
settingSources: ['user'] as ('user' | 'project' | 'local')[],
|
||||
env: sdkEnv,
|
||||
...(adaptiveThinking && { thinking: { type: 'adaptive' as const } }),
|
||||
...(outputFormat && { outputFormat }),
|
||||
};
|
||||
|
||||
if (!execContext.useCleanOutput) {
|
||||
@@ -243,6 +285,9 @@ export async function runClaudePrompt(
|
||||
model,
|
||||
partialCost: totalCost,
|
||||
apiErrorDetected,
|
||||
...(messageLoopResult.structuredOutput !== undefined && {
|
||||
structuredOutput: messageLoopResult.structuredOutput,
|
||||
}),
|
||||
};
|
||||
} catch (error) {
|
||||
// 9. Handle errors — log, write error file, return failure
|
||||
@@ -273,6 +318,7 @@ interface MessageLoopResult {
|
||||
apiErrorDetected: boolean;
|
||||
cost: number;
|
||||
model?: string | undefined;
|
||||
structuredOutput?: unknown;
|
||||
}
|
||||
|
||||
interface MessageLoopDeps {
|
||||
@@ -297,6 +343,7 @@ async function processMessageStream(
|
||||
let apiErrorDetected = false;
|
||||
let cost = 0;
|
||||
let model: string | undefined;
|
||||
let structuredOutput: unknown | undefined;
|
||||
let lastHeartbeat = Date.now();
|
||||
|
||||
for await (const message of query({ prompt: fullPrompt, options })) {
|
||||
@@ -327,6 +374,9 @@ async function processMessageStream(
|
||||
if (dispatchResult.type === 'complete') {
|
||||
result = dispatchResult.result;
|
||||
cost = dispatchResult.cost;
|
||||
if (dispatchResult.structuredOutput !== undefined) {
|
||||
structuredOutput = dispatchResult.structuredOutput;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -334,12 +384,18 @@ async function processMessageStream(
|
||||
if (dispatchResult.apiErrorDetected) {
|
||||
apiErrorDetected = true;
|
||||
}
|
||||
// Capture model from SystemInitMessage, but override with router model if applicable
|
||||
if (dispatchResult.model) {
|
||||
model = getActualModelName(dispatchResult.model);
|
||||
model = dispatchResult.model;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { turnCount, result, apiErrorDetected, cost, model };
|
||||
return {
|
||||
turnCount,
|
||||
result,
|
||||
apiErrorDetected,
|
||||
cost,
|
||||
model,
|
||||
...(structuredOutput !== undefined && { structuredOutput }),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -19,7 +19,6 @@ import {
|
||||
formatToolUseOutput,
|
||||
} from './output-formatters.js';
|
||||
import type { ProgressManager } from './progress-manager.js';
|
||||
import { getActualModelName } from './router-utils.js';
|
||||
import type {
|
||||
ApiErrorDetection,
|
||||
AssistantMessage,
|
||||
@@ -40,7 +39,10 @@ function extractMessageContent(message: AssistantMessage): string {
|
||||
const messageContent = message.message;
|
||||
|
||||
if (Array.isArray(messageContent.content)) {
|
||||
return messageContent.content.map((c: ContentBlock) => c.text || JSON.stringify(c)).join('\n');
|
||||
return messageContent.content
|
||||
.filter((c: ContentBlock) => c.type !== 'thinking' && c.type !== 'redacted_thinking')
|
||||
.map((c: ContentBlock) => c.text || JSON.stringify(c))
|
||||
.join('\n');
|
||||
}
|
||||
|
||||
return String(messageContent.content);
|
||||
@@ -223,6 +225,10 @@ function handleResultMessage(message: ResultMessage): ResultData {
|
||||
}
|
||||
}
|
||||
|
||||
if (message.structured_output !== undefined) {
|
||||
result.structuredOutput = message.structured_output;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -259,7 +265,7 @@ function outputLines(lines: string[]): void {
|
||||
|
||||
export type MessageDispatchAction =
|
||||
| { type: 'continue'; apiErrorDetected?: boolean | undefined; model?: string | undefined }
|
||||
| { type: 'complete'; result: string | null; cost: number }
|
||||
| { type: 'complete'; result: string | null; cost: number; structuredOutput?: unknown }
|
||||
| { type: 'throw'; error: Error };
|
||||
|
||||
export interface MessageDispatchDeps {
|
||||
@@ -305,12 +311,10 @@ export async function dispatchMessage(
|
||||
case 'system': {
|
||||
if (message.subtype === 'init') {
|
||||
const initMsg = message as SystemInitMessage;
|
||||
const actualModel = getActualModelName(initMsg.model);
|
||||
if (!execContext.useCleanOutput) {
|
||||
logger.info(`Model: ${actualModel}, Permission: ${initMsg.permissionMode}`);
|
||||
logger.info(`Model: ${initMsg.model}, Permission: ${initMsg.permissionMode}`);
|
||||
}
|
||||
// Return actual model for tracking in audit logs
|
||||
return { type: 'continue', model: actualModel };
|
||||
return { type: 'continue', model: initMsg.model };
|
||||
}
|
||||
return { type: 'continue' };
|
||||
}
|
||||
@@ -338,7 +342,26 @@ export async function dispatchMessage(
|
||||
case 'result': {
|
||||
const resultData = handleResultMessage(message as ResultMessage);
|
||||
outputLines(formatResultOutput(resultData, !execContext.useCleanOutput));
|
||||
return { type: 'complete', result: resultData.result, cost: resultData.cost };
|
||||
|
||||
if (resultData.subtype === 'error_max_structured_output_retries') {
|
||||
return {
|
||||
type: 'throw',
|
||||
error: new PentestError(
|
||||
'Structured output validation failed after max retries',
|
||||
'validation',
|
||||
true,
|
||||
{},
|
||||
ErrorCode.OUTPUT_VALIDATION_FAILED,
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
type: 'complete' as const,
|
||||
result: resultData.result,
|
||||
cost: resultData.cost,
|
||||
...(resultData.structuredOutput !== undefined && { structuredOutput: resultData.structuredOutput }),
|
||||
};
|
||||
}
|
||||
|
||||
default:
|
||||
|
||||
@@ -21,7 +21,7 @@ export type ModelTier = 'small' | 'medium' | 'large';
|
||||
const DEFAULT_MODELS: Readonly<Record<ModelTier, string>> = {
|
||||
small: 'claude-haiku-4-5-20251001',
|
||||
medium: 'claude-sonnet-4-6',
|
||||
large: 'claude-opus-4-6',
|
||||
large: 'claude-opus-4-7',
|
||||
};
|
||||
|
||||
/** Resolve a model tier to a concrete model ID. */
|
||||
@@ -35,3 +35,8 @@ export function resolveModel(tier: ModelTier = 'medium'): string {
|
||||
return process.env.ANTHROPIC_MEDIUM_MODEL || DEFAULT_MODELS.medium;
|
||||
}
|
||||
}
|
||||
|
||||
/** Whether a model supports adaptive thinking. Opus 4.6 and 4.7 only. */
|
||||
export function supportsAdaptiveThinking(model: string): boolean {
|
||||
return /opus-4-[67]/.test(model);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,186 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Zod schema definitions for vulnerability exploitation queue structured outputs.
|
||||
*
|
||||
* Each vuln agent returns a structured JSON response matching its schema.
|
||||
* The SDK validates the output against the JSON Schema generated from these Zod definitions.
|
||||
*/
|
||||
|
||||
import type { JsonSchemaOutputFormat } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { z } from 'zod';
|
||||
import type { AgentName } from '../types/agents.js';
|
||||
|
||||
// === Common Fields ===
|
||||
|
||||
const ANALYSIS_NOTES_DESCRIPTION =
|
||||
'Plain context for defenders (caveats, scope, what is at risk). Not attack steps.';
|
||||
|
||||
function notesField(exploit: boolean) {
|
||||
const f = z.string().optional();
|
||||
return exploit ? f : f.describe(ANALYSIS_NOTES_DESCRIPTION);
|
||||
}
|
||||
|
||||
function makeBase(exploit: boolean) {
|
||||
return z.object({
|
||||
ID: z.string(),
|
||||
vulnerability_type: z.string(),
|
||||
externally_exploitable: z.boolean(),
|
||||
confidence: z.string(),
|
||||
notes: notesField(exploit),
|
||||
});
|
||||
}
|
||||
|
||||
// === Per-Vuln-Type Schemas (used for type inference; notes description is mode-agnostic for types) ===
|
||||
|
||||
const baseVulnerability = makeBase(true);
|
||||
|
||||
const InjectionVulnerability = baseVulnerability.extend({
|
||||
source: z.string().optional(),
|
||||
combined_sources: z.string().optional(),
|
||||
path: z.string().optional(),
|
||||
sink_call: z.string().optional(),
|
||||
slot_type: z.string().optional(),
|
||||
sanitization_observed: z.string().optional(),
|
||||
concat_occurrences: z.string().optional(),
|
||||
verdict: z.string().optional(),
|
||||
mismatch_reason: z.string().optional(),
|
||||
witness_payload: z.string().optional(),
|
||||
});
|
||||
|
||||
const XssVulnerability = baseVulnerability.extend({
|
||||
source: z.string().optional(),
|
||||
source_detail: z.string().optional(),
|
||||
path: z.string().optional(),
|
||||
sink_function: z.string().optional(),
|
||||
render_context: z.string().optional(),
|
||||
encoding_observed: z.string().optional(),
|
||||
verdict: z.string().optional(),
|
||||
mismatch_reason: z.string().optional(),
|
||||
witness_payload: z.string().optional(),
|
||||
});
|
||||
|
||||
const AuthVulnerability = baseVulnerability.extend({
|
||||
source_endpoint: z.string().optional(),
|
||||
vulnerable_code_location: z.string().optional(),
|
||||
missing_defense: z.string().optional(),
|
||||
exploitation_hypothesis: z.string().optional(),
|
||||
suggested_exploit_technique: z.string().optional(),
|
||||
});
|
||||
|
||||
const SsrfVulnerability = baseVulnerability.extend({
|
||||
source_endpoint: z.string().optional(),
|
||||
vulnerable_parameter: z.string().optional(),
|
||||
vulnerable_code_location: z.string().optional(),
|
||||
missing_defense: z.string().optional(),
|
||||
exploitation_hypothesis: z.string().optional(),
|
||||
suggested_exploit_technique: z.string().optional(),
|
||||
});
|
||||
|
||||
const AuthzVulnerability = baseVulnerability.extend({
|
||||
endpoint: z.string().optional(),
|
||||
vulnerable_code_location: z.string().optional(),
|
||||
role_context: z.string().optional(),
|
||||
guard_evidence: z.string().optional(),
|
||||
side_effect: z.string().optional(),
|
||||
reason: z.string().optional(),
|
||||
minimal_witness: z.string().optional(),
|
||||
});
|
||||
|
||||
// === Inferred Entry Types (consumed by renderer) ===
|
||||
|
||||
export type InjectionFinding = z.infer<typeof InjectionVulnerability>;
|
||||
export type XssFinding = z.infer<typeof XssVulnerability>;
|
||||
export type AuthFinding = z.infer<typeof AuthVulnerability>;
|
||||
export type SsrfFinding = z.infer<typeof SsrfVulnerability>;
|
||||
export type AuthzFinding = z.infer<typeof AuthzVulnerability>;
|
||||
|
||||
// === Convert to JSON Schema for SDK ===
|
||||
|
||||
// NOTE: The SDK's AJV validator expects draft-07. Zod defaults to draft-2020-12 which
|
||||
// causes the SDK to silently skip structured output.
|
||||
function toOutputFormat(zodSchema: z.ZodType): JsonSchemaOutputFormat {
|
||||
return { type: 'json_schema', schema: z.toJSONSchema(zodSchema, { target: 'draft-07' }) as Record<string, unknown> };
|
||||
}
|
||||
|
||||
// === Per-Mode Output Format Builders ===
|
||||
// Two maps cached at module load; the only per-mode difference is the
|
||||
// description on the `notes` field, which steers the LLM's writing.
|
||||
|
||||
function buildOutputFormats(exploit: boolean): Partial<Record<AgentName, JsonSchemaOutputFormat>> {
|
||||
const base = makeBase(exploit);
|
||||
return {
|
||||
'injection-vuln': toOutputFormat(z.object({ vulnerabilities: z.array(base.extend({
|
||||
source: z.string().optional(),
|
||||
combined_sources: z.string().optional(),
|
||||
path: z.string().optional(),
|
||||
sink_call: z.string().optional(),
|
||||
slot_type: z.string().optional(),
|
||||
sanitization_observed: z.string().optional(),
|
||||
concat_occurrences: z.string().optional(),
|
||||
verdict: z.string().optional(),
|
||||
mismatch_reason: z.string().optional(),
|
||||
witness_payload: z.string().optional(),
|
||||
})) })),
|
||||
'xss-vuln': toOutputFormat(z.object({ vulnerabilities: z.array(base.extend({
|
||||
source: z.string().optional(),
|
||||
source_detail: z.string().optional(),
|
||||
path: z.string().optional(),
|
||||
sink_function: z.string().optional(),
|
||||
render_context: z.string().optional(),
|
||||
encoding_observed: z.string().optional(),
|
||||
verdict: z.string().optional(),
|
||||
mismatch_reason: z.string().optional(),
|
||||
witness_payload: z.string().optional(),
|
||||
})) })),
|
||||
'auth-vuln': toOutputFormat(z.object({ vulnerabilities: z.array(base.extend({
|
||||
source_endpoint: z.string().optional(),
|
||||
vulnerable_code_location: z.string().optional(),
|
||||
missing_defense: z.string().optional(),
|
||||
exploitation_hypothesis: z.string().optional(),
|
||||
suggested_exploit_technique: z.string().optional(),
|
||||
})) })),
|
||||
'ssrf-vuln': toOutputFormat(z.object({ vulnerabilities: z.array(base.extend({
|
||||
source_endpoint: z.string().optional(),
|
||||
vulnerable_parameter: z.string().optional(),
|
||||
vulnerable_code_location: z.string().optional(),
|
||||
missing_defense: z.string().optional(),
|
||||
exploitation_hypothesis: z.string().optional(),
|
||||
suggested_exploit_technique: z.string().optional(),
|
||||
})) })),
|
||||
'authz-vuln': toOutputFormat(z.object({ vulnerabilities: z.array(base.extend({
|
||||
endpoint: z.string().optional(),
|
||||
vulnerable_code_location: z.string().optional(),
|
||||
role_context: z.string().optional(),
|
||||
guard_evidence: z.string().optional(),
|
||||
side_effect: z.string().optional(),
|
||||
reason: z.string().optional(),
|
||||
minimal_witness: z.string().optional(),
|
||||
})) })),
|
||||
};
|
||||
}
|
||||
|
||||
const OUTPUT_FORMATS_EXPLOIT = buildOutputFormats(true);
|
||||
const OUTPUT_FORMATS_ANALYSIS = buildOutputFormats(false);
|
||||
|
||||
const VULN_AGENT_QUEUE_FILENAMES: Partial<Record<AgentName, string>> = {
|
||||
'injection-vuln': 'injection_exploitation_queue.json',
|
||||
'xss-vuln': 'xss_exploitation_queue.json',
|
||||
'auth-vuln': 'auth_exploitation_queue.json',
|
||||
'ssrf-vuln': 'ssrf_exploitation_queue.json',
|
||||
'authz-vuln': 'authz_exploitation_queue.json',
|
||||
};
|
||||
|
||||
/** Returns the structured output format for a vuln agent, or undefined for non-vuln agents. */
|
||||
export function getOutputFormat(agentName: AgentName, exploit = true): JsonSchemaOutputFormat | undefined {
|
||||
return (exploit ? OUTPUT_FORMATS_EXPLOIT : OUTPUT_FORMATS_ANALYSIS)[agentName];
|
||||
}
|
||||
|
||||
/** Returns the queue filename for a vuln agent, or undefined for non-vuln agents. */
|
||||
export function getQueueFilename(agentName: AgentName): string | undefined {
|
||||
return VULN_AGENT_QUEUE_FILENAMES[agentName];
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Get the actual model name being used.
|
||||
* When using claude-code-router, the SDK reports its configured model (claude-sonnet)
|
||||
* but the actual model is determined by ROUTER_DEFAULT env var.
|
||||
*/
|
||||
export function getActualModelName(sdkReportedModel?: string): string | undefined {
|
||||
const routerBaseUrl = process.env.ANTHROPIC_BASE_URL;
|
||||
const routerDefault = process.env.ROUTER_DEFAULT;
|
||||
|
||||
// If router mode is active and ROUTER_DEFAULT is set, use that
|
||||
if (routerBaseUrl && routerDefault) {
|
||||
// ROUTER_DEFAULT format: "provider,model" (e.g., "gemini,gemini-2.5-pro")
|
||||
const parts = routerDefault.split(',');
|
||||
if (parts.length >= 2) {
|
||||
return parts.slice(1).join(','); // Handle model names with commas
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to SDK-reported model
|
||||
return sdkReportedModel;
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Writes ~/.claude/settings.json with permissions.deny rules derived from
|
||||
* `code_path` avoid patterns. The SDK reads this via `settingSources: ['user']`;
|
||||
* deny rules fire even in `bypassPermissions` mode.
|
||||
*/
|
||||
|
||||
import os from 'node:os';
|
||||
import { fs, path } from 'zx';
|
||||
import type { DistributedConfig } from '../types/config.js';
|
||||
|
||||
const FILE_TOOLS = ['Read', 'Edit'] as const;
|
||||
|
||||
function denyEntriesFor(pattern: string): string[] {
|
||||
const arg = `./${pattern.replace(/^[./]+/, '')}`;
|
||||
return FILE_TOOLS.map((tool) => `${tool}(${arg})`);
|
||||
}
|
||||
|
||||
export async function writeUserSettingsForCodePathAvoids(config: DistributedConfig | null): Promise<void> {
|
||||
const avoidPatterns = (config?.avoid ?? []).filter((r) => r.type === 'code_path').map((r) => r.value);
|
||||
const settingsPath = path.join(os.homedir(), '.claude', 'settings.json');
|
||||
|
||||
if (avoidPatterns.length === 0) {
|
||||
await fs.remove(settingsPath);
|
||||
return;
|
||||
}
|
||||
|
||||
const settings = {
|
||||
permissions: {
|
||||
deny: avoidPatterns.flatMap(denyEntriesFor),
|
||||
},
|
||||
};
|
||||
|
||||
await fs.ensureDir(path.dirname(settingsPath));
|
||||
await fs.writeJson(settingsPath, settings, { spaces: 2 });
|
||||
}
|
||||
@@ -34,6 +34,7 @@ export interface ResultData {
|
||||
subtype?: string;
|
||||
stop_reason?: string | null;
|
||||
permissionDenials: number;
|
||||
structuredOutput?: unknown;
|
||||
}
|
||||
|
||||
export interface ToolUseData {
|
||||
@@ -51,6 +52,8 @@ export interface ToolResultData {
|
||||
export interface ContentBlock {
|
||||
type?: string;
|
||||
text?: string;
|
||||
thinking?: string;
|
||||
data?: string;
|
||||
}
|
||||
|
||||
export interface AssistantMessage {
|
||||
@@ -69,6 +72,7 @@ export interface ResultMessage {
|
||||
subtype?: string;
|
||||
stop_reason?: string | null;
|
||||
permission_denials?: unknown[];
|
||||
structured_output?: unknown;
|
||||
}
|
||||
|
||||
export interface ToolUseMessage {
|
||||
|
||||
@@ -202,7 +202,7 @@ export class AuditSession {
|
||||
/**
|
||||
* Update session status
|
||||
*/
|
||||
async updateSessionStatus(status: 'in-progress' | 'completed' | 'failed'): Promise<void> {
|
||||
async updateSessionStatus(status: 'in-progress' | 'completed' | 'failed' | 'cancelled'): Promise<void> {
|
||||
await this.ensureInitialized();
|
||||
|
||||
const unlock = await sessionMutex.lock(this.sessionId);
|
||||
|
||||
@@ -57,7 +57,7 @@ interface SessionData {
|
||||
id: string;
|
||||
webUrl: string;
|
||||
repoPath?: string;
|
||||
status: 'in-progress' | 'completed' | 'failed';
|
||||
status: 'in-progress' | 'completed' | 'failed' | 'cancelled';
|
||||
createdAt: string;
|
||||
completedAt?: string;
|
||||
originalWorkflowId?: string; // First workflow that created this workspace
|
||||
@@ -232,12 +232,12 @@ export class MetricsTracker {
|
||||
/**
|
||||
* Update session status
|
||||
*/
|
||||
async updateSessionStatus(status: 'in-progress' | 'completed' | 'failed'): Promise<void> {
|
||||
async updateSessionStatus(status: 'in-progress' | 'completed' | 'failed' | 'cancelled'): Promise<void> {
|
||||
if (!this.data) return;
|
||||
|
||||
this.data.session.status = status;
|
||||
|
||||
if (status === 'completed' || status === 'failed') {
|
||||
if (status === 'completed' || status === 'failed' || status === 'cancelled') {
|
||||
this.data.session.completedAt = formatTimestamp();
|
||||
}
|
||||
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
* All functions are pure and crash-safe.
|
||||
*/
|
||||
|
||||
import fs from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import { WORKSPACES_DIR } from '../paths.js';
|
||||
import { ensureDirectory } from '../utils/file-io.js';
|
||||
@@ -98,33 +97,3 @@ export async function initializeAuditStructure(sessionMetadata: SessionMetadata)
|
||||
await ensureDirectory(promptsPath);
|
||||
await ensureDirectory(deliverablesPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy deliverable files from repo to workspaces for self-contained audit trail.
|
||||
* No-ops if source directory doesn't exist. Idempotent and parallel-safe.
|
||||
*/
|
||||
export async function copyDeliverablesToAudit(sessionMetadata: SessionMetadata, repoPath: string): Promise<void> {
|
||||
const sourceDir = path.join(repoPath, 'deliverables');
|
||||
const destDir = path.join(generateAuditPath(sessionMetadata), 'deliverables');
|
||||
|
||||
let entries: string[];
|
||||
try {
|
||||
entries = await fs.readdir(sourceDir);
|
||||
} catch {
|
||||
// Source directory doesn't exist yet — nothing to copy
|
||||
return;
|
||||
}
|
||||
|
||||
await ensureDirectory(destDir);
|
||||
|
||||
for (const entry of entries) {
|
||||
const sourcePath = path.join(sourceDir, entry);
|
||||
const destPath = path.join(destDir, entry);
|
||||
|
||||
// Only copy files, skip subdirectories
|
||||
const stat = await fs.stat(sourcePath);
|
||||
if (stat.isFile()) {
|
||||
await fs.copyFile(sourcePath, destPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,7 +30,7 @@ export interface AgentMetricsSummary {
|
||||
}
|
||||
|
||||
export interface WorkflowSummary {
|
||||
status: 'completed' | 'failed';
|
||||
status: 'completed' | 'failed' | 'cancelled';
|
||||
totalDurationMs: number;
|
||||
totalCostUsd: number;
|
||||
completedAgents: string[];
|
||||
|
||||
@@ -10,7 +10,13 @@ import type { FormatsPlugin } from 'ajv-formats';
|
||||
import yaml from 'js-yaml';
|
||||
import { fs } from 'zx';
|
||||
import { PentestError } from './services/error-handling.js';
|
||||
import type { Authentication, Config, DistributedConfig, Rule } from './types/config.js';
|
||||
import {
|
||||
ALL_VULN_CLASSES,
|
||||
type Authentication,
|
||||
type Config,
|
||||
type DistributedConfig,
|
||||
type Rule,
|
||||
} from './types/config.js';
|
||||
import { ErrorCode } from './types/errors.js';
|
||||
|
||||
// Handle ESM/CJS interop for ajv-formats using require
|
||||
@@ -258,6 +264,87 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Parse a raw YAML string into a validated Config object.
|
||||
*
|
||||
* Same validation as parseConfig but accepts a string instead of a file path.
|
||||
* Used when config YAML is passed inline (e.g., from a parent workflow).
|
||||
*/
|
||||
export const parseConfigYAML = (yamlContent: string): Config => {
|
||||
if (!yamlContent.trim()) {
|
||||
throw new PentestError(
|
||||
'Configuration YAML string is empty',
|
||||
'config',
|
||||
false,
|
||||
{},
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED,
|
||||
);
|
||||
}
|
||||
|
||||
let config: unknown;
|
||||
try {
|
||||
config = yaml.load(yamlContent, {
|
||||
schema: yaml.FAILSAFE_SCHEMA,
|
||||
json: false,
|
||||
});
|
||||
} catch (yamlError) {
|
||||
const errMsg = yamlError instanceof Error ? yamlError.message : String(yamlError);
|
||||
throw new PentestError(
|
||||
`YAML parsing failed: ${errMsg}`,
|
||||
'config',
|
||||
false,
|
||||
{ originalError: errMsg },
|
||||
ErrorCode.CONFIG_PARSE_ERROR,
|
||||
);
|
||||
}
|
||||
|
||||
if (config === null || config === undefined) {
|
||||
throw new PentestError(
|
||||
'Configuration YAML resulted in null/undefined after parsing',
|
||||
'config',
|
||||
false,
|
||||
{},
|
||||
ErrorCode.CONFIG_PARSE_ERROR,
|
||||
);
|
||||
}
|
||||
|
||||
validateConfig(config as Config);
|
||||
return config as Config;
|
||||
};
|
||||
|
||||
function checkDeprecatedFields(config: Config): void {
|
||||
const messages: string[] = [];
|
||||
|
||||
const checkRules = (rules: unknown, where: string): void => {
|
||||
if (!Array.isArray(rules)) return;
|
||||
rules.forEach((rule, idx) => {
|
||||
if (typeof rule !== 'object' || rule === null) return;
|
||||
const r = rule as Record<string, unknown>;
|
||||
if (r.type === 'path') {
|
||||
messages.push(`rules.${where}[${idx}].type: 'path' has been renamed to 'url_path'.`);
|
||||
}
|
||||
if ('url_path' in r && !('value' in r)) {
|
||||
messages.push(`rules.${where}[${idx}]: the rule field 'url_path' has been renamed to 'value'.`);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
const raw = config as Record<string, unknown>;
|
||||
const rules = raw.rules as { avoid?: unknown; focus?: unknown } | undefined;
|
||||
checkRules(rules?.avoid, 'avoid');
|
||||
checkRules(rules?.focus, 'focus');
|
||||
|
||||
if (messages.length > 0) {
|
||||
throw new PentestError(
|
||||
`Configuration uses deprecated fields. Please update:\n - ${messages.join('\n - ')}`,
|
||||
'config',
|
||||
false,
|
||||
{ deprecatedFields: messages },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const validateConfig = (config: Config): void => {
|
||||
if (!config || typeof config !== 'object') {
|
||||
throw new PentestError(
|
||||
@@ -279,6 +366,8 @@ const validateConfig = (config: Config): void => {
|
||||
);
|
||||
}
|
||||
|
||||
checkDeprecatedFields(config);
|
||||
|
||||
const isValid = validateSchema(config);
|
||||
if (!isValid) {
|
||||
const errors = validateSchema.errors || [];
|
||||
@@ -294,10 +383,16 @@ const validateConfig = (config: Config): void => {
|
||||
|
||||
performSecurityValidation(config);
|
||||
|
||||
if (!config.rules && !config.authentication && !config.description) {
|
||||
console.warn(
|
||||
'⚠️ Configuration file contains no rules, authentication, or description. The pentest will run without any scoping restrictions or login capabilities.',
|
||||
);
|
||||
const hasAnySteering =
|
||||
!!config.rules ||
|
||||
!!config.authentication ||
|
||||
!!config.description ||
|
||||
!!config.vuln_classes ||
|
||||
config.exploit !== undefined ||
|
||||
!!config.report ||
|
||||
!!config.rules_of_engagement;
|
||||
if (!hasAnySteering) {
|
||||
console.warn('⚠️ Configuration file contains no steering fields. The pentest will run with all defaults.');
|
||||
} else if (config.rules && !config.rules.avoid && !config.rules.focus) {
|
||||
console.warn('⚠️ Configuration file contains no rules. The pentest will run without any scoping restrictions.');
|
||||
}
|
||||
@@ -384,6 +479,34 @@ const performSecurityValidation = (config: Config): void => {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (config.rules_of_engagement) {
|
||||
for (const pattern of DANGEROUS_PATTERNS) {
|
||||
if (pattern.test(config.rules_of_engagement)) {
|
||||
throw new PentestError(
|
||||
`rules_of_engagement contains potentially dangerous pattern: ${pattern.source}`,
|
||||
'config',
|
||||
false,
|
||||
{ field: 'rules_of_engagement', pattern: pattern.source },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (config.report?.guidance) {
|
||||
for (const pattern of DANGEROUS_PATTERNS) {
|
||||
if (pattern.test(config.report.guidance)) {
|
||||
throw new PentestError(
|
||||
`report.guidance contains potentially dangerous pattern: ${pattern.source}`,
|
||||
'config',
|
||||
false,
|
||||
{ field: 'report.guidance', pattern: pattern.source },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): void => {
|
||||
@@ -391,12 +514,12 @@ const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): voi
|
||||
|
||||
rules.forEach((rule, index) => {
|
||||
for (const pattern of DANGEROUS_PATTERNS) {
|
||||
if (pattern.test(rule.url_path)) {
|
||||
if (pattern.test(rule.value)) {
|
||||
throw new PentestError(
|
||||
`rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`,
|
||||
`rules.${ruleType}[${index}].value contains potentially dangerous pattern: ${pattern.source}`,
|
||||
'config',
|
||||
false,
|
||||
{ field: `rules.${ruleType}[${index}].url_path`, pattern: pattern.source },
|
||||
{ field: `rules.${ruleType}[${index}].value`, pattern: pattern.source },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED,
|
||||
);
|
||||
}
|
||||
@@ -416,13 +539,25 @@ const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): voi
|
||||
};
|
||||
|
||||
const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): void => {
|
||||
const field = `rules.${ruleType}[${index}].url_path`;
|
||||
const field = `rules.${ruleType}[${index}].value`;
|
||||
|
||||
switch (rule.type) {
|
||||
case 'path':
|
||||
if (!rule.url_path.startsWith('/')) {
|
||||
case 'url_path':
|
||||
if (!rule.value.startsWith('/')) {
|
||||
throw new PentestError(
|
||||
`${field} for type 'path' must start with '/'`,
|
||||
`${field} for type 'url_path' must start with '/'`,
|
||||
'config',
|
||||
false,
|
||||
{ field, ruleType: rule.type },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED,
|
||||
);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'code_path':
|
||||
if (rule.value.includes('://')) {
|
||||
throw new PentestError(
|
||||
`${field} for type 'code_path' must not contain a URL protocol (got '${rule.value}')`,
|
||||
'config',
|
||||
false,
|
||||
{ field, ruleType: rule.type },
|
||||
@@ -434,7 +569,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
|
||||
case 'subdomain':
|
||||
case 'domain':
|
||||
// Basic domain validation - no slashes allowed
|
||||
if (rule.url_path.includes('/')) {
|
||||
if (rule.value.includes('/')) {
|
||||
throw new PentestError(
|
||||
`${field} for type '${rule.type}' cannot contain '/' characters`,
|
||||
'config',
|
||||
@@ -444,7 +579,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
|
||||
);
|
||||
}
|
||||
// Must contain at least one dot for domains
|
||||
if (rule.type === 'domain' && !rule.url_path.includes('.')) {
|
||||
if (rule.type === 'domain' && !rule.value.includes('.')) {
|
||||
throw new PentestError(
|
||||
`${field} for type 'domain' must be a valid domain name`,
|
||||
'config',
|
||||
@@ -457,7 +592,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
|
||||
|
||||
case 'method': {
|
||||
const allowedMethods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS'];
|
||||
if (!allowedMethods.includes(rule.url_path.toUpperCase())) {
|
||||
if (!allowedMethods.includes(rule.value.toUpperCase())) {
|
||||
throw new PentestError(
|
||||
`${field} for type 'method' must be one of: ${allowedMethods.join(', ')}`,
|
||||
'config',
|
||||
@@ -470,7 +605,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
|
||||
}
|
||||
|
||||
case 'header':
|
||||
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
|
||||
if (!rule.value.match(/^[a-zA-Z0-9\-_]+$/)) {
|
||||
throw new PentestError(
|
||||
`${field} for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`,
|
||||
'config',
|
||||
@@ -482,7 +617,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
|
||||
break;
|
||||
|
||||
case 'parameter':
|
||||
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
|
||||
if (!rule.value.match(/^[a-zA-Z0-9\-_]+$/)) {
|
||||
throw new PentestError(
|
||||
`${field} for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`,
|
||||
'config',
|
||||
@@ -498,13 +633,13 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
|
||||
const checkForDuplicates = (rules: Rule[], ruleType: string): void => {
|
||||
const seen = new Set<string>();
|
||||
rules.forEach((rule, index) => {
|
||||
const key = `${rule.type}:${rule.url_path}`;
|
||||
const key = `${rule.type}:${rule.value}`;
|
||||
if (seen.has(key)) {
|
||||
throw new PentestError(
|
||||
`Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`,
|
||||
`Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.value}'`,
|
||||
'config',
|
||||
false,
|
||||
{ field: `rules.${ruleType}[${index}]`, ruleType: rule.type, urlPath: rule.url_path },
|
||||
{ field: `rules.${ruleType}[${index}]`, ruleType: rule.type, value: rule.value },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED,
|
||||
);
|
||||
}
|
||||
@@ -513,16 +648,16 @@ const checkForDuplicates = (rules: Rule[], ruleType: string): void => {
|
||||
};
|
||||
|
||||
const checkForConflicts = (avoidRules: Rule[] = [], focusRules: Rule[] = []): void => {
|
||||
const avoidSet = new Set(avoidRules.map((rule) => `${rule.type}:${rule.url_path}`));
|
||||
const avoidSet = new Set(avoidRules.map((rule) => `${rule.type}:${rule.value}`));
|
||||
|
||||
focusRules.forEach((rule, index) => {
|
||||
const key = `${rule.type}:${rule.url_path}`;
|
||||
const key = `${rule.type}:${rule.value}`;
|
||||
if (avoidSet.has(key)) {
|
||||
throw new PentestError(
|
||||
`Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`,
|
||||
`Conflicting rule found: rules.focus[${index}] '${rule.value}' also exists in rules.avoid`,
|
||||
'config',
|
||||
false,
|
||||
{ field: `rules.focus[${index}]`, urlPath: rule.url_path },
|
||||
{ field: `rules.focus[${index}]`, value: rule.value },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED,
|
||||
);
|
||||
}
|
||||
@@ -533,7 +668,7 @@ const sanitizeRule = (rule: Rule): Rule => {
|
||||
return {
|
||||
description: rule.description.trim(),
|
||||
type: rule.type.toLowerCase().trim() as Rule['type'],
|
||||
url_path: rule.url_path.trim(),
|
||||
value: rule.value.trim(),
|
||||
};
|
||||
};
|
||||
|
||||
@@ -543,11 +678,28 @@ export const distributeConfig = (config: Config | null): DistributedConfig => {
|
||||
const authentication = config?.authentication || null;
|
||||
const description = config?.description?.trim() || '';
|
||||
|
||||
const vuln_classes =
|
||||
config?.vuln_classes && config.vuln_classes.length > 0 ? [...config.vuln_classes] : [...ALL_VULN_CLASSES];
|
||||
|
||||
const exploit = config?.exploit !== undefined ? config.exploit === 'true' : true;
|
||||
|
||||
const report = {
|
||||
...(config?.report?.min_severity && { min_severity: config.report.min_severity }),
|
||||
...(config?.report?.min_confidence && { min_confidence: config.report.min_confidence }),
|
||||
...(config?.report?.guidance && { guidance: config.report.guidance.trim() }),
|
||||
};
|
||||
|
||||
const rules_of_engagement = config?.rules_of_engagement?.trim() ?? '';
|
||||
|
||||
return {
|
||||
avoid: avoid.map(sanitizeRule),
|
||||
focus: focus.map(sanitizeRule),
|
||||
authentication: authentication ? sanitizeAuthentication(authentication) : null,
|
||||
description,
|
||||
vuln_classes,
|
||||
exploit,
|
||||
report,
|
||||
rules_of_engagement,
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
/**
|
||||
* CheckpointProvider — injectable interface for external state persistence.
|
||||
*
|
||||
* Called before and after each agent to support skip-guard (resume) and
|
||||
* post-agent artifact persistence. During the concurrent vulnerability-exploitation
|
||||
* phase, 5 pipelines run in parallel — methods fire per-agent for granular control.
|
||||
*
|
||||
* Default: no-op (skip nothing, persist nothing).
|
||||
*/
|
||||
|
||||
import type { AgentMetrics, PipelineState } from '../temporal/shared.js';
|
||||
|
||||
/** Result of a pre-agent skip check. */
|
||||
export interface SkipDecision {
|
||||
readonly skip: boolean;
|
||||
readonly metrics?: AgentMetrics; // Required when skip=true
|
||||
}
|
||||
|
||||
/** File-system context passed after agent completion for artifact persistence. */
|
||||
export interface CheckpointContext {
|
||||
readonly repoPath: string;
|
||||
readonly sessionId: string;
|
||||
readonly deliverablesSubdir: string;
|
||||
readonly outputPath?: string;
|
||||
}
|
||||
|
||||
export interface CheckpointProvider {
|
||||
/**
|
||||
* Called before an agent activity executes.
|
||||
* Return { skip: true, metrics } to skip the agent (e.g., output files already exist).
|
||||
* Return { skip: false } to run normally.
|
||||
*/
|
||||
shouldSkipAgent(
|
||||
agentName: string,
|
||||
repoPath: string,
|
||||
deliverablesSubdir: string,
|
||||
): Promise<SkipDecision>;
|
||||
|
||||
/**
|
||||
* Called after an agent activity succeeds.
|
||||
* Receives pipeline state and optional file context for artifact persistence.
|
||||
*/
|
||||
onAgentComplete(
|
||||
agentName: string,
|
||||
phase: string,
|
||||
state: PipelineState,
|
||||
context?: CheckpointContext,
|
||||
): Promise<void>;
|
||||
}
|
||||
|
||||
/** Default no-op implementation — no external checkpointing. */
|
||||
export class NoOpCheckpointProvider implements CheckpointProvider {
|
||||
async shouldSkipAgent(): Promise<SkipDecision> {
|
||||
return { skip: false };
|
||||
}
|
||||
|
||||
async onAgentComplete(): Promise<void> {
|
||||
// No-op
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
/**
|
||||
* FindingsProvider — injectable interface for external findings integration.
|
||||
*
|
||||
* Allows external security data from consumer-supplied sources to be merged
|
||||
* into the exploitation pipeline between vulnerability analysis and exploitation.
|
||||
*
|
||||
* Default: no-op returning { mergedCount: 0 }.
|
||||
*/
|
||||
|
||||
import type { ActivityInput } from '../temporal/activities.js';
|
||||
import type { VulnType } from '../types/agents.js';
|
||||
|
||||
export interface FindingsProvider {
|
||||
mergeFindingsIntoQueue(
|
||||
repoPath: string,
|
||||
vulnType: VulnType,
|
||||
input: ActivityInput,
|
||||
): Promise<{ mergedCount: number }>;
|
||||
}
|
||||
|
||||
/** Default no-op implementation — no external findings to merge. */
|
||||
export class NoOpFindingsProvider implements FindingsProvider {
|
||||
async mergeFindingsIntoQueue(): Promise<{ mergedCount: number }> {
|
||||
return { mergedCount: 0 };
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
/**
|
||||
* Injectable interfaces for extending the pentest pipeline.
|
||||
*
|
||||
* All interfaces have default no-op implementations.
|
||||
* Consumers can provide alternate implementations via the DI container.
|
||||
*/
|
||||
|
||||
export type { CheckpointProvider, CheckpointContext, SkipDecision } from './checkpoint-provider.js';
|
||||
export { NoOpCheckpointProvider } from './checkpoint-provider.js';
|
||||
export type { FindingsProvider } from './findings-provider.js';
|
||||
export { NoOpFindingsProvider } from './findings-provider.js';
|
||||
export type { ReportOutputProvider } from './report-output-provider.js';
|
||||
export { NoOpReportOutputProvider } from './report-output-provider.js';
|
||||
@@ -0,0 +1,22 @@
|
||||
/**
|
||||
* ReportOutputProvider — injectable interface for emitting an optional
|
||||
* additional artifact alongside the assembled markdown report.
|
||||
*
|
||||
* Runs after the report agent has finalized
|
||||
* `comprehensive_security_assessment_report.md`. Consumers can override to
|
||||
* produce derived outputs; the default no-op produces nothing.
|
||||
*/
|
||||
|
||||
import type { ActivityInput } from '../temporal/activities.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
|
||||
export interface ReportOutputProvider {
|
||||
generate(input: ActivityInput, logger: ActivityLogger): Promise<{ outputPath?: string }>;
|
||||
}
|
||||
|
||||
/** Default no-op implementation — no additional output produced. */
|
||||
export class NoOpReportOutputProvider implements ReportOutputProvider {
|
||||
async generate(): Promise<{ outputPath?: string }> {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
@@ -9,6 +9,21 @@ const WORKER_ROOT = path.resolve(import.meta.dirname, '..');
|
||||
export const PROMPTS_DIR = path.join(WORKER_ROOT, 'prompts');
|
||||
export const CONFIGS_DIR = path.join(WORKER_ROOT, 'configs');
|
||||
|
||||
/** Default deliverables subdirectory relative to repoPath */
|
||||
export const DEFAULT_DELIVERABLES_SUBDIR = '.shannon/deliverables';
|
||||
|
||||
/** Default audit log directory */
|
||||
export const DEFAULT_AUDIT_DIR = './workspaces';
|
||||
|
||||
/**
|
||||
* Resolve the deliverables directory for a given repoPath and optional subdir override.
|
||||
* @param repoPath - Absolute path to the target repository
|
||||
* @param subdir - Subdirectory relative to repoPath (default: '.shannon/deliverables')
|
||||
*/
|
||||
export function deliverablesDir(repoPath: string, subdir: string = DEFAULT_DELIVERABLES_SUBDIR): string {
|
||||
return path.join(repoPath, ...subdir.split('/'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Repository root — walk up from WORKER_ROOT looking for pnpm-workspace.yaml.
|
||||
* Falls back to two levels up (apps/worker/ → repo root) if not found.
|
||||
|
||||
@@ -82,6 +82,26 @@ function generateTOTP(secret: string, timeStep: number = 30, digits: number = 6)
|
||||
return generateHOTP(secret, counter, digits);
|
||||
}
|
||||
|
||||
// === Help ===
|
||||
|
||||
function printHelp(): void {
|
||||
console.log(
|
||||
`generate-totp - emit a current 6-digit TOTP code for a base32-encoded secret.
|
||||
|
||||
Usage:
|
||||
generate-totp --secret <BASE32>
|
||||
generate-totp --help
|
||||
|
||||
Options:
|
||||
--secret Base32-encoded TOTP shared secret (characters A-Z, 2-7).
|
||||
-h, --help Show this help and exit.
|
||||
|
||||
Output:
|
||||
JSON to stdout. On success: {"status":"success","totpCode":"123456","expiresIn":<sec>}.
|
||||
On error: {"status":"error","message":"...","retryable":false} (exit 1).`,
|
||||
);
|
||||
}
|
||||
|
||||
// === Argument Parsing ===
|
||||
|
||||
function parseSecret(argv: string[]): string {
|
||||
@@ -97,6 +117,11 @@ function parseSecret(argv: string[]): string {
|
||||
// === Main ===
|
||||
|
||||
function main(): void {
|
||||
if (process.argv.includes('--help') || process.argv.includes('-h')) {
|
||||
printHelp();
|
||||
return;
|
||||
}
|
||||
|
||||
const secret = parseSecret(process.argv);
|
||||
|
||||
if (!secret) {
|
||||
|
||||
@@ -9,17 +9,40 @@
|
||||
/**
|
||||
* save-deliverable CLI
|
||||
*
|
||||
* Standalone script to save deliverable files with validation.
|
||||
* Replaces the MCP save_deliverable tool.
|
||||
* Standalone script to save deliverable files.
|
||||
*
|
||||
* Usage:
|
||||
* node save-deliverable.js --type INJECTION_QUEUE --content '{"vulnerabilities": [...]}'
|
||||
* node save-deliverable.js --type INJECTION_ANALYSIS --file-path deliverables/injection_analysis_deliverable.md
|
||||
*/
|
||||
|
||||
import { mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
||||
import { join, resolve } from 'node:path';
|
||||
import { DELIVERABLE_FILENAMES, type DeliverableType, isQueueType } from '../types/deliverables.js';
|
||||
import { DELIVERABLE_FILENAMES, type DeliverableType } from '../types/deliverables.js';
|
||||
|
||||
// === Help ===
|
||||
|
||||
function printHelp(): void {
|
||||
const types = Object.keys(DELIVERABLE_FILENAMES).join(', ');
|
||||
console.log(
|
||||
`save-deliverable - save a Shannon pentest deliverable under its canonical filename.
|
||||
|
||||
Usage:
|
||||
save-deliverable --type <TYPE> --file-path <path>
|
||||
save-deliverable --type <TYPE> --content '<text>'
|
||||
save-deliverable --help
|
||||
|
||||
Options:
|
||||
--type Deliverable type (required). One of:
|
||||
${types}
|
||||
--file-path Path of a file whose contents to save (preferred for large content).
|
||||
--content Inline content string to save.
|
||||
-h, --help Show this help and exit.
|
||||
|
||||
Output:
|
||||
JSON to stdout. On success: {"status":"success","filepath":"..."}.
|
||||
On error: {"status":"error","message":"...","retryable":true|false} (exit 1).`,
|
||||
);
|
||||
}
|
||||
|
||||
// === Argument Parsing ===
|
||||
|
||||
@@ -51,53 +74,11 @@ function parseArgs(argv: string[]): ParsedArgs {
|
||||
return args;
|
||||
}
|
||||
|
||||
// === Queue Validation ===
|
||||
|
||||
interface ValidationResult {
|
||||
valid: boolean;
|
||||
message?: string;
|
||||
}
|
||||
|
||||
function validateQueueJson(content: string): ValidationResult {
|
||||
try {
|
||||
const parsed = JSON.parse(content) as unknown;
|
||||
|
||||
if (typeof parsed !== 'object' || parsed === null) {
|
||||
return {
|
||||
valid: false,
|
||||
message: `Invalid queue structure: Expected an object. Got: ${typeof parsed}`,
|
||||
};
|
||||
}
|
||||
|
||||
const obj = parsed as Record<string, unknown>;
|
||||
|
||||
if (!('vulnerabilities' in obj)) {
|
||||
return {
|
||||
valid: false,
|
||||
message: `Invalid queue structure: Missing 'vulnerabilities' property. Expected: {"vulnerabilities": [...]}`,
|
||||
};
|
||||
}
|
||||
|
||||
if (!Array.isArray(obj.vulnerabilities)) {
|
||||
return {
|
||||
valid: false,
|
||||
message: `Invalid queue structure: 'vulnerabilities' must be an array. Expected: {"vulnerabilities": [...]}`,
|
||||
};
|
||||
}
|
||||
|
||||
return { valid: true };
|
||||
} catch (error) {
|
||||
return {
|
||||
valid: false,
|
||||
message: `Invalid JSON: ${error instanceof Error ? error.message : String(error)}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// === File Operations ===
|
||||
|
||||
function saveDeliverableFile(targetDir: string, filename: string, content: string): string {
|
||||
const deliverablesDir = join(targetDir, 'deliverables');
|
||||
const subdir = process.env.SHANNON_DELIVERABLES_SUBDIR || '.shannon/deliverables';
|
||||
const deliverablesDir = join(targetDir, ...subdir.split('/'));
|
||||
const filepath = join(deliverablesDir, filename);
|
||||
|
||||
try {
|
||||
@@ -113,6 +94,11 @@ function saveDeliverableFile(targetDir: string, filename: string, content: strin
|
||||
// === Main ===
|
||||
|
||||
function main(): void {
|
||||
if (process.argv.includes('--help') || process.argv.includes('-h')) {
|
||||
printHelp();
|
||||
return;
|
||||
}
|
||||
|
||||
const args = parseArgs(process.argv);
|
||||
|
||||
// 1. Validate --type
|
||||
@@ -165,22 +151,11 @@ function main(): void {
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// 3. Validate queue types
|
||||
let validated = false;
|
||||
if (isQueueType(args.type)) {
|
||||
const validation = validateQueueJson(content);
|
||||
if (!validation.valid) {
|
||||
console.log(JSON.stringify({ status: 'error', message: validation.message, retryable: true }));
|
||||
process.exit(1);
|
||||
}
|
||||
validated = true;
|
||||
}
|
||||
|
||||
// 4. Save the file
|
||||
// 3. Save the file
|
||||
try {
|
||||
const targetDir = process.cwd();
|
||||
const filepath = saveDeliverableFile(targetDir, filename, content);
|
||||
console.log(JSON.stringify({ status: 'success', filepath, validated }));
|
||||
console.log(JSON.stringify({ status: 'success', filepath }));
|
||||
} catch (error) {
|
||||
const msg = error instanceof Error ? error.message : String(error);
|
||||
console.log(JSON.stringify({ status: 'error', message: `Failed to save: ${msg}`, retryable: true }));
|
||||
|
||||
@@ -21,7 +21,9 @@
|
||||
* No Temporal dependencies - pure domain logic.
|
||||
*/
|
||||
|
||||
import { fs, path } from 'zx';
|
||||
import { type ClaudePromptResult, runClaudePrompt, validateAgentOutput } from '../ai/claude-executor.js';
|
||||
import { getOutputFormat, getQueueFilename } from '../ai/queue-schemas.js';
|
||||
import type { AuditSession } from '../audit/index.js';
|
||||
import { AGENTS } from '../session-manager.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
@@ -42,9 +44,15 @@ import { loadPrompt } from './prompt-manager.js';
|
||||
export interface AgentExecutionInput {
|
||||
webUrl: string;
|
||||
repoPath: string;
|
||||
deliverablesPath: string;
|
||||
configPath?: string | undefined;
|
||||
configData?: import('../types/config.js').DistributedConfig | undefined;
|
||||
configYAML?: string | undefined;
|
||||
pipelineTestingMode?: boolean | undefined;
|
||||
attemptNumber: number;
|
||||
apiKey?: string | undefined;
|
||||
promptDir?: string | undefined;
|
||||
providerConfig?: import('../types/config.js').ProviderConfig | undefined;
|
||||
}
|
||||
|
||||
interface FailAgentOpts {
|
||||
@@ -87,10 +95,10 @@ export class AgentExecutionService {
|
||||
auditSession: AuditSession,
|
||||
logger: ActivityLogger,
|
||||
): Promise<Result<AgentEndResult, PentestError>> {
|
||||
const { webUrl, repoPath, configPath, pipelineTestingMode = false, attemptNumber } = input;
|
||||
const { webUrl, repoPath, deliverablesPath, configPath, configData, configYAML, pipelineTestingMode = false, attemptNumber, apiKey, promptDir, providerConfig } = input;
|
||||
|
||||
// 1. Load config (if provided)
|
||||
const configResult = await this.configLoader.loadOptional(configPath);
|
||||
// 1. Load config (pre-parsed configData → raw YAML → file path)
|
||||
const configResult = await this.configLoader.loadOptional(configPath, configData, configYAML);
|
||||
if (isErr(configResult)) {
|
||||
return configResult;
|
||||
}
|
||||
@@ -100,7 +108,7 @@ export class AgentExecutionService {
|
||||
const promptTemplate = AGENTS[agentName].promptTemplate;
|
||||
let prompt: string;
|
||||
try {
|
||||
prompt = await loadPrompt(promptTemplate, { webUrl, repoPath }, distributedConfig, pipelineTestingMode, logger);
|
||||
prompt = await loadPrompt(promptTemplate, { webUrl, repoPath }, distributedConfig, pipelineTestingMode, logger, promptDir);
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
return err(
|
||||
@@ -116,7 +124,7 @@ export class AgentExecutionService {
|
||||
|
||||
// 3. Create git checkpoint before execution
|
||||
try {
|
||||
await createGitCheckpoint(repoPath, agentName, attemptNumber, logger);
|
||||
await createGitCheckpoint(deliverablesPath, agentName, attemptNumber, logger);
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
return err(
|
||||
@@ -124,7 +132,7 @@ export class AgentExecutionService {
|
||||
`Failed to create git checkpoint for ${agentName}: ${errorMessage}`,
|
||||
'filesystem',
|
||||
false,
|
||||
{ agentName, repoPath, originalError: errorMessage },
|
||||
{ agentName, deliverablesPath, originalError: errorMessage },
|
||||
ErrorCode.GIT_CHECKPOINT_FAILED,
|
||||
),
|
||||
);
|
||||
@@ -134,6 +142,7 @@ export class AgentExecutionService {
|
||||
await auditSession.startAgent(agentName, prompt, attemptNumber);
|
||||
|
||||
// 5. Execute agent
|
||||
const outputFormat = getOutputFormat(agentName, distributedConfig?.exploit ?? true);
|
||||
const result: ClaudePromptResult = await runClaudePrompt(
|
||||
prompt,
|
||||
repoPath,
|
||||
@@ -143,13 +152,17 @@ export class AgentExecutionService {
|
||||
auditSession,
|
||||
logger,
|
||||
AGENTS[agentName].modelTier,
|
||||
outputFormat,
|
||||
apiKey,
|
||||
path.relative(repoPath, deliverablesPath),
|
||||
providerConfig,
|
||||
);
|
||||
|
||||
// 6. Spending cap check - defense-in-depth
|
||||
if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) {
|
||||
const resultText = result.result || '';
|
||||
if (isSpendingCapBehavior(result.turns ?? 0, result.cost || 0, resultText)) {
|
||||
return this.failAgent(agentName, repoPath, auditSession, logger, {
|
||||
return this.failAgent(agentName, deliverablesPath, auditSession, logger, {
|
||||
attemptNumber,
|
||||
result,
|
||||
rollbackReason: 'spending cap detected',
|
||||
@@ -164,7 +177,7 @@ export class AgentExecutionService {
|
||||
|
||||
// 7. Handle execution failure
|
||||
if (!result.success) {
|
||||
return this.failAgent(agentName, repoPath, auditSession, logger, {
|
||||
return this.failAgent(agentName, deliverablesPath, auditSession, logger, {
|
||||
attemptNumber,
|
||||
result,
|
||||
rollbackReason: 'execution failure',
|
||||
@@ -176,10 +189,19 @@ export class AgentExecutionService {
|
||||
});
|
||||
}
|
||||
|
||||
// 8. Validate output
|
||||
const validationPassed = await validateAgentOutput(result, agentName, repoPath, logger);
|
||||
// 8. Write structured output to disk (vuln agents only)
|
||||
const queueFilename = getQueueFilename(agentName);
|
||||
if (result.structuredOutput !== undefined && queueFilename) {
|
||||
await fs.ensureDir(deliverablesPath);
|
||||
const queuePath = path.join(deliverablesPath, queueFilename);
|
||||
await fs.writeFile(queuePath, JSON.stringify(result.structuredOutput, null, 2), 'utf8');
|
||||
logger.info(`Wrote structured output queue to ${queueFilename}`);
|
||||
}
|
||||
|
||||
// 9. Validate output
|
||||
const validationPassed = await validateAgentOutput(result, agentName, deliverablesPath, logger);
|
||||
if (!validationPassed) {
|
||||
return this.failAgent(agentName, repoPath, auditSession, logger, {
|
||||
return this.failAgent(agentName, deliverablesPath, auditSession, logger, {
|
||||
attemptNumber,
|
||||
result,
|
||||
rollbackReason: 'validation failure',
|
||||
@@ -191,9 +213,9 @@ export class AgentExecutionService {
|
||||
});
|
||||
}
|
||||
|
||||
// 9. Success - commit deliverables, then capture checkpoint hash
|
||||
await commitGitSuccess(repoPath, agentName, logger);
|
||||
const commitHash = await getGitCommitHash(repoPath);
|
||||
// 10. Success - commit deliverables, then capture checkpoint hash
|
||||
await commitGitSuccess(deliverablesPath, agentName, logger);
|
||||
const commitHash = await getGitCommitHash(deliverablesPath);
|
||||
|
||||
const endResult: AgentEndResult = {
|
||||
attemptNumber,
|
||||
@@ -210,12 +232,12 @@ export class AgentExecutionService {
|
||||
|
||||
private async failAgent(
|
||||
agentName: AgentName,
|
||||
repoPath: string,
|
||||
deliverablesPath: string,
|
||||
auditSession: AuditSession,
|
||||
logger: ActivityLogger,
|
||||
opts: FailAgentOpts,
|
||||
): Promise<Result<AgentEndResult, PentestError>> {
|
||||
await rollbackGitWorkspace(repoPath, opts.rollbackReason, logger);
|
||||
await rollbackGitWorkspace(deliverablesPath, opts.rollbackReason, logger);
|
||||
|
||||
const endResult: AgentEndResult = {
|
||||
attemptNumber: opts.attemptNumber,
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
* Pure service with no Temporal dependencies.
|
||||
*/
|
||||
|
||||
import { distributeConfig, parseConfig } from '../config-parser.js';
|
||||
import { distributeConfig, parseConfig, parseConfigYAML } from '../config-parser.js';
|
||||
import type { DistributedConfig } from '../types/config.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import { err, ok, type Result } from '../types/result.js';
|
||||
@@ -60,11 +60,31 @@ export class ConfigLoaderService {
|
||||
|
||||
/**
|
||||
* Load config if path is provided, otherwise return null config.
|
||||
* If configData is provided (pre-parsed), returns it directly without file I/O.
|
||||
*
|
||||
* @param configPath - Optional path to the YAML configuration file
|
||||
* @param configData - Optional pre-parsed config (bypasses file loading)
|
||||
* @returns Result containing DistributedConfig (or null) on success, PentestError on failure
|
||||
*/
|
||||
async loadOptional(configPath: string | undefined): Promise<Result<DistributedConfig | null, PentestError>> {
|
||||
async loadOptional(
|
||||
configPath: string | undefined,
|
||||
configData?: DistributedConfig,
|
||||
configYAML?: string,
|
||||
): Promise<Result<DistributedConfig | null, PentestError>> {
|
||||
if (configData) {
|
||||
return ok(configData);
|
||||
}
|
||||
if (configYAML) {
|
||||
try {
|
||||
const config = parseConfigYAML(configYAML);
|
||||
return ok(distributeConfig(config));
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
return err(
|
||||
new PentestError(`Failed to parse config YAML: ${errorMessage}`, 'config', false, { originalError: errorMessage }, ErrorCode.CONFIG_PARSE_ERROR),
|
||||
);
|
||||
}
|
||||
}
|
||||
if (!configPath) {
|
||||
return ok(null);
|
||||
}
|
||||
|
||||
@@ -18,6 +18,13 @@
|
||||
*/
|
||||
|
||||
import type { SessionMetadata } from '../audit/utils.js';
|
||||
import type { CheckpointProvider } from '../interfaces/checkpoint-provider.js';
|
||||
import { NoOpCheckpointProvider } from '../interfaces/checkpoint-provider.js';
|
||||
import type { FindingsProvider } from '../interfaces/findings-provider.js';
|
||||
import { NoOpFindingsProvider } from '../interfaces/findings-provider.js';
|
||||
import type { ReportOutputProvider } from '../interfaces/report-output-provider.js';
|
||||
import { NoOpReportOutputProvider } from '../interfaces/report-output-provider.js';
|
||||
import type { ContainerConfig } from '../types/config.js';
|
||||
import { AgentExecutionService } from './agent-execution.js';
|
||||
import { ConfigLoaderService } from './config-loader.js';
|
||||
import { ExploitationCheckerService } from './exploitation-checker.js';
|
||||
@@ -32,6 +39,10 @@ import { ExploitationCheckerService } from './exploitation-checker.js';
|
||||
*/
|
||||
export interface ContainerDependencies {
|
||||
readonly sessionMetadata: SessionMetadata;
|
||||
readonly config: ContainerConfig;
|
||||
readonly findingsProvider?: FindingsProvider;
|
||||
readonly checkpointProvider?: CheckpointProvider;
|
||||
readonly reportOutputProvider?: ReportOutputProvider;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -45,17 +56,27 @@ export interface ContainerDependencies {
|
||||
*/
|
||||
export class Container {
|
||||
readonly sessionMetadata: SessionMetadata;
|
||||
readonly config: ContainerConfig;
|
||||
readonly agentExecution: AgentExecutionService;
|
||||
readonly configLoader: ConfigLoaderService;
|
||||
readonly exploitationChecker: ExploitationCheckerService;
|
||||
readonly findingsProvider: FindingsProvider;
|
||||
readonly checkpointProvider: CheckpointProvider;
|
||||
readonly reportOutputProvider: ReportOutputProvider;
|
||||
|
||||
constructor(deps: ContainerDependencies) {
|
||||
this.sessionMetadata = deps.sessionMetadata;
|
||||
this.config = deps.config;
|
||||
|
||||
// Wire services with explicit constructor injection
|
||||
this.configLoader = new ConfigLoaderService();
|
||||
this.exploitationChecker = new ExploitationCheckerService();
|
||||
this.agentExecution = new AgentExecutionService(this.configLoader);
|
||||
|
||||
// Wire providers with default no-ops when not provided
|
||||
this.findingsProvider = deps.findingsProvider ?? new NoOpFindingsProvider();
|
||||
this.checkpointProvider = deps.checkpointProvider ?? new NoOpCheckpointProvider();
|
||||
this.reportOutputProvider = deps.reportOutputProvider ?? new NoOpReportOutputProvider();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,6 +86,38 @@ export class Container {
|
||||
*/
|
||||
const containers = new Map<string, Container>();
|
||||
|
||||
/** Default container config — OSS standalone defaults */
|
||||
const DEFAULT_CONFIG: ContainerConfig = {
|
||||
deliverablesSubdir: '.shannon/deliverables',
|
||||
auditDir: './workspaces',
|
||||
};
|
||||
|
||||
/**
|
||||
* Factory function for creating containers.
|
||||
*
|
||||
* Default: creates a plain Container with NoOp providers. Consumers can call
|
||||
* setContainerFactory() at worker startup to inject custom provider
|
||||
* implementations into every container.
|
||||
*/
|
||||
type ContainerFactory = (
|
||||
workflowId: string,
|
||||
sessionMetadata: SessionMetadata,
|
||||
config: ContainerConfig,
|
||||
) => Container;
|
||||
|
||||
let containerFactory: ContainerFactory = (_workflowId, sessionMetadata, config) =>
|
||||
new Container({ sessionMetadata, config });
|
||||
|
||||
/**
|
||||
* Override the default container factory.
|
||||
*
|
||||
* Call once at worker startup to inject providers into all containers
|
||||
* created during the worker's lifetime.
|
||||
*/
|
||||
export function setContainerFactory(factory: ContainerFactory): void {
|
||||
containerFactory = factory;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get or create a Container for a workflow.
|
||||
*
|
||||
@@ -73,13 +126,18 @@ const containers = new Map<string, Container>();
|
||||
*
|
||||
* @param workflowId - Unique workflow identifier
|
||||
* @param sessionMetadata - Session metadata for audit paths
|
||||
* @param config - Runtime configuration (defaults to OSS standalone config)
|
||||
* @returns Container instance for the workflow
|
||||
*/
|
||||
export function getOrCreateContainer(workflowId: string, sessionMetadata: SessionMetadata): Container {
|
||||
export function getOrCreateContainer(
|
||||
workflowId: string,
|
||||
sessionMetadata: SessionMetadata,
|
||||
config: ContainerConfig = DEFAULT_CONFIG,
|
||||
): Container {
|
||||
let container = containers.get(workflowId);
|
||||
|
||||
if (!container) {
|
||||
container = new Container({ sessionMetadata });
|
||||
container = containerFactory(workflowId, sessionMetadata, config);
|
||||
containers.set(workflowId, container);
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,251 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Deterministic queue-JSON to findings-MD renderer.
|
||||
*
|
||||
* Used when exploit=false: the exploit agents didn't run, so there is no
|
||||
* `*_exploitation_evidence.md` to concatenate into the report. This module
|
||||
* reads each `*_exploitation_queue.json` (already SDK-validated against the
|
||||
* schemas in ../ai/queue-schemas.ts) and writes a `*_findings.md` per class
|
||||
* in the canonical body shape that report-executive.txt's cleanup expects.
|
||||
*
|
||||
* No LLM in the loop — every field maps directly from a JSON key.
|
||||
*/
|
||||
|
||||
import { fs, path } from 'zx';
|
||||
import type {
|
||||
AuthFinding,
|
||||
AuthzFinding,
|
||||
InjectionFinding,
|
||||
SsrfFinding,
|
||||
XssFinding,
|
||||
} from '../ai/queue-schemas.js';
|
||||
import { deliverablesDir } from '../paths.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
import type { VulnClass } from '../types/config.js';
|
||||
|
||||
const DISCLAIMER = [
|
||||
'> Exploitation phase was not run for this assessment. Each entry documents a',
|
||||
'> vulnerability identified through static analysis; live exploitation steps and',
|
||||
'> proof of impact are not included.',
|
||||
].join('\n');
|
||||
|
||||
interface ClassConfig<T> {
|
||||
readonly heading: string;
|
||||
readonly noneFoundLabel: string;
|
||||
readonly queueFile: string;
|
||||
readonly findingsFile: string;
|
||||
readonly renderEntry: (entry: T) => string;
|
||||
}
|
||||
|
||||
interface QueueDocument<T> {
|
||||
vulnerabilities?: T[];
|
||||
}
|
||||
|
||||
// === Common Render Helpers ===
|
||||
|
||||
function summaryRow(label: string, value: string | undefined | null | boolean): string | null {
|
||||
if (value === undefined || value === null) return null;
|
||||
if (typeof value === 'string' && value.trim() === '') return null;
|
||||
return `- **${label}:** ${value}`;
|
||||
}
|
||||
|
||||
function formatLocation(endpoint: string | undefined, codeLocation: string | undefined): string {
|
||||
if (endpoint && codeLocation) return `${endpoint} (${codeLocation})`;
|
||||
return endpoint ?? codeLocation ?? '';
|
||||
}
|
||||
|
||||
function buildEntry(
|
||||
id: string,
|
||||
title: string,
|
||||
summaryRows: ReadonlyArray<string | null>,
|
||||
notes: string | undefined,
|
||||
): string {
|
||||
const lines: string[] = [];
|
||||
lines.push(`### ${id}: ${title}`);
|
||||
lines.push('');
|
||||
lines.push('**Summary:**');
|
||||
for (const row of summaryRows) {
|
||||
if (row !== null) lines.push(row);
|
||||
}
|
||||
lines.push('');
|
||||
if (notes && notes.trim() !== '') {
|
||||
lines.push(`**Notes:** ${notes.trim()}`);
|
||||
}
|
||||
return lines.join('\n').trimEnd();
|
||||
}
|
||||
|
||||
// === Per-Class Renderers ===
|
||||
|
||||
function renderAuthEntry(e: AuthFinding): string {
|
||||
return buildEntry(
|
||||
e.ID,
|
||||
e.vulnerability_type,
|
||||
[
|
||||
summaryRow('Vulnerable location', formatLocation(e.source_endpoint, e.vulnerable_code_location)),
|
||||
summaryRow('Overview', e.missing_defense),
|
||||
summaryRow('Impact', e.exploitation_hypothesis),
|
||||
],
|
||||
e.notes,
|
||||
);
|
||||
}
|
||||
|
||||
function renderSsrfEntry(e: SsrfFinding): string {
|
||||
return buildEntry(
|
||||
e.ID,
|
||||
e.vulnerability_type,
|
||||
[
|
||||
summaryRow('Vulnerable location', formatLocation(e.source_endpoint, e.vulnerable_code_location)),
|
||||
summaryRow('Overview', e.missing_defense),
|
||||
summaryRow('Impact', e.exploitation_hypothesis),
|
||||
],
|
||||
e.notes,
|
||||
);
|
||||
}
|
||||
|
||||
function renderAuthzEntry(e: AuthzFinding): string {
|
||||
return buildEntry(
|
||||
e.ID,
|
||||
e.vulnerability_type,
|
||||
[
|
||||
summaryRow('Vulnerable location', formatLocation(e.endpoint, e.vulnerable_code_location)),
|
||||
summaryRow('Overview', e.guard_evidence),
|
||||
summaryRow('Impact', e.side_effect),
|
||||
],
|
||||
e.notes,
|
||||
);
|
||||
}
|
||||
|
||||
function renderInjectionEntry(e: InjectionFinding): string {
|
||||
const location = e.path && e.sink_call ? `${e.sink_call} (path: ${e.path})` : (e.sink_call ?? e.path);
|
||||
return buildEntry(
|
||||
e.ID,
|
||||
e.vulnerability_type,
|
||||
[
|
||||
summaryRow('Vulnerable location', location),
|
||||
summaryRow('Overview', e.mismatch_reason),
|
||||
],
|
||||
e.notes,
|
||||
);
|
||||
}
|
||||
|
||||
function renderXssEntry(e: XssFinding): string {
|
||||
const location = e.path && e.sink_function ? `${e.sink_function} (path: ${e.path})` : (e.sink_function ?? e.path);
|
||||
return buildEntry(
|
||||
e.ID,
|
||||
e.vulnerability_type,
|
||||
[
|
||||
summaryRow('Vulnerable location', location),
|
||||
summaryRow('Overview', e.mismatch_reason),
|
||||
],
|
||||
e.notes,
|
||||
);
|
||||
}
|
||||
|
||||
// === Class Registry ===
|
||||
|
||||
const CLASSES: Record<VulnClass, ClassConfig<unknown>> = {
|
||||
auth: {
|
||||
heading: 'Authentication',
|
||||
noneFoundLabel: 'authentication',
|
||||
queueFile: 'auth_exploitation_queue.json',
|
||||
findingsFile: 'auth_findings.md',
|
||||
renderEntry: (e) => renderAuthEntry(e as AuthFinding),
|
||||
},
|
||||
authz: {
|
||||
heading: 'Authorization',
|
||||
noneFoundLabel: 'authorization',
|
||||
queueFile: 'authz_exploitation_queue.json',
|
||||
findingsFile: 'authz_findings.md',
|
||||
renderEntry: (e) => renderAuthzEntry(e as AuthzFinding),
|
||||
},
|
||||
injection: {
|
||||
heading: 'Injection',
|
||||
noneFoundLabel: 'injection',
|
||||
queueFile: 'injection_exploitation_queue.json',
|
||||
findingsFile: 'injection_findings.md',
|
||||
renderEntry: (e) => renderInjectionEntry(e as InjectionFinding),
|
||||
},
|
||||
xss: {
|
||||
heading: 'XSS',
|
||||
noneFoundLabel: 'XSS',
|
||||
queueFile: 'xss_exploitation_queue.json',
|
||||
findingsFile: 'xss_findings.md',
|
||||
renderEntry: (e) => renderXssEntry(e as XssFinding),
|
||||
},
|
||||
ssrf: {
|
||||
heading: 'SSRF',
|
||||
noneFoundLabel: 'SSRF',
|
||||
queueFile: 'ssrf_exploitation_queue.json',
|
||||
findingsFile: 'ssrf_findings.md',
|
||||
renderEntry: (e) => renderSsrfEntry(e as SsrfFinding),
|
||||
},
|
||||
};
|
||||
|
||||
// === Class File Assembly ===
|
||||
|
||||
function renderClassFile(config: ClassConfig<unknown>, entries: readonly unknown[]): string {
|
||||
const sections: string[] = [];
|
||||
sections.push(`# ${config.heading} Findings`);
|
||||
sections.push('');
|
||||
sections.push(DISCLAIMER);
|
||||
sections.push('');
|
||||
sections.push('## Identified Vulnerabilities');
|
||||
sections.push('');
|
||||
if (entries.length === 0) {
|
||||
sections.push(`No ${config.noneFoundLabel} vulnerabilities were identified.`);
|
||||
sections.push('');
|
||||
} else {
|
||||
for (const entry of entries) {
|
||||
sections.push(config.renderEntry(entry));
|
||||
sections.push('');
|
||||
}
|
||||
}
|
||||
return `${sections.join('\n').trimEnd()}\n`;
|
||||
}
|
||||
|
||||
// === Public Entry Point ===
|
||||
|
||||
/**
|
||||
* Render `*_findings.md` per class from each `*_exploitation_queue.json`.
|
||||
*
|
||||
* Idempotent: skips classes whose findings file already exists, or whose queue
|
||||
* is missing (class out of scope this run). Per-class failures are logged and
|
||||
* other classes still proceed.
|
||||
*/
|
||||
export async function renderFindingsFromQueues(
|
||||
sourceDir: string,
|
||||
deliverablesSubdir: string | undefined,
|
||||
logger: ActivityLogger,
|
||||
): Promise<void> {
|
||||
const dir = deliverablesDir(sourceDir, deliverablesSubdir);
|
||||
|
||||
for (const config of Object.values(CLASSES)) {
|
||||
const queuePath = path.join(dir, config.queueFile);
|
||||
const findingsPath = path.join(dir, config.findingsFile);
|
||||
|
||||
if (await fs.pathExists(findingsPath)) {
|
||||
logger.info(`${config.heading}: ${config.findingsFile} already exists, skipping`);
|
||||
continue;
|
||||
}
|
||||
if (!(await fs.pathExists(queuePath))) {
|
||||
logger.info(`${config.heading}: no queue file (class out of scope), skipping`);
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const doc = (await fs.readJson(queuePath)) as QueueDocument<unknown>;
|
||||
const entries = doc.vulnerabilities ?? [];
|
||||
const markdown = renderClassFile(config, entries);
|
||||
await fs.writeFile(findingsPath, markdown);
|
||||
logger.info(`${config.heading}: rendered ${entries.length} finding(s) to ${config.findingsFile}`);
|
||||
} catch (error) {
|
||||
const err = error as Error;
|
||||
logger.warn(`${config.heading}: failed to render findings from ${config.queueFile}: ${err.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -16,7 +16,9 @@ export { AgentExecutionService } from './agent-execution.js';
|
||||
|
||||
export { ConfigLoaderService } from './config-loader.js';
|
||||
export type { ContainerDependencies } from './container.js';
|
||||
export { Container, getOrCreateContainer, removeContainer } from './container.js';
|
||||
export { Container, getContainer, getOrCreateContainer, removeContainer, setContainerFactory } from './container.js';
|
||||
export { ExploitationCheckerService } from './exploitation-checker.js';
|
||||
export { loadPrompt } from './prompt-manager.js';
|
||||
export { assembleFinalReport, injectModelIntoReport } from './reporting.js';
|
||||
export type { ClaudePromptResult } from '../ai/claude-executor.js';
|
||||
export { runClaudePrompt } from '../ai/claude-executor.js';
|
||||
|
||||
@@ -14,8 +14,9 @@
|
||||
* Checks run sequentially, cheapest first:
|
||||
* 1. Repository path exists and contains .git
|
||||
* 2. Config file parses and validates (if provided)
|
||||
* 3. Credentials validate via Claude Agent SDK query (API key, OAuth, Bedrock, Vertex AI, or router mode)
|
||||
* 4. Target URL is reachable from the container (DNS + HTTP)
|
||||
* 3. code_path rules match real entries in the repo (filesystem only)
|
||||
* 4. Credentials validate via Claude Agent SDK query (API key, OAuth, Bedrock, or Vertex AI)
|
||||
* 5. Target URL is reachable from the container (DNS + HTTP)
|
||||
*/
|
||||
|
||||
import { lookup } from 'node:dns/promises';
|
||||
@@ -24,9 +25,11 @@ import http from 'node:http';
|
||||
import https from 'node:https';
|
||||
import type { SDKAssistantMessageError } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { query } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { glob } from 'zx';
|
||||
import { resolveModel } from '../ai/models.js';
|
||||
import { parseConfig } from '../config-parser.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
import type { Config, Rule } from '../types/config.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import { err, ok, type Result } from '../types/result.js';
|
||||
import { isRetryableError, PentestError } from './error-handling.js';
|
||||
@@ -39,7 +42,7 @@ function isLoopbackAddress(address: string): boolean {
|
||||
|
||||
// === Repository Validation ===
|
||||
|
||||
async function validateRepo(repoPath: string, logger: ActivityLogger): Promise<Result<void, PentestError>> {
|
||||
async function validateRepo(repoPath: string, logger: ActivityLogger, skipGitCheck?: boolean): Promise<Result<void, PentestError>> {
|
||||
logger.info('Checking repository path...', { repoPath });
|
||||
|
||||
// 1. Check repo directory exists
|
||||
@@ -68,10 +71,22 @@ async function validateRepo(repoPath: string, logger: ActivityLogger): Promise<R
|
||||
);
|
||||
}
|
||||
|
||||
// 2. Check .git directory exists
|
||||
try {
|
||||
const gitStats = await fs.stat(`${repoPath}/.git`);
|
||||
if (!gitStats.isDirectory()) {
|
||||
// 2. Check .git directory exists (skipped when consumer removes .git after clone)
|
||||
if (!skipGitCheck) {
|
||||
try {
|
||||
const gitStats = await fs.stat(`${repoPath}/.git`);
|
||||
if (!gitStats.isDirectory()) {
|
||||
return err(
|
||||
new PentestError(
|
||||
`Not a git repository (no .git directory): ${repoPath}`,
|
||||
'config',
|
||||
false,
|
||||
{ repoPath },
|
||||
ErrorCode.REPO_NOT_FOUND,
|
||||
),
|
||||
);
|
||||
}
|
||||
} catch {
|
||||
return err(
|
||||
new PentestError(
|
||||
`Not a git repository (no .git directory): ${repoPath}`,
|
||||
@@ -82,16 +97,8 @@ async function validateRepo(repoPath: string, logger: ActivityLogger): Promise<R
|
||||
),
|
||||
);
|
||||
}
|
||||
} catch {
|
||||
return err(
|
||||
new PentestError(
|
||||
`Not a git repository (no .git directory): ${repoPath}`,
|
||||
'config',
|
||||
false,
|
||||
{ repoPath },
|
||||
ErrorCode.REPO_NOT_FOUND,
|
||||
),
|
||||
);
|
||||
} else {
|
||||
logger.info('Skipping .git check (skipGitCheck enabled)');
|
||||
}
|
||||
|
||||
logger.info('Repository path OK');
|
||||
@@ -100,13 +107,13 @@ async function validateRepo(repoPath: string, logger: ActivityLogger): Promise<R
|
||||
|
||||
// === Config Validation ===
|
||||
|
||||
async function validateConfig(configPath: string, logger: ActivityLogger): Promise<Result<void, PentestError>> {
|
||||
async function validateConfig(configPath: string, logger: ActivityLogger): Promise<Result<Config, PentestError>> {
|
||||
logger.info('Validating configuration file...', { configPath });
|
||||
|
||||
try {
|
||||
await parseConfig(configPath);
|
||||
const config = await parseConfig(configPath);
|
||||
logger.info('Configuration file OK');
|
||||
return ok(undefined);
|
||||
return ok(config);
|
||||
} catch (error) {
|
||||
if (error instanceof PentestError) {
|
||||
return err(error);
|
||||
@@ -124,6 +131,73 @@ async function validateConfig(configPath: string, logger: ActivityLogger): Promi
|
||||
}
|
||||
}
|
||||
|
||||
// === code_path Existence Validation ===
|
||||
|
||||
const CODE_PATH_IGNORE = ['.git/**', '.shannon/**'];
|
||||
|
||||
async function patternMatchesAny(repoPath: string, pattern: string): Promise<boolean> {
|
||||
const stream = glob.globbyStream(pattern, {
|
||||
cwd: repoPath,
|
||||
dot: true,
|
||||
onlyFiles: false,
|
||||
followSymbolicLinks: false,
|
||||
ignore: CODE_PATH_IGNORE,
|
||||
});
|
||||
for await (const _ of stream) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
type RuleKind = 'avoid' | 'focus';
|
||||
interface MissingCodePath {
|
||||
kind: RuleKind;
|
||||
value: string;
|
||||
description: string;
|
||||
}
|
||||
|
||||
async function validateCodePathsExist(
|
||||
config: Config,
|
||||
repoPath: string,
|
||||
logger: ActivityLogger,
|
||||
): Promise<Result<void, PentestError>> {
|
||||
const tagged: Array<{ kind: RuleKind; rule: Rule }> = [
|
||||
...(config.rules?.avoid ?? []).map((rule) => ({ kind: 'avoid' as const, rule })),
|
||||
...(config.rules?.focus ?? []).map((rule) => ({ kind: 'focus' as const, rule })),
|
||||
].filter(({ rule }) => rule.type === 'code_path');
|
||||
|
||||
if (tagged.length === 0) {
|
||||
return ok(undefined);
|
||||
}
|
||||
|
||||
logger.info(`Validating ${tagged.length} code_path rule(s) against repo...`);
|
||||
|
||||
// ≥1 match is the only property enforced — malformed globs simply match nothing.
|
||||
const missing: MissingCodePath[] = [];
|
||||
for (const { kind, rule } of tagged) {
|
||||
if (!(await patternMatchesAny(repoPath, rule.value))) {
|
||||
missing.push({ kind, value: rule.value, description: rule.description });
|
||||
}
|
||||
}
|
||||
|
||||
if (missing.length > 0) {
|
||||
const lines = missing.map((m) => `[${m.kind}] '${m.value}' — ${m.description}`);
|
||||
return err(
|
||||
new PentestError(
|
||||
`code_path rules don't match any file or directory in the repo:\n - ${lines.join('\n - ')}\n` +
|
||||
`Fix the patterns or remove the rules.`,
|
||||
'config',
|
||||
false,
|
||||
{ missing },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
logger.info('All code_path rules matched');
|
||||
return ok(undefined);
|
||||
}
|
||||
|
||||
// === Credential Validation ===
|
||||
|
||||
/** Map SDK error type to a human-readable preflight PentestError. */
|
||||
@@ -180,11 +254,23 @@ function classifySdkError(sdkError: SDKAssistantMessageError, authType: string):
|
||||
}
|
||||
|
||||
/** Validate credentials via a minimal Claude Agent SDK query. */
|
||||
async function validateCredentials(logger: ActivityLogger): Promise<Result<void, PentestError>> {
|
||||
async function validateCredentials(logger: ActivityLogger, apiKey?: string, providerConfig?: import('../types/config.js').ProviderConfig): Promise<Result<void, PentestError>> {
|
||||
// 0. If providerConfig is present, credentials are managed by the caller.
|
||||
// The executor will map providerConfig directly to sdkEnv — no process.env needed.
|
||||
if (providerConfig) {
|
||||
logger.info(`Provider config present (type: ${providerConfig.providerType || 'anthropic_api'}) — skipping env-based credential validation`);
|
||||
return ok(undefined);
|
||||
}
|
||||
|
||||
// 0b. If apiKey provided via config, set it in env for SDK validation
|
||||
// This avoids requiring process.env.ANTHROPIC_API_KEY when key is threaded via input
|
||||
if (apiKey) {
|
||||
process.env.ANTHROPIC_API_KEY = apiKey;
|
||||
}
|
||||
// 1. Custom base URL — validate endpoint is reachable via SDK query
|
||||
if (process.env.ANTHROPIC_BASE_URL) {
|
||||
if (process.env.ANTHROPIC_BASE_URL && process.env.ANTHROPIC_AUTH_TOKEN) {
|
||||
const baseUrl = process.env.ANTHROPIC_BASE_URL;
|
||||
logger.info(`Validating custom base URL: ${baseUrl}`);
|
||||
logger.info('Validating custom base URL');
|
||||
|
||||
try {
|
||||
for await (const message of query({ prompt: 'hi', options: { model: resolveModel('small'), maxTurns: 1 } })) {
|
||||
@@ -289,7 +375,7 @@ async function validateCredentials(logger: ActivityLogger): Promise<Result<void,
|
||||
}
|
||||
|
||||
// 4. Check that at least one credential is present
|
||||
if (!process.env.ANTHROPIC_API_KEY && !process.env.CLAUDE_CODE_OAUTH_TOKEN) {
|
||||
if (!process.env.ANTHROPIC_API_KEY && !process.env.CLAUDE_CODE_OAUTH_TOKEN && !process.env.ANTHROPIC_AUTH_TOKEN) {
|
||||
return err(
|
||||
new PentestError(
|
||||
'No API credentials found. Set ANTHROPIC_API_KEY or CLAUDE_CODE_OAUTH_TOKEN in .env (or use CLAUDE_CODE_USE_BEDROCK=1 for AWS Bedrock, or CLAUDE_CODE_USE_VERTEX=1 for Google Vertex AI)',
|
||||
@@ -368,7 +454,7 @@ function httpHead(url: string, timeoutMs: number): Promise<number> {
|
||||
|
||||
/** Check that the target URL is reachable from inside the container. */
|
||||
async function validateTargetUrl(targetUrl: string, logger: ActivityLogger): Promise<Result<void, PentestError>> {
|
||||
logger.info('Checking target URL reachability...', { targetUrl });
|
||||
logger.info('Checking target URL reachability...');
|
||||
|
||||
// 1. Parse URL
|
||||
let parsed: URL;
|
||||
@@ -447,8 +533,9 @@ async function validateTargetUrl(targetUrl: string, logger: ActivityLogger): Pro
|
||||
*
|
||||
* 1. Repository path exists and contains .git
|
||||
* 2. Config file parses and validates (if configPath provided)
|
||||
* 3. Credentials validate (API key, OAuth, or router mode)
|
||||
* 4. Target URL is reachable from the container
|
||||
* 3. code_path rules match at least one entry in the repo (skipped without config)
|
||||
* 4. Credentials validate (API key, OAuth, Bedrock, or Vertex AI)
|
||||
* 5. Target URL is reachable from the container
|
||||
*
|
||||
* Returns on first failure.
|
||||
*/
|
||||
@@ -457,28 +544,42 @@ export async function runPreflightChecks(
|
||||
repoPath: string,
|
||||
configPath: string | undefined,
|
||||
logger: ActivityLogger,
|
||||
skipGitCheck?: boolean,
|
||||
apiKey?: string,
|
||||
providerConfig?: import('../types/config.js').ProviderConfig,
|
||||
): Promise<Result<void, PentestError>> {
|
||||
// 1. Repository check (free — filesystem only)
|
||||
const repoResult = await validateRepo(repoPath, logger);
|
||||
const repoResult = await validateRepo(repoPath, logger, skipGitCheck);
|
||||
if (!repoResult.ok) {
|
||||
return repoResult;
|
||||
}
|
||||
|
||||
// 2. Config check (free — filesystem + CPU)
|
||||
let parsedConfig: Config | null = null;
|
||||
if (configPath) {
|
||||
const configResult = await validateConfig(configPath, logger);
|
||||
if (!configResult.ok) {
|
||||
return configResult;
|
||||
}
|
||||
parsedConfig = configResult.value;
|
||||
}
|
||||
|
||||
// 3. Credential check (cheap — 1 SDK round-trip)
|
||||
const credResult = await validateCredentials(logger);
|
||||
// 3. code_path rules must match real entries in the repo (filesystem only).
|
||||
// Runs after both repo and config are valid, before any network round-trip.
|
||||
if (parsedConfig) {
|
||||
const codePathResult = await validateCodePathsExist(parsedConfig, repoPath, logger);
|
||||
if (!codePathResult.ok) {
|
||||
return codePathResult;
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Credential check (cheap — 1 SDK round-trip, skipped when providerConfig present)
|
||||
const credResult = await validateCredentials(logger, apiKey, providerConfig);
|
||||
if (!credResult.ok) {
|
||||
return credResult;
|
||||
}
|
||||
|
||||
// 4. Target URL reachability check (cheap — 1 HTTP round-trip)
|
||||
// 5. Target URL reachability check (cheap — 1 HTTP round-trip)
|
||||
const urlResult = await validateTargetUrl(targetUrl, logger);
|
||||
if (!urlResult.ok) {
|
||||
return urlResult;
|
||||
|
||||
@@ -8,9 +8,113 @@ import { fs, path } from 'zx';
|
||||
import { PROMPTS_DIR } from '../paths.js';
|
||||
import { PLAYWRIGHT_SESSION_MAPPING } from '../session-manager.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
import type { Authentication, DistributedConfig } from '../types/config.js';
|
||||
import type { Authentication, DistributedConfig, ReportConfig, Rule, VulnClass } from '../types/config.js';
|
||||
import { isGlobPattern } from '../utils/glob.js';
|
||||
import { handlePromptError, PentestError } from './error-handling.js';
|
||||
|
||||
function renderCodePathRules(rules: Rule[]): string {
|
||||
const filtered = rules.filter((r) => r.type === 'code_path');
|
||||
if (filtered.length === 0) return 'None';
|
||||
return filtered
|
||||
.map((r) => {
|
||||
const kind = isGlobPattern(r.value) ? '[GLOB]' : '[FILE]';
|
||||
return `- ${r.value} ${kind} — ${r.description}`;
|
||||
})
|
||||
.join('\n');
|
||||
}
|
||||
|
||||
interface VulnSummarySpec {
|
||||
readonly heading: string;
|
||||
readonly evidenceSection: string;
|
||||
readonly noneFoundLabel: string;
|
||||
}
|
||||
|
||||
const VULN_SUMMARY_SPECS: Record<VulnClass, VulnSummarySpec> = {
|
||||
auth: {
|
||||
heading: 'Authentication Vulnerabilities',
|
||||
evidenceSection: 'Authentication Exploitation Evidence',
|
||||
noneFoundLabel: 'authentication',
|
||||
},
|
||||
authz: {
|
||||
heading: 'Authorization Vulnerabilities',
|
||||
evidenceSection: 'Authorization Exploitation Evidence',
|
||||
noneFoundLabel: 'authorization',
|
||||
},
|
||||
xss: {
|
||||
heading: 'Cross-Site Scripting (XSS) Vulnerabilities',
|
||||
evidenceSection: 'XSS Exploitation Evidence',
|
||||
noneFoundLabel: 'XSS',
|
||||
},
|
||||
injection: {
|
||||
heading: 'SQL/Command Injection Vulnerabilities',
|
||||
evidenceSection: 'Injection Exploitation Evidence',
|
||||
noneFoundLabel: 'SQL or command injection',
|
||||
},
|
||||
ssrf: {
|
||||
heading: 'Server-Side Request Forgery (SSRF) Vulnerabilities',
|
||||
evidenceSection: 'SSRF Exploitation Evidence',
|
||||
noneFoundLabel: 'SSRF',
|
||||
},
|
||||
};
|
||||
|
||||
function renderVulnSummarySubsections(selected: readonly VulnClass[]): string {
|
||||
const classes = selected.length > 0 ? selected : (Object.keys(VULN_SUMMARY_SPECS) as VulnClass[]);
|
||||
return classes
|
||||
.map((cls) => {
|
||||
const spec = VULN_SUMMARY_SPECS[cls];
|
||||
return `**${spec.heading}:**\n{Check for "${spec.evidenceSection}" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No ${spec.noneFoundLabel} vulnerabilities were found."}`;
|
||||
})
|
||||
.join('\n\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Renders the top-level <report_filters> block. Empty when no filters are set —
|
||||
* each filter is included only when the operator configured it, so the agent
|
||||
* never sees `none` placeholders or instructions for filters that don't apply.
|
||||
*/
|
||||
function renderReportFiltersBlock(report: ReportConfig | undefined): string {
|
||||
if (!report) return '';
|
||||
const guidance = report.guidance?.trim();
|
||||
if (!report.min_severity && !report.min_confidence && !guidance) return '';
|
||||
|
||||
const lines: string[] = [
|
||||
'<report_filters>',
|
||||
'The filters below are user-supplied and binding for this assessment. Honor each strictly when assembling the final report.',
|
||||
'',
|
||||
];
|
||||
if (report.min_severity) {
|
||||
lines.push(
|
||||
`- Minimum severity: ${report.min_severity} — keep only findings rated this severity or higher (scale: low < medium < high < critical).`,
|
||||
);
|
||||
}
|
||||
if (report.min_confidence) {
|
||||
lines.push(
|
||||
`- Minimum confidence: ${report.min_confidence} — keep only findings rated this confidence or higher (scale: low < medium < high).`,
|
||||
);
|
||||
}
|
||||
if (guidance) {
|
||||
lines.push('');
|
||||
lines.push('User guidance — apply throughout the report as binding directives for finding selection:');
|
||||
lines.push(guidance);
|
||||
}
|
||||
lines.push('</report_filters>');
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Renders the per-finding DROP rules used inside the cleanup step. Severity and
|
||||
* confidence inline as concrete thresholds; guidance is referenced by pointer
|
||||
* so the actual text only lives in <report_filters>, avoiding double-statement.
|
||||
*/
|
||||
function renderReportFilterRules(report: ReportConfig | undefined): string {
|
||||
const drops: string[] = [];
|
||||
if (report?.min_severity) drops.push(`* severity is below ${report.min_severity}`);
|
||||
if (report?.min_confidence) drops.push(`* confidence is below ${report.min_confidence}`);
|
||||
if (report?.guidance?.trim()) drops.push('* topic matches an exclusion in the user guidance');
|
||||
if (drops.length === 0) return '';
|
||||
return [' - DROP any `### [TYPE]-VULN-[NUMBER]` finding whose:', ...drops.map((d) => ` ${d}`)].join('\n');
|
||||
}
|
||||
|
||||
interface PromptVariables {
|
||||
webUrl: string;
|
||||
repoPath: string;
|
||||
@@ -23,10 +127,14 @@ interface IncludeReplacement {
|
||||
}
|
||||
|
||||
// Pure function: Build complete login instructions from config
|
||||
async function buildLoginInstructions(authentication: Authentication, logger: ActivityLogger): Promise<string> {
|
||||
async function buildLoginInstructions(
|
||||
authentication: Authentication,
|
||||
logger: ActivityLogger,
|
||||
promptsBaseDir: string = PROMPTS_DIR,
|
||||
): Promise<string> {
|
||||
try {
|
||||
// 1. Load the login instructions template
|
||||
const loginInstructionsPath = path.join(PROMPTS_DIR, 'shared', 'login-instructions.txt');
|
||||
const loginInstructionsPath = path.join(promptsBaseDir, 'shared', 'login-instructions.txt');
|
||||
|
||||
if (!(await fs.pathExists(loginInstructionsPath))) {
|
||||
throw new PentestError('Login instructions template not found', 'filesystem', false, { loginInstructionsPath });
|
||||
@@ -148,6 +256,7 @@ async function interpolateVariables(
|
||||
variables: PromptVariables,
|
||||
config: DistributedConfig | null = null,
|
||||
logger: ActivityLogger,
|
||||
promptsBaseDir: string = PROMPTS_DIR,
|
||||
): Promise<string> {
|
||||
try {
|
||||
if (!template || typeof template !== 'string') {
|
||||
@@ -170,36 +279,63 @@ async function interpolateVariables(
|
||||
.replace(/{{AUTH_CONTEXT}}/g, buildAuthContext(config))
|
||||
.replace(/{{DESCRIPTION}}/g, config?.description ? `Description: ${config.description}` : '');
|
||||
|
||||
if (config) {
|
||||
// Handle rules section - if both are empty, use cleaner messaging
|
||||
const hasAvoidRules = config.avoid && config.avoid.length > 0;
|
||||
const hasFocusRules = config.focus && config.focus.length > 0;
|
||||
|
||||
if (!hasAvoidRules && !hasFocusRules) {
|
||||
// Replace the entire rules section with a clean message
|
||||
const cleanRulesSection = '<rules>\nNo specific rules or focus areas provided for this test.\n</rules>';
|
||||
result = result.replace(/<rules>[\s\S]*?<\/rules>/g, cleanRulesSection);
|
||||
} else {
|
||||
const avoidRules = hasAvoidRules ? config.avoid?.map((r) => `- ${r.description}`).join('\n') : 'None';
|
||||
const focusRules = hasFocusRules ? config.focus?.map((r) => `- ${r.description}`).join('\n') : 'None';
|
||||
|
||||
result = result.replace(/{{RULES_AVOID}}/g, avoidRules).replace(/{{RULES_FOCUS}}/g, focusRules);
|
||||
}
|
||||
|
||||
// Extract and inject login instructions from config
|
||||
if (config.authentication?.login_flow) {
|
||||
const loginInstructions = await buildLoginInstructions(config.authentication, logger);
|
||||
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, loginInstructions);
|
||||
} else {
|
||||
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, '');
|
||||
}
|
||||
const avoidUrlRules = config?.avoid?.filter((r) => r.type !== 'code_path') ?? [];
|
||||
const focusUrlRules = config?.focus?.filter((r) => r.type !== 'code_path') ?? [];
|
||||
if (avoidUrlRules.length === 0 && focusUrlRules.length === 0) {
|
||||
result = result.replace(/<rules>[\s\S]*?<\/rules>\s*/g, '');
|
||||
} else {
|
||||
const avoidStr = avoidUrlRules.length > 0 ? avoidUrlRules.map((r) => `- ${r.description}`).join('\n') : 'None';
|
||||
const focusStr = focusUrlRules.length > 0 ? focusUrlRules.map((r) => `- ${r.description}`).join('\n') : 'None';
|
||||
result = result.replace(/{{RULES_AVOID}}/g, avoidStr).replace(/{{RULES_FOCUS}}/g, focusStr);
|
||||
}
|
||||
|
||||
const avoidCodeRules = (config?.avoid ?? []).filter((r) => r.type === 'code_path');
|
||||
const focusCodeRules = (config?.focus ?? []).filter((r) => r.type === 'code_path');
|
||||
if (avoidCodeRules.length === 0 && focusCodeRules.length === 0) {
|
||||
result = result.replace(/<code_path_rules>[\s\S]*?<\/code_path_rules>\s*/g, '');
|
||||
} else {
|
||||
result = result
|
||||
.replace(/{{CODE_RULES_AVOID}}/g, renderCodePathRules(config?.avoid ?? []))
|
||||
.replace(/{{CODE_RULES_FOCUS}}/g, renderCodePathRules(config?.focus ?? []));
|
||||
}
|
||||
|
||||
const roe = config?.rules_of_engagement?.trim() ?? '';
|
||||
if (roe) {
|
||||
result = result.replace(/{{RULES_OF_ENGAGEMENT}}/g, roe);
|
||||
} else {
|
||||
result = result.replace(/<rules_of_engagement>[\s\S]*?<\/rules_of_engagement>\s*/g, '');
|
||||
}
|
||||
|
||||
if (config?.authentication?.login_flow) {
|
||||
const loginInstructions = await buildLoginInstructions(config.authentication, logger, promptsBaseDir);
|
||||
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, loginInstructions);
|
||||
} else {
|
||||
// Replace the entire rules section with a clean message when no config provided
|
||||
const cleanRulesSection = '<rules>\nNo specific rules or focus areas provided for this test.\n</rules>';
|
||||
result = result.replace(/<rules>[\s\S]*?<\/rules>/g, cleanRulesSection);
|
||||
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, '');
|
||||
}
|
||||
|
||||
const vulnClasses = config?.vuln_classes ?? [];
|
||||
result = result.replace(
|
||||
/{{VULN_CLASSES_TESTED}}/g,
|
||||
vulnClasses.length > 0 ? vulnClasses.join(', ') : 'injection, xss, auth, authz, ssrf',
|
||||
);
|
||||
result = result.replace(/{{VULN_SUMMARY_SUBSECTIONS}}/g, renderVulnSummarySubsections(vulnClasses));
|
||||
|
||||
const exploitEnabled = config?.exploit ?? true;
|
||||
result = result
|
||||
.replace(/{{EXPLOITATION}}/g, exploitEnabled ? 'enabled' : 'disabled')
|
||||
.replace(/{{REPORT_VULN_HEADING}}/g, exploitEnabled ? 'Exploitation Evidence' : 'Findings')
|
||||
.replace(
|
||||
/{{REPORT_VULN_SUBHEADING}}/g,
|
||||
exploitEnabled ? 'Successfully Exploited Vulnerabilities' : 'Identified Vulnerabilities',
|
||||
);
|
||||
|
||||
result = result
|
||||
.replace(/{{REPORT_FILTERS_BLOCK}}/g, renderReportFiltersBlock(config?.report))
|
||||
.replace(/{{REPORT_FILTER_RULES}}/g, renderReportFilterRules(config?.report));
|
||||
|
||||
// Collapse runs of 3+ newlines (left behind by tag-strip and empty-fragment substitutions).
|
||||
result = result.replace(/\n{3,}/g, '\n\n');
|
||||
|
||||
// Validate that all placeholders have been replaced (excluding instructional text)
|
||||
const remainingPlaceholders = result.match(/\{\{[^}]+\}\}/g);
|
||||
if (remainingPlaceholders) {
|
||||
@@ -223,10 +359,12 @@ export async function loadPrompt(
|
||||
config: DistributedConfig | null = null,
|
||||
pipelineTestingMode: boolean = false,
|
||||
logger: ActivityLogger,
|
||||
promptDir?: string,
|
||||
): Promise<string> {
|
||||
try {
|
||||
// 1. Resolve prompt file path
|
||||
const promptsDir = pipelineTestingMode ? path.join(PROMPTS_DIR, 'pipeline-testing') : PROMPTS_DIR;
|
||||
// 1. Resolve prompt file path (promptDir override → default PROMPTS_DIR)
|
||||
const basePromptsDir = promptDir ?? PROMPTS_DIR;
|
||||
const promptsDir = pipelineTestingMode ? path.join(basePromptsDir, 'pipeline-testing') : basePromptsDir;
|
||||
const promptPath = path.join(promptsDir, `${promptName}.txt`);
|
||||
|
||||
if (pipelineTestingMode) {
|
||||
@@ -256,7 +394,7 @@ export async function loadPrompt(
|
||||
template = await processIncludes(template, promptsDir);
|
||||
|
||||
// 5. Interpolate variables and return final prompt
|
||||
return await interpolateVariables(template, enhancedVariables, config, logger);
|
||||
return await interpolateVariables(template, enhancedVariables, config, logger, basePromptsDir);
|
||||
} catch (error) {
|
||||
if (error instanceof PentestError) {
|
||||
throw error;
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
import { fs, path } from 'zx';
|
||||
|
||||
import type { ExploitationDecision, VulnType } from '../types/agents.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import { err, ok, type Result } from '../types/result.js';
|
||||
@@ -114,12 +115,12 @@ function getExistenceErrorMessage(existence: FileExistence): string {
|
||||
const { deliverableExists, queueExists } = existence;
|
||||
|
||||
if (!deliverableExists && !queueExists) {
|
||||
return 'Analysis failed: Neither deliverable nor queue file exists. Analysis agent must create both files.';
|
||||
return 'Analysis failed: Neither deliverable nor queue file exists. Both are required.';
|
||||
}
|
||||
if (!queueExists) {
|
||||
return 'Analysis incomplete: Deliverable exists but queue file missing. Analysis agent must create both files.';
|
||||
return 'Analysis incomplete: Deliverable exists but queue file missing. Both are required.';
|
||||
}
|
||||
return 'Analysis incomplete: Queue exists but deliverable file missing. Analysis agent must create both files.';
|
||||
return 'Analysis incomplete: Queue exists but deliverable file missing. Both are required.';
|
||||
}
|
||||
|
||||
// Pure function to create file paths
|
||||
@@ -133,8 +134,8 @@ const createPaths = (vulnType: VulnType, sourceDir: string): PathsBase | PathsWi
|
||||
|
||||
return Object.freeze({
|
||||
vulnType,
|
||||
deliverable: path.join(sourceDir, 'deliverables', config.deliverable),
|
||||
queue: path.join(sourceDir, 'deliverables', config.queue),
|
||||
deliverable: path.join(sourceDir, config.deliverable),
|
||||
queue: path.join(sourceDir, config.queue),
|
||||
sourceDir,
|
||||
});
|
||||
};
|
||||
|
||||
@@ -5,62 +5,73 @@
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
import { fs, path } from 'zx';
|
||||
import { deliverablesDir } from '../paths.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import { PentestError } from './error-handling.js';
|
||||
|
||||
interface DeliverableFile {
|
||||
name: string;
|
||||
path: string;
|
||||
/** Candidate filenames in priority order. First one that exists wins. */
|
||||
paths: readonly string[];
|
||||
required: boolean;
|
||||
}
|
||||
|
||||
// Pure function: Assemble final report from specialist deliverables
|
||||
export async function assembleFinalReport(sourceDir: string, logger: ActivityLogger): Promise<string> {
|
||||
const deliverableFiles: DeliverableFile[] = [
|
||||
{ name: 'Injection', path: 'injection_exploitation_evidence.md', required: false },
|
||||
{ name: 'XSS', path: 'xss_exploitation_evidence.md', required: false },
|
||||
{ name: 'Authentication', path: 'auth_exploitation_evidence.md', required: false },
|
||||
{ name: 'SSRF', path: 'ssrf_exploitation_evidence.md', required: false },
|
||||
{ name: 'Authorization', path: 'authz_exploitation_evidence.md', required: false },
|
||||
// Pure function: Assemble final report from specialist deliverables.
|
||||
// Per class, prefer the exploit-agent's evidence file; fall back to renderer-produced findings.
|
||||
// Both never coexist for a workspace because scope (exploit flag) is locked.
|
||||
export async function assembleFinalReport(
|
||||
sourceDir: string,
|
||||
deliverablesSubdir: string | undefined,
|
||||
logger: ActivityLogger,
|
||||
): Promise<string> {
|
||||
const deliverableFiles: readonly DeliverableFile[] = [
|
||||
{ name: 'Injection', paths: ['injection_exploitation_evidence.md', 'injection_findings.md'], required: false },
|
||||
{ name: 'XSS', paths: ['xss_exploitation_evidence.md', 'xss_findings.md'], required: false },
|
||||
{ name: 'Authentication', paths: ['auth_exploitation_evidence.md', 'auth_findings.md'], required: false },
|
||||
{ name: 'SSRF', paths: ['ssrf_exploitation_evidence.md', 'ssrf_findings.md'], required: false },
|
||||
{ name: 'Authorization', paths: ['authz_exploitation_evidence.md', 'authz_findings.md'], required: false },
|
||||
];
|
||||
|
||||
const dir = deliverablesDir(sourceDir, deliverablesSubdir);
|
||||
const sections: string[] = [];
|
||||
|
||||
for (const file of deliverableFiles) {
|
||||
const filePath = path.join(sourceDir, 'deliverables', file.path);
|
||||
try {
|
||||
if (await fs.pathExists(filePath)) {
|
||||
const content = await fs.readFile(filePath, 'utf8');
|
||||
sections.push(content);
|
||||
logger.info(`Added ${file.name} findings`);
|
||||
} else if (file.required) {
|
||||
let added = false;
|
||||
for (const candidate of file.paths) {
|
||||
const filePath = path.join(dir, candidate);
|
||||
try {
|
||||
if (await fs.pathExists(filePath)) {
|
||||
const content = await fs.readFile(filePath, 'utf8');
|
||||
sections.push(content);
|
||||
logger.info(`Added ${file.name} section from ${candidate}`);
|
||||
added = true;
|
||||
break;
|
||||
}
|
||||
} catch (error) {
|
||||
const err = error as Error;
|
||||
logger.warn(`Could not read ${candidate}: ${err.message}`);
|
||||
}
|
||||
}
|
||||
if (!added) {
|
||||
if (file.required) {
|
||||
throw new PentestError(
|
||||
`Required deliverable file not found: ${file.path}`,
|
||||
`Required deliverable file not found: ${file.paths.join(' or ')}`,
|
||||
'filesystem',
|
||||
false,
|
||||
{ deliverableFile: file.path, sourceDir },
|
||||
{ deliverableFile: file.paths, sourceDir },
|
||||
ErrorCode.DELIVERABLE_NOT_FOUND,
|
||||
);
|
||||
} else {
|
||||
logger.info(`No ${file.name} deliverable found`);
|
||||
}
|
||||
} catch (error) {
|
||||
if (file.required) {
|
||||
throw error;
|
||||
}
|
||||
const err = error as Error;
|
||||
logger.warn(`Could not read ${file.path}: ${err.message}`);
|
||||
logger.info(`No ${file.name} deliverable found`);
|
||||
}
|
||||
}
|
||||
|
||||
const finalContent = sections.join('\n\n');
|
||||
const deliverablesDir = path.join(sourceDir, 'deliverables');
|
||||
const finalReportPath = path.join(deliverablesDir, 'comprehensive_security_assessment_report.md');
|
||||
const finalReportPath = path.join(dir, 'comprehensive_security_assessment_report.md');
|
||||
|
||||
try {
|
||||
// Ensure deliverables directory exists
|
||||
await fs.ensureDir(deliverablesDir);
|
||||
await fs.ensureDir(dir);
|
||||
await fs.writeFile(finalReportPath, finalContent);
|
||||
logger.info(`Final report assembled at ${finalReportPath}`);
|
||||
} catch (error) {
|
||||
@@ -81,6 +92,7 @@ export async function assembleFinalReport(sourceDir: string, logger: ActivityLog
|
||||
*/
|
||||
export async function injectModelIntoReport(
|
||||
repoPath: string,
|
||||
deliverablesSubdir: string | undefined,
|
||||
outputPath: string,
|
||||
logger: ActivityLogger,
|
||||
): Promise<void> {
|
||||
@@ -117,7 +129,7 @@ export async function injectModelIntoReport(
|
||||
logger.info(`Injecting model info into report: ${modelStr}`);
|
||||
|
||||
// 3. Read the final report
|
||||
const reportPath = path.join(repoPath, 'deliverables', 'comprehensive_security_assessment_report.md');
|
||||
const reportPath = path.join(deliverablesDir(repoPath, deliverablesSubdir), 'comprehensive_security_assessment_report.md');
|
||||
|
||||
if (!(await fs.pathExists(reportPath))) {
|
||||
logger.warn('Final report not found, skipping model injection');
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
import { fs, path } from 'zx';
|
||||
|
||||
import { validateQueueAndDeliverable } from './services/queue-validation.js';
|
||||
import type { ActivityLogger } from './types/activity-logger.js';
|
||||
import type { AgentDefinition, AgentName, AgentValidator, PlaywrightSession, VulnType } from './types/index.js';
|
||||
@@ -16,7 +17,7 @@ export const AGENTS: Readonly<Record<AgentName, AgentDefinition>> = Object.freez
|
||||
displayName: 'Pre-recon agent',
|
||||
prerequisites: [],
|
||||
promptTemplate: 'pre-recon-code',
|
||||
deliverableFilename: 'code_analysis_deliverable.md',
|
||||
deliverableFilename: 'pre_recon_deliverable.md',
|
||||
modelTier: 'large',
|
||||
},
|
||||
recon: {
|
||||
@@ -102,7 +103,6 @@ export const AGENTS: Readonly<Record<AgentName, AgentDefinition>> = Object.freez
|
||||
prerequisites: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit'],
|
||||
promptTemplate: 'report-executive',
|
||||
deliverableFilename: 'comprehensive_security_assessment_report.md',
|
||||
modelTier: 'small',
|
||||
},
|
||||
});
|
||||
|
||||
@@ -143,7 +143,7 @@ function createVulnValidator(vulnType: VulnType): AgentValidator {
|
||||
// Factory function for exploit deliverable validators
|
||||
function createExploitValidator(vulnType: VulnType): AgentValidator {
|
||||
return async (sourceDir: string): Promise<boolean> => {
|
||||
const evidenceFile = path.join(sourceDir, 'deliverables', `${vulnType}_exploitation_evidence.md`);
|
||||
const evidenceFile = path.join(sourceDir, `${vulnType}_exploitation_evidence.md`);
|
||||
return await fs.pathExists(evidenceFile);
|
||||
};
|
||||
}
|
||||
@@ -179,13 +179,13 @@ export const PLAYWRIGHT_SESSION_MAPPING: Record<string, PlaywrightSession> = Obj
|
||||
export const AGENT_VALIDATORS: Record<AgentName, AgentValidator> = Object.freeze({
|
||||
// Pre-reconnaissance agent - validates the code analysis deliverable created by the agent
|
||||
'pre-recon': async (sourceDir: string): Promise<boolean> => {
|
||||
const codeAnalysisFile = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md');
|
||||
const codeAnalysisFile = path.join(sourceDir, 'pre_recon_deliverable.md');
|
||||
return await fs.pathExists(codeAnalysisFile);
|
||||
},
|
||||
|
||||
// Reconnaissance agent
|
||||
recon: async (sourceDir: string): Promise<boolean> => {
|
||||
const reconFile = path.join(sourceDir, 'deliverables', 'recon_deliverable.md');
|
||||
const reconFile = path.join(sourceDir, 'recon_deliverable.md');
|
||||
return await fs.pathExists(reconFile);
|
||||
},
|
||||
|
||||
@@ -205,7 +205,7 @@ export const AGENT_VALIDATORS: Record<AgentName, AgentValidator> = Object.freeze
|
||||
|
||||
// Executive report agent
|
||||
report: async (sourceDir: string, logger: ActivityLogger): Promise<boolean> => {
|
||||
const reportFile = path.join(sourceDir, 'deliverables', 'comprehensive_security_assessment_report.md');
|
||||
const reportFile = path.join(sourceDir, 'comprehensive_security_assessment_report.md');
|
||||
|
||||
const reportExists = await fs.pathExists(reportFile);
|
||||
|
||||
|
||||
@@ -18,25 +18,30 @@
|
||||
import fs from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import { ApplicationFailure, Context, heartbeat } from '@temporalio/activity';
|
||||
import { writeUserSettingsForCodePathAvoids } from '../ai/settings-writer.js';
|
||||
import { AuditSession } from '../audit/index.js';
|
||||
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
|
||||
import { copyDeliverablesToAudit, type SessionMetadata } from '../audit/utils.js';
|
||||
import { generateSessionJsonPath, type SessionMetadata } from '../audit/utils.js';
|
||||
import type { WorkflowSummary } from '../audit/workflow-logger.js';
|
||||
import type { CheckpointContext } from '../interfaces/checkpoint-provider.js';
|
||||
import { DEFAULT_DELIVERABLES_SUBDIR, deliverablesDir } from '../paths.js';
|
||||
import { getContainer, getOrCreateContainer, removeContainer } from '../services/container.js';
|
||||
import { classifyErrorForTemporal, PentestError } from '../services/error-handling.js';
|
||||
import { ExploitationCheckerService } from '../services/exploitation-checker.js';
|
||||
import { executeGitCommandWithRetry } from '../services/git-manager.js';
|
||||
import { runPreflightChecks } from '../services/preflight.js';
|
||||
import type { ExploitationDecision, VulnType } from '../services/queue-validation.js';
|
||||
import { renderFindingsFromQueues } from '../services/findings-renderer.js';
|
||||
import { assembleFinalReport, injectModelIntoReport } from '../services/reporting.js';
|
||||
import { AGENTS } from '../session-manager.js';
|
||||
import type { AgentName } from '../types/agents.js';
|
||||
import { ALL_AGENTS } from '../types/agents.js';
|
||||
import type { ContainerConfig, ProviderConfig, VulnClass } from '../types/config.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import { isErr } from '../types/result.js';
|
||||
import { fileExists, readJson } from '../utils/file-io.js';
|
||||
import { atomicWrite, fileExists, readJson } from '../utils/file-io.js';
|
||||
import { createActivityLogger } from './activity-logger.js';
|
||||
import type { AgentMetrics, ResumeState } from './shared.js';
|
||||
import type { AgentMetrics, PipelineState, ResumeState } from './shared.js';
|
||||
|
||||
// Max lengths to prevent Temporal protobuf buffer overflow
|
||||
const MAX_ERROR_MESSAGE_LENGTH = 2000;
|
||||
@@ -49,6 +54,9 @@ const HEARTBEAT_INTERVAL_MS = 2000;
|
||||
|
||||
/**
|
||||
* Input for all agent activities.
|
||||
*
|
||||
* Config fields are optional with sensible defaults. When provided, they
|
||||
* flow through to getOrCreateContainer() for path and credential configuration.
|
||||
*/
|
||||
export interface ActivityInput {
|
||||
webUrl: string;
|
||||
@@ -58,6 +66,16 @@ export interface ActivityInput {
|
||||
pipelineTestingMode?: boolean;
|
||||
workflowId: string;
|
||||
sessionId: string;
|
||||
|
||||
// Config fields — serializable, read by getOrCreateContainer()
|
||||
configYAML?: string;
|
||||
apiKey?: string;
|
||||
deliverablesSubdir?: string;
|
||||
auditDir?: string;
|
||||
promptDir?: string;
|
||||
sastSarifPath?: string;
|
||||
skipGitCheck?: boolean;
|
||||
providerConfig?: ProviderConfig;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -92,6 +110,19 @@ function buildSessionMetadata(input: ActivityInput): SessionMetadata {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Build ContainerConfig from ActivityInput, falling back to defaults.
|
||||
*/
|
||||
function buildContainerConfig(input: ActivityInput): ContainerConfig {
|
||||
return {
|
||||
deliverablesSubdir: input.deliverablesSubdir ?? DEFAULT_DELIVERABLES_SUBDIR,
|
||||
auditDir: input.auditDir ?? './workspaces',
|
||||
...(input.apiKey !== undefined && { apiKey: input.apiKey }),
|
||||
...(input.promptDir !== undefined && { promptDir: input.promptDir }),
|
||||
...(input.providerConfig !== undefined && { providerConfig: input.providerConfig }),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Core activity implementation using services.
|
||||
*
|
||||
@@ -103,6 +134,21 @@ function buildSessionMetadata(input: ActivityInput): SessionMetadata {
|
||||
*/
|
||||
async function runAgentActivity(agentName: AgentName, input: ActivityInput): Promise<AgentMetrics> {
|
||||
const { repoPath, configPath, pipelineTestingMode = false, workflowId, webUrl } = input;
|
||||
|
||||
// Skip guard: the checkpoint provider decides whether to run the agent.
|
||||
// The default NoOp provider always returns { skip: false }.
|
||||
const skipContainer =
|
||||
getContainer(workflowId) ??
|
||||
getOrCreateContainer(workflowId, buildSessionMetadata(input), buildContainerConfig(input));
|
||||
const decision = await skipContainer.checkpointProvider.shouldSkipAgent(
|
||||
agentName,
|
||||
repoPath,
|
||||
input.deliverablesSubdir ?? DEFAULT_DELIVERABLES_SUBDIR,
|
||||
);
|
||||
if (decision.skip && decision.metrics) {
|
||||
return decision.metrics;
|
||||
}
|
||||
|
||||
const startTime = Date.now();
|
||||
const attemptNumber = Context.current().info.attempt;
|
||||
|
||||
@@ -117,7 +163,7 @@ async function runAgentActivity(agentName: AgentName, input: ActivityInput): Pro
|
||||
|
||||
// 1. Build session metadata and get/create container
|
||||
const sessionMetadata = buildSessionMetadata(input);
|
||||
const container = getOrCreateContainer(workflowId, sessionMetadata);
|
||||
const container = getOrCreateContainer(workflowId, sessionMetadata, buildContainerConfig(input));
|
||||
|
||||
// 2. Create audit session for THIS agent execution
|
||||
// NOTE: Each agent needs its own AuditSession because AuditSession uses
|
||||
@@ -126,14 +172,24 @@ async function runAgentActivity(agentName: AgentName, input: ActivityInput): Pro
|
||||
await auditSession.initialize(workflowId);
|
||||
|
||||
// 3. Execute agent via service (throws PentestError on failure)
|
||||
const deliverablesPath = deliverablesDir(repoPath, container.config.deliverablesSubdir);
|
||||
const endResult = await container.agentExecution.executeOrThrow(
|
||||
agentName,
|
||||
{
|
||||
webUrl,
|
||||
repoPath,
|
||||
deliverablesPath,
|
||||
configPath,
|
||||
pipelineTestingMode,
|
||||
attemptNumber,
|
||||
...(input.apiKey !== undefined && { apiKey: input.apiKey }),
|
||||
...(input.providerConfig !== undefined && { providerConfig: input.providerConfig }),
|
||||
...(input.promptDir !== undefined && {
|
||||
promptDir: path.isAbsolute(input.promptDir)
|
||||
? input.promptDir
|
||||
: path.resolve(process.env.SHANNON_WORKER_ROOT ?? process.cwd(), input.promptDir),
|
||||
}),
|
||||
...(input.configYAML !== undefined && { configYAML: input.configYAML }),
|
||||
},
|
||||
auditSession,
|
||||
logger,
|
||||
@@ -250,7 +306,7 @@ export async function runReportAgent(input: ActivityInput): Promise<AgentMetrics
|
||||
* Runs cheap checks before any agent execution:
|
||||
* 1. Repository path exists with .git
|
||||
* 2. Config file validates (if provided)
|
||||
* 3. Credential validation (API key, OAuth, or router mode)
|
||||
* 3. Credential validation (API key, OAuth, Bedrock, or Vertex AI)
|
||||
* 4. Target URL reachable from the container
|
||||
*
|
||||
* NOT using runAgentActivity — preflight doesn't run an agent via the SDK.
|
||||
@@ -268,7 +324,15 @@ export async function runPreflightValidation(input: ActivityInput): Promise<void
|
||||
const logger = createActivityLogger();
|
||||
logger.info('Running preflight validation...', { attempt: attemptNumber });
|
||||
|
||||
const result = await runPreflightChecks(input.webUrl, input.repoPath, input.configPath, logger);
|
||||
const result = await runPreflightChecks(
|
||||
input.webUrl,
|
||||
input.repoPath,
|
||||
input.configPath,
|
||||
logger,
|
||||
input.skipGitCheck,
|
||||
input.apiKey,
|
||||
input.providerConfig,
|
||||
);
|
||||
|
||||
if (isErr(result)) {
|
||||
const classified = classifyErrorForTemporal(result.error);
|
||||
@@ -312,14 +376,78 @@ export async function runPreflightValidation(input: ActivityInput): Promise<void
|
||||
}
|
||||
|
||||
/**
|
||||
* Assemble the final report by concatenating exploitation evidence files.
|
||||
* Initialize a private git repository inside the workspace deliverables directory.
|
||||
* Idempotent — skips if .git already exists (resume case).
|
||||
*/
|
||||
export async function assembleReportActivity(input: ActivityInput): Promise<void> {
|
||||
const { repoPath } = input;
|
||||
export async function initDeliverableGit(input: ActivityInput): Promise<void> {
|
||||
const deliverablesPath = deliverablesDir(input.repoPath, input.deliverablesSubdir);
|
||||
await fs.mkdir(deliverablesPath, { recursive: true });
|
||||
|
||||
// Check for .git directly inside deliverables, not parent repo's .git
|
||||
const dotGitPath = path.join(deliverablesPath, '.git');
|
||||
try {
|
||||
await fs.stat(dotGitPath);
|
||||
return;
|
||||
} catch {
|
||||
// .git doesn't exist, proceed with init
|
||||
}
|
||||
|
||||
await executeGitCommandWithRetry(['git', 'init'], deliverablesPath, 'init deliverables repo');
|
||||
await executeGitCommandWithRetry(
|
||||
['git', 'commit', '--allow-empty', '-m', '📍 Initial deliverables checkpoint'],
|
||||
deliverablesPath,
|
||||
'initial checkpoint',
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sync code_path avoid rules into Claude's user-scope settings.json so the
|
||||
* SDK enforces them at the tool layer for every agent in this run.
|
||||
*
|
||||
* Runs once per workflow before any agent fires. Config is fixed for the
|
||||
* lifetime of the workflow, so writing once avoids the parallel-agent race
|
||||
* on the global ~/.claude/settings.json file.
|
||||
*/
|
||||
export async function syncCodePathDenyRules(input: ActivityInput): Promise<void> {
|
||||
const logger = createActivityLogger();
|
||||
const container = getOrCreateContainer(input.workflowId, buildSessionMetadata(input), buildContainerConfig(input));
|
||||
|
||||
const configResult = await container.configLoader.loadOptional(input.configPath, undefined, input.configYAML);
|
||||
if (isErr(configResult)) {
|
||||
logger.warn(`syncCodePathDenyRules: skipping (config load failed: ${configResult.error.message})`);
|
||||
return;
|
||||
}
|
||||
|
||||
const config = configResult.value;
|
||||
const denyCount = (config?.avoid ?? []).filter((r) => r.type === 'code_path').length;
|
||||
await writeUserSettingsForCodePathAvoids(config);
|
||||
logger.info(`Synced code_path deny rules to user settings (${denyCount} entries)`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Assemble the final report by concatenating per-class deliverables.
|
||||
*
|
||||
* Under exploit=true, each exploit agent has produced `*_exploitation_evidence.md`
|
||||
* directly. Under exploit=false, exploit agents didn't run; we deterministically
|
||||
* render `*_findings.md` from each `*_exploitation_queue.json` first, then assemble.
|
||||
*/
|
||||
export async function assembleReportActivity(input: ActivityInput, exploit: boolean): Promise<void> {
|
||||
const { repoPath, deliverablesSubdir } = input;
|
||||
const logger = createActivityLogger();
|
||||
|
||||
if (!exploit) {
|
||||
logger.info('Rendering per-class findings from analysis queues...');
|
||||
try {
|
||||
await renderFindingsFromQueues(repoPath, deliverablesSubdir, logger);
|
||||
} catch (error) {
|
||||
const err = error as Error;
|
||||
logger.warn(`Error rendering findings from queues: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
logger.info('Assembling deliverables from specialist agents...');
|
||||
try {
|
||||
await assembleFinalReport(repoPath, logger);
|
||||
await assembleFinalReport(repoPath, deliverablesSubdir, logger);
|
||||
} catch (error) {
|
||||
const err = error as Error;
|
||||
logger.warn(`Error assembling final report: ${err.message}`);
|
||||
@@ -330,11 +458,11 @@ export async function assembleReportActivity(input: ActivityInput): Promise<void
|
||||
* Inject model metadata into the final report.
|
||||
*/
|
||||
export async function injectReportMetadataActivity(input: ActivityInput): Promise<void> {
|
||||
const { repoPath, sessionId, outputPath } = input;
|
||||
const { repoPath, sessionId, outputPath, deliverablesSubdir } = input;
|
||||
const logger = createActivityLogger();
|
||||
const effectiveOutputPath = outputPath ? path.join(outputPath, sessionId) : path.join('./workspaces', sessionId);
|
||||
try {
|
||||
await injectModelIntoReport(repoPath, effectiveOutputPath, logger);
|
||||
await injectModelIntoReport(repoPath, deliverablesSubdir, effectiveOutputPath, logger);
|
||||
} catch (error) {
|
||||
const err = error as Error;
|
||||
logger.warn(`Error injecting model into report: ${err.message}`);
|
||||
@@ -355,7 +483,14 @@ export async function checkExploitationQueue(input: ActivityInput, vulnType: Vul
|
||||
const existingContainer = getContainer(workflowId);
|
||||
const checker = existingContainer?.exploitationChecker ?? new ExploitationCheckerService();
|
||||
|
||||
return checker.checkQueue(vulnType, repoPath, logger);
|
||||
// Pass deliverablesPath (not repoPath) — validators expect the deliverables directory
|
||||
const delivPath = deliverablesDir(repoPath, input.deliverablesSubdir);
|
||||
return checker.checkQueue(vulnType, delivPath, logger);
|
||||
}
|
||||
|
||||
interface RunScope {
|
||||
vulnClasses: VulnClass[];
|
||||
exploit: boolean;
|
||||
}
|
||||
|
||||
interface SessionJson {
|
||||
@@ -365,6 +500,7 @@ interface SessionJson {
|
||||
repoPath?: string;
|
||||
originalWorkflowId?: string;
|
||||
resumeAttempts?: ResumeAttempt[];
|
||||
scope?: RunScope;
|
||||
};
|
||||
metrics: {
|
||||
agents: Record<
|
||||
@@ -384,6 +520,7 @@ export async function loadResumeState(
|
||||
workspaceName: string,
|
||||
expectedUrl: string,
|
||||
expectedRepoPath: string,
|
||||
deliverablesSubdir?: string,
|
||||
): Promise<ResumeState> {
|
||||
// 1. Validate workspace exists
|
||||
const sessionPath = path.join('./workspaces', workspaceName, 'session.json');
|
||||
@@ -426,7 +563,7 @@ export async function loadResumeState(
|
||||
}
|
||||
|
||||
const deliverableFilename = AGENTS[agentName].deliverableFilename;
|
||||
const deliverablePath = `${expectedRepoPath}/deliverables/${deliverableFilename}`;
|
||||
const deliverablePath = path.join(deliverablesDir(expectedRepoPath, deliverablesSubdir), deliverableFilename);
|
||||
const deliverableExists = await fileExists(deliverablePath);
|
||||
|
||||
if (!deliverableExists) {
|
||||
@@ -460,7 +597,8 @@ export async function loadResumeState(
|
||||
}
|
||||
|
||||
// 5. Find the most recent checkpoint commit
|
||||
const checkpointHash = await findLatestCommit(expectedRepoPath, checkpoints);
|
||||
const deliverablesPath = deliverablesDir(expectedRepoPath, deliverablesSubdir);
|
||||
const checkpointHash = await findLatestCommit(deliverablesPath, checkpoints);
|
||||
const originalWorkflowId = session.session.originalWorkflowId || session.session.id;
|
||||
|
||||
// 6. Log summary and return resume state
|
||||
@@ -480,7 +618,43 @@ export async function loadResumeState(
|
||||
};
|
||||
}
|
||||
|
||||
async function findLatestCommit(repoPath: string, commitHashes: string[]): Promise<string> {
|
||||
/** First run records scope into session.json; resume runs throw if it differs. */
|
||||
export async function persistOrValidateRunScope(
|
||||
input: ActivityInput,
|
||||
vulnClasses: VulnClass[],
|
||||
exploit: boolean,
|
||||
): Promise<void> {
|
||||
const sessionMetadata = buildSessionMetadata(input);
|
||||
const auditSession = new AuditSession(sessionMetadata);
|
||||
await auditSession.initialize(input.workflowId);
|
||||
|
||||
const sessionPath = generateSessionJsonPath(sessionMetadata);
|
||||
const session = await readJson<SessionJson>(sessionPath);
|
||||
|
||||
if (session.session.scope) {
|
||||
const recorded = session.session.scope;
|
||||
const sameClasses =
|
||||
recorded.vulnClasses.length === vulnClasses.length &&
|
||||
recorded.vulnClasses.every((c) => vulnClasses.includes(c)) &&
|
||||
vulnClasses.every((c) => recorded.vulnClasses.includes(c));
|
||||
|
||||
if (!sameClasses || recorded.exploit !== exploit) {
|
||||
throw ApplicationFailure.nonRetryable(
|
||||
`Resume scope mismatch for workspace ${input.sessionId}.\n` +
|
||||
` Original: vuln_classes=[${recorded.vulnClasses.join(', ')}], exploit=${recorded.exploit}\n` +
|
||||
` Provided: vuln_classes=[${vulnClasses.join(', ')}], exploit=${exploit}\n` +
|
||||
`Resume requires the same scope as the original run. Start a new workspace if you want different scope.`,
|
||||
'ScopeMismatchError',
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
session.session.scope = { vulnClasses: [...vulnClasses], exploit };
|
||||
await atomicWrite(sessionPath, session);
|
||||
}
|
||||
|
||||
async function findLatestCommit(gitDir: string, commitHashes: string[]): Promise<string> {
|
||||
if (commitHashes.length === 1) {
|
||||
const hash = commitHashes[0];
|
||||
if (!hash) {
|
||||
@@ -497,7 +671,7 @@ async function findLatestCommit(repoPath: string, commitHashes: string[]): Promi
|
||||
|
||||
const result = await executeGitCommandWithRetry(
|
||||
['git', 'rev-list', '--max-count=1', ...commitHashes],
|
||||
repoPath,
|
||||
gitDir,
|
||||
'find latest commit',
|
||||
);
|
||||
|
||||
@@ -505,26 +679,42 @@ async function findLatestCommit(repoPath: string, commitHashes: string[]): Promi
|
||||
}
|
||||
|
||||
/**
|
||||
* Restore git workspace to a checkpoint and clean up partial deliverables.
|
||||
* Restore deliverables git to a checkpoint.
|
||||
* Operates on the private git inside workspace deliverables, not the user's repo.
|
||||
*/
|
||||
export async function restoreGitCheckpoint(
|
||||
repoPath: string,
|
||||
checkpointHash: string,
|
||||
incompleteAgents: AgentName[],
|
||||
deliverablesSubdir?: string,
|
||||
): Promise<void> {
|
||||
const deliverablesPath = deliverablesDir(repoPath, deliverablesSubdir);
|
||||
const logger = createActivityLogger();
|
||||
logger.info(`Restoring git workspace to ${checkpointHash}...`);
|
||||
logger.info(`Restoring deliverables to ${checkpointHash}...`);
|
||||
|
||||
// Validate hash exists in this clone before attempting reset
|
||||
try {
|
||||
await executeGitCommandWithRetry(
|
||||
['git', 'rev-parse', '--verify', checkpointHash],
|
||||
repoPath,
|
||||
'verify checkpoint hash exists',
|
||||
);
|
||||
} catch {
|
||||
logger.info(`Checkpoint hash not found in clone, skipping git reset: ${checkpointHash}`);
|
||||
return;
|
||||
}
|
||||
|
||||
await executeGitCommandWithRetry(
|
||||
['git', 'reset', '--hard', checkpointHash],
|
||||
repoPath,
|
||||
'reset to checkpoint for resume',
|
||||
deliverablesPath,
|
||||
'reset deliverables to checkpoint',
|
||||
);
|
||||
await executeGitCommandWithRetry(['git', 'clean', '-fd'], repoPath, 'clean untracked files for resume');
|
||||
await executeGitCommandWithRetry(['git', 'clean', '-fd'], deliverablesPath, 'clean untracked deliverables');
|
||||
|
||||
// Explicitly delete partial deliverables for incomplete agents
|
||||
for (const agentName of incompleteAgents) {
|
||||
const deliverableFilename = AGENTS[agentName].deliverableFilename;
|
||||
const deliverablePath = `${repoPath}/deliverables/${deliverableFilename}`;
|
||||
const deliverablePath = path.join(deliverablesPath, deliverableFilename);
|
||||
try {
|
||||
const exists = await fileExists(deliverablePath);
|
||||
if (exists) {
|
||||
@@ -536,7 +726,7 @@ export async function restoreGitCheckpoint(
|
||||
}
|
||||
}
|
||||
|
||||
logger.info('Workspace restored to clean state');
|
||||
logger.info('Deliverables restored to clean state');
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -589,7 +779,7 @@ export async function logPhaseTransition(
|
||||
* Cleans up container when done.
|
||||
*/
|
||||
export async function logWorkflowComplete(input: ActivityInput, summary: WorkflowSummary): Promise<void> {
|
||||
const { repoPath, workflowId } = input;
|
||||
const { workflowId } = input;
|
||||
const sessionMetadata = buildSessionMetadata(input);
|
||||
|
||||
// 1. Initialize audit session and mark final status
|
||||
@@ -631,16 +821,76 @@ export async function logWorkflowComplete(input: ActivityInput, summary: Workflo
|
||||
// 5. Write completion entry to workflow.log
|
||||
await auditSession.logWorkflowComplete(cumulativeSummary);
|
||||
|
||||
// 6. Copy deliverables to workspaces
|
||||
try {
|
||||
await copyDeliverablesToAudit(sessionMetadata, repoPath);
|
||||
} catch (copyErr) {
|
||||
const logger = createActivityLogger();
|
||||
logger.error('Failed to copy deliverables to workspaces', {
|
||||
error: copyErr instanceof Error ? copyErr.message : String(copyErr),
|
||||
});
|
||||
}
|
||||
|
||||
// 7. Clean up container
|
||||
// 6. Clean up container
|
||||
removeContainer(workflowId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge external findings into the exploitation queue for a vulnerability type.
|
||||
*
|
||||
* Delegates to the FindingsProvider registered in the DI container.
|
||||
* Default: no-op returning { mergedCount: 0 }.
|
||||
* Consumers can override this activity at the worker level with custom findings integration.
|
||||
*/
|
||||
export async function mergeFindingsIntoQueue(
|
||||
input: ActivityInput,
|
||||
vulnType: VulnType,
|
||||
): Promise<{ mergedCount: number }> {
|
||||
const container = getContainer(input.workflowId);
|
||||
if (!container?.findingsProvider) return { mergedCount: 0 };
|
||||
return container.findingsProvider.mergeFindingsIntoQueue(input.repoPath, vulnType, input);
|
||||
}
|
||||
|
||||
/**
|
||||
* Persist pipeline state after an agent completes.
|
||||
*
|
||||
* Delegates to the CheckpointProvider registered in the DI container.
|
||||
* Default: no-op. Consumers can override this activity at the worker level with custom persistence.
|
||||
*/
|
||||
export async function saveCheckpoint(
|
||||
input: ActivityInput,
|
||||
agentName: string,
|
||||
phase: string,
|
||||
state: PipelineState,
|
||||
): Promise<void> {
|
||||
const container = getContainer(input.workflowId);
|
||||
if (!container?.checkpointProvider) return;
|
||||
|
||||
const context: CheckpointContext = {
|
||||
repoPath: input.repoPath,
|
||||
sessionId: input.sessionId,
|
||||
deliverablesSubdir: input.deliverablesSubdir ?? DEFAULT_DELIVERABLES_SUBDIR,
|
||||
...(input.outputPath !== undefined && { outputPath: input.outputPath }),
|
||||
};
|
||||
|
||||
return container.checkpointProvider.onAgentComplete(agentName, phase, state, context);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate an optional additional output alongside the assembled markdown report.
|
||||
*
|
||||
* Delegates to the ReportOutputProvider registered in the DI container.
|
||||
* Default: no-op. Consumers can override this activity at the worker level
|
||||
* to emit derived outputs from the final report.
|
||||
*/
|
||||
export async function generateReportOutputActivity(input: ActivityInput): Promise<void> {
|
||||
const container = getContainer(input.workflowId);
|
||||
if (!container?.reportOutputProvider) return;
|
||||
|
||||
const logger = createActivityLogger();
|
||||
|
||||
// Resolve promptDir against the worker root so providers are cwd-independent.
|
||||
const resolvedInput: ActivityInput = {
|
||||
...input,
|
||||
...(input.promptDir !== undefined && {
|
||||
promptDir: path.isAbsolute(input.promptDir)
|
||||
? input.promptDir
|
||||
: path.resolve(process.env.SHANNON_WORKER_ROOT ?? process.cwd(), input.promptDir),
|
||||
}),
|
||||
};
|
||||
|
||||
const result = await container.reportOutputProvider.generate(resolvedInput, logger);
|
||||
if (result.outputPath) {
|
||||
logger.info(`Report output written to ${result.outputPath}`);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
/**
|
||||
* Pipeline entry point — re-exports the extracted pipeline function and shared types.
|
||||
*
|
||||
* Consumers import from this module to call the pipeline as a library function
|
||||
* within their own workflow context.
|
||||
*/
|
||||
|
||||
export { pentestPipeline } from './workflows.js';
|
||||
export type {
|
||||
AgentMetrics,
|
||||
PipelineInput,
|
||||
PipelineState,
|
||||
PipelineSummary,
|
||||
ResumeState,
|
||||
VulnExploitPipelineResult,
|
||||
} from './shared.js';
|
||||
export type { ActivityInput } from './activities.js';
|
||||
@@ -2,7 +2,8 @@ import { defineQuery } from '@temporalio/workflow';
|
||||
|
||||
export type { AgentMetrics } from '../types/metrics.js';
|
||||
|
||||
import type { PipelineConfig } from '../types/config.js';
|
||||
import type { DistributedConfig, PipelineConfig, ProviderConfig, VulnClass } from '../types/config.js';
|
||||
import type { ErrorCode } from '../types/errors.js';
|
||||
import type { AgentMetrics } from '../types/metrics.js';
|
||||
|
||||
export interface PipelineInput {
|
||||
@@ -16,6 +17,20 @@ export interface PipelineInput {
|
||||
sessionId?: string; // Workspace directory name (distinct from workflowId for named workspaces)
|
||||
resumeFromWorkspace?: string; // Workspace name to resume from
|
||||
terminatedWorkflows?: string[]; // Workflows terminated during resume
|
||||
|
||||
// Config fields — serializable, flow through to ActivityInput → getOrCreateContainer()
|
||||
configYAML?: string; // Raw YAML string (parsed in activity, not workflow — workflow sandbox can't use Node.js)
|
||||
configData?: DistributedConfig; // Pre-parsed config (bypasses file loading)
|
||||
apiKey?: string; // API key override (avoids process.env mutation)
|
||||
deliverablesSubdir?: string; // Override deliverables path (default: '.shannon/deliverables')
|
||||
auditDir?: string; // Override audit log directory (default: './workspaces')
|
||||
promptDir?: string; // Override prompt template directory
|
||||
sastSarifPath?: string; // Optional path for consumer-supplied findings input
|
||||
checkpointsEnabled?: boolean; // Enable checkpoint activities (default: false)
|
||||
skipGitCheck?: boolean; // Skip .git directory validation in preflight (e.g. when .git is removed after clone)
|
||||
providerConfig?: ProviderConfig; // LLM provider configuration (Bedrock, Vertex, etc.)
|
||||
vulnClasses?: VulnClass[]; // omitted = all five
|
||||
exploit?: boolean; // false skips the exploitation phase
|
||||
}
|
||||
|
||||
export interface ResumeState {
|
||||
@@ -34,12 +49,13 @@ export interface PipelineSummary {
|
||||
}
|
||||
|
||||
export interface PipelineState {
|
||||
status: 'running' | 'completed' | 'failed';
|
||||
status: 'running' | 'completed' | 'failed' | 'cancelled';
|
||||
currentPhase: string | null;
|
||||
currentAgent: string | null;
|
||||
completedAgents: string[];
|
||||
failedAgent: string | null;
|
||||
error: string | null;
|
||||
errorCode?: ErrorCode;
|
||||
startTime: number;
|
||||
agentMetrics: Record<string, AgentMetrics>;
|
||||
summary: PipelineSummary | null;
|
||||
|
||||
@@ -19,7 +19,7 @@ import type { PipelineState } from './shared.js';
|
||||
* safely imported into Temporal workflows. The caller must ensure
|
||||
* state.summary is set before calling (via computeSummary).
|
||||
*/
|
||||
export function toWorkflowSummary(state: PipelineState, status: 'completed' | 'failed'): WorkflowSummary {
|
||||
export function toWorkflowSummary(state: PipelineState, status: 'completed' | 'failed' | 'cancelled'): WorkflowSummary {
|
||||
// state.summary must be computed before calling this mapper
|
||||
const summary = state.summary;
|
||||
if (!summary) {
|
||||
|
||||
@@ -35,7 +35,8 @@ import { bundleWorkflowCode, NativeConnection, Worker } from '@temporalio/worker
|
||||
import dotenv from 'dotenv';
|
||||
import { sanitizeHostname } from '../audit/utils.js';
|
||||
import { parseConfig } from '../config-parser.js';
|
||||
import type { PipelineConfig } from '../types/config.js';
|
||||
import { deliverablesDir } from '../paths.js';
|
||||
import type { PipelineConfig, VulnClass } from '../types/config.js';
|
||||
import { fileExists, readJson } from '../utils/file-io.js';
|
||||
import * as activities from './activities.js';
|
||||
import type { PipelineInput, PipelineProgress, PipelineState } from './shared.js';
|
||||
@@ -274,30 +275,39 @@ async function resolveWorkspace(client: Client, args: CliArgs): Promise<Workspac
|
||||
|
||||
// === Pipeline Input Construction ===
|
||||
|
||||
async function loadPipelineConfig(configPath: string | undefined): Promise<PipelineConfig> {
|
||||
if (!configPath) return {};
|
||||
interface OrchestrationConfig {
|
||||
pipelineConfig: PipelineConfig;
|
||||
vulnClasses?: VulnClass[];
|
||||
exploit?: boolean;
|
||||
}
|
||||
|
||||
async function loadOrchestrationConfig(configPath: string | undefined): Promise<OrchestrationConfig> {
|
||||
if (!configPath) return { pipelineConfig: {} };
|
||||
try {
|
||||
const config = await parseConfig(configPath);
|
||||
const raw = config.pipeline;
|
||||
if (!raw) return {};
|
||||
|
||||
const result: PipelineConfig = {};
|
||||
if (raw.retry_preset !== undefined) {
|
||||
result.retry_preset = raw.retry_preset;
|
||||
const pipelineConfig: PipelineConfig = {};
|
||||
if (config.pipeline?.retry_preset !== undefined) {
|
||||
pipelineConfig.retry_preset = config.pipeline.retry_preset;
|
||||
}
|
||||
if (raw.max_concurrent_pipelines !== undefined) {
|
||||
result.max_concurrent_pipelines = Number(raw.max_concurrent_pipelines);
|
||||
if (config.pipeline?.max_concurrent_pipelines !== undefined) {
|
||||
pipelineConfig.max_concurrent_pipelines = Number(config.pipeline.max_concurrent_pipelines);
|
||||
}
|
||||
return result;
|
||||
|
||||
return {
|
||||
pipelineConfig,
|
||||
...(config.vuln_classes && config.vuln_classes.length > 0 && { vulnClasses: [...config.vuln_classes] }),
|
||||
...(config.exploit !== undefined && { exploit: config.exploit === 'true' }),
|
||||
};
|
||||
} catch {
|
||||
return {};
|
||||
return { pipelineConfig: {} };
|
||||
}
|
||||
}
|
||||
|
||||
function buildPipelineInput(
|
||||
args: CliArgs,
|
||||
workspace: WorkspaceResolution,
|
||||
pipelineConfig: PipelineConfig,
|
||||
orchestration: OrchestrationConfig,
|
||||
): PipelineInput {
|
||||
return {
|
||||
webUrl: args.webUrl,
|
||||
@@ -308,7 +318,9 @@ function buildPipelineInput(
|
||||
...(args.pipelineTestingMode && { pipelineTestingMode: args.pipelineTestingMode }),
|
||||
...(workspace.isResume && args.resumeFromWorkspace && { resumeFromWorkspace: args.resumeFromWorkspace }),
|
||||
...(workspace.terminatedWorkflows.length > 0 && { terminatedWorkflows: workspace.terminatedWorkflows }),
|
||||
...(Object.keys(pipelineConfig).length > 0 && { pipelineConfig }),
|
||||
...(Object.keys(orchestration.pipelineConfig).length > 0 && { pipelineConfig: orchestration.pipelineConfig }),
|
||||
...(orchestration.vulnClasses && { vulnClasses: orchestration.vulnClasses }),
|
||||
...(orchestration.exploit !== undefined && { exploit: orchestration.exploit }),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -360,13 +372,13 @@ async function waitForWorkflowResult(
|
||||
// === Deliverables Copy ===
|
||||
|
||||
function copyDeliverables(repoPath: string, outputPath: string): void {
|
||||
const deliverablesDir = path.join(repoPath, 'deliverables');
|
||||
if (!fs.existsSync(deliverablesDir)) {
|
||||
const outputDir = deliverablesDir(repoPath);
|
||||
if (!fs.existsSync(outputDir)) {
|
||||
console.log('No deliverables directory found, skipping copy');
|
||||
return;
|
||||
}
|
||||
|
||||
const files = fs.readdirSync(deliverablesDir);
|
||||
const files = fs.readdirSync(outputDir);
|
||||
if (files.length === 0) {
|
||||
console.log('No deliverables to copy');
|
||||
return;
|
||||
@@ -375,7 +387,8 @@ function copyDeliverables(repoPath: string, outputPath: string): void {
|
||||
fs.mkdirSync(outputPath, { recursive: true });
|
||||
|
||||
for (const file of files) {
|
||||
const src = path.join(deliverablesDir, file);
|
||||
if (file === '.git') continue;
|
||||
const src = path.join(outputDir, file);
|
||||
const dest = path.join(outputPath, file);
|
||||
fs.cpSync(src, dest, { recursive: true });
|
||||
}
|
||||
@@ -415,8 +428,8 @@ async function run(): Promise<void> {
|
||||
|
||||
// 4. Resolve workspace and build pipeline input
|
||||
const workspace = await resolveWorkspace(client, args);
|
||||
const pipelineConfig = await loadPipelineConfig(args.configPath);
|
||||
const input = buildPipelineInput(args, workspace, pipelineConfig);
|
||||
const orchestration = await loadOrchestrationConfig(args.configPath);
|
||||
const input = buildPipelineInput(args, workspace, orchestration);
|
||||
|
||||
// 5. Start worker polling in the background
|
||||
const workerDone = worker.run();
|
||||
|
||||
@@ -9,6 +9,39 @@
|
||||
* Pure functions with no side effects — safe for Temporal workflow sandbox.
|
||||
*/
|
||||
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
|
||||
/**
|
||||
* Maps an ApplicationFailure type string to a structured ErrorCode.
|
||||
*
|
||||
* Activities classify errors via classifyErrorForTemporal() and throw
|
||||
* ApplicationFailure with a type string. This function maps those strings
|
||||
* to stable ErrorCode values so consumers can switch on codes instead of
|
||||
* string-matching error messages.
|
||||
*/
|
||||
const ERROR_TYPE_TO_CODE: Record<string, ErrorCode> = {
|
||||
AuthenticationError: ErrorCode.AUTH_FAILED,
|
||||
BillingError: ErrorCode.BILLING_ERROR,
|
||||
RateLimitError: ErrorCode.API_RATE_LIMITED,
|
||||
ConfigurationError: ErrorCode.CONFIG_VALIDATION_FAILED,
|
||||
OutputValidationError: ErrorCode.OUTPUT_VALIDATION_FAILED,
|
||||
AgentExecutionError: ErrorCode.AGENT_EXECUTION_FAILED,
|
||||
GitError: ErrorCode.GIT_CHECKPOINT_FAILED,
|
||||
InvalidTargetError: ErrorCode.TARGET_UNREACHABLE,
|
||||
};
|
||||
|
||||
export function classifyErrorCode(error: unknown): ErrorCode | undefined {
|
||||
let current: unknown = error;
|
||||
while (current instanceof Error) {
|
||||
if ('type' in current && typeof (current as { type: unknown }).type === 'string') {
|
||||
const code = ERROR_TYPE_TO_CODE[(current as { type: string }).type];
|
||||
if (code) return code;
|
||||
}
|
||||
current = (current as { cause?: unknown }).cause;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/** Maps Temporal error type strings to actionable remediation hints. */
|
||||
const REMEDIATION_HINTS: Record<string, string> = {
|
||||
AuthenticationError: 'Verify ANTHROPIC_API_KEY or CLAUDE_CODE_OAUTH_TOKEN in .env is valid and not expired.',
|
||||
|
||||
@@ -23,9 +23,17 @@
|
||||
* - Graceful failure handling: pipelines continue if one fails
|
||||
*/
|
||||
|
||||
import { log, proxyActivities, setHandler, workflowInfo } from '@temporalio/workflow';
|
||||
import {
|
||||
ApplicationFailure,
|
||||
isCancellation,
|
||||
log,
|
||||
proxyActivities,
|
||||
setHandler,
|
||||
workflowInfo,
|
||||
} from '@temporalio/workflow';
|
||||
import type { AgentName, VulnType } from '../types/agents.js';
|
||||
import { ALL_AGENTS } from '../types/agents.js';
|
||||
import { ALL_VULN_CLASSES, type VulnClass } from '../types/config.js';
|
||||
import type * as activities from './activities.js';
|
||||
import type { ActivityInput } from './activities.js';
|
||||
import {
|
||||
@@ -39,7 +47,20 @@ import {
|
||||
type VulnExploitPipelineResult,
|
||||
} from './shared.js';
|
||||
import { toWorkflowSummary } from './summary-mapper.js';
|
||||
import { formatWorkflowError } from './workflow-errors.js';
|
||||
import { classifyErrorCode, formatWorkflowError } from './workflow-errors.js';
|
||||
|
||||
/** Agents this run is expected to produce — drives the resume short-circuit. */
|
||||
function computeExpectedAgents(vulnClasses: readonly VulnClass[], exploit: boolean): string[] {
|
||||
const expected: string[] = ['pre-recon', 'recon'];
|
||||
for (const cls of vulnClasses) {
|
||||
expected.push(`${cls}-vuln`);
|
||||
if (exploit) {
|
||||
expected.push(`${cls}-exploit`);
|
||||
}
|
||||
}
|
||||
expected.push('report');
|
||||
return expected;
|
||||
}
|
||||
|
||||
// Retry configuration for production (long intervals for billing recovery)
|
||||
const PRODUCTION_RETRY = {
|
||||
@@ -127,7 +148,28 @@ function computeSummary(state: PipelineState): PipelineSummary {
|
||||
};
|
||||
}
|
||||
|
||||
export async function pentestPipelineWorkflow(input: PipelineInput): Promise<PipelineState> {
|
||||
/**
|
||||
* Core pipeline orchestration. Coordinates the pentest pipeline stages.
|
||||
*
|
||||
* IMPORTANT: This function uses Temporal workflow APIs internally (proxyActivities,
|
||||
* queries). It can ONLY be called from within a Temporal workflow execution.
|
||||
* Do not call from standalone scripts or activity code.
|
||||
*/
|
||||
export async function pentestPipeline(input: PipelineInput): Promise<PipelineState> {
|
||||
// Validate repoPath: reject traversal attempts and require absolute path
|
||||
if (!input.repoPath || input.repoPath.includes('..')) {
|
||||
throw ApplicationFailure.nonRetryable(
|
||||
`Invalid repoPath: path traversal not allowed (received: ${input.repoPath ?? '<empty>'})`,
|
||||
'ConfigurationError',
|
||||
);
|
||||
}
|
||||
if (!input.repoPath.startsWith('/')) {
|
||||
throw ApplicationFailure.nonRetryable(
|
||||
`Invalid repoPath: absolute path required (received: ${input.repoPath})`,
|
||||
'ConfigurationError',
|
||||
);
|
||||
}
|
||||
|
||||
const { workflowId } = workflowInfo();
|
||||
|
||||
// Select activity proxy based on mode: testing (fast), subscription (extended), or default
|
||||
@@ -176,24 +218,53 @@ export async function pentestPipelineWorkflow(input: PipelineInput): Promise<Pip
|
||||
...(input.pipelineTestingMode !== undefined && {
|
||||
pipelineTestingMode: input.pipelineTestingMode,
|
||||
}),
|
||||
// Config fields — flow through to getOrCreateContainer()
|
||||
...(input.configYAML !== undefined && { configYAML: input.configYAML }),
|
||||
...(input.apiKey !== undefined && { apiKey: input.apiKey }),
|
||||
...(input.deliverablesSubdir !== undefined && { deliverablesSubdir: input.deliverablesSubdir }),
|
||||
...(input.auditDir !== undefined && { auditDir: input.auditDir }),
|
||||
...(input.promptDir !== undefined && { promptDir: input.promptDir }),
|
||||
...(input.sastSarifPath !== undefined && { sastSarifPath: input.sastSarifPath }),
|
||||
...(input.skipGitCheck !== undefined && { skipGitCheck: input.skipGitCheck }),
|
||||
...(input.providerConfig !== undefined && { providerConfig: input.providerConfig }),
|
||||
};
|
||||
|
||||
const selectedVulnClasses: readonly VulnClass[] =
|
||||
input.vulnClasses && input.vulnClasses.length > 0 ? input.vulnClasses : ALL_VULN_CLASSES;
|
||||
const selectedClassSet = new Set<VulnClass>(selectedVulnClasses);
|
||||
const exploit: boolean = input.exploit ?? true;
|
||||
const expectedAgents = computeExpectedAgents(selectedVulnClasses, exploit);
|
||||
|
||||
await a.persistOrValidateRunScope(activityInput, [...selectedVulnClasses], exploit);
|
||||
|
||||
let resumeState: ResumeState | null = null;
|
||||
|
||||
if (input.resumeFromWorkspace) {
|
||||
// 1. Load resume state (validates workspace, cross-checks deliverables)
|
||||
resumeState = await a.loadResumeState(input.resumeFromWorkspace, input.webUrl, input.repoPath);
|
||||
resumeState = await a.loadResumeState(
|
||||
input.resumeFromWorkspace,
|
||||
input.webUrl,
|
||||
input.repoPath,
|
||||
input.deliverablesSubdir,
|
||||
);
|
||||
|
||||
// 2. Restore git workspace and clean up incomplete deliverables
|
||||
const incompleteAgents = ALL_AGENTS.filter(
|
||||
(agentName) => !resumeState?.completedAgents.includes(agentName),
|
||||
) as AgentName[];
|
||||
|
||||
await a.restoreGitCheckpoint(input.repoPath, resumeState.checkpointHash, incompleteAgents);
|
||||
await a.restoreGitCheckpoint(
|
||||
input.repoPath,
|
||||
resumeState.checkpointHash,
|
||||
incompleteAgents,
|
||||
input.deliverablesSubdir,
|
||||
);
|
||||
|
||||
// 3. Short-circuit if all agents already completed
|
||||
if (resumeState.completedAgents.length === ALL_AGENTS.length) {
|
||||
log.info(`All ${ALL_AGENTS.length} agents already completed. Nothing to resume.`);
|
||||
// 3. Short-circuit when every agent expected by this run is done.
|
||||
// Uses dynamic expectedAgents (not ALL_AGENTS) so a class-scoped run completes sooner.
|
||||
const allExpectedDone = expectedAgents.every((a) => resumeState?.completedAgents.includes(a));
|
||||
if (allExpectedDone) {
|
||||
log.info(`All ${expectedAgents.length} expected agents already completed. Nothing to resume.`);
|
||||
state.status = 'completed';
|
||||
state.completedAgents = [...resumeState.completedAgents];
|
||||
state.summary = computeSummary(state);
|
||||
@@ -228,6 +299,9 @@ export async function pentestPipelineWorkflow(input: PipelineInput): Promise<Pip
|
||||
await a.logPhaseTransition(activityInput, phaseName, 'start');
|
||||
state.agentMetrics[agentName] = await runAgent(activityInput);
|
||||
state.completedAgents.push(agentName);
|
||||
if (input.checkpointsEnabled) {
|
||||
await a.saveCheckpoint(activityInput, agentName, phaseName, state);
|
||||
}
|
||||
await a.logPhaseTransition(activityInput, phaseName, 'complete');
|
||||
} else {
|
||||
log.info(`Skipping ${agentName} (already complete)`);
|
||||
@@ -282,30 +356,14 @@ export async function pentestPipelineWorkflow(input: PipelineInput): Promise<Pip
|
||||
];
|
||||
}
|
||||
|
||||
// Aggregate results from settled pipeline promises into workflow state
|
||||
// Aggregate errors from settled pipeline promises.
|
||||
// Metrics and completedAgents are updated incrementally inside runVulnExploitPipeline
|
||||
// so that getProgress queries reflect real-time status during execution.
|
||||
function aggregatePipelineResults(results: PromiseSettledResult<VulnExploitPipelineResult>[]): void {
|
||||
const failedPipelines: string[] = [];
|
||||
|
||||
for (const result of results) {
|
||||
if (result.status === 'fulfilled') {
|
||||
const { vulnType, vulnMetrics, exploitMetrics } = result.value;
|
||||
|
||||
const vulnAgentName = `${vulnType}-vuln`;
|
||||
if (vulnMetrics) {
|
||||
state.agentMetrics[vulnAgentName] = vulnMetrics;
|
||||
state.completedAgents.push(vulnAgentName);
|
||||
} else if (shouldSkip(vulnAgentName)) {
|
||||
state.completedAgents.push(vulnAgentName);
|
||||
}
|
||||
|
||||
const exploitAgentName = `${vulnType}-exploit`;
|
||||
if (exploitMetrics) {
|
||||
state.agentMetrics[exploitAgentName] = exploitMetrics;
|
||||
state.completedAgents.push(exploitAgentName);
|
||||
} else if (shouldSkip(exploitAgentName)) {
|
||||
state.completedAgents.push(exploitAgentName);
|
||||
}
|
||||
} else {
|
||||
if (result.status === 'rejected') {
|
||||
const errorMsg = result.reason instanceof Error ? result.reason.message : String(result.reason);
|
||||
failedPipelines.push(errorMsg);
|
||||
}
|
||||
@@ -362,6 +420,14 @@ export async function pentestPipelineWorkflow(input: PipelineInput): Promise<Pip
|
||||
await preflightActs.runPreflightValidation(activityInput);
|
||||
log.info('Preflight validation passed');
|
||||
|
||||
// === Initialize Deliverables Git ===
|
||||
await a.initDeliverableGit(activityInput);
|
||||
|
||||
// === Sync SDK deny rules ===
|
||||
await a.syncCodePathDenyRules(activityInput);
|
||||
|
||||
log.info(`Run scope: vuln_classes=[${selectedVulnClasses.join(', ')}] exploit=${exploit}`);
|
||||
|
||||
// === Phase 1: Pre-Reconnaissance ===
|
||||
await runSequentialPhase('pre-recon', 'pre-recon', a.runPreReconAgent);
|
||||
|
||||
@@ -389,20 +455,33 @@ export async function pentestPipelineWorkflow(input: PipelineInput): Promise<Pip
|
||||
let vulnMetrics: AgentMetrics | null = null;
|
||||
if (!shouldSkip(vulnAgentName)) {
|
||||
vulnMetrics = await runVulnAgent();
|
||||
state.agentMetrics[vulnAgentName] = vulnMetrics;
|
||||
state.completedAgents.push(vulnAgentName);
|
||||
if (input.checkpointsEnabled) {
|
||||
await a.saveCheckpoint(activityInput, vulnAgentName, 'vulnerability-analysis', state);
|
||||
}
|
||||
} else {
|
||||
log.info(`Skipping ${vulnAgentName} (already complete)`);
|
||||
state.completedAgents.push(vulnAgentName);
|
||||
}
|
||||
|
||||
// 1.5. Merge external findings from consumer provider into exploitation queue
|
||||
await a.mergeFindingsIntoQueue(activityInput, vulnType);
|
||||
|
||||
// 2. Check exploitation queue for actionable findings
|
||||
const decision = await a.checkExploitationQueue(activityInput, vulnType);
|
||||
|
||||
// 3. Conditionally run exploitation agent
|
||||
// 3. Previously-completed exploits are preserved regardless of mode; new exploits gated by mode.
|
||||
let exploitMetrics: AgentMetrics | null = null;
|
||||
if (decision.shouldExploit) {
|
||||
if (!shouldSkip(exploitAgentName)) {
|
||||
exploitMetrics = await runExploitAgent();
|
||||
} else {
|
||||
log.info(`Skipping ${exploitAgentName} (already complete)`);
|
||||
if (shouldSkip(exploitAgentName)) {
|
||||
log.info(`Skipping ${exploitAgentName} (already complete)`);
|
||||
state.completedAgents.push(exploitAgentName);
|
||||
} else if (decision.shouldExploit && exploit) {
|
||||
exploitMetrics = await runExploitAgent();
|
||||
state.agentMetrics[exploitAgentName] = exploitMetrics;
|
||||
state.completedAgents.push(exploitAgentName);
|
||||
if (input.checkpointsEnabled) {
|
||||
await a.saveCheckpoint(activityInput, exploitAgentName, 'exploitation', state);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -424,6 +503,11 @@ export async function pentestPipelineWorkflow(input: PipelineInput): Promise<Pip
|
||||
const pipelineThunks: Array<() => Promise<VulnExploitPipelineResult>> = [];
|
||||
|
||||
for (const config of pipelineConfigs) {
|
||||
// Excluded classes drop entirely; any prior deliverables stay on disk but don't count this run.
|
||||
if (!selectedClassSet.has(config.vulnType)) {
|
||||
log.info(`Skipping ${config.vulnType} pipeline (class not selected this run)`);
|
||||
continue;
|
||||
}
|
||||
if (!shouldSkip(config.vulnAgent) || !shouldSkip(config.exploitAgent)) {
|
||||
pipelineThunks.push(() => runVulnExploitPipeline(config.vulnType, config.runVuln, config.runExploit));
|
||||
} else {
|
||||
@@ -445,12 +529,15 @@ export async function pentestPipelineWorkflow(input: PipelineInput): Promise<Pip
|
||||
state.currentAgent = 'report';
|
||||
await a.logPhaseTransition(activityInput, 'reporting', 'start');
|
||||
|
||||
// First, assemble the concatenated report from exploitation evidence files
|
||||
await a.assembleReportActivity(activityInput);
|
||||
// First, assemble the concatenated report from per-class deliverables
|
||||
await a.assembleReportActivity(activityInput, exploit);
|
||||
|
||||
// Then run the report agent to add executive summary and clean up
|
||||
state.agentMetrics.report = await a.runReportAgent(activityInput);
|
||||
state.completedAgents.push('report');
|
||||
if (input.checkpointsEnabled) {
|
||||
await a.saveCheckpoint(activityInput, 'report', 'reporting', state);
|
||||
}
|
||||
|
||||
// Inject model metadata into the final report
|
||||
await a.injectReportMetadataActivity(activityInput);
|
||||
@@ -461,6 +548,13 @@ export async function pentestPipelineWorkflow(input: PipelineInput): Promise<Pip
|
||||
state.completedAgents.push('report');
|
||||
}
|
||||
|
||||
// Runs after the skip gate so consumer providers still execute on resume.
|
||||
await a.generateReportOutputActivity(activityInput);
|
||||
|
||||
if (input.checkpointsEnabled) {
|
||||
await a.saveCheckpoint(activityInput, 'report-output', 'reporting', state);
|
||||
}
|
||||
|
||||
state.status = 'completed';
|
||||
state.currentPhase = null;
|
||||
state.currentAgent = null;
|
||||
@@ -471,9 +565,22 @@ export async function pentestPipelineWorkflow(input: PipelineInput): Promise<Pip
|
||||
|
||||
return state;
|
||||
} catch (error) {
|
||||
// Cancellation: return structured state instead of throwing
|
||||
if (isCancellation(error)) {
|
||||
state.status = 'cancelled';
|
||||
state.error = `Cancelled during phase: ${state.currentPhase ?? 'unknown'}`;
|
||||
state.summary = computeSummary(state);
|
||||
await a.logWorkflowComplete(activityInput, toWorkflowSummary(state, 'cancelled'));
|
||||
return state;
|
||||
}
|
||||
|
||||
state.status = 'failed';
|
||||
state.failedAgent = state.currentAgent;
|
||||
state.error = formatWorkflowError(error, state.currentPhase, state.currentAgent);
|
||||
const errorCode = classifyErrorCode(error);
|
||||
if (errorCode) {
|
||||
state.errorCode = errorCode;
|
||||
}
|
||||
state.summary = computeSummary(state);
|
||||
|
||||
// Log workflow failure summary
|
||||
@@ -482,3 +589,8 @@ export async function pentestPipelineWorkflow(input: PipelineInput): Promise<Pip
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/** OSS workflow entry point — thin shell around the extracted pipeline function. */
|
||||
export async function pentestPipelineWorkflow(input: PipelineInput): Promise<PipelineState> {
|
||||
return pentestPipeline(input);
|
||||
}
|
||||
|
||||
@@ -8,12 +8,12 @@
|
||||
* Configuration type definitions
|
||||
*/
|
||||
|
||||
export type RuleType = 'path' | 'subdomain' | 'domain' | 'method' | 'header' | 'parameter';
|
||||
export type RuleType = 'url_path' | 'subdomain' | 'domain' | 'method' | 'header' | 'parameter' | 'code_path';
|
||||
|
||||
export interface Rule {
|
||||
description: string;
|
||||
type: RuleType;
|
||||
url_path: string;
|
||||
value: string;
|
||||
}
|
||||
|
||||
export interface Rules {
|
||||
@@ -21,6 +21,19 @@ export interface Rules {
|
||||
focus?: Rule[];
|
||||
}
|
||||
|
||||
export type VulnClass = 'injection' | 'xss' | 'auth' | 'authz' | 'ssrf';
|
||||
|
||||
export const ALL_VULN_CLASSES: readonly VulnClass[] = ['injection', 'xss', 'auth', 'authz', 'ssrf'];
|
||||
|
||||
export type Severity = 'low' | 'medium' | 'high' | 'critical';
|
||||
export type Confidence = 'low' | 'medium' | 'high';
|
||||
|
||||
export interface ReportConfig {
|
||||
min_severity?: Severity;
|
||||
min_confidence?: Confidence;
|
||||
guidance?: string;
|
||||
}
|
||||
|
||||
export type LoginType = 'form' | 'sso' | 'api' | 'basic';
|
||||
|
||||
export interface SuccessCondition {
|
||||
@@ -47,6 +60,10 @@ export interface Config {
|
||||
authentication?: Authentication;
|
||||
pipeline?: PipelineConfig;
|
||||
description?: string;
|
||||
vuln_classes?: VulnClass[];
|
||||
exploit?: 'true' | 'false';
|
||||
report?: ReportConfig;
|
||||
rules_of_engagement?: string;
|
||||
}
|
||||
|
||||
export type RetryPreset = 'default' | 'subscription';
|
||||
@@ -61,4 +78,48 @@ export interface DistributedConfig {
|
||||
focus: Rule[];
|
||||
authentication: Authentication | null;
|
||||
description: string;
|
||||
vuln_classes: VulnClass[];
|
||||
exploit: boolean;
|
||||
report: ReportConfig;
|
||||
rules_of_engagement: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* LLM provider configuration for multi-provider support.
|
||||
*
|
||||
* Maps to SDK environment variables at execution time. When providerType
|
||||
* is omitted or 'anthropic_api', falls back to apiKey + ANTHROPIC_API_KEY.
|
||||
*/
|
||||
export interface ProviderConfig {
|
||||
readonly providerType?: string;
|
||||
readonly apiKey?: string;
|
||||
readonly awsRegion?: string;
|
||||
readonly awsAccessKeyId?: string;
|
||||
readonly awsSecretAccessKey?: string;
|
||||
readonly gcpRegion?: string;
|
||||
readonly gcpProjectId?: string;
|
||||
readonly gcpCredentialsPath?: string;
|
||||
readonly baseUrl?: string;
|
||||
readonly authToken?: string;
|
||||
readonly modelOverrides?: Record<string, string>;
|
||||
readonly supportsStructuredOutput?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Runtime configuration for the DI container.
|
||||
*
|
||||
* Abstracts path conventions and credential threading so consumers
|
||||
* can override OSS defaults without modifying source files.
|
||||
*/
|
||||
export interface ContainerConfig {
|
||||
/** Subdirectory for deliverables relative to repoPath. Default: '.shannon/deliverables' */
|
||||
readonly deliverablesSubdir: string;
|
||||
/** Directory for audit logs. Default: './workspaces' */
|
||||
readonly auditDir: string;
|
||||
/** API key override — when set, executor reads from config instead of process.env */
|
||||
readonly apiKey?: string;
|
||||
/** Prompt directory override — when set, prompt manager loads from this path */
|
||||
readonly promptDir?: string;
|
||||
/** LLM provider configuration — when set, executor maps to SDK env vars directly */
|
||||
readonly providerConfig?: ProviderConfig;
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
/**
|
||||
* Deliverable Type Definitions
|
||||
*
|
||||
* Maps deliverable types to their filenames and defines validation requirements.
|
||||
* Maps deliverable types to their filenames for the save-deliverable CLI.
|
||||
*/
|
||||
|
||||
export enum DeliverableType {
|
||||
@@ -19,19 +19,10 @@ export enum DeliverableType {
|
||||
|
||||
// Vulnerability analysis agents
|
||||
INJECTION_ANALYSIS = 'INJECTION_ANALYSIS',
|
||||
INJECTION_QUEUE = 'INJECTION_QUEUE',
|
||||
|
||||
XSS_ANALYSIS = 'XSS_ANALYSIS',
|
||||
XSS_QUEUE = 'XSS_QUEUE',
|
||||
|
||||
AUTH_ANALYSIS = 'AUTH_ANALYSIS',
|
||||
AUTH_QUEUE = 'AUTH_QUEUE',
|
||||
|
||||
AUTHZ_ANALYSIS = 'AUTHZ_ANALYSIS',
|
||||
AUTHZ_QUEUE = 'AUTHZ_QUEUE',
|
||||
|
||||
SSRF_ANALYSIS = 'SSRF_ANALYSIS',
|
||||
SSRF_QUEUE = 'SSRF_QUEUE',
|
||||
|
||||
// Exploitation agents
|
||||
INJECTION_EVIDENCE = 'INJECTION_EVIDENCE',
|
||||
@@ -45,50 +36,16 @@ export enum DeliverableType {
|
||||
* Hard-coded filename mappings from agent prompts
|
||||
*/
|
||||
export const DELIVERABLE_FILENAMES: Record<DeliverableType, string> = {
|
||||
[DeliverableType.CODE_ANALYSIS]: 'code_analysis_deliverable.md',
|
||||
[DeliverableType.CODE_ANALYSIS]: 'pre_recon_deliverable.md',
|
||||
[DeliverableType.RECON]: 'recon_deliverable.md',
|
||||
[DeliverableType.INJECTION_ANALYSIS]: 'injection_analysis_deliverable.md',
|
||||
[DeliverableType.INJECTION_QUEUE]: 'injection_exploitation_queue.json',
|
||||
[DeliverableType.XSS_ANALYSIS]: 'xss_analysis_deliverable.md',
|
||||
[DeliverableType.XSS_QUEUE]: 'xss_exploitation_queue.json',
|
||||
[DeliverableType.AUTH_ANALYSIS]: 'auth_analysis_deliverable.md',
|
||||
[DeliverableType.AUTH_QUEUE]: 'auth_exploitation_queue.json',
|
||||
[DeliverableType.AUTHZ_ANALYSIS]: 'authz_analysis_deliverable.md',
|
||||
[DeliverableType.AUTHZ_QUEUE]: 'authz_exploitation_queue.json',
|
||||
[DeliverableType.SSRF_ANALYSIS]: 'ssrf_analysis_deliverable.md',
|
||||
[DeliverableType.SSRF_QUEUE]: 'ssrf_exploitation_queue.json',
|
||||
[DeliverableType.INJECTION_EVIDENCE]: 'injection_exploitation_evidence.md',
|
||||
[DeliverableType.XSS_EVIDENCE]: 'xss_exploitation_evidence.md',
|
||||
[DeliverableType.AUTH_EVIDENCE]: 'auth_exploitation_evidence.md',
|
||||
[DeliverableType.AUTHZ_EVIDENCE]: 'authz_exploitation_evidence.md',
|
||||
[DeliverableType.SSRF_EVIDENCE]: 'ssrf_exploitation_evidence.md',
|
||||
};
|
||||
|
||||
/**
|
||||
* Queue types that require JSON validation
|
||||
*/
|
||||
export const QUEUE_TYPES: DeliverableType[] = [
|
||||
DeliverableType.INJECTION_QUEUE,
|
||||
DeliverableType.XSS_QUEUE,
|
||||
DeliverableType.AUTH_QUEUE,
|
||||
DeliverableType.AUTHZ_QUEUE,
|
||||
DeliverableType.SSRF_QUEUE,
|
||||
];
|
||||
|
||||
/**
|
||||
* Type guard to check if a deliverable type is a queue
|
||||
*/
|
||||
export function isQueueType(type: string): boolean {
|
||||
return QUEUE_TYPES.includes(type as DeliverableType);
|
||||
}
|
||||
|
||||
/**
|
||||
* Vulnerability queue structure
|
||||
*/
|
||||
export interface VulnerabilityQueue {
|
||||
vulnerabilities: VulnerabilityItem[];
|
||||
}
|
||||
|
||||
export interface VulnerabilityItem {
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
/**
|
||||
* Specific error codes for reliable classification.
|
||||
*
|
||||
* ErrorCode provides precision within the coarse 8-category PentestErrorType.
|
||||
* ErrorCode provides precision within the coarse 7-category PentestErrorType.
|
||||
* Used by classifyErrorForTemporal for code-based classification (preferred)
|
||||
* with string matching as fallback for external errors.
|
||||
*/
|
||||
@@ -47,15 +47,7 @@ export enum ErrorCode {
|
||||
BILLING_ERROR = 'BILLING_ERROR',
|
||||
}
|
||||
|
||||
export type PentestErrorType =
|
||||
| 'config'
|
||||
| 'network'
|
||||
| 'tool'
|
||||
| 'prompt'
|
||||
| 'filesystem'
|
||||
| 'validation'
|
||||
| 'billing'
|
||||
| 'unknown';
|
||||
export type PentestErrorType = 'config' | 'network' | 'prompt' | 'filesystem' | 'validation' | 'billing' | 'unknown';
|
||||
|
||||
export interface PentestErrorContext {
|
||||
[key: string]: unknown;
|
||||
|
||||
@@ -26,7 +26,6 @@ export const BILLING_TEXT_PATTERNS = [
|
||||
'cap reached',
|
||||
'budget exceeded',
|
||||
'usage limit',
|
||||
'resets',
|
||||
] as const;
|
||||
|
||||
/**
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
import { glob } from 'zx';
|
||||
|
||||
export function isGlobPattern(value: string): boolean {
|
||||
return glob.isDynamicPattern(value);
|
||||
}
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 51 KiB |
@@ -19,34 +19,5 @@ services:
|
||||
retries: 10
|
||||
start_period: 30s
|
||||
|
||||
# Optional: claude-code-router for multi-model support
|
||||
# Start with: ROUTER=true ./shannon start ...
|
||||
router:
|
||||
image: node:20-slim
|
||||
container_name: shannon-router
|
||||
profiles: ["router"] # Only starts when explicitly requested
|
||||
command: >
|
||||
sh -c "apt-get update && apt-get install -y gettext-base &&
|
||||
npm install -g @musistudio/claude-code-router &&
|
||||
mkdir -p /root/.claude-code-router &&
|
||||
envsubst < /config/router-config.json > /root/.claude-code-router/config.json &&
|
||||
ccr start"
|
||||
ports:
|
||||
- "127.0.0.1:3456:3456"
|
||||
volumes:
|
||||
- ./apps/cli/infra/router-config.json:/config/router-config.json:ro
|
||||
environment:
|
||||
- HOST=0.0.0.0
|
||||
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
|
||||
- OPENROUTER_API_KEY=${OPENROUTER_API_KEY:-}
|
||||
- ROUTER_DEFAULT=${ROUTER_DEFAULT:-openai,gpt-4o}
|
||||
healthcheck:
|
||||
test: ["CMD", "node", "-e", "require('http').get('http://localhost:3456/health', r => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 30s
|
||||
|
||||
volumes:
|
||||
temporal-data:
|
||||
|
||||
+5
-5
@@ -6,13 +6,13 @@ TARGET_GID="${SHANNON_HOST_GID:-}"
|
||||
CURRENT_UID=$(id -u pentest 2>/dev/null || echo "")
|
||||
|
||||
if [ -n "$TARGET_UID" ] && [ "$TARGET_UID" != "$CURRENT_UID" ]; then
|
||||
deluser pentest 2>/dev/null || true
|
||||
delgroup pentest 2>/dev/null || true
|
||||
userdel pentest 2>/dev/null || true
|
||||
groupdel pentest 2>/dev/null || true
|
||||
|
||||
addgroup -g "$TARGET_GID" pentest
|
||||
adduser -u "$TARGET_UID" -G pentest -s /bin/bash -D pentest
|
||||
groupadd -g "$TARGET_GID" pentest
|
||||
useradd -u "$TARGET_UID" -g pentest -s /bin/bash -M pentest
|
||||
|
||||
chown -R pentest:pentest /app/sessions /app/deliverables /app/workspaces /tmp/.claude
|
||||
chown -R pentest:pentest /app/sessions /app/workspaces /tmp/.claude
|
||||
fi
|
||||
|
||||
exec su -m pentest -c "exec $*"
|
||||
|
||||
+1
-1
@@ -3,7 +3,7 @@
|
||||
"version": "0.0.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"packageManager": "pnpm@10.12.1",
|
||||
"packageManager": "pnpm@10.33.0",
|
||||
"scripts": {
|
||||
"build": "turbo run build",
|
||||
"check": "turbo run check",
|
||||
|
||||
Generated
+860
-171
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user