refactor: migrate to Turborepo + pnpm + Biome monorepo

Restructure into apps/worker, apps/cli, packages/mcp-server with
Turborepo task orchestration, pnpm workspaces, Biome linting/formatting,
and tsdown CLI bundling.

Key changes:
- src/ -> apps/worker/src/, cli/ -> apps/cli/, mcp-server/ -> packages/mcp-server/
- prompts/ and configs/ moved into apps/worker/
- npm replaced with pnpm, package-lock.json replaced with pnpm-lock.yaml
- Dockerfile updated for pnpm-based builds
- CLI logs command rewritten with chokidar for cross-platform reliability
- Router health checking added for auto-detected router mode
- Centralized path resolution via apps/worker/src/paths.ts
This commit is contained in:
ezl-keygraph
2026-03-15 00:45:36 +05:30
parent 9b1abd9ec0
commit 181f24cfcc
141 changed files with 3717 additions and 3997 deletions
+6 -5
View File
@@ -47,11 +47,12 @@ ehthumbs.db
Thumbs.db
# CLI package (runs on host, not in container)
# Keep cli/package.json so npm workspaces resolve and npm ci works with the root lockfile
cli/src/
cli/dist/
cli/infra/
cli/tsconfig.json
# Keep apps/cli/package.json so pnpm workspaces resolve
apps/cli/src/
apps/cli/dist/
apps/cli/infra/
apps/cli/tsconfig.json
apps/cli/tsdown.config.ts
# Docker files (avoid recursive copying)
Dockerfile*
+21 -15
View File
@@ -26,16 +26,17 @@ jobs:
with:
fetch-depth: 0
- name: Install pnpm
uses: pnpm/action-setup@v4
- name: Setup Node.js
uses: actions/setup-node@v6
with:
node-version: 24
cache: 'pnpm'
- name: Install dependencies
run: npm clean-install
- name: Verify dependency signatures
run: npm audit signatures
run: pnpm install --frozen-lockfile
- name: Probe semantic-release
id: probe
@@ -108,34 +109,35 @@ jobs:
--certificate-identity https://github.com/${{ github.repository }}/.github/workflows/release.yml@${{ github.ref }} \
keygraph/shannon@${{ steps.build.outputs.digest }}
- name: Install pnpm
uses: pnpm/action-setup@v4
- name: Configure npm registry
uses: actions/setup-node@v6
with:
node-version: 24
registry-url: https://registry.npmjs.org
cache: 'pnpm'
- name: Install dependencies
run: npm clean-install
- name: Verify dependency signatures
run: npm audit signatures
run: pnpm install --frozen-lockfile
- name: Set CLI package version
run: npm version "${{ needs.preflight.outputs.version }}" --workspace cli --no-git-tag-version --allow-same-version
run: cd apps/cli && npm version "${{ needs.preflight.outputs.version }}" --no-git-tag-version --allow-same-version
- name: Sync lockfile with bumped versions
run: npm i --package-lock-only
- name: Sync lockfile with bumped version
run: pnpm install --lockfile-only
- name: Build CLI
run: npm run build:cli
run: pnpm --filter @keygraph/shannon run build
- name: Publish npm package
working-directory: cli
working-directory: apps/cli
run: |
if npm view "@keygraph/shannon@${{ needs.preflight.outputs.version }}" version 2>/dev/null; then
echo "Version already published, skipping"
else
npm publish --access public
pnpm publish --access public --no-git-checks
fi
release:
@@ -151,13 +153,17 @@ jobs:
with:
fetch-depth: 0
- name: Install pnpm
uses: pnpm/action-setup@v4
- name: Setup Node.js
uses: actions/setup-node@v6
with:
node-version: 24
cache: 'pnpm'
- name: Install dependencies
run: npm clean-install
run: pnpm install --frozen-lockfile
- name: Create GitHub release
env:
+1
View File
@@ -4,3 +4,4 @@ workspaces/
credentials/
dist/
repos/
.turbo/
+2
View File
@@ -0,0 +1,2 @@
auto-install-peers=true
strict-peer-dependencies=false
+14 -8
View File
@@ -3,13 +3,19 @@
"plugins": [
"@semantic-release/commit-analyzer",
"@semantic-release/release-notes-generator",
["@semantic-release/npm", {
"npmPublish": false
}],
["@semantic-release/github", {
"successCommentCondition": false,
"failCommentCondition": false,
"releasedLabels": false
}]
[
"@semantic-release/npm",
{
"npmPublish": false
}
],
[
"@semantic-release/github",
{
"successCommentCondition": false,
"failCommentCondition": false,
"releasedLabels": false
}
]
]
}
+67 -51
View File
@@ -13,13 +13,13 @@ Shannon supports two CLI modes, auto-detected based on the current working direc
| | **npx** (`npx @keygraph/shannon`) | **Local** (`./shannon`) |
|---|---|---|
| **Install** | Zero-install via npm | Clone the repo |
| **Image** | Pulled from Docker Hub (`keygraph/shannon:<version>`) | Built locally (`shannon-worker`) |
| **Image** | Pulled from Docker Hub (`keygraph/shannon:latest`) | Built locally (`shannon-worker`) |
| **State** | `~/.shannon/` | Project directory |
| **Credentials** | `~/.shannon/config.toml` (via `shn setup`) or env vars | `./.env` |
| **Config** | `~/.shannon/config.toml` (via `shn setup`) | N/A |
| **Prompts** | Bundled in Docker image | Mounted from `./prompts/` (live-editable) |
| **Prompts** | Bundled in Docker image | Mounted from `./apps/worker/prompts/` (live-editable) |
Mode detection: `./shannon` sets `SHANNON_LOCAL=1` env var; `cli/src/mode.ts` checks this to select local vs npx mode.
Mode auto-detection: local mode activates when env var `SHANNON_LOCAL=1` is set by the `./shannon` entry point (`apps/cli/src/mode.ts`). Otherwise npx mode.
### npx Quick Start
@@ -45,7 +45,7 @@ echo "ANTHROPIC_API_KEY=your-key" > .env
# Run
./shannon start -u <url> -r my-repo
./shannon start -u <url> -r my-repo -c ./configs/my-config.yaml
./shannon start -u <url> -r my-repo -c ./apps/worker/configs/my-config.yaml
./shannon start -u <url> -r /any/path/to/repo
```
@@ -75,55 +75,68 @@ npx @keygraph/shannon update # npx mode: pull latest image
npx @keygraph/shannon uninstall # npx mode: remove ~/.shannon/ (confirms first)
# Build TypeScript (development)
npm run build:all # Build core + CLI + MCP server
pnpm run build # Build all packages via Turborepo
pnpm run check # Type-check all packages
pnpm biome # Biome lint + format + import sorting check
pnpm biome:fix # Auto-fix lint, format, and import sorting
```
TypeScript compiler options are shared via `tsconfig.base.json` at the root. All three packages extend it, overriding only `rootDir` and `outDir`. Shared devDependencies (`typescript`, `@types/node`) are hoisted to the root workspace.
**Monorepo tooling:** pnpm workspaces, Turborepo for task orchestration, Biome for linting/formatting. TypeScript compiler options shared via `tsconfig.base.json` at the root. All packages extend it, overriding only `rootDir` and `outDir`. Shared devDependencies (`typescript`, `@types/node`, `turbo`, `@biomejs/biome`) are hoisted to the root workspace.
**Options:** `-c <file>` (YAML config), `-o <path>` (output directory), `-w <name>` (named workspace; auto-resumes if exists), `--pipeline-testing` (minimal prompts, 10s retries), `--router` (multi-model routing via [claude-code-router](https://github.com/musistudio/claude-code-router))
## Architecture
### CLI Package (`cli/`)
Published as `@keygraph/shannon` on npm. Contains only Docker orchestration logic — no Temporal SDK, business logic, or prompts.
### Monorepo Layout
- `cli/src/index.ts` — CLI dispatcher (`setup`, `start`, `stop`, `logs`, `workspaces`, `status`, `build`, `update`, `uninstall`, `info`)
- `cli/src/mode.ts` — Auto-detection: local mode if `Dockerfile` + `docker-compose.yml` + `prompts/` exist in CWD
- `cli/src/docker.ts` — Compose lifecycle, image pull/build, ephemeral `docker run` worker spawning
- `cli/src/home.ts` — State directory management (`~/.shannon/` for npx, `./` for local)
- `cli/src/env.ts``.env` loading, TOML fallback (npx only) via `cli/src/config/resolver.ts`, credential validation, env flag building
- `cli/src/config/resolver.ts` — Cascading config (npx only): env vars → `~/.shannon/config.toml` (parsed with `smol-toml`)
- `cli/src/config/writer.ts` — TOML serialization and secure file persistence (0o600)
- `cli/src/commands/setup.ts` — Interactive TUI wizard (`@clack/prompts`) for provider credential setup (npx only)
- `cli/src/paths.ts` — Repo/config path resolution (bare name → `./repos/<name>`, or any absolute/relative path)
- `cli/src/commands/` — Command handlers
- `cli/infra/compose.yml` — Bundled Temporal + router compose file for npx mode
- `shannon` — Node.js entry point (`#!/usr/bin/env node`) that delegates to `cli/dist/index.js`
```
apps/cli/ — @keygraph/shannon (published to npm, bundled with tsdown)
apps/worker/ — @shannon/worker (private, Temporal worker + pipeline logic)
packages/mcp-server/ — @shannon/mcp-server (private, MCP tool server)
```
### CLI Package (`apps/cli/`)
Published as `@keygraph/shannon` on npm. Contains only Docker orchestration logic — no Temporal SDK, business logic, or prompts. Bundled with tsdown for single-file ESM output.
- `apps/cli/src/index.ts` — CLI dispatcher (`setup`, `start`, `stop`, `logs`, `workspaces`, `status`, `build`, `update`, `uninstall`, `info`)
- `apps/cli/src/mode.ts` — Auto-detection: local mode if `SHANNON_LOCAL=1` env var is set
- `apps/cli/src/docker.ts` — Compose lifecycle, image pull/build, ephemeral `docker run` worker spawning
- `apps/cli/src/home.ts` — State directory management (`~/.shannon/` for npx, `./` for local)
- `apps/cli/src/env.ts``.env` loading, TOML fallback (npx only) via `apps/cli/src/config/resolver.ts`, credential validation, env flag building
- `apps/cli/src/config/resolver.ts` — Cascading config (npx only): env vars → `~/.shannon/config.toml` (parsed with `smol-toml`)
- `apps/cli/src/config/writer.ts` — TOML serialization and secure file persistence (0o600)
- `apps/cli/src/commands/setup.ts` — Interactive TUI wizard (`@clack/prompts`) for provider credential setup (npx only)
- `apps/cli/src/paths.ts` — Repo/config path resolution (bare name → `./repos/<name>`, or any absolute/relative path)
- `apps/cli/src/commands/` — Command handlers
- `apps/cli/infra/compose.yml` — Bundled Temporal + router compose file for npx mode
- `apps/cli/tsdown.config.ts` — tsdown bundler config
- `shannon` — Node.js entry point (`#!/usr/bin/env node`) that delegates to `apps/cli/dist/index.mjs`
### Docker Architecture
Infra (Temporal + router) runs via `docker-compose.yml`. Workers are ephemeral `docker run --rm` containers, one per scan, each with a unique task queue and isolated volume mounts.
- `docker-compose.yml` — Infra only: `shannon-temporal` (port 7233/8233) and `shannon-router` (port 3456, optional via profile). Network: `shannon-net`
- `Dockerfile` — 2-stage build (builder + Chainguard Wolfi runtime). Entrypoint: `CMD ["node", "dist/temporal/worker.js"]`
- `Dockerfile` — 2-stage build (builder + Chainguard Wolfi runtime). Uses pnpm. Entrypoint: `CMD ["node", "apps/worker/dist/temporal/worker.js"]`
- No `docker-compose.docker.yml` — host gateway handled via `--add-host` flag in CLI
### Core Modules
- `src/session-manager.ts`Agent definitions (`AGENTS` record). Agent types in `src/types/agents.ts`
- `src/config-parser.ts` — YAML config parsing with JSON Schema validation
- `src/ai/claude-executor.ts` — Claude Agent SDK integration with retry logic
- `src/services/` — Business logic layer (Temporal-agnostic). Activities delegate here. Key: `agent-execution.ts`, `error-handling.ts`, `container.ts`
- `src/types/` — Consolidated types: `Result<T,E>`, `ErrorCode`, `AgentName`, `ActivityLogger`, etc.
- `src/utils/` — Shared utilities (file I/O, formatting, concurrency)
### Worker Package (`apps/worker/`)
- `apps/worker/src/paths.ts`Centralized path constants (`PROMPTS_DIR`, `CONFIGS_DIR`, `WORKSPACES_DIR`)
- `apps/worker/src/session-manager.ts` — Agent definitions (`AGENTS` record). Agent types in `apps/worker/src/types/agents.ts`
- `apps/worker/src/config-parser.ts` — YAML config parsing with JSON Schema validation
- `apps/worker/src/ai/claude-executor.ts` — Claude Agent SDK integration with retry logic
- `apps/worker/src/services/` — Business logic layer (Temporal-agnostic). Activities delegate here. Key: `agent-execution.ts`, `error-handling.ts`, `container.ts`
- `apps/worker/src/types/` — Consolidated types: `Result<T,E>`, `ErrorCode`, `AgentName`, `ActivityLogger`, etc.
- `apps/worker/src/utils/` — Shared utilities (file I/O, formatting, concurrency)
### Temporal Orchestration
Durable workflow orchestration with crash recovery, queryable progress, intelligent retry, and parallel execution (5 concurrent agents in vuln/exploit phases).
- `src/temporal/workflows.ts` — Main workflow (`pentestPipelineWorkflow`)
- `src/temporal/activities.ts` — Thin wrappers — heartbeat loop, error classification, container lifecycle. Business logic delegated to `src/services/`
- `src/temporal/activity-logger.ts``TemporalActivityLogger` implementation of `ActivityLogger` interface
- `src/temporal/summary-mapper.ts` — Maps `PipelineSummary` to `WorkflowSummary`
- `src/temporal/worker.ts` — Combined worker + client entry point (per-invocation task queue, submits workflow, waits for result)
- `src/temporal/shared.ts` — Types, interfaces, query definitions
- `apps/worker/src/temporal/workflows.ts` — Main workflow (`pentestPipelineWorkflow`)
- `apps/worker/src/temporal/activities.ts` — Thin wrappers — heartbeat loop, error classification, container lifecycle. Business logic delegated to `apps/worker/src/services/`
- `apps/worker/src/temporal/activity-logger.ts``TemporalActivityLogger` implementation of `ActivityLogger` interface
- `apps/worker/src/temporal/summary-mapper.ts` — Maps `PipelineSummary` to `WorkflowSummary`
- `apps/worker/src/temporal/worker.ts` — Combined worker + client entry point (per-invocation task queue, submits workflow, waits for result)
- `apps/worker/src/temporal/shared.ts` — Types, interfaces, query definitions
### Five-Phase Pipeline
1. **Pre-Recon** (`pre-recon`) — External scans (nmap, subfinder, whatweb) + source code analysis
@@ -133,24 +146,24 @@ Durable workflow orchestration with crash recovery, queryable progress, intellig
5. **Reporting** (`report`) — Executive-level security report
### Supporting Systems
- **Configuration** — YAML configs in `configs/` with JSON Schema validation (`config-schema.json`). Supports auth settings, MFA/TOTP, and per-app testing parameters. Credential resolution — local mode: env vars → `./.env`; npx mode: env vars → `~/.shannon/config.toml` (via `shn setup`)
- **Prompts** — Per-phase templates in `prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `prompts/shared/` via `src/services/prompt-manager.ts`
- **SDK Integration** — Uses `@anthropic-ai/claude-agent-sdk` with `maxTurns: 10_000` and `bypassPermissions` mode. Playwright MCP for browser automation, TOTP generation via MCP tool. Login flow template at `prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth
- **Audit System** — Crash-safe append-only logging in `workspaces/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables. WorkflowLogger (`audit/workflow-logger.ts`) provides unified human-readable per-workflow logs, backed by LogStream (`audit/log-stream.ts`) shared stream primitive
- **Configuration** — YAML configs in `apps/worker/configs/` with JSON Schema validation (`config-schema.json`). Supports auth settings, MFA/TOTP, and per-app testing parameters. Credential resolution — local mode: env vars → `./.env`; npx mode: env vars → `~/.shannon/config.toml` (via `shn setup`)
- **Prompts** — Per-phase templates in `apps/worker/prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `apps/worker/prompts/shared/` via `apps/worker/src/services/prompt-manager.ts`
- **SDK Integration** — Uses `@anthropic-ai/claude-agent-sdk` with `maxTurns: 10_000` and `bypassPermissions` mode. Playwright MCP for browser automation, TOTP generation via MCP tool. Login flow template at `apps/worker/prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth
- **Audit System** — Crash-safe append-only logging in `workspaces/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables. WorkflowLogger (`apps/worker/src/audit/workflow-logger.ts`) provides unified human-readable per-workflow logs, backed by LogStream (`apps/worker/src/audit/log-stream.ts`) shared stream primitive
- **Deliverables** — Saved to `deliverables/` in the target repo via the `save_deliverable` MCP tool
- **Workspaces & Resume** — Named workspaces via `-w <name>` or auto-named from URL+timestamp. Resume detects completed agents via `session.json`. `loadResumeState()` in `src/temporal/activities.ts` validates deliverable existence, restores git checkpoints, and cleans up incomplete deliverables. Workspace listing via `src/temporal/workspaces.ts`
- **Workspaces & Resume** — Named workspaces via `-w <name>` or auto-named from URL+timestamp. Resume detects completed agents via `session.json`. `loadResumeState()` in `apps/worker/src/temporal/activities.ts` validates deliverable existence, restores git checkpoints, and cleans up incomplete deliverables. Workspace listing via `apps/worker/src/temporal/workspaces.ts`
## Development Notes
### Adding a New Agent
1. Define agent in `src/session-manager.ts` (add to `AGENTS` record). `ALL_AGENTS`/`AgentName` types live in `src/types/agents.ts`
2. Create prompt template in `prompts/` (e.g., `vuln-newtype.txt`)
3. Two-layer pattern: add a thin activity wrapper in `src/temporal/activities.ts` (heartbeat + error classification). `AgentExecutionService` in `src/services/agent-execution.ts` handles the agent lifecycle automatically via the `AGENTS` registry
4. Register activity in `src/temporal/workflows.ts` within the appropriate phase
1. Define agent in `apps/worker/src/session-manager.ts` (add to `AGENTS` record). `ALL_AGENTS`/`AgentName` types live in `apps/worker/src/types/agents.ts`
2. Create prompt template in `apps/worker/prompts/` (e.g., `vuln-newtype.txt`)
3. Two-layer pattern: add a thin activity wrapper in `apps/worker/src/temporal/activities.ts` (heartbeat + error classification). `AgentExecutionService` in `apps/worker/src/services/agent-execution.ts` handles the agent lifecycle automatically via the `AGENTS` registry
4. Register activity in `apps/worker/src/temporal/workflows.ts` within the appropriate phase
### Modifying Prompts
- Variable substitution: `{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`, `{{LOGIN_INSTRUCTIONS}}`
- Shared partials in `prompts/shared/` included via `src/services/prompt-manager.ts`
- Shared partials in `apps/worker/prompts/shared/` included via `apps/worker/src/services/prompt-manager.ts`
- Test with `--pipeline-testing` for fast iteration
### Key Design Patterns
@@ -158,8 +171,8 @@ Durable workflow orchestration with crash recovery, queryable progress, intellig
- **Progressive Analysis** — Each phase builds on previous results
- **SDK-First** — Claude Agent SDK handles autonomous analysis
- **Modular Error Handling** — `ErrorCode` enum, `Result<T,E>` for explicit error propagation, automatic retry (3 attempts per agent)
- **Services Boundary** — Activities are thin Temporal wrappers; `src/services/` owns business logic, accepts `ActivityLogger`, returns `Result<T,E>`. No Temporal imports in services
- **DI Container** — Per-workflow in `src/services/container.ts`. `AuditSession` excluded (parallel safety)
- **Services Boundary** — Activities are thin Temporal wrappers; `apps/worker/src/services/` owns business logic, accepts `ActivityLogger`, returns `Result<T,E>`. No Temporal imports in services
- **DI Container** — Per-workflow in `apps/worker/src/services/container.ts`. `AuditSession` excluded (parallel safety)
- **Ephemeral Workers** — Each scan runs in its own `docker run --rm` container with a per-invocation task queue. Temporal routes activities by queue name, so per-scan queues ensure activities never land on a worker with the wrong repo mounted
### Security
@@ -167,6 +180,9 @@ Defensive security tool only. Use only on systems you own or have explicit permi
## Code Style Guidelines
### Formatting
Biome handles formatting and linting. Run `pnpm biome:fix` to auto-fix. Config in `biome.json`: single quotes, semicolons, trailing commas, 2-space indent, 120 char line width.
### Clarity Over Brevity
- Optimize for readability, not line count — three clear lines beat one dense expression
- Use descriptive names that convey intent
@@ -209,15 +225,15 @@ Comments must be **timeless** — no references to this conversation, refactorin
## Key Files
**CLI:** `shannon` (entry point), `cli/src/index.ts` (dispatcher), `cli/src/docker.ts` (orchestration), `cli/src/mode.ts` (auto-detection)
**CLI:** `shannon` (entry point), `apps/cli/src/index.ts` (dispatcher), `apps/cli/src/docker.ts` (orchestration), `apps/cli/src/mode.ts` (auto-detection)
**Entry Points:** `src/temporal/workflows.ts`, `src/temporal/activities.ts`, `src/temporal/worker.ts`
**Entry Points:** `apps/worker/src/temporal/workflows.ts`, `apps/worker/src/temporal/activities.ts`, `apps/worker/src/temporal/worker.ts`
**Core Logic:** `src/session-manager.ts`, `src/ai/claude-executor.ts`, `src/config-parser.ts`, `src/services/`, `src/audit/`
**Core Logic:** `apps/worker/src/session-manager.ts`, `apps/worker/src/ai/claude-executor.ts`, `apps/worker/src/config-parser.ts`, `apps/worker/src/services/`, `apps/worker/src/audit/`
**Config:** `docker-compose.yml`, `cli/infra/compose.yml`, `configs/`, `prompts/`, `tsconfig.base.json` (shared compiler options)
**Config:** `docker-compose.yml`, `apps/cli/infra/compose.yml`, `apps/worker/configs/`, `apps/worker/prompts/`, `tsconfig.base.json` (shared compiler options), `turbo.json`, `biome.json`
**CI/CD:** `.github/workflows/release.yml` (semantic-release: npm publish + Docker push + GitHub release), `.github/workflows/rollback.yml` (rollback npm and Docker latest tags)
**CI/CD:** `.github/workflows/release.yml` (Docker Hub push + npm publish + GitHub release, manual dispatch)
## Troubleshooting
+26 -11
View File
@@ -50,16 +50,27 @@ RUN git clone --depth 1 https://github.com/urbanadventurer/WhatWeb.git /opt/what
# Install Python-based tools
RUN pip3 install --no-cache-dir schemathesis
# Install pnpm
RUN npm install -g pnpm@10
# Build Node.js application in builder to avoid QEMU emulation failures in CI
WORKDIR /app
COPY package*.json ./
COPY cli/package.json ./cli/
COPY mcp-server/package*.json ./mcp-server/
RUN npm ci && npm cache clean --force
# Copy workspace manifests for install layer caching
COPY package.json pnpm-workspace.yaml pnpm-lock.yaml .npmrc ./
COPY apps/worker/package.json ./apps/worker/
COPY apps/cli/package.json ./apps/cli/
COPY packages/mcp-server/package.json ./packages/mcp-server/
RUN pnpm install --frozen-lockfile
COPY . .
RUN cd mcp-server && npm run build && cd .. && npm run build
RUN npm prune --production && \
cd mcp-server && npm prune --production
# Build mcp-server (dependency) then worker. CLI not needed in Docker
RUN pnpm --filter @shannon/mcp-server run build && \
pnpm --filter @shannon/worker run build
RUN pnpm prune --prod
# Runtime stage - Minimal production image
FROM cgr.dev/chainguard/wolfi-base:latest AS runtime
@@ -119,13 +130,17 @@ RUN addgroup -g 1001 pentest && \
# Set working directory
WORKDIR /app
# Copy built application from builder
COPY --from=builder /app /app
# Copy only what the worker needs (skip CLI source, infra, tsdown artifacts)
COPY --from=builder /app/package.json /app/pnpm-workspace.yaml /app/pnpm-lock.yaml /app/.npmrc /app/
COPY --from=builder /app/node_modules /app/node_modules
COPY --from=builder /app/apps/worker /app/apps/worker
COPY --from=builder /app/apps/cli/package.json /app/apps/cli/package.json
COPY --from=builder /app/packages /app/packages
RUN npm install -g @anthropic-ai/claude-code
# Create directories for session data and ensure proper permissions
RUN mkdir -p /app/sessions /app/deliverables /app/repos /app/configs /app/workspaces && \
RUN mkdir -p /app/sessions /app/deliverables /app/repos /app/workspaces && \
mkdir -p /tmp/.cache /tmp/.config /tmp/.npm && \
chmod 777 /app && \
chmod 777 /tmp/.cache && \
@@ -152,4 +167,4 @@ RUN git config --global user.email "agent@localhost" && \
git config --global user.name "Pentest Agent" && \
git config --global --add safe.directory '*'
CMD ["node", "dist/temporal/worker.js"]
CMD ["node", "apps/worker/dist/temporal/worker.js"]
@@ -19,9 +19,7 @@
"name": "openrouter",
"api_base_url": "https://openrouter.ai/api/v1/chat/completions",
"api_key": "$OPENROUTER_API_KEY",
"models": [
"google/gemini-3-flash-preview"
],
"models": ["google/gemini-3-flash-preview"],
"transformer": {
"use": ["openrouter"]
}
+10 -5
View File
@@ -3,20 +3,22 @@
"version": "0.0.0",
"description": "Shannon - Autonomous white-box AI pentester for web applications and APIs by Keygraph",
"type": "module",
"main": "dist/index.js",
"main": "dist/index.mjs",
"bin": {
"shannon": "dist/index.js"
"shannon": "dist/index.mjs"
},
"files": [
"dist",
"infra"
],
"scripts": {
"build": "tsc",
"dev": "tsc --watch"
"build": "tsdown",
"check": "tsc --noEmit",
"clean": "rm -rf dist"
},
"dependencies": {
"@clack/prompts": "^1.1.0",
"chokidar": "^5.0.0",
"dotenv": "^17.3.1",
"smol-toml": "^1.6.0"
},
@@ -37,9 +39,12 @@
"repository": {
"type": "git",
"url": "git+https://github.com/KeygraphHQ/shannon.git",
"directory": "cli"
"directory": "apps/cli"
},
"engines": {
"node": ">=18"
},
"devDependencies": {
"tsdown": "^0.21.2"
}
}
@@ -3,8 +3,8 @@
* Only available in local mode (running from cloned repository).
*/
import { isLocal } from '../mode.js';
import { buildImage } from '../docker.js';
import { isLocal } from '../mode.js';
export function build(noCache: boolean): void {
if (!isLocal()) {
+106
View File
@@ -0,0 +1,106 @@
/**
* `shannon logs` command — tail a workspace's workflow log.
*
* Uses chokidar for reliable cross-platform file watching and
* bounded synchronous reads to prevent duplicate output.
*/
import fs from 'node:fs';
import path from 'node:path';
import { watch } from 'chokidar';
import { getWorkspacesDir } from '../home.js';
// Match the exact line the worker writes — anchored to prevent false positives from agent output
const COMPLETION_PATTERN = /^Workflow (COMPLETED|FAILED)$/m;
/** Read a byte range from a file and return it as a UTF-8 string. */
function readRange(filePath: string, start: number, end: number): string {
const length = end - start;
const buffer = Buffer.alloc(length);
const fd = fs.openSync(filePath, 'r');
try {
fs.readSync(fd, buffer, 0, length, start);
} finally {
fs.closeSync(fd);
}
return buffer.toString('utf-8');
}
/** Resolve a workspace ID to its workflow.log path, or exit with an error. */
function resolveLogFile(workspaceId: string): string {
const workspacesDir = getWorkspacesDir();
// 1. Direct match
const directPath = path.join(workspacesDir, workspaceId, 'workflow.log');
if (fs.existsSync(directPath)) return directPath;
// 2. Resume workflow ID (e.g. workspace_resume_123)
const resumeBase = workspaceId.replace(/_resume_\d+$/, '');
if (resumeBase !== workspaceId) {
const resumePath = path.join(workspacesDir, resumeBase, 'workflow.log');
if (fs.existsSync(resumePath)) return resumePath;
}
// 3. Named workspace ID (e.g. workspace_shannon-123)
const namedBase = workspaceId.replace(/_shannon-\d+$/, '');
if (namedBase !== workspaceId) {
const namedPath = path.join(workspacesDir, namedBase, 'workflow.log');
if (fs.existsSync(namedPath)) return namedPath;
}
console.error(`ERROR: Workflow log not found for: ${workspaceId}`);
console.error('');
console.error('Possible causes:');
console.error(" - Workflow hasn't started yet");
console.error(' - Workspace ID is incorrect');
console.error('');
console.error('Check the Temporal Web UI at http://localhost:8233 for workflow details');
process.exit(1);
}
export function logs(workspaceId: string): void {
const logFile = resolveLogFile(workspaceId);
let position = 0;
/**
* Output any new content appended since the last read.
* Returns true when the workflow completion marker is detected.
*/
function flush(): boolean {
try {
const { size } = fs.statSync(logFile);
if (size <= position) return false;
const data = readRange(logFile, position, size);
process.stdout.write(data);
position = size;
return COMPLETION_PATTERN.test(data);
} catch {
// File deleted or unreadable — treat as done
return true;
}
}
console.log(`Tailing workflow log: ${logFile}`);
// 1. Output existing content
if (flush()) {
process.exit(0);
}
// 2. Watch for appended content via chokidar
const watcher = watch(logFile, { persistent: true });
const shutdown = (): void => {
watcher.close().finally(() => process.exit(0));
// Safety net — force exit if watcher.close() stalls
setTimeout(() => process.exit(0), 1000).unref();
};
watcher.on('change', () => {
if (flush()) shutdown();
});
process.on('SIGINT', shutdown);
}
@@ -6,8 +6,8 @@
*/
import fs from 'node:fs';
import path from 'node:path';
import os from 'node:os';
import path from 'node:path';
import * as p from '@clack/prompts';
import { type ShannonConfig, saveConfig } from '../config/writer.js';
@@ -42,10 +42,14 @@ export async function setup(): Promise<void> {
async function setupProvider(provider: Provider): Promise<ShannonConfig> {
switch (provider) {
case 'anthropic': return setupAnthropic();
case 'bedrock': return setupBedrock();
case 'vertex': return setupVertex();
case 'router': return setupRouter();
case 'anthropic':
return setupAnthropic();
case 'bedrock':
return setupBedrock();
case 'vertex':
return setupVertex();
case 'router':
return setupRouter();
}
}
@@ -190,7 +194,7 @@ async function setupRouter(): Promise<ShannonConfig> {
if (p.isCancel(routerProvider)) return cancelAndExit();
const apiKey = await promptSecret(
routerProvider === 'openai' ? 'Enter your OpenAI API key' : 'Enter your OpenRouter API key'
routerProvider === 'openai' ? 'Enter your OpenAI API key' : 'Enter your OpenRouter API key',
);
let defaultModel: string;
@@ -207,9 +211,7 @@ async function setupRouter(): Promise<ShannonConfig> {
} else {
const model = await p.select({
message: 'Default model',
options: [
{ value: 'google/gemini-3-flash-preview' as const, label: 'Google Gemini 3 Flash Preview' },
],
options: [{ value: 'google/gemini-3-flash-preview' as const, label: 'Google Gemini 3 Flash Preview' }],
});
if (p.isCancel(model)) return cancelAndExit();
defaultModel = `openrouter,${model}`;
@@ -5,14 +5,14 @@
* and npx mode (Docker Hub pull, ~/.shannon/).
*/
import { execFileSync } from 'node:child_process';
import fs from 'node:fs';
import path from 'node:path';
import { execFileSync } from 'node:child_process';
import { ensureImage, ensureInfra, randomSuffix, spawnWorker } from '../docker.js';
import { buildEnvFlags, isRouterConfigured, loadEnv, validateCredentials } from '../env.js';
import { getCredentialsDir, getCredentialsPath, getWorkspacesDir, initHome } from '../home.js';
import { isLocal } from '../mode.js';
import { initHome, getWorkspacesDir, getCredentialsPath, getCredentialsDir } from '../home.js';
import { loadEnv, buildEnvFlags, validateCredentials, isRouterConfigured } from '../env.js';
import { resolveRepo, resolveConfig, ensureDeliverables } from '../paths.js';
import { ensureInfra, ensureImage, spawnWorker, randomSuffix } from '../docker.js';
import { ensureDeliverables, resolveConfig, resolveRepo } from '../paths.js';
import { displaySplash } from '../splash.js';
export interface StartArgs {
@@ -83,7 +83,7 @@ export function start(args: StartArgs): void {
}
// 11. Resolve prompts directory (local mode only)
const promptsDir = isLocal() ? path.resolve('prompts') : undefined;
const promptsDir = isLocal() ? path.resolve('apps/worker/prompts') : undefined;
// 12. Display splash screen
displaySplash(isLocal() ? undefined : args.version);
@@ -165,8 +165,14 @@ export function start(args: StartArgs): void {
}
};
process.on('SIGINT', () => { cleanup(); process.exit(0); });
process.on('SIGTERM', () => { cleanup(); process.exit(0); });
process.on('SIGINT', () => {
cleanup();
process.exit(0);
});
process.on('SIGTERM', () => {
cleanup();
process.exit(0);
});
process.on('exit', cleanup);
}
@@ -176,7 +182,7 @@ function printInfo(
workspace: string,
workflowId: string,
repoPath: string,
workspacesDir: string
workspacesDir: string,
): void {
const logsCmd = isLocal() ? `./shannon logs ${workspace}` : `npx @keygraph/shannon logs ${workspace}`;
const reportsPath = path.join(workspacesDir, workspace);
@@ -3,7 +3,7 @@
*/
import * as p from '@clack/prompts';
import { stopWorkers, stopInfra } from '../docker.js';
import { stopInfra, stopWorkers } from '../docker.js';
export async function stop(clean: boolean): Promise<void> {
if (clean) {
@@ -3,10 +3,10 @@
*/
import fs from 'node:fs';
import path from 'node:path';
import os from 'node:os';
import path from 'node:path';
import * as p from '@clack/prompts';
import { stopWorkers, stopInfra } from '../docker.js';
import { stopInfra, stopWorkers } from '../docker.js';
const SHANNON_HOME = path.join(os.homedir(), '.shannon');
@@ -3,21 +3,29 @@
*/
import { execFileSync } from 'node:child_process';
import { getWorkspacesDir } from '../home.js';
import { getWorkerImage } from '../docker.js';
import { getWorkspacesDir } from '../home.js';
export function workspaces(version: string): void {
const workspacesDir = getWorkspacesDir();
const image = getWorkerImage(version);
try {
execFileSync('docker', [
'run', '--rm',
'-v', `${workspacesDir}:/app/workspaces`,
'-e', 'WORKSPACES_DIR=/app/workspaces',
image,
'node', 'dist/temporal/workspaces.js',
], { stdio: 'inherit' });
execFileSync(
'docker',
[
'run',
'--rm',
'-v',
`${workspacesDir}:/app/workspaces`,
'-e',
'WORKSPACES_DIR=/app/workspaces',
image,
'node',
'apps/worker/dist/temporal/workspaces.js',
],
{ stdio: 'inherit' },
);
} catch {
console.error('ERROR: Failed to list workspaces. Is the Docker image available?');
console.error(` Run: docker pull ${image}`);
@@ -118,11 +118,7 @@ function buildSchema(): Map<string, Map<string, TOMLType>> {
}
/** Check that a provider section has all required fields and dependencies. */
function validateProviderFields(
config: TOMLConfig,
provider: string,
errors: string[],
): void {
function validateProviderFields(config: TOMLConfig, provider: string, errors: string[]): void {
const section = config[provider] as Record<string, unknown> | undefined;
if (!section) return;
const keys = Object.keys(section);
@@ -171,11 +167,7 @@ function validateProviderFields(
}
/** Bedrock and Vertex require a [models] section with all three tiers. */
function validateModelTiers(
config: TOMLConfig,
provider: string,
errors: string[],
): void {
function validateModelTiers(config: TOMLConfig, provider: string, errors: string[]): void {
const models = config.models as Record<string, unknown> | undefined;
if (!models || typeof models !== 'object') {
errors.push(`[${provider}] requires a [models] section with small, medium, and large`);
@@ -222,9 +214,7 @@ function validateConfig(config: TOMLConfig): string[] {
}
if (typeof value !== expectedType) {
errors.push(
`[${section}].${key} must be ${expectedType}, got ${typeof value}`
);
errors.push(`[${section}].${key} must be ${expectedType}, got ${typeof value}`);
continue;
}
@@ -243,7 +233,7 @@ function validateConfig(config: TOMLConfig): string[] {
});
if (present.length > 1) {
errors.push(
`Multiple providers configured: [${present.join('], [')}]. Only one provider section is allowed at a time`
`Multiple providers configured: [${present.join('], [')}]. Only one provider section is allowed at a time`,
);
}
+63 -22
View File
@@ -5,10 +5,10 @@
* NPX mode: pulls from Docker Hub, uses bundled compose.yml.
*/
import { execFileSync, spawn, type ChildProcess } from 'node:child_process';
import path from 'node:path';
import os from 'node:os';
import { type ChildProcess, execFileSync, spawn } from 'node:child_process';
import crypto from 'node:crypto';
import os from 'node:os';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { getMode } from './mode.js';
@@ -56,37 +56,77 @@ function runOutput(cmd: string, args: string[]): string {
*/
export function isTemporalReady(): boolean {
const output = runOutput('docker', [
'exec', 'shannon-temporal',
'temporal', 'operator', 'cluster', 'health', '--address', 'localhost:7233',
'exec',
'shannon-temporal',
'temporal',
'operator',
'cluster',
'health',
'--address',
'localhost:7233',
]);
return output.includes('SERVING');
}
/** Check if the router container is running and healthy. */
function isRouterReady(): boolean {
const status = runOutput('docker', ['inspect', '--format', '{{.State.Health.Status}}', 'shannon-router']);
return status === 'healthy';
}
/**
* Ensure Temporal (and optionally router) are running via compose.
* If Temporal is already up but router is needed and missing, starts router only.
*/
export function ensureInfra(useRouter: boolean): void {
if (isTemporalReady()) {
const temporalReady = isTemporalReady();
const routerNeeded = useRouter && !isRouterReady();
if (temporalReady && !routerNeeded) {
return;
}
const composeFile = getComposeFile();
console.log('Starting Shannon infrastructure...');
const composeArgs = ['compose', '-f', composeFile];
if (useRouter) composeArgs.push('--profile', 'router');
composeArgs.push('up', '-d');
if (temporalReady && routerNeeded) {
console.log('Starting router...');
} else {
console.log('Starting Shannon infrastructure...');
}
execFileSync('docker', composeArgs, { stdio: 'inherit' });
console.log('Waiting for Temporal to be ready...');
for (let i = 0; i < 30; i++) {
if (isTemporalReady()) {
console.log('Temporal is ready!');
return;
// Wait for Temporal if it wasn't already running
if (!temporalReady) {
console.log('Waiting for Temporal to be ready...');
for (let i = 0; i < 30; i++) {
if (isTemporalReady()) {
console.log('Temporal is ready!');
break;
}
if (i === 29) {
console.error('Timeout waiting for Temporal');
process.exit(1);
}
execFileSync('sleep', ['2']);
}
execFileSync('sleep', ['2']);
}
console.error('Timeout waiting for Temporal');
process.exit(1);
// Wait for router if needed
if (routerNeeded) {
console.log('Waiting for router to be ready...');
for (let i = 0; i < 15; i++) {
if (isRouterReady()) {
console.log('Router is ready!');
return;
}
execFileSync('sleep', ['2']);
}
console.error('Timeout waiting for router');
process.exit(1);
}
}
/**
@@ -173,7 +213,7 @@ export function spawnWorker(opts: WorkerOptions): ChildProcess {
// Local mode: mount prompts for live editing
if (opts.promptsDir) {
args.push('-v', `${opts.promptsDir}:/app/prompts:ro`);
args.push('-v', `${opts.promptsDir}:/app/apps/worker/prompts:ro`);
}
if (opts.config) {
@@ -202,7 +242,7 @@ export function spawnWorker(opts: WorkerOptions): ChildProcess {
args.push(getWorkerImage(opts.version));
// Worker command
args.push('node', 'dist/temporal/worker.js', opts.url, opts.repo.containerPath);
args.push('node', 'apps/worker/dist/temporal/worker.js', opts.url, opts.repo.containerPath);
args.push('--task-queue', opts.taskQueue);
if (opts.config) {
args.push('--config', opts.config.containerPath);
@@ -262,9 +302,7 @@ export function pullImage(version: string): void {
* Remove old keygraph/shannon images that don't match the current version.
*/
function pruneOldImages(currentVersion: string): void {
const output = runOutput('docker', [
'images', NPX_IMAGE_REPO, '--format', '{{.Tag}}',
]);
const output = runOutput('docker', ['images', NPX_IMAGE_REPO, '--format', '{{.Tag}}']);
if (!output) return;
const currentTag = currentVersion;
@@ -279,7 +317,10 @@ function pruneOldImages(currentVersion: string): void {
*/
export function listRunningWorkers(): string {
return runOutput('docker', [
'ps', '--filter', 'name=shannon-worker-',
'--format', 'table {{.Names}}\t{{.Status}}\t{{.RunningFor}}',
'ps',
'--filter',
'name=shannon-worker-',
'--format',
'table {{.Names}}\t{{.Status}}\t{{.RunningFor}}',
]);
}
+5 -4
View File
@@ -6,8 +6,8 @@
*/
import dotenv from 'dotenv';
import { getMode } from './mode.js';
import { resolveConfig } from './config/resolver.js';
import { getMode } from './mode.js';
/** Environment variables forwarded to worker containers. */
const FORWARD_VARS = [
@@ -148,9 +148,10 @@ export function validateCredentials(): CredentialValidation {
return { valid: true, mode: 'router' };
}
const hint = getMode() === 'local'
? `No credentials found. Set ANTHROPIC_API_KEY in .env or export it.`
: `Authentication not configured. Export variables or run 'npx @keygraph/shannon setup'.`;
const hint =
getMode() === 'local'
? `No credentials found. Set ANTHROPIC_API_KEY in .env or export it.`
: `Authentication not configured. Export variables or run 'npx @keygraph/shannon setup'.`;
return {
valid: false,
mode: 'api-key',
+2 -4
View File
@@ -6,8 +6,8 @@
*/
import fs from 'node:fs';
import path from 'node:path';
import os from 'node:os';
import path from 'node:path';
import { getMode } from './mode.js';
const SHANNON_HOME = path.join(os.homedir(), '.shannon');
@@ -17,9 +17,7 @@ export function getConfigFile(): string {
}
export function getWorkspacesDir(): string {
return getMode() === 'local'
? path.resolve('workspaces')
: path.join(SHANNON_HOME, 'workspaces');
return getMode() === 'local' ? path.resolve('workspaces') : path.join(SHANNON_HOME, 'workspaces');
}
/**
+56 -23
View File
@@ -1,5 +1,3 @@
#!/usr/bin/env node
/**
* Shannon CLI AI Penetration Testing Framework
*
@@ -14,16 +12,16 @@
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { getMode } from './mode.js';
import { start } from './commands/start.js';
import { stop } from './commands/stop.js';
import { logs } from './commands/logs.js';
import { workspaces } from './commands/workspaces.js';
import { status } from './commands/status.js';
import { update } from './commands/update.js';
import { build } from './commands/build.js';
import { logs } from './commands/logs.js';
import { setup } from './commands/setup.js';
import { start } from './commands/start.js';
import { status } from './commands/status.js';
import { stop } from './commands/stop.js';
import { uninstall } from './commands/uninstall.js';
import { update } from './commands/update.js';
import { workspaces } from './commands/workspaces.js';
import { getMode } from './mode.js';
import { displaySplash } from './splash.js';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
@@ -45,16 +43,24 @@ function showHelp(): void {
console.log(`
Shannon - AI Penetration Testing Framework
Usage:${mode === 'local' ? '' : `
${prefix} setup Configure credentials`}
Usage:${
mode === 'local'
? ''
: `
${prefix} setup Configure credentials`
}
${prefix} start --url <url> --repo <path> [options] Start a pentest scan
${prefix} stop [--clean] Stop all containers
${prefix} workspaces List all workspaces
${prefix} logs <workspace> Tail workflow log
${prefix} status Show running workers${mode === 'local' ? `
${prefix} build [--no-cache] Build worker image` : `
${prefix} status Show running workers${
mode === 'local'
? `
${prefix} build [--no-cache] Build worker image`
: `
${prefix} update Pull latest image
${prefix} uninstall Remove ~/.shannon/ and all data`}
${prefix} uninstall Remove ~/.shannon/ and all data`
}
${prefix} info Show splash screen
${prefix} help Show this help
@@ -72,9 +78,13 @@ Examples:
${prefix} start -u https://example.com -r /path/to/repo -c config.yaml -w q1-audit
${prefix} logs q1-audit
${prefix} stop --clean
${mode === 'local' ? `
State directory: ./workspaces/` : `
State directory: ~/.shannon/`}
${
mode === 'local'
? `
State directory: ./workspaces/`
: `
State directory: ~/.shannon/`
}
Monitor workflows at http://localhost:8233
`);
}
@@ -105,23 +115,38 @@ function parseStartArgs(argv: string[]): ParsedStartArgs {
switch (arg) {
case '-u':
case '--url':
if (next && !next.startsWith('-')) { url = next; i++; }
if (next && !next.startsWith('-')) {
url = next;
i++;
}
break;
case '-r':
case '--repo':
if (next && !next.startsWith('-')) { repo = next; i++; }
if (next && !next.startsWith('-')) {
repo = next;
i++;
}
break;
case '-c':
case '--config':
if (next && !next.startsWith('-')) { config = next; i++; }
if (next && !next.startsWith('-')) {
config = next;
i++;
}
break;
case '-w':
case '--workspace':
if (next && !next.startsWith('-')) { workspace = next; i++; }
if (next && !next.startsWith('-')) {
workspace = next;
i++;
}
break;
case '-o':
case '--output':
if (next && !next.startsWith('-')) { output = next; i++; }
if (next && !next.startsWith('-')) {
output = next;
i++;
}
break;
case '--pipeline-testing':
pipelineTesting = true;
@@ -142,7 +167,15 @@ function parseStartArgs(argv: string[]): ParsedStartArgs {
process.exit(1);
}
return { url, repo, pipelineTesting, router, ...(config && { config }), ...(workspace && { workspace }), ...(output && { output }) };
return {
url,
repo,
pipelineTesting,
router,
...(config && { config }),
...(workspace && { workspace }),
...(output && { output }),
};
}
// === Main Dispatch ===
+1 -1
View File
@@ -1,5 +1,5 @@
{
"extends": "../tsconfig.base.json",
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"rootDir": "./src",
"outDir": "./dist"
+11
View File
@@ -0,0 +1,11 @@
import { defineConfig } from 'tsdown';
export default defineConfig({
entry: ['src/index.ts'],
format: 'esm',
target: 'node18',
outDir: 'dist',
clean: true,
deps: { neverBundle: ['@clack/prompts', 'dotenv', 'smol-toml'] },
banner: { js: '#!/usr/bin/env node' },
});
@@ -124,11 +124,7 @@
"deprecated": true
}
},
"anyOf": [
{"required": ["authentication"]},
{"required": ["rules"]},
{"required": ["authentication", "rules"]}
],
"anyOf": [{ "required": ["authentication"] }, { "required": ["rules"] }, { "required": ["authentication", "rules"] }],
"additionalProperties": false,
"$defs": {
"rule": {
@@ -157,4 +153,4 @@
"additionalProperties": false
}
}
}
}
+27
View File
@@ -0,0 +1,27 @@
{
"name": "@shannon/worker",
"version": "0.0.0",
"private": true,
"type": "module",
"scripts": {
"build": "tsc",
"check": "tsc --noEmit",
"clean": "rm -rf dist"
},
"dependencies": {
"@anthropic-ai/claude-agent-sdk": "catalog:",
"@shannon/mcp-server": "workspace:*",
"@temporalio/activity": "^1.11.0",
"@temporalio/client": "^1.11.0",
"@temporalio/worker": "^1.11.0",
"@temporalio/workflow": "^1.11.0",
"ajv": "^8.12.0",
"ajv-formats": "^2.1.1",
"dotenv": "^16.4.5",
"js-yaml": "^4.1.0",
"zx": "^8.0.0"
},
"devDependencies": {
"@types/js-yaml": "^4.0.9"
}
}
@@ -6,26 +6,23 @@
// Production Claude agent execution with retry, git checkpoints, and audit logging
import { fs, path } from 'zx';
import { query } from '@anthropic-ai/claude-agent-sdk';
import { createShannonHelperServer } from '@shannon/mcp-server';
import { fs, path } from 'zx';
import type { AuditSession } from '../audit/index.js';
import { isRetryableError, PentestError } from '../services/error-handling.js';
import { isSpendingCapBehavior } from '../utils/billing-detection.js';
import { Timer } from '../utils/metrics.js';
import { formatTimestamp } from '../utils/formatting.js';
import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../session-manager.js';
import { AuditSession } from '../audit/index.js';
import { createShannonHelperServer } from '../../mcp-server/dist/index.js';
import { AGENTS } from '../session-manager.js';
import type { AgentName } from '../types/index.js';
import { dispatchMessage } from './message-handlers.js';
import { detectExecutionContext, formatErrorOutput, formatCompletionMessage } from './output-formatters.js';
import { createProgressManager } from './progress-manager.js';
import { createAuditLogger } from './audit-logger.js';
import { getActualModelName } from './router-utils.js';
import { resolveModel, type ModelTier } from './models.js';
import { AGENT_VALIDATORS, AGENTS, MCP_AGENT_MAPPING } from '../session-manager.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import type { AgentName } from '../types/index.js';
import { isSpendingCapBehavior } from '../utils/billing-detection.js';
import { formatTimestamp } from '../utils/formatting.js';
import { Timer } from '../utils/metrics.js';
import { createAuditLogger } from './audit-logger.js';
import { dispatchMessage } from './message-handlers.js';
import { type ModelTier, resolveModel } from './models.js';
import { detectExecutionContext, formatCompletionMessage, formatErrorOutput } from './output-formatters.js';
import { createProgressManager } from './progress-manager.js';
import { getActualModelName } from './router-utils.js';
declare global {
var SHANNON_DISABLE_LOADER: boolean | undefined;
@@ -59,7 +56,7 @@ type McpServer = ReturnType<typeof createShannonHelperServer> | StdioMcpServer;
function buildMcpServers(
sourceDir: string,
agentName: string | null,
logger: ActivityLogger
logger: ActivityLogger,
): Record<string, McpServer> {
// 1. Create the shannon-helper server (always present)
const shannonHelperServer = createShannonHelperServer(sourceDir);
@@ -81,11 +78,7 @@ function buildMcpServers(
// 3. Configure Playwright MCP args with Docker/local browser handling
const isDocker = process.env.SHANNON_DOCKER === 'true';
const mcpArgs: string[] = [
'@playwright/mcp@0.0.68',
'--isolated',
'--user-data-dir', userDataDir,
];
const mcpArgs: string[] = ['@playwright/mcp@0.0.68', '--isolated', '--user-data-dir', userDataDir];
if (isDocker) {
mcpArgs.push('--executable-path', '/usr/bin/chromium-browser');
@@ -139,7 +132,7 @@ async function writeErrorLog(
err: Error & { code?: string; status?: number },
sourceDir: string,
fullPrompt: string,
duration: number
duration: number,
): Promise<void> {
try {
const errorLog = {
@@ -150,17 +143,17 @@ async function writeErrorLog(
message: err.message,
code: err.code,
status: err.status,
stack: err.stack
stack: err.stack,
},
context: {
sourceDir,
prompt: fullPrompt.slice(0, 200) + '...',
retryable: isRetryableError(err)
prompt: `${fullPrompt.slice(0, 200)}...`,
retryable: isRetryableError(err),
},
duration
duration,
};
const logPath = path.join(sourceDir, 'error.log');
await fs.appendFile(logPath, JSON.stringify(errorLog) + '\n');
await fs.appendFile(logPath, `${JSON.stringify(errorLog)}\n`);
} catch {
// Best-effort error log writing - don't propagate failures
}
@@ -170,7 +163,7 @@ export async function validateAgentOutput(
result: ClaudePromptResult,
agentName: string | null,
sourceDir: string,
logger: ActivityLogger
logger: ActivityLogger,
): Promise<boolean> {
logger.info(`Validating ${agentName} agent output`);
@@ -202,7 +195,6 @@ export async function validateAgentOutput(
}
return validationResult;
} catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
logger.error(`Validation failed with error: ${errMsg}`);
@@ -220,7 +212,7 @@ export async function runClaudePrompt(
agentName: string | null = null,
auditSession: AuditSession | null = null,
logger: ActivityLogger,
modelTier: ModelTier = 'medium'
modelTier: ModelTier = 'medium',
): Promise<ClaudePromptResult> {
// 1. Initialize timing and prompt
const timer = new Timer(`agent-${description.toLowerCase().replace(/\s+/g, '-')}`);
@@ -230,7 +222,7 @@ export async function runClaudePrompt(
const execContext = detectExecutionContext(description);
const progress = createProgressManager(
{ description, useCleanOutput: execContext.useCleanOutput },
global.SHANNON_DISABLE_LOADER ?? false
global.SHANNON_DISABLE_LOADER ?? false,
);
const auditLogger = createAuditLogger(auditSession);
@@ -293,7 +285,7 @@ export async function runClaudePrompt(
fullPrompt,
options,
{ execContext, description, progress, auditLogger, logger },
timer
timer,
);
turnCount = messageLoopResult.turnCount;
@@ -309,7 +301,7 @@ export async function runClaudePrompt(
throw new PentestError(
`Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`,
'billing',
true // Retryable - Temporal will use 5-30 min backoff
true, // Retryable - Temporal will use 5-30 min backoff
);
}
@@ -330,9 +322,8 @@ export async function runClaudePrompt(
cost: totalCost,
model,
partialCost: totalCost,
apiErrorDetected
apiErrorDetected,
};
} catch (error) {
// 9. Handle errors — log, write error file, return failure
const duration = timer.stop();
@@ -347,16 +338,15 @@ export async function runClaudePrompt(
return {
error: err.message,
errorType: err.constructor.name,
prompt: fullPrompt.slice(0, 100) + '...',
prompt: `${fullPrompt.slice(0, 100)}...`,
success: false,
duration,
cost: totalCost,
retryable: isRetryableError(err)
retryable: isRetryableError(err),
};
}
}
interface MessageLoopResult {
turnCount: number;
result: string | null;
@@ -377,7 +367,7 @@ async function processMessageStream(
fullPrompt: string,
options: NonNullable<Parameters<typeof query>[0]['options']>,
deps: MessageLoopDeps,
timer: Timer
timer: Timer,
): Promise<MessageLoopResult> {
const { execContext, description, progress, auditLogger, logger } = deps;
const HEARTBEAT_INTERVAL = 30000;
@@ -402,11 +392,13 @@ async function processMessageStream(
turnCount++;
}
const dispatchResult = await dispatchMessage(
message as { type: string; subtype?: string },
turnCount,
{ execContext, description, progress, auditLogger, logger }
);
const dispatchResult = await dispatchMessage(message as { type: string; subtype?: string }, turnCount, {
execContext,
description,
progress,
auditLogger,
logger,
});
if (dispatchResult.type === 'throw') {
throw dispatchResult.error;
@@ -4,35 +4,35 @@
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import type { SDKAssistantMessageError } from '@anthropic-ai/claude-agent-sdk';
import { PentestError } from '../services/error-handling.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import { ErrorCode } from '../types/errors.js';
import { matchesBillingTextPattern } from '../utils/billing-detection.js';
import { filterJsonToolCalls } from './output-formatters.js';
import { formatTimestamp } from '../utils/formatting.js';
import { getActualModelName } from './router-utils.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import type { AuditLogger } from './audit-logger.js';
import {
filterJsonToolCalls,
formatAssistantOutput,
formatResultOutput,
formatToolUseOutput,
formatToolResultOutput,
formatToolUseOutput,
} from './output-formatters.js';
import type { AuditLogger } from './audit-logger.js';
import type { ProgressManager } from './progress-manager.js';
import type { SDKAssistantMessageError } from '@anthropic-ai/claude-agent-sdk';
import { getActualModelName } from './router-utils.js';
import type {
AssistantMessage,
ResultMessage,
ToolUseMessage,
ToolResultMessage,
AssistantResult,
ResultData,
ToolUseData,
ToolResultData,
ApiErrorDetection,
AssistantMessage,
AssistantResult,
ContentBlock,
SystemInitMessage,
ExecutionContext,
ResultData,
ResultMessage,
SystemInitMessage,
ToolResultData,
ToolResultMessage,
ToolUseData,
ToolUseMessage,
} from './types.js';
// Handles both array and string content formats from SDK
@@ -40,9 +40,7 @@ function extractMessageContent(message: AssistantMessage): string {
const messageContent = message.message;
if (Array.isArray(messageContent.content)) {
return messageContent.content
.map((c: ContentBlock) => c.text || JSON.stringify(c))
.join('\n');
return messageContent.content.map((c: ContentBlock) => c.text || JSON.stringify(c)).join('\n');
}
return String(messageContent.content);
@@ -81,7 +79,7 @@ function detectApiError(content: string): ApiErrorDetection {
'billing',
true, // RETRYABLE - Temporal will use 5-30 min backoff
{},
ErrorCode.SPENDING_CAP_REACHED
ErrorCode.SPENDING_CAP_REACHED,
),
};
}
@@ -104,10 +102,7 @@ function detectApiError(content: string): ApiErrorDetection {
}
// Maps SDK structured error types to our error handling.
function handleStructuredError(
errorType: SDKAssistantMessageError,
content: string
): ApiErrorDetection {
function handleStructuredError(errorType: SDKAssistantMessageError, content: string): ApiErrorDetection {
switch (errorType) {
case 'billing_error':
return {
@@ -117,7 +112,7 @@ function handleStructuredError(
'billing',
true, // Retryable with backoff
{},
ErrorCode.INSUFFICIENT_CREDITS
ErrorCode.INSUFFICIENT_CREDITS,
),
};
case 'rate_limit':
@@ -128,7 +123,7 @@ function handleStructuredError(
'network',
true, // Retryable with backoff
{},
ErrorCode.API_RATE_LIMITED
ErrorCode.API_RATE_LIMITED,
),
};
case 'authentication_failed':
@@ -137,7 +132,7 @@ function handleStructuredError(
shouldThrow: new PentestError(
`Authentication failed: ${content.slice(0, 100)}`,
'config',
false // Not retryable - needs API key fix
false, // Not retryable - needs API key fix
),
};
case 'server_error':
@@ -146,7 +141,7 @@ function handleStructuredError(
shouldThrow: new PentestError(
`Server error (structured): ${content.slice(0, 100)}`,
'network',
true // Retryable
true, // Retryable
),
};
case 'invalid_request':
@@ -155,7 +150,7 @@ function handleStructuredError(
shouldThrow: new PentestError(
`Invalid request: ${content.slice(0, 100)}`,
'config',
false // Not retryable - needs code fix
false, // Not retryable - needs code fix
),
};
case 'max_output_tokens':
@@ -164,19 +159,15 @@ function handleStructuredError(
shouldThrow: new PentestError(
`Max output tokens reached: ${content.slice(0, 100)}`,
'billing',
true // Retryable - may succeed with different content
true, // Retryable - may succeed with different content
),
};
case 'unknown':
default:
return { detected: true };
}
}
function handleAssistantMessage(
message: AssistantMessage,
turnCount: number
): AssistantResult {
function handleAssistantMessage(message: AssistantMessage, turnCount: number): AssistantResult {
const content = extractMessageContent(message);
const cleanedContent = filterJsonToolCalls(content);
@@ -246,8 +237,7 @@ function handleToolUseMessage(message: ToolUseMessage): ToolUseData {
// Truncates long results for display (500 char limit), preserves full content for logging
function handleToolResultMessage(message: ToolResultMessage): ToolResultData {
const content = message.content;
const contentStr =
typeof content === 'string' ? content : JSON.stringify(content, null, 2);
const contentStr = typeof content === 'string' ? content : JSON.stringify(content, null, 2);
const displayContent =
contentStr.length > 500
@@ -284,7 +274,7 @@ export interface MessageDispatchDeps {
export async function dispatchMessage(
message: { type: string; subtype?: string },
turnCount: number,
deps: MessageDispatchDeps
deps: MessageDispatchDeps,
): Promise<MessageDispatchAction> {
const { execContext, description, progress, auditLogger, logger } = deps;
@@ -298,12 +288,7 @@ export async function dispatchMessage(
if (assistantResult.cleanedContent.trim()) {
progress.stop();
outputLines(formatAssistantOutput(
assistantResult.cleanedContent,
execContext,
turnCount,
description
));
outputLines(formatAssistantOutput(assistantResult.cleanedContent, execContext, turnCount, description));
progress.start();
}
@@ -324,7 +309,7 @@ export async function dispatchMessage(
if (!execContext.useCleanOutput) {
logger.info(`Model: ${actualModel}, Permission: ${initMsg.permissionMode}`);
if (initMsg.mcp_servers && initMsg.mcp_servers.length > 0) {
const mcpStatus = initMsg.mcp_servers.map(s => `${s.name}(${s.status})`).join(', ');
const mcpStatus = initMsg.mcp_servers.map((s) => `${s.name}(${s.status})`).join(', ');
logger.info(`MCP: ${mcpStatus}`);
}
}
@@ -4,8 +4,8 @@
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { extractAgentType, formatDuration } from '../utils/formatting.js';
import { AGENTS } from '../session-manager.js';
import { extractAgentType, formatDuration } from '../utils/formatting.js';
import type { ExecutionContext, ResultData } from './types.js';
interface ToolCallInput {
@@ -247,11 +247,7 @@ export function filterJsonToolCalls(content: string | null | undefined): string
if (browserAction) {
processedLines.push(browserAction);
}
continue;
}
// Hide all other tool calls (Read, Write, Grep, etc.)
continue;
} catch {
// If JSON parsing fails, treat as regular text
processedLines.push(line);
@@ -266,8 +262,7 @@ export function filterJsonToolCalls(content: string | null | undefined): string
}
export function detectExecutionContext(description: string): ExecutionContext {
const isParallelExecution =
description.includes('vuln agent') || description.includes('exploit agent');
const isParallelExecution = description.includes('vuln agent') || description.includes('exploit agent');
const useCleanOutput =
description.includes('Pre-recon agent') ||
@@ -287,7 +282,7 @@ export function formatAssistantOutput(
cleanedContent: string,
context: ExecutionContext,
turnCount: number,
description: string
description: string,
): string[] {
if (!cleanedContent.trim()) {
return [];
@@ -341,7 +336,7 @@ export function formatErrorOutput(
description: string,
duration: number,
sourceDir: string,
isRetryable: boolean
isRetryable: boolean,
): string[] {
const lines: string[] = [];
@@ -374,7 +369,7 @@ export function formatCompletionMessage(
context: ExecutionContext,
description: string,
turnCount: number,
duration: number
duration: number,
): string {
if (context.isParallelExecution) {
const prefix = getAgentPrefix(description);
@@ -388,10 +383,7 @@ export function formatCompletionMessage(
return ` Claude Code completed: ${description} (${turnCount} turns) in ${formatDuration(duration)}`;
}
export function formatToolUseOutput(
toolName: string,
input: Record<string, unknown> | undefined
): string[] {
export function formatToolUseOutput(toolName: string, input: Record<string, unknown> | undefined): string[] {
const lines: string[] = [];
lines.push(`\n Using Tool: ${toolName}`);
@@ -63,10 +63,7 @@ class NullProgressManager implements ProgressManager {
}
// Returns no-op when disabled
export function createProgressManager(
context: ProgressContext,
disableLoader: boolean
): ProgressManager {
export function createProgressManager(context: ProgressContext, disableLoader: boolean): ProgressManager {
if (!context.useCleanOutput || disableLoader) {
return new NullProgressManager();
}
@@ -25,4 +25,3 @@ export function getActualModelName(sdkReportedModel?: string): string | undefine
// Fall back to SDK-reported model
return sdkReportedModel;
}
@@ -53,7 +53,6 @@ export interface ContentBlock {
text?: string;
}
export interface AssistantMessage {
type: 'assistant';
error?: SDKAssistantMessageError;
@@ -99,4 +98,3 @@ export interface SystemInitMessage {
export interface UserMessage {
type: 'user';
}
@@ -11,15 +11,15 @@
* crash-safe audit logging.
*/
import { AgentLogger } from './logger.js';
import { WorkflowLogger, type AgentLogDetails, type WorkflowSummary } from './workflow-logger.js';
import { MetricsTracker } from './metrics-tracker.js';
import { initializeAuditStructure, type SessionMetadata } from './utils.js';
import { formatTimestamp } from '../utils/formatting.js';
import { SessionMutex } from '../utils/concurrency.js';
import type { AgentEndResult } from '../types/index.js';
import { PentestError } from '../services/error-handling.js';
import { ErrorCode } from '../types/errors.js';
import type { AgentEndResult } from '../types/index.js';
import { SessionMutex } from '../utils/concurrency.js';
import { formatTimestamp } from '../utils/formatting.js';
import { AgentLogger } from './logger.js';
import { MetricsTracker } from './metrics-tracker.js';
import { initializeAuditStructure, type SessionMetadata } from './utils.js';
import { type AgentLogDetails, WorkflowLogger, type WorkflowSummary } from './workflow-logger.js';
// Global mutex instance
const sessionMutex = new SessionMutex();
@@ -47,7 +47,7 @@ export class AuditSession {
'config',
false,
{ field: 'sessionMetadata.id' },
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
if (!this.sessionMetadata.webUrl) {
@@ -56,7 +56,7 @@ export class AuditSession {
'config',
false,
{ field: 'sessionMetadata.webUrl' },
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
@@ -100,11 +100,7 @@ export class AuditSession {
/**
* Start agent execution
*/
async startAgent(
agentName: string,
promptContent: string,
attemptNumber: number = 1
): Promise<void> {
async startAgent(agentName: string, promptContent: string, attemptNumber: number = 1): Promise<void> {
await this.ensureInitialized();
// 1. Save prompt snapshot (only on first attempt)
@@ -140,7 +136,7 @@ export class AuditSession {
'validation',
false,
{},
ErrorCode.AGENT_EXECUTION_FAILED
ErrorCode.AGENT_EXECUTION_FAILED,
);
}
@@ -152,18 +148,10 @@ export class AuditSession {
const agentName = this.currentAgentName || 'unknown';
switch (eventType) {
case 'tool_start':
await this.workflowLogger.logToolStart(
agentName,
String(data.toolName || ''),
data.parameters
);
await this.workflowLogger.logToolStart(agentName, String(data.toolName || ''), data.parameters);
break;
case 'llm_response':
await this.workflowLogger.logLlmResponse(
agentName,
Number(data.turn || 0),
String(data.content || '')
);
await this.workflowLogger.logLlmResponse(agentName, Number(data.turn || 0), String(data.content || ''));
break;
// tool_end and error events are intentionally not logged to workflow log
// to reduce noise - the agent completion message captures the outcome
@@ -266,11 +254,7 @@ export class AuditSession {
* @param terminatedWorkflows - IDs of workflows that were terminated
* @param checkpointHash - Git checkpoint hash that was restored
*/
async addResumeAttempt(
workflowId: string,
terminatedWorkflows: string[],
checkpointHash?: string
): Promise<void> {
async addResumeAttempt(workflowId: string, terminatedWorkflows: string[], checkpointHash?: string): Promise<void> {
await this.ensureInitialized();
const unlock = await sessionMutex.lock(this.sessionId);
@@ -12,8 +12,8 @@
* and proper cleanup.
*/
import fs from 'fs';
import path from 'path';
import fs from 'node:fs';
import path from 'node:path';
import { ensureDirectory } from '../utils/file-io.js';
/**
@@ -103,7 +103,7 @@ export class LogStream {
}
return new Promise((resolve) => {
this.stream!.end(() => {
this.stream?.end(() => {
this._isOpen = false;
this.stream = null;
resolve();
@@ -11,14 +11,10 @@
* Uses LogStream for stream management with backpressure handling.
*/
import {
generateLogPath,
generatePromptPath,
type SessionMetadata,
} from './utils.js';
import { atomicWrite } from '../utils/file-io.js';
import { formatTimestamp } from '../utils/formatting.js';
import { LogStream } from './log-stream.js';
import { generateLogPath, generatePromptPath, type SessionMetadata } from './utils.js';
interface LogEvent {
type: string;
@@ -103,11 +99,7 @@ export class AgentLogger {
* Save prompt snapshot to prompts directory
* Static method - doesn't require logger instance
*/
static async savePrompt(
sessionMetadata: SessionMetadata,
agentName: string,
promptContent: string
): Promise<void> {
static async savePrompt(sessionMetadata: SessionMetadata, agentName: string, promptContent: string): Promise<void> {
const promptPath = generatePromptPath(sessionMetadata, agentName);
// Create header with metadata
@@ -11,16 +11,13 @@
* Tracks attempt-level data for complete forensic trail.
*/
import {
generateSessionJsonPath,
type SessionMetadata,
} from './utils.js';
import { atomicWrite, readJson, fileExists } from '../utils/file-io.js';
import { formatTimestamp, calculatePercentage } from '../utils/formatting.js';
import { AGENT_PHASE_MAP, type PhaseName } from '../session-manager.js';
import { PentestError } from '../services/error-handling.js';
import { AGENT_PHASE_MAP, type PhaseName } from '../session-manager.js';
import { ErrorCode } from '../types/errors.js';
import type { AgentName, AgentEndResult } from '../types/index.js';
import type { AgentEndResult, AgentName } from '../types/index.js';
import { atomicWrite, fileExists, readJson } from '../utils/file-io.js';
import { calculatePercentage, formatTimestamp } from '../utils/formatting.js';
import { generateSessionJsonPath, type SessionMetadata } from './utils.js';
interface AttemptData {
attempt_number: number;
@@ -166,7 +163,7 @@ export class MetricsTracker {
'validation',
false,
{},
ErrorCode.AGENT_EXECUTION_FAILED
ErrorCode.AGENT_EXECUTION_FAILED,
);
}
@@ -254,18 +251,14 @@ export class MetricsTracker {
* @param terminatedWorkflows - IDs of workflows that were terminated
* @param checkpointHash - Git checkpoint hash that was restored
*/
async addResumeAttempt(
workflowId: string,
terminatedWorkflows: string[],
checkpointHash?: string
): Promise<void> {
async addResumeAttempt(workflowId: string, terminatedWorkflows: string[], checkpointHash?: string): Promise<void> {
if (!this.data) {
throw new PentestError(
'MetricsTracker not initialized',
'validation',
false,
{},
ErrorCode.AGENT_EXECUTION_FAILED
ErrorCode.AGENT_EXECUTION_FAILED,
);
}
@@ -307,15 +300,10 @@ export class MetricsTracker {
const agents = this.data.metrics.agents;
// Only count successful agents
const successfulAgents = Object.entries(agents).filter(
([, data]) => data.status === 'success'
);
const successfulAgents = Object.entries(agents).filter(([, data]) => data.status === 'success');
// Calculate total duration and cost
const totalDuration = successfulAgents.reduce(
(sum, [, data]) => sum + data.final_duration_ms,
0
);
const totalDuration = successfulAgents.reduce((sum, [, data]) => sum + data.final_duration_ms, 0);
const totalCost = successfulAgents.reduce((sum, [, data]) => sum + data.total_cost_usd, 0);
@@ -329,15 +317,13 @@ export class MetricsTracker {
/**
* Calculate phase-level metrics
*/
private calculatePhaseMetrics(
successfulAgents: Array<[string, AgentAuditMetrics]>
): Record<string, PhaseMetrics> {
private calculatePhaseMetrics(successfulAgents: Array<[string, AgentAuditMetrics]>): Record<string, PhaseMetrics> {
const phases: Record<PhaseName, AgentAuditMetrics[]> = {
'pre-recon': [],
'recon': [],
recon: [],
'vulnerability-analysis': [],
'exploitation': [],
'reporting': [],
exploitation: [],
reporting: [],
};
// Group agents by phase using imported AGENT_PHASE_MAP
@@ -11,22 +11,15 @@
* All functions are pure and crash-safe.
*/
import fs from 'fs/promises';
import path from 'path';
import { fileURLToPath } from 'url';
import fs from 'node:fs/promises';
import path from 'node:path';
import { WORKSPACES_DIR } from '../paths.js';
import { ensureDirectory } from '../utils/file-io.js';
export type { SessionMetadata } from '../types/audit.js';
import type { SessionMetadata } from '../types/audit.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// Get Shannon repository root
const SHANNON_ROOT = path.resolve(__dirname, '..', '..');
const WORKSPACES_DIR = path.join(SHANNON_ROOT, 'workspaces');
/**
* Extract and sanitize hostname from URL for use in identifiers
*/
@@ -59,7 +52,7 @@ export function generateLogPath(
sessionMetadata: SessionMetadata,
agentName: string,
timestamp: number,
attemptNumber: number
attemptNumber: number,
): string {
const auditPath = generateAuditPath(sessionMetadata);
const filename = `${timestamp}_${agentName}_attempt-${attemptNumber}.log`;
@@ -110,10 +103,7 @@ export async function initializeAuditStructure(sessionMetadata: SessionMetadata)
* Copy deliverable files from repo to workspaces for self-contained audit trail.
* No-ops if source directory doesn't exist. Idempotent and parallel-safe.
*/
export async function copyDeliverablesToAudit(
sessionMetadata: SessionMetadata,
repoPath: string
): Promise<void> {
export async function copyDeliverablesToAudit(sessionMetadata: SessionMetadata, repoPath: string): Promise<void> {
const sourceDir = path.join(repoPath, 'deliverables');
const destDir = path.join(generateAuditPath(sessionMetadata), 'deliverables');
@@ -11,10 +11,10 @@
* Optimized for `tail -f` viewing during concurrent workflow execution.
*/
import fs from 'fs/promises';
import { generateWorkflowLogPath, type SessionMetadata } from './utils.js';
import fs from 'node:fs/promises';
import { formatDuration, formatTimestamp } from '../utils/formatting.js';
import { LogStream } from './log-stream.js';
import { generateWorkflowLogPath, type SessionMetadata } from './utils.js';
export interface AgentLogDetails {
attemptNumber?: number;
@@ -147,11 +147,7 @@ export class WorkflowLogger {
/**
* Log an agent event
*/
async logAgent(
agentName: string,
event: 'start' | 'end',
details?: AgentLogDetails
): Promise<void> {
async logAgent(agentName: string, event: 'start' | 'end', details?: AgentLogDetails): Promise<void> {
await this.ensureInitialized();
let message: string;
@@ -160,7 +156,7 @@ export class WorkflowLogger {
const attempt = details?.attemptNumber ?? 1;
message = `${agentName}: Starting (attempt ${attempt})`;
} else {
const parts: string[] = [agentName + ':'];
const parts: string[] = [`${agentName}:`];
if (details?.success === false) {
parts.push('Failed');
@@ -213,7 +209,7 @@ export class WorkflowLogger {
*/
private truncate(str: string, maxLen: number): string {
if (str.length <= maxLen) return str;
return str.slice(0, maxLen - 3) + '...';
return `${str.slice(0, maxLen - 3)}...`;
}
/**
@@ -327,11 +323,9 @@ export class WorkflowLogger {
const label = 'Error: ';
const indent = ' '.repeat(label.length);
const lines = segments.map((segment, i) =>
i === 0 ? `${label}${segment.trim()}` : `${indent}${segment.trim()}`
);
const lines = segments.map((segment, i) => (i === 0 ? `${label}${segment.trim()}` : `${indent}${segment.trim()}`));
return lines.join('\n') + '\n';
return `${lines.join('\n')}\n`;
}
/**
@@ -342,35 +336,40 @@ export class WorkflowLogger {
const status = summary.status === 'completed' ? 'COMPLETED' : 'FAILED';
await this.logStream.write('\n');
await this.logStream.write(`================================================================================\n`);
await this.logStream.write(`Workflow ${status}\n`);
await this.logStream.write(`────────────────────────────────────────\n`);
await this.logStream.write(`Workflow ID: ${this.workflowId ?? this.sessionMetadata.id}\n`);
await this.logStream.write(`Status: ${summary.status}\n`);
await this.logStream.write(`Duration: ${formatDuration(summary.totalDurationMs)}\n`);
await this.logStream.write(`Total Cost: $${summary.totalCostUsd.toFixed(4)}\n`);
await this.logStream.write(`Agents: ${summary.completedAgents.length} completed\n`);
const lines: string[] = [
'',
'================================================================================',
`Workflow ${status}`,
'────────────────────────────────────────',
`Workflow ID: ${this.workflowId ?? this.sessionMetadata.id}`,
`Status: ${summary.status}`,
`Duration: ${formatDuration(summary.totalDurationMs)}`,
`Total Cost: $${summary.totalCostUsd.toFixed(4)}`,
`Agents: ${summary.completedAgents.length} completed`,
];
if (summary.error) {
await this.logStream.write(this.formatErrorBlock(summary.error));
lines.push(this.formatErrorBlock(summary.error).trimEnd());
}
await this.logStream.write(`\n`);
await this.logStream.write(`Agent Breakdown:\n`);
lines.push('');
lines.push('Agent Breakdown:');
for (const agentName of summary.completedAgents) {
const metrics = summary.agentMetrics[agentName];
if (metrics) {
const duration = formatDuration(metrics.durationMs);
const cost = metrics.costUsd !== null ? `$${metrics.costUsd.toFixed(4)}` : 'N/A';
await this.logStream.write(` - ${agentName} (${duration}, ${cost})\n`);
lines.push(` - ${agentName} (${duration}, ${cost})`);
} else {
await this.logStream.write(` - ${agentName}\n`);
lines.push(` - ${agentName}`);
}
}
await this.logStream.write(`================================================================================\n`);
lines.push('================================================================================');
// Single atomic write to prevent interleaved/duplicate output in log tailers
await this.logStream.write(`${lines.join('\n')}\n`);
}
/**
@@ -4,19 +4,14 @@
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { createRequire } from 'module';
import { fs } from 'zx';
import yaml from 'js-yaml';
import { Ajv, type ValidateFunction, type ErrorObject } from 'ajv';
import { createRequire } from 'node:module';
import { Ajv, type ErrorObject, type ValidateFunction } from 'ajv';
import type { FormatsPlugin } from 'ajv-formats';
import yaml from 'js-yaml';
import { fs } from 'zx';
import { PentestError } from './services/error-handling.js';
import type { Authentication, Config, DistributedConfig, Rule } from './types/config.js';
import { ErrorCode } from './types/errors.js';
import type {
Config,
Rule,
Authentication,
DistributedConfig,
} from './types/config.js';
// Handle ESM/CJS interop for ajv-formats using require
const require = createRequire(import.meta.url);
@@ -35,12 +30,10 @@ try {
validateSchema = ajv.compile(configSchema);
} catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
throw new PentestError(
`Failed to load configuration schema: ${errMsg}`,
'config',
false,
{ schemaPath: '../configs/config-schema.json', originalError: errMsg }
);
throw new PentestError(`Failed to load configuration schema: ${errMsg}`, 'config', false, {
schemaPath: '../configs/config-schema.json',
originalError: errMsg,
});
}
const DANGEROUS_PATTERNS: RegExp[] = [
@@ -185,7 +178,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
'config',
false,
{ configPath },
ErrorCode.CONFIG_NOT_FOUND
ErrorCode.CONFIG_NOT_FOUND,
);
}
@@ -198,7 +191,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
'config',
false,
{ configPath, fileSize: stats.size, maxFileSize },
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
@@ -211,7 +204,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
'config',
false,
{ configPath },
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
@@ -230,7 +223,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
'config',
false,
{ configPath, originalError: errMsg },
ErrorCode.CONFIG_PARSE_ERROR
ErrorCode.CONFIG_PARSE_ERROR,
);
}
@@ -241,7 +234,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
'config',
false,
{ configPath },
ErrorCode.CONFIG_PARSE_ERROR
ErrorCode.CONFIG_PARSE_ERROR,
);
}
@@ -260,7 +253,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
'config',
false,
{ configPath, originalError: errMsg },
ErrorCode.CONFIG_PARSE_ERROR
ErrorCode.CONFIG_PARSE_ERROR,
);
}
};
@@ -272,7 +265,7 @@ const validateConfig = (config: Config): void => {
'config',
false,
{},
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
@@ -282,7 +275,7 @@ const validateConfig = (config: Config): void => {
'config',
false,
{},
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
@@ -295,7 +288,7 @@ const validateConfig = (config: Config): void => {
'config',
false,
{ validationErrors: errorMessages },
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
@@ -303,12 +296,10 @@ const validateConfig = (config: Config): void => {
if (!config.rules && !config.authentication) {
console.warn(
'⚠️ Configuration file contains no rules or authentication. The pentest will run without any scoping restrictions or login capabilities.'
'⚠️ Configuration file contains no rules or authentication. The pentest will run without any scoping restrictions or login capabilities.',
);
} else if (config.rules && !config.rules.avoid && !config.rules.focus) {
console.warn(
'⚠️ Configuration file contains no rules. The pentest will run without any scoping restrictions.'
);
console.warn('⚠️ Configuration file contains no rules. The pentest will run without any scoping restrictions.');
}
};
@@ -325,7 +316,7 @@ const performSecurityValidation = (config: Config): void => {
'config',
false,
{ field: 'login_url', pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
}
@@ -339,7 +330,7 @@ const performSecurityValidation = (config: Config): void => {
'config',
false,
{ field: 'credentials.username', pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
if (pattern.test(auth.credentials.password)) {
@@ -348,7 +339,7 @@ const performSecurityValidation = (config: Config): void => {
'config',
false,
{ field: 'credentials.password', pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
}
@@ -363,7 +354,7 @@ const performSecurityValidation = (config: Config): void => {
'config',
false,
{ field: `login_flow[${index}]`, pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
}
@@ -392,7 +383,7 @@ const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): voi
'config',
false,
{ field: `rules.${ruleType}[${index}].url_path`, pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
if (pattern.test(rule.description)) {
@@ -401,7 +392,7 @@ const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): voi
'config',
false,
{ field: `rules.${ruleType}[${index}].description`, pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
}
@@ -421,7 +412,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
break;
@@ -435,7 +426,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
// Must contain at least one dot for domains
@@ -445,7 +436,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
break;
@@ -458,7 +449,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
'config',
false,
{ field, ruleType: rule.type, allowedMethods },
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
break;
@@ -471,7 +462,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
break;
@@ -483,7 +474,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
break;
@@ -500,7 +491,7 @@ const checkForDuplicates = (rules: Rule[], ruleType: string): void => {
'config',
false,
{ field: `rules.${ruleType}[${index}]`, ruleType: rule.type, urlPath: rule.url_path },
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
seen.add(key);
@@ -518,7 +509,7 @@ const checkForConflicts = (avoidRules: Rule[] = [], focusRules: Rule[] = []): vo
'config',
false,
{ field: `rules.focus[${index}]`, urlPath: rule.url_path },
ErrorCode.CONFIG_VALIDATION_FAILED
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
});
+30
View File
@@ -0,0 +1,30 @@
/** Centralized path constants for the worker package */
import fs from 'node:fs';
import path from 'node:path';
/** Worker package root (apps/worker/) resolved from compiled dist/ files */
const WORKER_ROOT = path.resolve(import.meta.dirname, '..');
export const PROMPTS_DIR = path.join(WORKER_ROOT, 'prompts');
export const CONFIGS_DIR = path.join(WORKER_ROOT, 'configs');
/**
* Repository root — walk up from WORKER_ROOT looking for pnpm-workspace.yaml.
* Falls back to two levels up (apps/worker/ → repo root) if not found.
*/
function findRepoRoot(): string {
let dir = WORKER_ROOT;
for (let i = 0; i < 5; i++) {
if (fs.existsSync(path.join(dir, 'pnpm-workspace.yaml'))) {
return dir;
}
const parent = path.dirname(dir);
if (parent === dir) break;
dir = parent;
}
return path.resolve(WORKER_ROOT, '..', '..');
}
const REPO_ROOT = findRepoRoot();
export const WORKSPACES_DIR = path.join(REPO_ROOT, 'workspaces');
@@ -37,7 +37,7 @@ export class ProgressIndicator {
}
// Clear the spinner line
process.stdout.write('\r' + ' '.repeat(this.message.length + 5) + '\r');
process.stdout.write(`\r${' '.repeat(this.message.length + 5)}\r`);
this.isRunning = false;
}
@@ -21,29 +21,20 @@
* No Temporal dependencies - pure domain logic.
*/
import type { ActivityLogger } from '../types/activity-logger.js';
import { Result, ok, err, isErr } from '../types/result.js';
import { ErrorCode, type PentestErrorType } from '../types/errors.js';
import { PentestError } from './error-handling.js';
import { isSpendingCapBehavior } from '../utils/billing-detection.js';
import { type ClaudePromptResult, runClaudePrompt, validateAgentOutput } from '../ai/claude-executor.js';
import type { AuditSession } from '../audit/index.js';
import { AGENTS } from '../session-manager.js';
import { loadPrompt } from './prompt-manager.js';
import {
runClaudePrompt,
validateAgentOutput,
type ClaudePromptResult,
} from '../ai/claude-executor.js';
import {
createGitCheckpoint,
commitGitSuccess,
rollbackGitWorkspace,
getGitCommitHash,
} from './git-manager.js';
import { AuditSession } from '../audit/index.js';
import type { AgentEndResult } from '../types/audit.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import type { AgentName } from '../types/agents.js';
import type { ConfigLoaderService } from './config-loader.js';
import type { AgentEndResult } from '../types/audit.js';
import { ErrorCode, type PentestErrorType } from '../types/errors.js';
import type { AgentMetrics } from '../types/metrics.js';
import { err, isErr, ok, type Result } from '../types/result.js';
import { isSpendingCapBehavior } from '../utils/billing-detection.js';
import type { ConfigLoaderService } from './config-loader.js';
import { PentestError } from './error-handling.js';
import { commitGitSuccess, createGitCheckpoint, getGitCommitHash, rollbackGitWorkspace } from './git-manager.js';
import { loadPrompt } from './prompt-manager.js';
/**
* Input for agent execution.
@@ -94,7 +85,7 @@ export class AgentExecutionService {
agentName: AgentName,
input: AgentExecutionInput,
auditSession: AuditSession,
logger: ActivityLogger
logger: ActivityLogger,
): Promise<Result<AgentEndResult, PentestError>> {
const { webUrl, repoPath, configPath, pipelineTestingMode = false, attemptNumber } = input;
@@ -109,13 +100,7 @@ export class AgentExecutionService {
const promptTemplate = AGENTS[agentName].promptTemplate;
let prompt: string;
try {
prompt = await loadPrompt(
promptTemplate,
{ webUrl, repoPath },
distributedConfig,
pipelineTestingMode,
logger
);
prompt = await loadPrompt(promptTemplate, { webUrl, repoPath }, distributedConfig, pipelineTestingMode, logger);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return err(
@@ -124,8 +109,8 @@ export class AgentExecutionService {
'prompt',
false,
{ agentName, promptTemplate, originalError: errorMessage },
ErrorCode.PROMPT_LOAD_FAILED
)
ErrorCode.PROMPT_LOAD_FAILED,
),
);
}
@@ -140,8 +125,8 @@ export class AgentExecutionService {
'filesystem',
false,
{ agentName, repoPath, originalError: errorMessage },
ErrorCode.GIT_CHECKPOINT_FAILED
)
ErrorCode.GIT_CHECKPOINT_FAILED,
),
);
}
@@ -157,7 +142,7 @@ export class AgentExecutionService {
agentName,
auditSession,
logger,
AGENTS[agentName].modelTier
AGENTS[agentName].modelTier,
);
// 6. Spending cap check - defense-in-depth
@@ -165,7 +150,8 @@ export class AgentExecutionService {
const resultText = result.result || '';
if (isSpendingCapBehavior(result.turns ?? 0, result.cost || 0, resultText)) {
return this.failAgent(agentName, repoPath, auditSession, logger, {
attemptNumber, result,
attemptNumber,
result,
rollbackReason: 'spending cap detected',
errorMessage: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
errorCode: ErrorCode.SPENDING_CAP_REACHED,
@@ -179,7 +165,8 @@ export class AgentExecutionService {
// 7. Handle execution failure
if (!result.success) {
return this.failAgent(agentName, repoPath, auditSession, logger, {
attemptNumber, result,
attemptNumber,
result,
rollbackReason: 'execution failure',
errorMessage: result.error || 'Agent execution failed',
errorCode: ErrorCode.AGENT_EXECUTION_FAILED,
@@ -193,7 +180,8 @@ export class AgentExecutionService {
const validationPassed = await validateAgentOutput(result, agentName, repoPath, logger);
if (!validationPassed) {
return this.failAgent(agentName, repoPath, auditSession, logger, {
attemptNumber, result,
attemptNumber,
result,
rollbackReason: 'validation failure',
errorMessage: `Agent ${agentName} failed output validation`,
errorCode: ErrorCode.OUTPUT_VALIDATION_FAILED,
@@ -225,7 +213,7 @@ export class AgentExecutionService {
repoPath: string,
auditSession: AuditSession,
logger: ActivityLogger,
opts: FailAgentOpts
opts: FailAgentOpts,
): Promise<Result<AgentEndResult, PentestError>> {
await rollbackGitWorkspace(repoPath, opts.rollbackReason, logger);
@@ -239,15 +227,7 @@ export class AgentExecutionService {
};
await auditSession.endAgent(agentName, endResult);
return err(
new PentestError(
opts.errorMessage,
opts.category,
opts.retryable,
opts.context,
opts.errorCode
)
);
return err(new PentestError(opts.errorMessage, opts.category, opts.retryable, opts.context, opts.errorCode));
}
/**
@@ -267,7 +247,7 @@ export class AgentExecutionService {
agentName: AgentName,
input: AgentExecutionInput,
auditSession: AuditSession,
logger: ActivityLogger
logger: ActivityLogger,
): Promise<AgentEndResult> {
const result = await this.execute(agentName, input, auditSession, logger);
if (isErr(result)) {
@@ -11,11 +11,11 @@
* Pure service with no Temporal dependencies.
*/
import { parseConfig, distributeConfig } from '../config-parser.js';
import { PentestError } from './error-handling.js';
import { Result, ok, err } from '../types/result.js';
import { ErrorCode } from '../types/errors.js';
import { distributeConfig, parseConfig } from '../config-parser.js';
import type { DistributedConfig } from '../types/config.js';
import { ErrorCode } from '../types/errors.js';
import { err, ok, type Result } from '../types/result.js';
import { PentestError } from './error-handling.js';
/**
* Service for loading and distributing configuration files.
@@ -52,8 +52,8 @@ export class ConfigLoaderService {
'config',
false,
{ configPath, originalError: errorMessage },
errorCode
)
errorCode,
),
);
}
}
@@ -64,9 +64,7 @@ export class ConfigLoaderService {
* @param configPath - Optional path to the YAML configuration file
* @returns Result containing DistributedConfig (or null) on success, PentestError on failure
*/
async loadOptional(
configPath: string | undefined
): Promise<Result<DistributedConfig | null, PentestError>> {
async loadOptional(configPath: string | undefined): Promise<Result<DistributedConfig | null, PentestError>> {
if (!configPath) {
return ok(null);
}
@@ -75,10 +75,7 @@ const containers = new Map<string, Container>();
* @param sessionMetadata - Session metadata for audit paths
* @returns Container instance for the workflow
*/
export function getOrCreateContainer(
workflowId: string,
sessionMetadata: SessionMetadata
): Container {
export function getOrCreateContainer(workflowId: string, sessionMetadata: SessionMetadata): Container {
let container = containers.get(workflowId);
if (!container) {
@@ -4,16 +4,8 @@
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import {
ErrorCode,
type PentestErrorType,
type PentestErrorContext,
type PromptErrorResult,
} from '../types/errors.js';
import {
matchesBillingApiPattern,
matchesBillingTextPattern,
} from '../utils/billing-detection.js';
import { ErrorCode, type PentestErrorContext, type PentestErrorType, type PromptErrorResult } from '../types/errors.js';
import { matchesBillingApiPattern, matchesBillingTextPattern } from '../utils/billing-detection.js';
export class PentestError extends Error {
override name = 'PentestError' as const;
@@ -29,7 +21,7 @@ export class PentestError extends Error {
type: PentestErrorType,
retryable: boolean = false,
context: PentestErrorContext = {},
code?: ErrorCode
code?: ErrorCode,
) {
super(message);
this.type = type;
@@ -42,18 +34,13 @@ export class PentestError extends Error {
}
}
export function handlePromptError(
promptName: string,
error: Error
): PromptErrorResult {
export function handlePromptError(promptName: string, error: Error): PromptErrorResult {
return {
success: false,
error: new PentestError(
`Failed to load prompt '${promptName}': ${error.message}`,
'prompt',
false,
{ promptName, originalError: error.message }
),
error: new PentestError(`Failed to load prompt '${promptName}': ${error.message}`, 'prompt', false, {
promptName,
originalError: error.message,
}),
};
}
@@ -111,10 +98,7 @@ export function isRetryableError(error: Error): boolean {
* Classifies errors by ErrorCode for reliable, code-based classification.
* Used when error is a PentestError with a specific ErrorCode.
*/
function classifyByErrorCode(
code: ErrorCode,
retryableFromError: boolean
): { type: string; retryable: boolean } {
function classifyByErrorCode(code: ErrorCode, retryableFromError: boolean): { type: string; retryable: boolean } {
switch (code) {
// Billing errors - retryable (wait for cap reset or credits added)
case ErrorCode.SPENDING_CAP_REACHED:
@@ -206,49 +190,30 @@ export function classifyErrorForTemporal(error: unknown): { type: string; retrya
}
// Permission (403) - access won't be granted
if (
message.includes('permission') ||
message.includes('forbidden') ||
message.includes('403')
) {
if (message.includes('permission') || message.includes('forbidden') || message.includes('403')) {
return { type: 'PermissionError', retryable: false };
}
// === OUTPUT VALIDATION ERRORS (Retryable) ===
// Agent didn't produce expected deliverables - retry may succeed
// IMPORTANT: Must come BEFORE generic 'validation' check below
if (
message.includes('failed output validation') ||
message.includes('output validation failed')
) {
if (message.includes('failed output validation') || message.includes('output validation failed')) {
return { type: 'OutputValidationError', retryable: true };
}
// Invalid Request (400) - malformed request is permanent
// Note: Checked AFTER billing and AFTER output validation
if (
message.includes('invalid_request_error') ||
message.includes('malformed') ||
message.includes('validation')
) {
if (message.includes('invalid_request_error') || message.includes('malformed') || message.includes('validation')) {
return { type: 'InvalidRequestError', retryable: false };
}
// Request Too Large (413) - won't fit no matter how many retries
if (
message.includes('request_too_large') ||
message.includes('too large') ||
message.includes('413')
) {
if (message.includes('request_too_large') || message.includes('too large') || message.includes('413')) {
return { type: 'RequestTooLargeError', retryable: false };
}
// Configuration errors - missing files need manual fix
if (
message.includes('enoent') ||
message.includes('no such file') ||
message.includes('cli not installed')
) {
if (message.includes('enoent') || message.includes('no such file') || message.includes('cli not installed')) {
return { type: 'ConfigurationError', retryable: false };
}
@@ -13,13 +13,9 @@
* No Temporal dependencies - this is pure business logic.
*/
import {
validateQueueSafe,
type VulnType,
type ExploitationDecision,
} from './queue-validation.js';
import { isOk } from '../types/result.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import { isOk } from '../types/result.js';
import { type ExploitationDecision, type VulnType, validateQueueSafe } from './queue-validation.js';
/**
* Service for checking exploitation queue decisions.
@@ -46,7 +42,7 @@ export class ExploitationCheckerService {
if (isOk(result)) {
const decision = result.value;
logger.info(
`${vulnType}: ${decision.shouldExploit ? `${decision.vulnerabilityCount} vulnerabilities found` : 'no vulnerabilities, skipping exploitation'}`
`${vulnType}: ${decision.shouldExploit ? `${decision.vulnerabilityCount} vulnerabilities found` : 'no vulnerabilities, skipping exploitation'}`,
);
return decision;
}
@@ -5,9 +5,9 @@
// as published by the Free Software Foundation.
import { $ } from 'zx';
import { PentestError } from './error-handling.js';
import { ErrorCode } from '../types/errors.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import { ErrorCode } from '../types/errors.js';
import { PentestError } from './error-handling.js';
/**
* Check if a directory is a git repository.
@@ -31,15 +31,8 @@ interface GitOperationResult {
/**
* Get list of changed files from git status --porcelain output
*/
async function getChangedFiles(
sourceDir: string,
operationDescription: string
): Promise<string[]> {
const status = await executeGitCommandWithRetry(
['git', 'status', '--porcelain'],
sourceDir,
operationDescription
);
async function getChangedFiles(sourceDir: string, operationDescription: string): Promise<string[]> {
const status = await executeGitCommandWithRetry(['git', 'status', '--porcelain'], sourceDir, operationDescription);
return status.stdout
.trim()
.split('\n')
@@ -55,14 +48,15 @@ function logChangeSummary(
messageWithoutChanges: string,
logger: ActivityLogger,
level: 'info' | 'warn' = 'info',
maxToShow: number = 5
maxToShow: number = 5,
): void {
if (changes.length > 0) {
const msg = messageWithChanges.replace('{count}', String(changes.length));
const fileList = changes.slice(0, maxToShow).map((c) => ` ${c}`).join(', ');
const suffix = changes.length > maxToShow
? ` ... and ${changes.length - maxToShow} more files`
: '';
const fileList = changes
.slice(0, maxToShow)
.map((c) => ` ${c}`)
.join(', ');
const suffix = changes.length > maxToShow ? ` ... and ${changes.length - maxToShow} more files` : '';
logger[level](`${msg} ${fileList}${suffix}`);
} else {
logger[level](messageWithoutChanges);
@@ -101,7 +95,7 @@ class GitSemaphore {
if (!this.running && this.queue.length > 0) {
this.running = true;
const resolve = this.queue.shift();
resolve!();
resolve?.();
}
}
}
@@ -125,7 +119,7 @@ export async function executeGitCommandWithRetry(
commandArgs: string[],
sourceDir: string,
description: string,
maxRetries: number = 5
maxRetries: number = 5,
): Promise<{ stdout: string; stderr: string }> {
await gitSemaphore.acquire();
@@ -139,11 +133,11 @@ export async function executeGitCommandWithRetry(
const errMsg = error instanceof Error ? error.message : String(error);
if (isGitLockError(errMsg) && attempt < maxRetries) {
const delay = Math.pow(2, attempt - 1) * 1000;
const delay = 2 ** (attempt - 1) * 1000;
// executeGitCommandWithRetry is also called outside activity context
// (e.g., from resume logic), so we use console.warn as a fallback here
console.warn(
`Git lock conflict during ${description} (attempt ${attempt}/${maxRetries}). Retrying in ${delay}ms...`
`Git lock conflict during ${description} (attempt ${attempt}/${maxRetries}). Retrying in ${delay}ms...`,
);
await new Promise((resolve) => setTimeout(resolve, delay));
continue;
@@ -157,7 +151,7 @@ export async function executeGitCommandWithRetry(
'filesystem',
true, // Retryable - transient git lock issues
{ maxRetries, description },
ErrorCode.GIT_CHECKPOINT_FAILED
ErrorCode.GIT_CHECKPOINT_FAILED,
);
} finally {
gitSemaphore.release();
@@ -168,7 +162,7 @@ export async function executeGitCommandWithRetry(
export async function rollbackGitWorkspace(
sourceDir: string,
reason: string = 'retry preparation',
logger: ActivityLogger
logger: ActivityLogger,
): Promise<GitOperationResult> {
// Skip git operations if not a git repository
if (!(await isGitRepository(sourceDir))) {
@@ -180,16 +174,8 @@ export async function rollbackGitWorkspace(
try {
const changes = await getChangedFiles(sourceDir, 'status check for rollback');
await executeGitCommandWithRetry(
['git', 'reset', '--hard', 'HEAD'],
sourceDir,
'hard reset for rollback'
);
await executeGitCommandWithRetry(
['git', 'clean', '-fd'],
sourceDir,
'cleaning untracked files for rollback'
);
await executeGitCommandWithRetry(['git', 'reset', '--hard', 'HEAD'], sourceDir, 'hard reset for rollback');
await executeGitCommandWithRetry(['git', 'clean', '-fd'], sourceDir, 'cleaning untracked files for rollback');
logChangeSummary(
changes,
@@ -197,7 +183,7 @@ export async function rollbackGitWorkspace(
'Rollback completed - no changes to remove',
logger,
'info',
3
3,
);
return { success: true };
} catch (error) {
@@ -210,7 +196,7 @@ export async function rollbackGitWorkspace(
'filesystem',
false, // Non-retryable - rollback is best-effort cleanup
{ sourceDir, reason },
ErrorCode.GIT_ROLLBACK_FAILED
ErrorCode.GIT_ROLLBACK_FAILED,
),
};
}
@@ -221,7 +207,7 @@ export async function createGitCheckpoint(
sourceDir: string,
description: string,
attempt: number,
logger: ActivityLogger
logger: ActivityLogger,
): Promise<GitOperationResult> {
// Skip git operations if not a git repository
if (!(await isGitRepository(sourceDir))) {
@@ -248,7 +234,7 @@ export async function createGitCheckpoint(
await executeGitCommandWithRetry(
['git', 'commit', '-m', `📍 Checkpoint: ${description} (attempt ${attempt})`, '--allow-empty'],
sourceDir,
'creating commit'
'creating commit',
);
// 4. Log result
@@ -268,7 +254,7 @@ export async function createGitCheckpoint(
export async function commitGitSuccess(
sourceDir: string,
description: string,
logger: ActivityLogger
logger: ActivityLogger,
): Promise<GitOperationResult> {
// Skip git operations if not a git repository
if (!(await isGitRepository(sourceDir))) {
@@ -280,22 +266,18 @@ export async function commitGitSuccess(
try {
const changes = await getChangedFiles(sourceDir, 'status check for success commit');
await executeGitCommandWithRetry(
['git', 'add', '-A'],
sourceDir,
'staging changes for success commit'
);
await executeGitCommandWithRetry(['git', 'add', '-A'], sourceDir, 'staging changes for success commit');
await executeGitCommandWithRetry(
['git', 'commit', '-m', `${description}: completed successfully`, '--allow-empty'],
sourceDir,
'creating success commit'
'creating success commit',
);
logChangeSummary(
changes,
'Success commit created with {count} file changes:',
'Empty success commit created (agent made no file changes)',
logger
logger,
);
return { success: true };
} catch (error) {
@@ -11,13 +11,12 @@
* Services are pure domain logic with no Temporal dependencies.
*/
export { Container, getOrCreateContainer, removeContainer } from './container.js';
export type { ContainerDependencies } from './container.js';
export type { AgentExecutionInput } from './agent-execution.js';
export { AgentExecutionService } from './agent-execution.js';
export { ConfigLoaderService } from './config-loader.js';
export type { ContainerDependencies } from './container.js';
export { Container, getOrCreateContainer, removeContainer } from './container.js';
export { ExploitationCheckerService } from './exploitation-checker.js';
export { AgentExecutionService } from './agent-execution.js';
export type { AgentExecutionInput } from './agent-execution.js';
export { assembleFinalReport, injectModelIntoReport } from './reporting.js';
export { loadPrompt } from './prompt-manager.js';
export { assembleFinalReport, injectModelIntoReport } from './reporting.js';
@@ -17,22 +17,19 @@
* 3. Credentials validate via Claude Agent SDK query (API key, OAuth, Bedrock, Vertex AI, or router mode)
*/
import fs from 'fs/promises';
import { query } from '@anthropic-ai/claude-agent-sdk';
import fs from 'node:fs/promises';
import type { SDKAssistantMessageError } from '@anthropic-ai/claude-agent-sdk';
import { PentestError, isRetryableError } from './error-handling.js';
import { ErrorCode } from '../types/errors.js';
import { type Result, ok, err } from '../types/result.js';
import { parseConfig } from '../config-parser.js';
import { query } from '@anthropic-ai/claude-agent-sdk';
import { resolveModel } from '../ai/models.js';
import { parseConfig } from '../config-parser.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import { ErrorCode } from '../types/errors.js';
import { err, ok, type Result } from '../types/result.js';
import { isRetryableError, PentestError } from './error-handling.js';
// === Repository Validation ===
async function validateRepo(
repoPath: string,
logger: ActivityLogger
): Promise<Result<void, PentestError>> {
async function validateRepo(repoPath: string, logger: ActivityLogger): Promise<Result<void, PentestError>> {
logger.info('Checking repository path...', { repoPath });
// 1. Check repo directory exists
@@ -45,8 +42,8 @@ async function validateRepo(
'config',
false,
{ repoPath },
ErrorCode.REPO_NOT_FOUND
)
ErrorCode.REPO_NOT_FOUND,
),
);
}
} catch {
@@ -56,8 +53,8 @@ async function validateRepo(
'config',
false,
{ repoPath },
ErrorCode.REPO_NOT_FOUND
)
ErrorCode.REPO_NOT_FOUND,
),
);
}
@@ -71,8 +68,8 @@ async function validateRepo(
'config',
false,
{ repoPath },
ErrorCode.REPO_NOT_FOUND
)
ErrorCode.REPO_NOT_FOUND,
),
);
}
} catch {
@@ -82,8 +79,8 @@ async function validateRepo(
'config',
false,
{ repoPath },
ErrorCode.REPO_NOT_FOUND
)
ErrorCode.REPO_NOT_FOUND,
),
);
}
@@ -93,10 +90,7 @@ async function validateRepo(
// === Config Validation ===
async function validateConfig(
configPath: string,
logger: ActivityLogger
): Promise<Result<void, PentestError>> {
async function validateConfig(configPath: string, logger: ActivityLogger): Promise<Result<void, PentestError>> {
logger.info('Validating configuration file...', { configPath });
try {
@@ -114,8 +108,8 @@ async function validateConfig(
'config',
false,
{ configPath },
ErrorCode.CONFIG_VALIDATION_FAILED
)
ErrorCode.CONFIG_VALIDATION_FAILED,
),
);
}
}
@@ -123,43 +117,60 @@ async function validateConfig(
// === Credential Validation ===
/** Map SDK error type to a human-readable preflight PentestError. */
function classifySdkError(
sdkError: SDKAssistantMessageError,
authType: string
): Result<void, PentestError> {
function classifySdkError(sdkError: SDKAssistantMessageError, authType: string): Result<void, PentestError> {
switch (sdkError) {
case 'authentication_failed':
return err(new PentestError(
`Invalid ${authType}. Check your credentials in .env and try again.`,
'config', false, { authType, sdkError }, ErrorCode.AUTH_FAILED
));
return err(
new PentestError(
`Invalid ${authType}. Check your credentials in .env and try again.`,
'config',
false,
{ authType, sdkError },
ErrorCode.AUTH_FAILED,
),
);
case 'billing_error':
return err(new PentestError(
`Anthropic account has a billing issue. Add credits or check your billing dashboard.`,
'billing', true, { authType, sdkError }, ErrorCode.BILLING_ERROR
));
return err(
new PentestError(
`Anthropic account has a billing issue. Add credits or check your billing dashboard.`,
'billing',
true,
{ authType, sdkError },
ErrorCode.BILLING_ERROR,
),
);
case 'rate_limit':
return err(new PentestError(
`Anthropic rate limit or spending cap reached. Wait a few minutes and try again.`,
'billing', true, { authType, sdkError }, ErrorCode.BILLING_ERROR
));
return err(
new PentestError(
`Anthropic rate limit or spending cap reached. Wait a few minutes and try again.`,
'billing',
true,
{ authType, sdkError },
ErrorCode.BILLING_ERROR,
),
);
case 'server_error':
return err(new PentestError(
`Anthropic API is temporarily unavailable. Try again shortly.`,
'network', true, { authType, sdkError }
));
return err(
new PentestError(`Anthropic API is temporarily unavailable. Try again shortly.`, 'network', true, {
authType,
sdkError,
}),
);
default:
return err(new PentestError(
`${authType} validation failed unexpectedly. Check your credentials in .env.`,
'config', false, { authType, sdkError }, ErrorCode.AUTH_FAILED
));
return err(
new PentestError(
`${authType} validation failed unexpectedly. Check your credentials in .env.`,
'config',
false,
{ authType, sdkError },
ErrorCode.AUTH_FAILED,
),
);
}
}
/** Validate credentials via a minimal Claude Agent SDK query. */
async function validateCredentials(
logger: ActivityLogger
): Promise<Result<void, PentestError>> {
async function validateCredentials(logger: ActivityLogger): Promise<Result<void, PentestError>> {
// 1. Custom base URL — validate endpoint is reachable via SDK query
if (process.env.ANTHROPIC_BASE_URL) {
const baseUrl = process.env.ANTHROPIC_BASE_URL;
@@ -193,8 +204,14 @@ async function validateCredentials(
// 2. Bedrock mode — validate required AWS credentials are present
if (process.env.CLAUDE_CODE_USE_BEDROCK === '1') {
const required = ['AWS_REGION', 'AWS_BEARER_TOKEN_BEDROCK', 'ANTHROPIC_SMALL_MODEL', 'ANTHROPIC_MEDIUM_MODEL', 'ANTHROPIC_LARGE_MODEL'];
const missing = required.filter(v => !process.env[v]);
const required = [
'AWS_REGION',
'AWS_BEARER_TOKEN_BEDROCK',
'ANTHROPIC_SMALL_MODEL',
'ANTHROPIC_MEDIUM_MODEL',
'ANTHROPIC_LARGE_MODEL',
];
const missing = required.filter((v) => !process.env[v]);
if (missing.length > 0) {
return err(
new PentestError(
@@ -202,8 +219,8 @@ async function validateCredentials(
'config',
false,
{ missing },
ErrorCode.AUTH_FAILED
)
ErrorCode.AUTH_FAILED,
),
);
}
logger.info('Bedrock credentials OK');
@@ -212,8 +229,14 @@ async function validateCredentials(
// 3. Vertex AI mode — validate required GCP credentials are present
if (process.env.CLAUDE_CODE_USE_VERTEX === '1') {
const required = ['CLOUD_ML_REGION', 'ANTHROPIC_VERTEX_PROJECT_ID', 'ANTHROPIC_SMALL_MODEL', 'ANTHROPIC_MEDIUM_MODEL', 'ANTHROPIC_LARGE_MODEL'];
const missing = required.filter(v => !process.env[v]);
const required = [
'CLOUD_ML_REGION',
'ANTHROPIC_VERTEX_PROJECT_ID',
'ANTHROPIC_SMALL_MODEL',
'ANTHROPIC_MEDIUM_MODEL',
'ANTHROPIC_LARGE_MODEL',
];
const missing = required.filter((v) => !process.env[v]);
if (missing.length > 0) {
return err(
new PentestError(
@@ -221,8 +244,8 @@ async function validateCredentials(
'config',
false,
{ missing },
ErrorCode.AUTH_FAILED
)
ErrorCode.AUTH_FAILED,
),
);
}
// Validate service account credentials file is accessible
@@ -234,8 +257,8 @@ async function validateCredentials(
'config',
false,
{},
ErrorCode.AUTH_FAILED
)
ErrorCode.AUTH_FAILED,
),
);
}
try {
@@ -247,8 +270,8 @@ async function validateCredentials(
'config',
false,
{ credPath },
ErrorCode.AUTH_FAILED
)
ErrorCode.AUTH_FAILED,
),
);
}
logger.info('Vertex AI credentials OK');
@@ -263,8 +286,8 @@ async function validateCredentials(
'config',
false,
{},
ErrorCode.AUTH_FAILED
)
ErrorCode.AUTH_FAILED,
),
);
}
@@ -296,8 +319,8 @@ async function validateCredentials(
retryable ? 'network' : 'config',
retryable,
{ authType },
retryable ? undefined : ErrorCode.AUTH_FAILED
)
retryable ? undefined : ErrorCode.AUTH_FAILED,
),
);
}
}
@@ -316,7 +339,7 @@ async function validateCredentials(
export async function runPreflightChecks(
repoPath: string,
configPath: string | undefined,
logger: ActivityLogger
logger: ActivityLogger,
): Promise<Result<void, PentestError>> {
// 1. Repository check (free — filesystem only)
const repoResult = await validateRepo(repoPath, logger);
@@ -5,10 +5,11 @@
// as published by the Free Software Foundation.
import { fs, path } from 'zx';
import { PentestError, handlePromptError } from './error-handling.js';
import { PROMPTS_DIR } from '../paths.js';
import { MCP_AGENT_MAPPING } from '../session-manager.js';
import type { Authentication, DistributedConfig } from '../types/config.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import type { Authentication, DistributedConfig } from '../types/config.js';
import { handlePromptError, PentestError } from './error-handling.js';
interface PromptVariables {
webUrl: string;
@@ -25,15 +26,10 @@ interface IncludeReplacement {
async function buildLoginInstructions(authentication: Authentication, logger: ActivityLogger): Promise<string> {
try {
// 1. Load the login instructions template
const loginInstructionsPath = path.join(import.meta.dirname, '..', '..', 'prompts', 'shared', 'login-instructions.txt');
const loginInstructionsPath = path.join(PROMPTS_DIR, 'shared', 'login-instructions.txt');
if (!await fs.pathExists(loginInstructionsPath)) {
throw new PentestError(
'Login instructions template not found',
'filesystem',
false,
{ loginInstructionsPath }
);
if (!(await fs.pathExists(loginInstructionsPath))) {
throw new PentestError('Login instructions template not found', 'filesystem', false, { loginInstructionsPath });
}
const fullTemplate = await fs.readFile(loginInstructionsPath, 'utf8');
@@ -57,9 +53,7 @@ async function buildLoginInstructions(authentication: Authentication, logger: Ac
logger.warn('Section markers not found, using full login instructions template');
loginInstructions = fullTemplate;
} else {
loginInstructions = [commonSection, authSection, verificationSection]
.filter(section => section)
.join('\n\n');
loginInstructions = [commonSection, authSection, verificationSection].filter((section) => section).join('\n\n');
}
// 4. Interpolate login flow and credential placeholders
@@ -73,7 +67,10 @@ async function buildLoginInstructions(authentication: Authentication, logger: Ac
userInstructions = userInstructions.replace(/\$password/g, authentication.credentials.password);
}
if (authentication.credentials.totp_secret) {
userInstructions = userInstructions.replace(/\$totp/g, `generated TOTP code using secret "${authentication.credentials.totp_secret}"`);
userInstructions = userInstructions.replace(
/\$totp/g,
`generated TOTP code using secret "${authentication.credentials.totp_secret}"`,
);
}
}
@@ -90,12 +87,10 @@ async function buildLoginInstructions(authentication: Authentication, logger: Ac
throw error;
}
const errMsg = error instanceof Error ? error.message : String(error);
throw new PentestError(
`Failed to build login instructions: ${errMsg}`,
'config',
false,
{ authentication, originalError: errMsg }
);
throw new PentestError(`Failed to build login instructions: ${errMsg}`, 'config', false, {
authentication,
originalError: errMsg,
});
}
}
@@ -112,7 +107,7 @@ async function processIncludes(content: string, baseDir: string): Promise<string
`Path traversal detected in @include(): ${match[1]}`,
'prompt',
false,
{ includePath, baseDir: resolvedBase }
{ includePath, baseDir: resolvedBase },
);
}
const sharedContent = await fs.readFile(includePath, 'utf8');
@@ -120,7 +115,7 @@ async function processIncludes(content: string, baseDir: string): Promise<string
placeholder: match[0],
content: sharedContent,
};
})
}),
);
for (const replacement of replacements) {
@@ -134,25 +129,20 @@ async function interpolateVariables(
template: string,
variables: PromptVariables,
config: DistributedConfig | null = null,
logger: ActivityLogger
logger: ActivityLogger,
): Promise<string> {
try {
if (!template || typeof template !== 'string') {
throw new PentestError(
'Template must be a non-empty string',
'validation',
false,
{ templateType: typeof template, templateLength: template?.length }
);
throw new PentestError('Template must be a non-empty string', 'validation', false, {
templateType: typeof template,
templateLength: template?.length,
});
}
if (!variables || !variables.webUrl || !variables.repoPath) {
throw new PentestError(
'Variables must include webUrl and repoPath',
'validation',
false,
{ variables: Object.keys(variables || {}) }
);
throw new PentestError('Variables must include webUrl and repoPath', 'validation', false, {
variables: Object.keys(variables || {}),
});
}
let result = template
@@ -170,12 +160,10 @@ async function interpolateVariables(
const cleanRulesSection = '<rules>\nNo specific rules or focus areas provided for this test.\n</rules>';
result = result.replace(/<rules>[\s\S]*?<\/rules>/g, cleanRulesSection);
} else {
const avoidRules = hasAvoidRules ? config.avoid!.map(r => `- ${r.description}`).join('\n') : 'None';
const focusRules = hasFocusRules ? config.focus!.map(r => `- ${r.description}`).join('\n') : 'None';
const avoidRules = hasAvoidRules ? config.avoid?.map((r) => `- ${r.description}`).join('\n') : 'None';
const focusRules = hasFocusRules ? config.focus?.map((r) => `- ${r.description}`).join('\n') : 'None';
result = result
.replace(/{{RULES_AVOID}}/g, avoidRules)
.replace(/{{RULES_FOCUS}}/g, focusRules);
result = result.replace(/{{RULES_AVOID}}/g, avoidRules).replace(/{{RULES_FOCUS}}/g, focusRules);
}
// Extract and inject login instructions from config
@@ -204,12 +192,7 @@ async function interpolateVariables(
throw error;
}
const errMsg = error instanceof Error ? error.message : String(error);
throw new PentestError(
`Variable interpolation failed: ${errMsg}`,
'prompt',
false,
{ originalError: errMsg }
);
throw new PentestError(`Variable interpolation failed: ${errMsg}`, 'prompt', false, { originalError: errMsg });
}
}
@@ -219,25 +202,19 @@ export async function loadPrompt(
variables: PromptVariables,
config: DistributedConfig | null = null,
pipelineTestingMode: boolean = false,
logger: ActivityLogger
logger: ActivityLogger,
): Promise<string> {
try {
// 1. Resolve prompt file path
const baseDir = pipelineTestingMode ? 'prompts/pipeline-testing' : 'prompts';
const promptsDir = path.join(import.meta.dirname, '..', '..', baseDir);
const promptsDir = pipelineTestingMode ? path.join(PROMPTS_DIR, 'pipeline-testing') : PROMPTS_DIR;
const promptPath = path.join(promptsDir, `${promptName}.txt`);
if (pipelineTestingMode) {
logger.info(`Using pipeline testing prompt: ${promptPath}`);
}
if (!await fs.pathExists(promptPath)) {
throw new PentestError(
`Prompt file not found: ${promptPath}`,
'prompt',
false,
{ promptName, promptPath }
);
if (!(await fs.pathExists(promptPath))) {
throw new PentestError(`Prompt file not found: ${promptPath}`, 'prompt', false, { promptName, promptPath });
}
// 2. Assign MCP server based on agent name
@@ -5,13 +5,13 @@
// as published by the Free Software Foundation.
import { fs, path } from 'zx';
import { PentestError } from './error-handling.js';
import type { ExploitationDecision, VulnType } from '../types/agents.js';
import { ErrorCode } from '../types/errors.js';
import { type Result, ok, err } from '../types/result.js';
import { err, ok, type Result } from '../types/result.js';
import { asyncPipe } from '../utils/functional.js';
import type { VulnType, ExploitationDecision } from '../types/agents.js';
import { PentestError } from './error-handling.js';
export type { VulnType, ExploitationDecision } from '../types/agents.js';
export type { ExploitationDecision, VulnType } from '../types/agents.js';
interface VulnTypeConfigItem {
deliverable: string;
@@ -63,7 +63,6 @@ interface QueueValidationResult {
error: string | null;
}
/**
* Result type for safe validation - explicit error handling.
*/
@@ -97,7 +96,7 @@ const VULN_TYPE_CONFIG: VulnTypeConfig = Object.freeze({
function createValidationRule(
predicate: (existence: FileExistence) => boolean,
errorMessage: ErrorMessageResolver,
retryable: boolean = true
retryable: boolean = true,
): ValidationRule {
return Object.freeze({ predicate, errorMessage, retryable });
}
@@ -106,7 +105,7 @@ function createValidationRule(
const fileExistenceRules: readonly ValidationRule[] = Object.freeze([
createValidationRule(
({ deliverableExists, queueExists }) => deliverableExists && queueExists,
getExistenceErrorMessage
getExistenceErrorMessage,
),
]);
@@ -124,19 +123,11 @@ function getExistenceErrorMessage(existence: FileExistence): string {
}
// Pure function to create file paths
const createPaths = (
vulnType: VulnType,
sourceDir: string
): PathsBase | PathsWithError => {
const createPaths = (vulnType: VulnType, sourceDir: string): PathsBase | PathsWithError => {
const config = VULN_TYPE_CONFIG[vulnType];
if (!config) {
return {
error: new PentestError(
`Unknown vulnerability type: ${vulnType}`,
'validation',
false,
{ vulnType }
),
error: new PentestError(`Unknown vulnerability type: ${vulnType}`, 'validation', false, { vulnType }),
};
}
@@ -149,9 +140,7 @@ const createPaths = (
};
// Pure function to check file existence
const checkFileExistence = async (
paths: PathsBase | PathsWithError
): Promise<PathsWithExistence | PathsWithError> => {
const checkFileExistence = async (paths: PathsBase | PathsWithError): Promise<PathsWithExistence | PathsWithError> => {
if ('error' in paths) return paths;
const [deliverableExists, queueExists] = await Promise.all([
@@ -167,7 +156,7 @@ const checkFileExistence = async (
// Validates deliverable/queue symmetry - both must exist or neither
const validateExistenceRules = (
pathsWithExistence: PathsWithExistence | PathsWithError
pathsWithExistence: PathsWithExistence | PathsWithError,
): PathsWithExistence | PathsWithError => {
if ('error' in pathsWithExistence) return pathsWithExistence;
@@ -178,9 +167,7 @@ const validateExistenceRules = (
if (failedRule) {
const message =
typeof failedRule.errorMessage === 'function'
? failedRule.errorMessage(existence)
: failedRule.errorMessage;
typeof failedRule.errorMessage === 'function' ? failedRule.errorMessage(existence) : failedRule.errorMessage;
return {
error: new PentestError(
@@ -193,7 +180,7 @@ const validateExistenceRules = (
queuePath: pathsWithExistence.queue,
existence,
},
ErrorCode.DELIVERABLE_NOT_FOUND
ErrorCode.DELIVERABLE_NOT_FOUND,
),
};
}
@@ -227,7 +214,7 @@ const validateQueueStructure = (content: string): QueueValidationResult => {
// Queue parse failures are retryable - agent can fix malformed JSON on retry
const validateQueueContent = async (
pathsWithExistence: PathsWithExistence | PathsWithError
pathsWithExistence: PathsWithExistence | PathsWithError,
): Promise<PathsWithQueue | PathsWithError> => {
if ('error' in pathsWithExistence) return pathsWithExistence;
@@ -249,7 +236,7 @@ const validateQueueContent = async (
queuePath: pathsWithExistence.queue,
originalError: queueValidation.error,
queueStructure: queueValidation.data ? Object.keys(queueValidation.data) : [],
}
},
),
};
}
@@ -268,16 +255,14 @@ const validateQueueContent = async (
vulnType: pathsWithExistence.vulnType,
queuePath: pathsWithExistence.queue,
originalError: readError instanceof Error ? readError.message : String(readError),
}
},
),
};
}
};
// Final decision: skip if queue says no vulns, proceed if vulns found, error otherwise
const determineExploitationDecision = (
validatedData: PathsWithQueue | PathsWithError
): ExploitationDecision => {
const determineExploitationDecision = (validatedData: PathsWithQueue | PathsWithError): ExploitationDecision => {
if ('error' in validatedData) {
throw validatedData.error;
}
@@ -297,14 +282,14 @@ const determineExploitationDecision = (
// Main functional validation pipeline
export async function validateQueueAndDeliverable(
vulnType: VulnType,
sourceDir: string
sourceDir: string,
): Promise<ExploitationDecision> {
return asyncPipe<ExploitationDecision>(
createPaths(vulnType, sourceDir),
checkFileExistence,
validateExistenceRules,
validateQueueContent,
determineExploitationDecision
determineExploitationDecision,
);
}
@@ -312,10 +297,7 @@ export async function validateQueueAndDeliverable(
* Safely validate queue and deliverable files.
* Returns Result<ExploitationDecision, PentestError> for explicit error handling.
*/
export async function validateQueueSafe(
vulnType: VulnType,
sourceDir: string
): Promise<SafeValidationResult> {
export async function validateQueueSafe(vulnType: VulnType, sourceDir: string): Promise<SafeValidationResult> {
try {
const result = await validateQueueAndDeliverable(vulnType, sourceDir);
return ok(result);
@@ -5,9 +5,9 @@
// as published by the Free Software Foundation.
import { fs, path } from 'zx';
import { PentestError } from './error-handling.js';
import { ErrorCode } from '../types/errors.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import { ErrorCode } from '../types/errors.js';
import { PentestError } from './error-handling.js';
interface DeliverableFile {
name: string;
@@ -22,7 +22,7 @@ export async function assembleFinalReport(sourceDir: string, logger: ActivityLog
{ name: 'XSS', path: 'xss_exploitation_evidence.md', required: false },
{ name: 'Authentication', path: 'auth_exploitation_evidence.md', required: false },
{ name: 'SSRF', path: 'ssrf_exploitation_evidence.md', required: false },
{ name: 'Authorization', path: 'authz_exploitation_evidence.md', required: false }
{ name: 'Authorization', path: 'authz_exploitation_evidence.md', required: false },
];
const sections: string[] = [];
@@ -40,7 +40,7 @@ export async function assembleFinalReport(sourceDir: string, logger: ActivityLog
'filesystem',
false,
{ deliverableFile: file.path, sourceDir },
ErrorCode.DELIVERABLE_NOT_FOUND
ErrorCode.DELIVERABLE_NOT_FOUND,
);
} else {
logger.info(`No ${file.name} deliverable found`);
@@ -65,12 +65,10 @@ export async function assembleFinalReport(sourceDir: string, logger: ActivityLog
logger.info(`Final report assembled at ${finalReportPath}`);
} catch (error) {
const err = error as Error;
throw new PentestError(
`Failed to write final report: ${err.message}`,
'filesystem',
false,
{ finalReportPath, originalError: err.message }
);
throw new PentestError(`Failed to write final report: ${err.message}`, 'filesystem', false, {
finalReportPath,
originalError: err.message,
});
}
return finalContent;
@@ -84,7 +82,7 @@ export async function assembleFinalReport(sourceDir: string, logger: ActivityLog
export async function injectModelIntoReport(
repoPath: string,
outputPath: string,
logger: ActivityLogger
logger: ActivityLogger,
): Promise<void> {
// 1. Read session.json to get model information
const sessionJsonPath = path.join(outputPath, 'session.json');
@@ -136,20 +134,14 @@ export async function injectModelIntoReport(
if (match) {
// Inject model line after Assessment Date
const modelLine = `- Model: ${modelStr}`;
reportContent = reportContent.replace(
assessmentDatePattern,
`$1\n${modelLine}`
);
reportContent = reportContent.replace(assessmentDatePattern, `$1\n${modelLine}`);
logger.info('Model info injected into Executive Summary');
} else {
// If no Assessment Date line found, try to add after Executive Summary header
const execSummaryPattern = /^## Executive Summary$/m;
if (reportContent.match(execSummaryPattern)) {
// Add model as first item in Executive Summary
reportContent = reportContent.replace(
execSummaryPattern,
`## Executive Summary\n- Model: ${modelStr}`
);
reportContent = reportContent.replace(execSummaryPattern, `## Executive Summary\n- Model: ${modelStr}`);
logger.info('Model info added to Executive Summary header');
} else {
logger.warn('Could not find Executive Summary section');
@@ -4,10 +4,10 @@
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { path, fs } from 'zx';
import { fs, path } from 'zx';
import { validateQueueAndDeliverable } from './services/queue-validation.js';
import type { AgentName, AgentDefinition, PlaywrightAgent, AgentValidator, VulnType } from './types/index.js';
import type { ActivityLogger } from './types/activity-logger.js';
import type { AgentDefinition, AgentName, AgentValidator, PlaywrightAgent, VulnType } from './types/index.js';
// Agent definitions according to PRD
// NOTE: deliverableFilename values must match mcp-server/src/types/deliverables.ts:DELIVERABLE_FILENAMES
@@ -20,7 +20,7 @@ export const AGENTS: Readonly<Record<AgentName, AgentDefinition>> = Object.freez
deliverableFilename: 'code_analysis_deliverable.md',
modelTier: 'large',
},
'recon': {
recon: {
name: 'recon',
displayName: 'Recon agent',
prerequisites: ['pre-recon'],
@@ -97,7 +97,7 @@ export const AGENTS: Readonly<Record<AgentName, AgentDefinition>> = Object.freez
promptTemplate: 'exploit-authz',
deliverableFilename: 'authz_exploitation_evidence.md',
},
'report': {
report: {
name: 'report',
displayName: 'Report agent',
prerequisites: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit'],
@@ -113,7 +113,7 @@ export type PhaseName = 'pre-recon' | 'recon' | 'vulnerability-analysis' | 'expl
// Map agents to their corresponding phases (single source of truth)
export const AGENT_PHASE_MAP: Readonly<Record<AgentName, PhaseName>> = Object.freeze({
'pre-recon': 'pre-recon',
'recon': 'recon',
recon: 'recon',
'injection-vuln': 'vulnerability-analysis',
'xss-vuln': 'vulnerability-analysis',
'auth-vuln': 'vulnerability-analysis',
@@ -124,7 +124,7 @@ export const AGENT_PHASE_MAP: Readonly<Record<AgentName, PhaseName>> = Object.fr
'auth-exploit': 'exploitation',
'authz-exploit': 'exploitation',
'ssrf-exploit': 'exploitation',
'report': 'reporting',
report: 'reporting',
});
// Factory function for vulnerability queue validators
@@ -210,11 +210,7 @@ export const AGENT_VALIDATORS: Record<AgentName, AgentValidator> = Object.freeze
// Executive report agent
report: async (sourceDir: string, logger: ActivityLogger): Promise<boolean> => {
const reportFile = path.join(
sourceDir,
'deliverables',
'comprehensive_security_assessment_report.md'
);
const reportFile = path.join(sourceDir, 'deliverables', 'comprehensive_security_assessment_report.md');
const reportExists = await fs.pathExists(reportFile);
@@ -15,29 +15,28 @@
* Business logic is delegated to services in src/services/.
*/
import { heartbeat, ApplicationFailure, Context } from '@temporalio/activity';
import path from 'path';
import fs from 'fs/promises';
import { classifyErrorForTemporal, PentestError } from '../services/error-handling.js';
import { ErrorCode } from '../types/errors.js';
import { getOrCreateContainer, getContainer, removeContainer } from '../services/container.js';
import { ExploitationCheckerService } from '../services/exploitation-checker.js';
import type { VulnType, ExploitationDecision } from '../services/queue-validation.js';
import fs from 'node:fs/promises';
import path from 'node:path';
import { ApplicationFailure, Context, heartbeat } from '@temporalio/activity';
import { AuditSession } from '../audit/index.js';
import type { WorkflowSummary } from '../audit/workflow-logger.js';
import type { AgentName } from '../types/agents.js';
import { ALL_AGENTS } from '../types/agents.js';
import type { AgentMetrics, ResumeState } from './shared.js';
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
import { copyDeliverablesToAudit, type SessionMetadata } from '../audit/utils.js';
import { readJson, fileExists } from '../utils/file-io.js';
import type { WorkflowSummary } from '../audit/workflow-logger.js';
import { getContainer, getOrCreateContainer, removeContainer } from '../services/container.js';
import { classifyErrorForTemporal, PentestError } from '../services/error-handling.js';
import { ExploitationCheckerService } from '../services/exploitation-checker.js';
import { executeGitCommandWithRetry } from '../services/git-manager.js';
import { runPreflightChecks } from '../services/preflight.js';
import type { ExploitationDecision, VulnType } from '../services/queue-validation.js';
import { assembleFinalReport, injectModelIntoReport } from '../services/reporting.js';
import { AGENTS } from '../session-manager.js';
import { executeGitCommandWithRetry } from '../services/git-manager.js';
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
import { createActivityLogger } from './activity-logger.js';
import { runPreflightChecks } from '../services/preflight.js';
import type { AgentName } from '../types/agents.js';
import { ALL_AGENTS } from '../types/agents.js';
import { ErrorCode } from '../types/errors.js';
import { isErr } from '../types/result.js';
import { fileExists, readJson } from '../utils/file-io.js';
import { createActivityLogger } from './activity-logger.js';
import type { AgentMetrics, ResumeState } from './shared.js';
// Max lengths to prevent Temporal protobuf buffer overflow
const MAX_ERROR_MESSAGE_LENGTH = 2000;
@@ -68,7 +67,7 @@ function truncateErrorMessage(message: string): string {
if (message.length <= MAX_ERROR_MESSAGE_LENGTH) {
return message;
}
return message.slice(0, MAX_ERROR_MESSAGE_LENGTH - 20) + '\n[truncated]';
return `${message.slice(0, MAX_ERROR_MESSAGE_LENGTH - 20)}\n[truncated]`;
}
/**
@@ -76,7 +75,7 @@ function truncateErrorMessage(message: string): string {
*/
function truncateStackTrace(failure: ApplicationFailure): void {
if (failure.stack && failure.stack.length > MAX_STACK_TRACE_LENGTH) {
failure.stack = failure.stack.slice(0, MAX_STACK_TRACE_LENGTH) + '\n[stack truncated]';
failure.stack = `${failure.stack.slice(0, MAX_STACK_TRACE_LENGTH)}\n[stack truncated]`;
}
}
@@ -102,10 +101,7 @@ function buildSessionMetadata(input: ActivityInput): SessionMetadata {
* 3. Service-based agent execution
* 4. Error classification for Temporal retry
*/
async function runAgentActivity(
agentName: AgentName,
input: ActivityInput
): Promise<AgentMetrics> {
async function runAgentActivity(agentName: AgentName, input: ActivityInput): Promise<AgentMetrics> {
const { repoPath, configPath, pipelineTestingMode = false, workflowId, webUrl } = input;
const startTime = Date.now();
const attemptNumber = Context.current().info.attempt;
@@ -140,7 +136,7 @@ async function runAgentActivity(
attemptNumber,
},
auditSession,
logger
logger,
);
// 4. Return metrics
@@ -167,7 +163,7 @@ async function runAgentActivity(
throw ApplicationFailure.nonRetryable(
`Agent ${agentName} failed output validation after ${attemptNumber} attempts`,
'OutputValidationError',
[{ agentName, attemptNumber, elapsed: Date.now() - startTime }]
[{ agentName, attemptNumber, elapsed: Date.now() - startTime }],
);
}
@@ -335,9 +331,7 @@ export async function assembleReportActivity(input: ActivityInput): Promise<void
export async function injectReportMetadataActivity(input: ActivityInput): Promise<void> {
const { repoPath, sessionId, outputPath } = input;
const logger = createActivityLogger();
const effectiveOutputPath = outputPath
? path.join(outputPath, sessionId)
: path.join('./workspaces', sessionId);
const effectiveOutputPath = outputPath ? path.join(outputPath, sessionId) : path.join('./workspaces', sessionId);
try {
await injectModelIntoReport(repoPath, effectiveOutputPath, logger);
} catch (error) {
@@ -352,10 +346,7 @@ export async function injectReportMetadataActivity(input: ActivityInput): Promis
* Uses existing container if available (from prior agent runs),
* otherwise creates service directly (stateless, no dependencies).
*/
export async function checkExploitationQueue(
input: ActivityInput,
vulnType: VulnType
): Promise<ExploitationDecision> {
export async function checkExploitationQueue(input: ActivityInput, vulnType: VulnType): Promise<ExploitationDecision> {
const { repoPath, workflowId } = input;
const logger = createActivityLogger();
@@ -391,7 +382,7 @@ interface SessionJson {
export async function loadResumeState(
workspaceName: string,
expectedUrl: string,
expectedRepoPath: string
expectedRepoPath: string,
): Promise<ResumeState> {
// 1. Validate workspace exists
const sessionPath = path.join('./workspaces', workspaceName, 'session.json');
@@ -400,7 +391,7 @@ export async function loadResumeState(
if (!exists) {
throw ApplicationFailure.nonRetryable(
`Workspace not found: ${workspaceName}\nExpected path: ${sessionPath}`,
'WorkspaceNotFoundError'
'WorkspaceNotFoundError',
);
}
@@ -412,14 +403,14 @@ export async function loadResumeState(
const errorMsg = error instanceof Error ? error.message : String(error);
throw ApplicationFailure.nonRetryable(
`Corrupted session.json in workspace ${workspaceName}: ${errorMsg}`,
'CorruptedSessionError'
'CorruptedSessionError',
);
}
if (session.session.webUrl !== expectedUrl) {
throw ApplicationFailure.nonRetryable(
`URL mismatch with workspace\n Workspace URL: ${session.session.webUrl}\n Provided URL: ${expectedUrl}`,
'URLMismatchError'
'URLMismatchError',
);
}
@@ -463,7 +454,7 @@ export async function loadResumeState(
`but their deliverable files are missing from disk. ` +
`Start a fresh run instead.`
: `No agents completed successfully. Start a fresh run instead.`),
'NoCheckpointsError'
'NoCheckpointsError',
);
}
@@ -497,7 +488,7 @@ async function findLatestCommit(repoPath: string, commitHashes: string[]): Promi
'filesystem',
false, // Non-retryable - corrupt workspace state
{ phase: 'resume' },
ErrorCode.GIT_CHECKPOINT_FAILED
ErrorCode.GIT_CHECKPOINT_FAILED,
);
}
return hash;
@@ -506,7 +497,7 @@ async function findLatestCommit(repoPath: string, commitHashes: string[]): Promi
const result = await executeGitCommandWithRetry(
['git', 'rev-list', '--max-count=1', ...commitHashes],
repoPath,
'find latest commit'
'find latest commit',
);
return result.stdout.trim();
@@ -518,7 +509,7 @@ async function findLatestCommit(repoPath: string, commitHashes: string[]): Promi
export async function restoreGitCheckpoint(
repoPath: string,
checkpointHash: string,
incompleteAgents: AgentName[]
incompleteAgents: AgentName[],
): Promise<void> {
const logger = createActivityLogger();
logger.info(`Restoring git workspace to ${checkpointHash}...`);
@@ -526,13 +517,9 @@ export async function restoreGitCheckpoint(
await executeGitCommandWithRetry(
['git', 'reset', '--hard', checkpointHash],
repoPath,
'reset to checkpoint for resume'
);
await executeGitCommandWithRetry(
['git', 'clean', '-fd'],
repoPath,
'clean untracked files for resume'
'reset to checkpoint for resume',
);
await executeGitCommandWithRetry(['git', 'clean', '-fd'], repoPath, 'clean untracked files for resume');
for (const agentName of incompleteAgents) {
const deliverableFilename = AGENTS[agentName].deliverableFilename;
@@ -559,7 +546,7 @@ export async function recordResumeAttempt(
terminatedWorkflows: string[],
checkpointHash: string,
previousWorkflowId: string,
completedAgents: string[]
completedAgents: string[],
): Promise<void> {
const sessionMetadata = buildSessionMetadata(input);
const auditSession = new AuditSession(sessionMetadata);
@@ -583,7 +570,7 @@ export async function recordResumeAttempt(
export async function logPhaseTransition(
input: ActivityInput,
phase: string,
event: 'start' | 'complete'
event: 'start' | 'complete',
): Promise<void> {
const sessionMetadata = buildSessionMetadata(input);
const auditSession = new AuditSession(sessionMetadata);
@@ -600,10 +587,7 @@ export async function logPhaseTransition(
* Log workflow completion with full summary.
* Cleans up container when done.
*/
export async function logWorkflowComplete(
input: ActivityInput,
summary: WorkflowSummary
): Promise<void> {
export async function logWorkflowComplete(input: ActivityInput, summary: WorkflowSummary): Promise<void> {
const { repoPath, workflowId } = input;
const sessionMetadata = buildSessionMetadata(input);
@@ -1,8 +1,9 @@
import { defineQuery } from '@temporalio/workflow';
export type { AgentMetrics } from '../types/metrics.js';
import type { AgentMetrics } from '../types/metrics.js';
import type { PipelineConfig } from '../types/config.js';
import type { AgentMetrics } from '../types/metrics.js';
export interface PipelineInput {
webUrl: string;
@@ -9,8 +9,8 @@
* Pure function with no side effects.
*/
import type { PipelineState } from './shared.js';
import type { WorkflowSummary } from '../audit/workflow-logger.js';
import type { PipelineState } from './shared.js';
/**
* Maps PipelineState to WorkflowSummary.
@@ -19,10 +19,7 @@ import type { WorkflowSummary } from '../audit/workflow-logger.js';
* safely imported into Temporal workflows. The caller must ensure
* state.summary is set before calling (via computeSummary).
*/
export function toWorkflowSummary(
state: PipelineState,
status: 'completed' | 'failed'
): WorkflowSummary {
export function toWorkflowSummary(state: PipelineState, status: 'completed' | 'failed'): WorkflowSummary {
// state.summary must be computed before calling this mapper
const summary = state.summary;
if (!summary) {
@@ -35,10 +32,7 @@ export function toWorkflowSummary(
totalCostUsd: summary.totalCostUsd,
completedAgents: state.completedAgents,
agentMetrics: Object.fromEntries(
Object.entries(state.agentMetrics).map(([name, m]) => [
name,
{ durationMs: m.durationMs, costUsd: m.costUsd },
])
Object.entries(state.agentMetrics).map(([name, m]) => [name, { durationMs: m.durationMs, costUsd: m.costUsd }]),
),
...(state.error && { error: state.error }),
};
@@ -1,4 +1,5 @@
#!/usr/bin/env node
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
@@ -26,18 +27,18 @@
* TEMPORAL_ADDRESS - Temporal server address (default: localhost:7233)
*/
import { NativeConnection, Worker, bundleWorkflowCode } from '@temporalio/worker';
import { Connection, Client, WorkflowNotFoundError, type WorkflowHandle } from '@temporalio/client';
import { fileURLToPath } from 'node:url';
import path from 'node:path';
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { Client, Connection, type WorkflowHandle, WorkflowNotFoundError } from '@temporalio/client';
import { bundleWorkflowCode, NativeConnection, Worker } from '@temporalio/worker';
import dotenv from 'dotenv';
import * as activities from './activities.js';
import { sanitizeHostname } from '../audit/utils.js';
import { readJson, fileExists } from '../utils/file-io.js';
import { parseConfig } from '../config-parser.js';
import type { PipelineConfig } from '../types/config.js';
import type { PipelineInput, PipelineState, PipelineProgress } from './shared.js';
import { fileExists, readJson } from '../utils/file-io.js';
import * as activities from './activities.js';
import type { PipelineInput, PipelineProgress, PipelineState } from './shared.js';
dotenv.config();
@@ -61,9 +62,7 @@ function showUsage(): void {
console.log('\nShannon Worker');
console.log('Combined worker + client for pentest pipeline\n');
console.log('Usage:');
console.log(
' node dist/temporal/worker.js <webUrl> <repoPath> --task-queue <name> [options]\n'
);
console.log(' node dist/temporal/worker.js <webUrl> <repoPath> --task-queue <name> [options]\n');
console.log('Options:');
console.log(' --task-queue <name> Task queue name (required)');
console.log(' --config <path> Configuration file path');
@@ -135,7 +134,10 @@ function parseCliArgs(argv: string[]): CliArgs {
}
return {
webUrl, repoPath, taskQueue, pipelineTestingMode,
webUrl,
repoPath,
taskQueue,
pipelineTestingMode,
...(configPath && { configPath }),
...(outputPath && { outputPath }),
...(resumeFromWorkspace && { resumeFromWorkspace }),
@@ -167,17 +169,11 @@ interface WorkspaceResolution {
terminatedWorkflows: string[];
}
async function terminateExistingWorkflows(
client: Client,
workspaceName: string
): Promise<string[]> {
async function terminateExistingWorkflows(client: Client, workspaceName: string): Promise<string[]> {
const sessionPath = path.join('./workspaces', workspaceName, 'session.json');
if (!(await fileExists(sessionPath))) {
throw new Error(
`Workspace not found: ${workspaceName}\n` +
`Expected path: ${sessionPath}`
);
throw new Error(`Workspace not found: ${workspaceName}\n` + `Expected path: ${sessionPath}`);
}
const session = await readJson<SessionJson>(sessionPath);
@@ -214,10 +210,7 @@ async function terminateExistingWorkflows(
return terminated;
}
async function resolveWorkspace(
client: Client,
args: CliArgs
): Promise<WorkspaceResolution> {
async function resolveWorkspace(client: Client, args: CliArgs): Promise<WorkspaceResolution> {
if (!args.resumeFromWorkspace) {
const hostname = sanitizeHostname(args.webUrl);
const workflowId = `${hostname}_shannon-${Date.now()}`;
@@ -269,9 +262,7 @@ async function resolveWorkspace(
// If the workspace name already looks like a CLI-generated ID
// (ends with _shannon-<digits>), use it directly to avoid double _shannon- suffixes
const workflowId = /_shannon-\d+$/.test(workspace)
? workspace
: `${workspace}_shannon-${Date.now()}`;
const workflowId = /_shannon-\d+$/.test(workspace) ? workspace : `${workspace}_shannon-${Date.now()}`;
return {
workflowId,
@@ -304,7 +295,9 @@ async function loadPipelineConfig(configPath: string | undefined): Promise<Pipel
}
function buildPipelineInput(
args: CliArgs, workspace: WorkspaceResolution, pipelineConfig: PipelineConfig
args: CliArgs,
workspace: WorkspaceResolution,
pipelineConfig: PipelineConfig,
): PipelineInput {
return {
webUrl: args.webUrl,
@@ -323,14 +316,14 @@ function buildPipelineInput(
async function waitForWorkflowResult(
handle: WorkflowHandle<(input: PipelineInput) => Promise<PipelineState>>,
workspace: WorkspaceResolution
workspace: WorkspaceResolution,
): Promise<void> {
const progressInterval = setInterval(async () => {
try {
const progress = await handle.query<PipelineProgress>(PROGRESS_QUERY);
const elapsed = Math.floor(progress.elapsedMs / 1000);
console.log(
`[${elapsed}s] Phase: ${progress.currentPhase || 'unknown'} | Agent: ${progress.currentAgent || 'none'} | Completed: ${progress.completedAgents.length}/13`
`[${elapsed}s] Phase: ${progress.currentPhase || 'unknown'} | Agent: ${progress.currentAgent || 'none'} | Completed: ${progress.completedAgents.length}/13`,
);
} catch {
// Workflow may have completed
@@ -350,9 +343,7 @@ async function waitForWorkflowResult(
if (workspace.isResume) {
try {
const session = await readJson<SessionJson>(
path.join('./workspaces', workspace.sessionId, 'session.json')
);
const session = await readJson<SessionJson>(path.join('./workspaces', workspace.sessionId, 'session.json'));
console.log(`Cumulative cost: $${session.metrics.total_cost_usd.toFixed(4)}`);
} catch {
// Non-fatal
@@ -437,7 +428,7 @@ async function run(): Promise<void> {
taskQueue: args.taskQueue,
workflowId: workspace.workflowId,
args: [input],
}
},
);
// 7. Wait for workflow result

Some files were not shown because too many files have changed in this diff Show More