mirror of
https://github.com/KeygraphHQ/shannon.git
synced 2026-07-01 11:05:36 +02:00
Compare commits
64 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 9d69e43ecf | |||
| f3cb5dce8f | |||
| 78a0a61208 | |||
| 7dc8cfe5c7 | |||
| 636ae6fb19 | |||
| eb8ab3be86 | |||
| 3b391ec54c | |||
| eaff84b847 | |||
| c12eca046c | |||
| 65b9bc4690 | |||
| 50629a24ab | |||
| e521e98a8f | |||
| 89cc30bb94 | |||
| 1f303b02b8 | |||
| 4de1508cb8 | |||
| 69f2d8ffe7 | |||
| b84c1d3bb0 | |||
| 5bda6fa634 | |||
| b26c69023d | |||
| cbb2b4acc0 | |||
| 05f8e2382c | |||
| eb7eced23f | |||
| 322e427c38 | |||
| 6fdfdcb96a | |||
| 49e53b9e0c | |||
| f84414d5ca | |||
| bc52d67dd5 | |||
| 264b16991a | |||
| dd18f4629b | |||
| b4d2c35b91 | |||
| f44d4d6fb8 | |||
| f92e3f6840 | |||
| b54d0fcc9b | |||
| fc8b122cca | |||
| 6d55352a13 | |||
| 00b5511028 | |||
| aecca9cec4 | |||
| 4d61d4af3f | |||
| 81ceabac1f | |||
| 41d3d3912d | |||
| 1e784e650d | |||
| 906d464abd | |||
| fba798ac49 | |||
| c655e8a716 | |||
| accb9562ba | |||
| 38e49eb1eb | |||
| c664000458 | |||
| af41570ae9 | |||
| 2c410d90b3 | |||
| 534b18e303 | |||
| 8f2825b32f | |||
| 369e3a34cf | |||
| 7f7285702e | |||
| deb4e51f98 | |||
| 2b14282ff6 | |||
| 5bbd757b45 | |||
| d2519322d2 | |||
| 456d852b87 | |||
| 341448c8a3 | |||
| b32e71a9b4 | |||
| 30f324be5e | |||
| 378585a4a3 | |||
| c040efc6b5 | |||
| 1051d40527 |
@@ -0,0 +1,139 @@
|
|||||||
|
---
|
||||||
|
description: Systematically debug errors using context analysis and structured recovery
|
||||||
|
---
|
||||||
|
|
||||||
|
You are debugging an issue. Follow this structured approach to avoid spinning in circles.
|
||||||
|
|
||||||
|
## Step 1: Capture Error Context
|
||||||
|
- Read the full error message and stack trace
|
||||||
|
- Identify the layer where the error originated:
|
||||||
|
- **CLI/Args** - Input validation, path resolution
|
||||||
|
- **Config Parsing** - YAML parsing, JSON Schema validation
|
||||||
|
- **Session Management** - Mutex, session.json, lock files
|
||||||
|
- **Audit System** - Logging, metrics tracking, atomic writes
|
||||||
|
- **Claude SDK** - Agent execution, MCP servers, turn handling
|
||||||
|
- **Git Operations** - Checkpoints, rollback, commit
|
||||||
|
- **Tool Execution** - nmap, subfinder, whatweb
|
||||||
|
- **Validation** - Deliverable checks, queue validation
|
||||||
|
|
||||||
|
## Step 2: Check Relevant Logs
|
||||||
|
|
||||||
|
**Session audit logs:**
|
||||||
|
```bash
|
||||||
|
# Find most recent session
|
||||||
|
ls -lt audit-logs/ | head -5
|
||||||
|
|
||||||
|
# Check session metrics and errors
|
||||||
|
cat audit-logs/<session>/session.json | jq '.errors, .agentMetrics'
|
||||||
|
|
||||||
|
# Check agent execution logs
|
||||||
|
ls -lt audit-logs/<session>/agents/
|
||||||
|
cat audit-logs/<session>/agents/<latest>.log
|
||||||
|
```
|
||||||
|
|
||||||
|
## Step 3: Trace the Call Path
|
||||||
|
|
||||||
|
For Shannon, trace through these layers:
|
||||||
|
|
||||||
|
1. **Temporal Client** → `src/temporal/client.ts` - Workflow initiation
|
||||||
|
2. **Workflow** → `src/temporal/workflows.ts` - Pipeline orchestration
|
||||||
|
3. **Activities** → `src/temporal/activities.ts` - Agent execution with heartbeats
|
||||||
|
4. **Config** → `src/config-parser.ts` - YAML loading, schema validation
|
||||||
|
5. **Session** → `src/session-manager.ts` - Agent definitions, execution order
|
||||||
|
6. **Audit** → `src/audit/audit-session.ts` - Logging facade, metrics tracking
|
||||||
|
7. **Executor** → `src/ai/claude-executor.ts` - SDK calls, MCP setup, retry logic
|
||||||
|
8. **Validation** → `src/queue-validation.ts` - Deliverable checks
|
||||||
|
|
||||||
|
## Step 4: Identify Root Cause
|
||||||
|
|
||||||
|
**Common Shannon-specific issues:**
|
||||||
|
|
||||||
|
| Symptom | Likely Cause | Fix |
|
||||||
|
|---------|--------------|-----|
|
||||||
|
| Agent hangs indefinitely | MCP server crashed, Playwright timeout | Check Playwright logs in `/tmp/playwright-*` |
|
||||||
|
| "Validation failed: Missing deliverable" | Agent didn't create expected file | Check `deliverables/` dir, review prompt |
|
||||||
|
| Git checkpoint fails | Uncommitted changes, git lock | Run `git status`, remove `.git/index.lock` |
|
||||||
|
| "Session limit reached" | Claude API billing limit | Not retryable - check API usage |
|
||||||
|
| Parallel agents all fail | Shared resource contention | Check mutex usage, stagger startup timing |
|
||||||
|
| Cost/timing not tracked | Metrics not reloaded before update | Add `metricsTracker.reload()` before updates |
|
||||||
|
| session.json corrupted | Partial write during crash | Delete and restart, or restore from backup |
|
||||||
|
| YAML config rejected | Invalid schema or unsafe content | Run through AJV validator manually |
|
||||||
|
| Prompt variable not replaced | Missing `{{VARIABLE}}` in context | Check `prompt-manager.ts` interpolation |
|
||||||
|
|
||||||
|
**MCP Server Issues:**
|
||||||
|
```bash
|
||||||
|
# Check if Playwright browsers are installed
|
||||||
|
npx playwright install chromium
|
||||||
|
|
||||||
|
# Check MCP server startup (look for connection errors)
|
||||||
|
grep -i "mcp\|playwright" audit-logs/<session>/agents/*.log
|
||||||
|
```
|
||||||
|
|
||||||
|
**Git State Issues:**
|
||||||
|
```bash
|
||||||
|
# Check for uncommitted changes
|
||||||
|
git status
|
||||||
|
|
||||||
|
# Check for git locks
|
||||||
|
ls -la .git/*.lock
|
||||||
|
|
||||||
|
# View recent git operations from Shannon
|
||||||
|
git reflog | head -10
|
||||||
|
```
|
||||||
|
|
||||||
|
## Step 5: Apply Fix with Retry Limit
|
||||||
|
|
||||||
|
- **CRITICAL**: Track consecutive failed attempts
|
||||||
|
- After **3 consecutive failures** on the same issue, STOP and:
|
||||||
|
- Summarize what was tried
|
||||||
|
- Explain what's blocking progress
|
||||||
|
- Ask the user for guidance or additional context
|
||||||
|
- After a successful fix, reset the failure counter
|
||||||
|
|
||||||
|
## Step 6: Validate the Fix
|
||||||
|
|
||||||
|
**For code changes:**
|
||||||
|
```bash
|
||||||
|
# Compile TypeScript
|
||||||
|
npx tsc --noEmit
|
||||||
|
|
||||||
|
# Quick validation run
|
||||||
|
shannon <URL> <REPO> --pipeline-testing
|
||||||
|
```
|
||||||
|
|
||||||
|
**For audit/session issues:**
|
||||||
|
- Verify `session.json` is valid JSON after fix
|
||||||
|
- Check that atomic writes complete without errors
|
||||||
|
- Confirm mutex release in `finally` blocks
|
||||||
|
|
||||||
|
**For agent issues:**
|
||||||
|
- Verify deliverable files are created in correct location
|
||||||
|
- Check that validation functions return expected results
|
||||||
|
- Confirm retry logic triggers on appropriate errors
|
||||||
|
|
||||||
|
## Anti-Patterns to Avoid
|
||||||
|
|
||||||
|
- Don't delete `session.json` without checking if session is active
|
||||||
|
- Don't modify git state while an agent is running
|
||||||
|
- Don't retry billing/quota errors (they're not retryable)
|
||||||
|
- Don't ignore PentestError type - it indicates the error category
|
||||||
|
- Don't make random changes hoping something works
|
||||||
|
- Don't fix symptoms without understanding root cause
|
||||||
|
- Don't bypass mutex protection for "quick fixes"
|
||||||
|
|
||||||
|
## Quick Reference: Error Types
|
||||||
|
|
||||||
|
| PentestError Type | Meaning | Retryable? |
|
||||||
|
|-------------------|---------|------------|
|
||||||
|
| `config` | Configuration file issues | No |
|
||||||
|
| `network` | Connection/timeout issues | Yes |
|
||||||
|
| `tool` | External tool (nmap, etc.) failed | Yes |
|
||||||
|
| `prompt` | Claude SDK/API issues | Sometimes |
|
||||||
|
| `filesystem` | File read/write errors | Sometimes |
|
||||||
|
| `validation` | Deliverable validation failed | Yes (via retry) |
|
||||||
|
| `billing` | API quota/billing limit | No |
|
||||||
|
| `unknown` | Unexpected error | Depends |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Now analyze the error and begin debugging systematically.
|
||||||
@@ -0,0 +1,120 @@
|
|||||||
|
---
|
||||||
|
description: Review code changes for Shannon-specific patterns, security, and common mistakes
|
||||||
|
---
|
||||||
|
|
||||||
|
Review the current changes (staged or working directory) with focus on Shannon-specific patterns and common mistakes.
|
||||||
|
|
||||||
|
## Step 1: Gather Changes
|
||||||
|
Run these commands to understand the scope:
|
||||||
|
```bash
|
||||||
|
git diff --stat HEAD
|
||||||
|
git diff HEAD
|
||||||
|
```
|
||||||
|
|
||||||
|
## Step 2: Check Shannon-Specific Patterns
|
||||||
|
|
||||||
|
### Error Handling (CRITICAL)
|
||||||
|
- [ ] **All errors use PentestError** - Never use raw `Error`. Use `new PentestError(message, type, retryable, context)`
|
||||||
|
- [ ] **Error type is appropriate** - Use correct type: 'config', 'network', 'tool', 'prompt', 'filesystem', 'validation', 'billing', 'unknown'
|
||||||
|
- [ ] **Retryable flag matches behavior** - If error will be retried, set `retryable: true`
|
||||||
|
- [ ] **Context includes debugging info** - Add relevant paths, tool names, error codes to context object
|
||||||
|
- [ ] **Never swallow errors silently** - Always log or propagate errors
|
||||||
|
|
||||||
|
### Audit System & Concurrency (CRITICAL)
|
||||||
|
- [ ] **Mutex protection for parallel operations** - Use `sessionMutex.lock()` when updating `session.json` during parallel agent execution
|
||||||
|
- [ ] **Reload before modify** - Always call `this.metricsTracker.reload()` before updating metrics in mutex block
|
||||||
|
- [ ] **Atomic writes for session.json** - Use `atomicWrite()` for session metadata, never `fs.writeFile()` directly
|
||||||
|
- [ ] **Stream drain handling** - Log writes must wait for buffer drain before resolving
|
||||||
|
- [ ] **Semaphore release in finally** - Git semaphore must be released in `finally` block
|
||||||
|
|
||||||
|
### Claude SDK Integration (CRITICAL)
|
||||||
|
- [ ] **MCP server configuration** - Verify Playwright MCP uses `--isolated` and unique `--user-data-dir`
|
||||||
|
- [ ] **Prompt variable interpolation** - Check all `{{VARIABLE}}` placeholders are replaced
|
||||||
|
- [ ] **Turn counting** - Increment `turnCount` on assistant messages, not tool calls
|
||||||
|
- [ ] **Cost tracking** - Extract cost from final `result` message, track even on failure
|
||||||
|
- [ ] **API error detection** - Check for "session limit reached" (fatal) vs other errors
|
||||||
|
|
||||||
|
### Configuration & Validation (CRITICAL)
|
||||||
|
- [ ] **FAILSAFE_SCHEMA for YAML** - Never use default schema (prevents code execution)
|
||||||
|
- [ ] **Security pattern detection** - Check for path traversal (`../`), HTML injection (`<>`), JavaScript URLs
|
||||||
|
- [ ] **Rule conflict detection** - Rules cannot appear in both `avoid` AND `focus`
|
||||||
|
- [ ] **Duplicate rule detection** - Same `type:url_path` cannot appear twice
|
||||||
|
- [ ] **JSON Schema validation before use** - Config must pass AJV validation
|
||||||
|
|
||||||
|
### Session & Agent Management (CRITICAL)
|
||||||
|
- [ ] **Deliverable dependencies respected** - Exploitation agents only run if vulnerability queue exists AND has items
|
||||||
|
- [ ] **Queue validation before exploitation** - Use `safeValidateQueueAndDeliverable()` to check eligibility
|
||||||
|
- [ ] **Git checkpoint before agent run** - Create checkpoint for rollback on failure
|
||||||
|
- [ ] **Git rollback on retry** - Call `rollbackGitWorkspace()` before each retry attempt
|
||||||
|
- [ ] **Agent prerequisites checked** - Verify prerequisite agents completed before running dependent agent
|
||||||
|
|
||||||
|
### Parallel Execution
|
||||||
|
- [ ] **Promise.allSettled for parallel agents** - Never use `Promise.all` (partial failures should not crash batch)
|
||||||
|
- [ ] **Staggered startup** - 2-second delay between parallel agent starts to prevent API throttle
|
||||||
|
- [ ] **Individual retry loops** - Each agent retries independently (3 attempts max)
|
||||||
|
- [ ] **Results aggregated correctly** - Handle both 'fulfilled' and 'rejected' results from `Promise.allSettled`
|
||||||
|
|
||||||
|
## Step 3: TypeScript Safety
|
||||||
|
|
||||||
|
### Type Assertions (WARNING)
|
||||||
|
- [ ] **No double casting** - Never use `as unknown as SomeType` (bypasses type safety)
|
||||||
|
- [ ] **Validate before casting** - JSON parsed data should be validated (JSON Schema) before `as Type`
|
||||||
|
- [ ] **Prefer type guards** - Use `instanceof` or property checks instead of assertions where possible
|
||||||
|
|
||||||
|
### Null/Undefined Handling
|
||||||
|
- [ ] **Explicit null checks** - Use `if (x === null || x === undefined)` not truthy checks for critical paths
|
||||||
|
- [ ] **Nullish coalescing** - Use `??` for null/undefined, not `||` which also catches empty string/0
|
||||||
|
- [ ] **Optional chaining** - Use `?.` for nested property access on potentially undefined objects
|
||||||
|
|
||||||
|
### Imports & Types
|
||||||
|
- [ ] **Type imports** - Use `import type { ... }` for type-only imports
|
||||||
|
- [ ] **No implicit any** - All function parameters and returns must have explicit types
|
||||||
|
- [ ] **Readonly for constants** - Use `Object.freeze()` and `Readonly<>` for immutable data
|
||||||
|
|
||||||
|
## Step 4: Security Review
|
||||||
|
|
||||||
|
### Defensive Tool Security
|
||||||
|
- [ ] **No credentials in logs** - Check that passwords, tokens, TOTP secrets are not logged to audit files
|
||||||
|
- [ ] **Config file size limit** - Ensure 1MB max for config files (DoS prevention)
|
||||||
|
- [ ] **Safe shell execution** - Command arguments must be escaped/sanitized
|
||||||
|
|
||||||
|
### Code Injection Prevention
|
||||||
|
- [ ] **YAML safe parsing** - FAILSAFE_SCHEMA only
|
||||||
|
- [ ] **No eval/Function** - Never use dynamic code evaluation
|
||||||
|
- [ ] **Input validation at boundaries** - URLs, paths validated before use
|
||||||
|
|
||||||
|
## Step 5: Common Mistakes to Avoid
|
||||||
|
|
||||||
|
### Anti-Patterns Found in Codebase
|
||||||
|
- [ ] **Catch + re-throw without context** - Don't just `throw error`, wrap with additional context
|
||||||
|
- [ ] **Silent failures in session loading** - Corrupted session files should warn user, not silently reset
|
||||||
|
- [ ] **Duplicate retry logic** - Don't implement retry at both caller and callee level
|
||||||
|
- [ ] **Hardcoded error message matching** - Prefer error codes over regex on error.message
|
||||||
|
- [ ] **Missing timeout on long operations** - Git operations and API calls should have timeouts
|
||||||
|
|
||||||
|
### Code Quality
|
||||||
|
- [ ] **No dead code added** - Remove unused imports, functions, variables
|
||||||
|
- [ ] **No over-engineering** - Don't add abstractions for single-use operations
|
||||||
|
- [ ] **Comments only where needed** - Self-documenting code preferred over excessive comments
|
||||||
|
- [ ] **Consistent file naming** - kebab-case for files (e.g., `queue-validation.ts`)
|
||||||
|
|
||||||
|
## Step 6: Provide Feedback
|
||||||
|
|
||||||
|
For each issue found:
|
||||||
|
1. **Location**: File and line number
|
||||||
|
2. **Issue**: What's wrong and why it matters
|
||||||
|
3. **Fix**: How to correct it (with code example if helpful)
|
||||||
|
4. **Severity**: Critical / Warning / Suggestion
|
||||||
|
|
||||||
|
### Severity Definitions
|
||||||
|
- **Critical**: Will cause bugs, crashes, data loss, or security issues
|
||||||
|
- **Warning**: Code smell, inconsistent pattern, or potential future issue
|
||||||
|
- **Suggestion**: Style improvement or minor enhancement
|
||||||
|
|
||||||
|
Summarize with:
|
||||||
|
- Total issues by severity
|
||||||
|
- Overall assessment (Ready to commit / Needs fixes / Needs discussion)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Now review the current changes.
|
||||||
+1
-1
@@ -7,6 +7,7 @@ yarn-error.log*
|
|||||||
# Runtime directories
|
# Runtime directories
|
||||||
sessions/
|
sessions/
|
||||||
deliverables/
|
deliverables/
|
||||||
|
xben-benchmark-results/
|
||||||
.claude/
|
.claude/
|
||||||
|
|
||||||
# Git
|
# Git
|
||||||
@@ -17,7 +18,6 @@ deliverables/
|
|||||||
# Development files
|
# Development files
|
||||||
*.md
|
*.md
|
||||||
!CLAUDE.md
|
!CLAUDE.md
|
||||||
.env*
|
|
||||||
.DS_Store
|
.DS_Store
|
||||||
Thumbs.db
|
Thumbs.db
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +0,0 @@
|
|||||||
# CLAUDE_CODE_MAX_TOKENS controls the maximum token limit for Claude Code sessions
|
|
||||||
# This is used by the Claude Agent SDK to set the context window size for AI analysis
|
|
||||||
# Higher values allow for more comprehensive code analysis but consume more tokens
|
|
||||||
# Default: 64000 tokens (sufficient for most penetration testing tasks)
|
|
||||||
CLAUDE_CODE_MAX_TOKENS=64000
|
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
# Shannon Environment Configuration
|
||||||
|
# Copy this file to .env and fill in your credentials
|
||||||
|
|
||||||
|
# Anthropic API Key (required - choose one)
|
||||||
|
ANTHROPIC_API_KEY=your-api-key-here
|
||||||
|
|
||||||
|
# OR use OAuth token instead
|
||||||
|
# CLAUDE_CODE_OAUTH_TOKEN=your-oauth-token-here
|
||||||
+3
-3
@@ -1,4 +1,4 @@
|
|||||||
node_modules/
|
node_modules/
|
||||||
.shannon-store.json
|
.env
|
||||||
agent-logs/
|
audit-logs/
|
||||||
/audit-logs/
|
dist/
|
||||||
|
|||||||
@@ -8,119 +8,97 @@ This is an AI-powered penetration testing agent designed for defensive security
|
|||||||
|
|
||||||
## Commands
|
## Commands
|
||||||
|
|
||||||
### Installation & Setup
|
### Prerequisites
|
||||||
|
- **Docker** - Container runtime
|
||||||
|
- **Anthropic API key** - Set in `.env` file
|
||||||
|
|
||||||
|
### Running the Penetration Testing Agent (Docker + Temporal)
|
||||||
```bash
|
```bash
|
||||||
npm install
|
# Configure credentials
|
||||||
|
cp .env.example .env
|
||||||
|
# Edit .env:
|
||||||
|
# ANTHROPIC_API_KEY=your-key
|
||||||
|
# CLAUDE_CODE_MAX_OUTPUT_TOKENS=64000 # Prevents token limits during long reports
|
||||||
|
|
||||||
|
# Start a pentest workflow
|
||||||
|
./shannon start URL=<url> REPO=<path>
|
||||||
```
|
```
|
||||||
|
|
||||||
### Running the Penetration Testing Agent
|
Examples:
|
||||||
```bash
|
```bash
|
||||||
./shannon.mjs <WEB_URL> <REPO_PATH> --config <CONFIG_FILE>
|
./shannon start URL=https://example.com REPO=/path/to/repo
|
||||||
|
./shannon start URL=https://example.com REPO=/path/to/repo CONFIG=./configs/my-config.yaml
|
||||||
|
./shannon start URL=https://example.com REPO=/path/to/repo OUTPUT=./my-reports
|
||||||
```
|
```
|
||||||
|
|
||||||
Example:
|
### Monitoring Progress
|
||||||
```bash
|
```bash
|
||||||
./shannon.mjs "https://example.com" "/path/to/local/repo"
|
./shannon logs # View real-time worker logs
|
||||||
./shannon.mjs "https://juice-shop.herokuapp.com" "/home/user/juice-shop" --config juice-shop-config.yaml
|
./shannon query ID=<workflow-id> # Query specific workflow progress
|
||||||
|
# Temporal Web UI available at http://localhost:8233
|
||||||
```
|
```
|
||||||
|
|
||||||
### Alternative Execution
|
### Stopping Shannon
|
||||||
```bash
|
```bash
|
||||||
npm start <WEB_URL> <REPO_PATH> --config <CONFIG_FILE>
|
./shannon stop # Stop containers (preserves workflow data)
|
||||||
|
./shannon stop CLEAN=true # Full cleanup including volumes
|
||||||
```
|
```
|
||||||
|
|
||||||
### Configuration Validation
|
### Options
|
||||||
```bash
|
```bash
|
||||||
# Configuration validation is built into the main script
|
CONFIG=<file> YAML configuration file for authentication and testing parameters
|
||||||
./shannon.mjs --help # Shows usage and validates config on execution
|
OUTPUT=<path> Custom output directory for session folder (default: ./audit-logs/)
|
||||||
|
PIPELINE_TESTING=true Use minimal prompts and fast retry intervals (10s instead of 5min)
|
||||||
|
REBUILD=true Force Docker rebuild with --no-cache (use when code changes aren't picked up)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Generate TOTP for Authentication
|
### Generate TOTP for Authentication
|
||||||
TOTP generation is now handled automatically via the `generate_totp` MCP tool during authentication flows.
|
TOTP generation is handled automatically via the `generate_totp` MCP tool during authentication flows.
|
||||||
|
|
||||||
### Development Commands
|
### Development Commands
|
||||||
```bash
|
```bash
|
||||||
# No linting or testing commands available in this project
|
# Build TypeScript
|
||||||
# Development is done by running the agent in pipeline-testing mode
|
npm run build
|
||||||
./shannon.mjs <commands> --pipeline-testing
|
|
||||||
```
|
|
||||||
|
|
||||||
### Session Management Commands
|
# Run with pipeline testing mode (fast, minimal deliverables)
|
||||||
```bash
|
./shannon start URL=<url> REPO=<path> PIPELINE_TESTING=true
|
||||||
# Setup session without running
|
|
||||||
./shannon.mjs --setup-only <WEB_URL> <REPO_PATH> --config <CONFIG_FILE>
|
|
||||||
|
|
||||||
# Check session status (shows progress, timing, costs)
|
|
||||||
./shannon.mjs --status
|
|
||||||
|
|
||||||
# List all available agents by phase
|
|
||||||
./shannon.mjs --list-agents
|
|
||||||
|
|
||||||
# Show help
|
|
||||||
./shannon.mjs --help
|
|
||||||
```
|
|
||||||
|
|
||||||
### Execution Commands
|
|
||||||
```bash
|
|
||||||
# Run all remaining agents to completion
|
|
||||||
./shannon.mjs --run-all [--pipeline-testing]
|
|
||||||
|
|
||||||
# Run a specific agent
|
|
||||||
./shannon.mjs --run-agent <agent-name> [--pipeline-testing]
|
|
||||||
|
|
||||||
# Run a range of agents
|
|
||||||
./shannon.mjs --run-agents <start-agent>:<end-agent> [--pipeline-testing]
|
|
||||||
|
|
||||||
# Run a specific phase
|
|
||||||
./shannon.mjs --run-phase <phase-name> [--pipeline-testing]
|
|
||||||
|
|
||||||
# Pipeline testing mode (minimal prompts for fast testing)
|
|
||||||
./shannon.mjs <command> --pipeline-testing
|
|
||||||
```
|
|
||||||
|
|
||||||
### Rollback & Recovery Commands
|
|
||||||
```bash
|
|
||||||
# Rollback to specific checkpoint
|
|
||||||
./shannon.mjs --rollback-to <agent-name>
|
|
||||||
|
|
||||||
# Rollback and re-execute specific agent
|
|
||||||
./shannon.mjs --rerun <agent-name> [--pipeline-testing]
|
|
||||||
```
|
|
||||||
|
|
||||||
### Session Cleanup Commands
|
|
||||||
```bash
|
|
||||||
# Delete all sessions (with confirmation)
|
|
||||||
./shannon.mjs --cleanup
|
|
||||||
|
|
||||||
# Delete specific session by ID
|
|
||||||
./shannon.mjs --cleanup <session-id>
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Architecture & Components
|
## Architecture & Components
|
||||||
|
|
||||||
### Main Entry Point
|
|
||||||
- `shannon.mjs` - Main orchestration script that coordinates the entire penetration testing workflow
|
|
||||||
|
|
||||||
### Core Modules
|
### Core Modules
|
||||||
- `src/config-parser.js` - Handles YAML configuration parsing, validation, and distribution to agents
|
- `src/config-parser.ts` - Handles YAML configuration parsing, validation, and distribution to agents
|
||||||
- `src/error-handling.js` - Comprehensive error handling with retry logic and categorized error types
|
- `src/error-handling.ts` - Comprehensive error handling with retry logic and categorized error types
|
||||||
- `src/tool-checker.js` - Validates availability of external security tools before execution
|
- `src/tool-checker.ts` - Validates availability of external security tools before execution
|
||||||
- `src/session-manager.js` - Manages persistent session state and agent lifecycle
|
- `src/session-manager.ts` - Agent definitions, execution order, and parallel groups
|
||||||
- `src/checkpoint-manager.js` - Git-based checkpointing system for rollback capabilities
|
- `src/queue-validation.ts` - Validates deliverables and agent prerequisites
|
||||||
- Pipeline orchestration is built into the main `shannon.mjs` script
|
|
||||||
- `src/queue-validation.js` - Validates deliverables and agent prerequisites
|
### Temporal Orchestration Layer
|
||||||
|
Shannon uses Temporal for durable workflow orchestration:
|
||||||
|
- `src/temporal/shared.ts` - Types, interfaces, query definitions
|
||||||
|
- `src/temporal/workflows.ts` - Main workflow (pentestPipelineWorkflow)
|
||||||
|
- `src/temporal/activities.ts` - Activity implementations with heartbeats
|
||||||
|
- `src/temporal/worker.ts` - Worker process entry point
|
||||||
|
- `src/temporal/client.ts` - CLI client for starting workflows
|
||||||
|
- `src/temporal/query.ts` - Query tool for progress inspection
|
||||||
|
|
||||||
|
Key features:
|
||||||
|
- **Crash recovery** - Workflows resume automatically after worker restart
|
||||||
|
- **Queryable progress** - Real-time status via `./shannon query` or Temporal Web UI
|
||||||
|
- **Intelligent retry** - Distinguishes transient vs permanent errors
|
||||||
|
- **Parallel execution** - 5 concurrent agents in vulnerability/exploitation phases
|
||||||
|
|
||||||
### Five-Phase Testing Workflow
|
### Five-Phase Testing Workflow
|
||||||
|
|
||||||
1. **Pre-Reconnaissance** (`pre-recon`) - External tool scans (nmap, subfinder, whatweb) + source code analysis
|
1. **Pre-Reconnaissance** (`pre-recon`) - External tool scans (nmap, subfinder, whatweb) + source code analysis
|
||||||
2. **Reconnaissance** (`recon`) - Analysis of initial findings and attack surface mapping
|
2. **Reconnaissance** (`recon`) - Analysis of initial findings and attack surface mapping
|
||||||
3. **Vulnerability Analysis** (5 agents)
|
3. **Vulnerability Analysis** (5 agents run in parallel)
|
||||||
- `injection-vuln` - SQL injection, command injection
|
- `injection-vuln` - SQL injection, command injection
|
||||||
- `xss-vuln` - Cross-site scripting
|
- `xss-vuln` - Cross-site scripting
|
||||||
- `auth-vuln` - Authentication bypasses
|
- `auth-vuln` - Authentication bypasses
|
||||||
- `authz-vuln` - Authorization flaws
|
- `authz-vuln` - Authorization flaws
|
||||||
- `ssrf-vuln` - Server-side request forgery
|
- `ssrf-vuln` - Server-side request forgery
|
||||||
4. **Exploitation** (5 agents)
|
4. **Exploitation** (5 agents run in parallel, only if vulnerabilities found)
|
||||||
- `injection-exploit` - Exploit injection vulnerabilities
|
- `injection-exploit` - Exploit injection vulnerabilities
|
||||||
- `xss-exploit` - Exploit XSS vulnerabilities
|
- `xss-exploit` - Exploit XSS vulnerabilities
|
||||||
- `auth-exploit` - Exploit authentication issues
|
- `auth-exploit` - Exploit authentication issues
|
||||||
@@ -181,71 +159,80 @@ The agent integrates with external security tools:
|
|||||||
|
|
||||||
Tools are validated for availability before execution using the tool-checker module.
|
Tools are validated for availability before execution using the tool-checker module.
|
||||||
|
|
||||||
### Git-Based Checkpointing System
|
### Audit & Metrics System
|
||||||
The agent implements a sophisticated checkpoint system using git:
|
The agent implements a crash-safe audit system with the following features:
|
||||||
- Every agent creates a git checkpoint before execution
|
|
||||||
- Rollback to any previous agent state using `--rollback-to` or `--rerun`
|
|
||||||
- Failed agents don't affect completed work
|
|
||||||
- Rolled-back agents marked in audit system with status: "rolled-back"
|
|
||||||
- Reconciliation automatically syncs Shannon store with audit logs after rollback
|
|
||||||
- Fail-fast safety prevents accidental re-execution of completed agents
|
|
||||||
|
|
||||||
### Unified Audit & Metrics System
|
|
||||||
The agent implements a crash-safe, self-healing audit system (v3.0) with the following guarantees:
|
|
||||||
|
|
||||||
**Architecture:**
|
**Architecture:**
|
||||||
- **audit-logs/**: Centralized metrics and forensic logs (source of truth)
|
- **audit-logs/** (or custom `--output` path): Centralized metrics and forensic logs
|
||||||
- `{hostname}_{sessionId}/session.json` - Comprehensive metrics with attempt-level detail
|
- `{hostname}_{sessionId}/session.json` - Comprehensive metrics with attempt-level detail
|
||||||
- `{hostname}_{sessionId}/prompts/` - Exact prompts used for reproducibility
|
- `{hostname}_{sessionId}/prompts/` - Exact prompts used for reproducibility
|
||||||
- `{hostname}_{sessionId}/agents/` - Turn-by-turn execution logs
|
- `{hostname}_{sessionId}/agents/` - Turn-by-turn execution logs
|
||||||
- **.shannon-store.json**: Minimal orchestration state (completedAgents, checkpoints)
|
- `{hostname}_{sessionId}/deliverables/` - Security reports and findings
|
||||||
|
|
||||||
**Crash Safety:**
|
**Crash Safety:**
|
||||||
- Append-only logging with immediate flush (survives kill -9)
|
- Append-only logging with immediate flush (survives kill -9)
|
||||||
- Atomic writes for session.json (no partial writes)
|
- Atomic writes for session.json (no partial writes)
|
||||||
- Event-based logging (tool_start, tool_end, llm_response) closes data loss windows
|
- Event-based logging (tool_start, tool_end, llm_response)
|
||||||
|
|
||||||
**Self-Healing:**
|
|
||||||
- Automatic reconciliation before every CLI command
|
|
||||||
- Recovers from crashes during rollback
|
|
||||||
- Audit logs are source of truth; Shannon store follows
|
|
||||||
|
|
||||||
**Forensic Completeness:**
|
|
||||||
- All retry attempts logged with errors, costs, durations
|
|
||||||
- Rolled-back agents preserved with status: "rolled-back"
|
|
||||||
- Partial cost capture for failed attempts
|
|
||||||
- Complete event trail for debugging
|
|
||||||
|
|
||||||
**Concurrency Safety:**
|
**Concurrency Safety:**
|
||||||
- SessionMutex prevents race conditions during parallel agent execution
|
- SessionMutex prevents race conditions during parallel agent execution
|
||||||
- Safe parallel execution of vulnerability and exploitation phases
|
- 5x faster execution with parallel vulnerability and exploitation phases
|
||||||
|
|
||||||
**Metrics & Reporting:**
|
**Metrics & Reporting:**
|
||||||
- Export metrics to CSV with `./scripts/export-metrics.js`
|
|
||||||
- Phase-level and agent-level timing/cost aggregations
|
- Phase-level and agent-level timing/cost aggregations
|
||||||
- Validation results integrated with metrics
|
- Validation results integrated with metrics
|
||||||
|
|
||||||
For detailed design, see `docs/unified-audit-system-design.md`.
|
|
||||||
|
|
||||||
## Development Notes
|
## Development Notes
|
||||||
|
|
||||||
|
### Learning from Reference Implementations
|
||||||
|
|
||||||
|
A working POC exists at `/Users/arjunmalleswaran/Code/shannon-pocs` that demonstrates the ideal Temporal + Claude Agent SDK integration. When implementing Temporal features, agents can ask questions in the chat, and the user will relay them to another Claude Code session working in that POC directory.
|
||||||
|
|
||||||
|
**How to use this approach:**
|
||||||
|
1. When stuck or unsure about Temporal patterns, write a specific question in the chat
|
||||||
|
2. The user will ask an agent working on the POC to answer
|
||||||
|
3. The user relays the answer (code snippets, patterns, explanations) back
|
||||||
|
4. Apply the learned patterns to Shannon's codebase
|
||||||
|
|
||||||
|
**Example questions to ask:**
|
||||||
|
- "How does the POC structure its workflow to handle parallel activities?"
|
||||||
|
- "Show me how heartbeats are implemented in the POC's activities"
|
||||||
|
- "What retry configuration does the POC use for long-running agent activities?"
|
||||||
|
- "How does the POC integrate Claude Agent SDK calls within Temporal activities?"
|
||||||
|
|
||||||
|
**Reference implementation:**
|
||||||
|
- **Temporal + Claude Agent SDK**: `/Users/arjunmalleswaran/Code/shannon-pocs` - working implementation demonstrating workflows, activities, worker setup, and SDK integration
|
||||||
|
|
||||||
|
### Adding a New Agent
|
||||||
|
1. Define the agent in `src/session-manager.ts` (add to `AGENT_QUEUE` and appropriate parallel group)
|
||||||
|
2. Create prompt template in `prompts/` (e.g., `vuln-newtype.txt` or `exploit-newtype.txt`)
|
||||||
|
3. Add activity function in `src/temporal/activities.ts`
|
||||||
|
4. Register activity in `src/temporal/workflows.ts` within the appropriate phase
|
||||||
|
|
||||||
|
### Modifying Prompts
|
||||||
|
- Prompt templates use variable substitution: `{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`, `{{LOGIN_INSTRUCTIONS}}`
|
||||||
|
- Shared partials in `prompts/shared/` are included via `prompt-manager.ts`
|
||||||
|
- Test changes with `PIPELINE_TESTING=true` for faster iteration
|
||||||
|
|
||||||
### Key Design Patterns
|
### Key Design Patterns
|
||||||
- **Configuration-Driven Architecture**: YAML configs with JSON Schema validation
|
- **Configuration-Driven Architecture**: YAML configs with JSON Schema validation
|
||||||
- **Modular Error Handling**: Categorized error types with retry logic
|
- **Modular Error Handling**: Categorized error types with retry logic
|
||||||
- **Pure Functions**: Most functionality is implemented as pure functions for testability
|
|
||||||
- **SDK-First Approach**: Heavy reliance on Claude Agent SDK for autonomous AI operations
|
- **SDK-First Approach**: Heavy reliance on Claude Agent SDK for autonomous AI operations
|
||||||
- **Progressive Analysis**: Each phase builds on previous phase results
|
- **Progressive Analysis**: Each phase builds on previous phase results
|
||||||
- **Local Repository Setup**: Target applications are accessed directly from user-provided local directories
|
|
||||||
|
|
||||||
### Error Handling Strategy
|
### Error Handling Strategy
|
||||||
The application uses a comprehensive error handling system with:
|
The application uses a comprehensive error handling system with:
|
||||||
- Categorized error types (PentestError, ConfigError, NetworkError, etc.)
|
- Categorized error types (PentestError, ConfigError, NetworkError, etc.)
|
||||||
- Automatic retry logic for transient failures
|
- Automatic retry logic for transient failures (3 attempts per agent)
|
||||||
- Graceful degradation when external tools are unavailable
|
- Graceful degradation when external tools are unavailable
|
||||||
- Detailed error logging and user-friendly error messages
|
- Detailed error logging and user-friendly error messages
|
||||||
|
|
||||||
### Testing Mode
|
### Testing Mode
|
||||||
The agent includes a testing mode that skips external tool execution for faster development cycles.
|
The agent includes a testing mode that skips external tool execution for faster development cycles:
|
||||||
|
```bash
|
||||||
|
./shannon start URL=<url> REPO=<path> PIPELINE_TESTING=true
|
||||||
|
```
|
||||||
|
|
||||||
### Security Focus
|
### Security Focus
|
||||||
This is explicitly designed as a **defensive security tool** for:
|
This is explicitly designed as a **defensive security tool** for:
|
||||||
@@ -256,82 +243,49 @@ This is explicitly designed as a **defensive security tool** for:
|
|||||||
|
|
||||||
The tool should only be used on systems you own or have explicit permission to test.
|
The tool should only be used on systems you own or have explicit permission to test.
|
||||||
|
|
||||||
## File Structure
|
## Key Files & Directories
|
||||||
|
|
||||||
```
|
**Entry Points:**
|
||||||
shannon.mjs # Main orchestration script
|
- `src/temporal/workflows.ts` - Temporal workflow definition
|
||||||
package.json # Node.js dependencies
|
- `src/temporal/activities.ts` - Activity implementations with heartbeats
|
||||||
.shannon-store.json # Orchestration state (minimal)
|
- `src/temporal/worker.ts` - Worker process entry point
|
||||||
src/ # Core modules
|
- `src/temporal/client.ts` - CLI client for starting workflows
|
||||||
├── audit/ # Unified audit system (v3.0)
|
|
||||||
│ ├── index.js # Public API
|
**Core Logic:**
|
||||||
│ ├── audit-session.js # Main facade (logger + metrics + mutex)
|
- `src/session-manager.ts` - Agent definitions, execution order, parallel groups
|
||||||
│ ├── logger.js # Append-only crash-safe logging
|
- `src/ai/claude-executor.ts` - Claude Agent SDK integration
|
||||||
│ ├── metrics-tracker.js # Timing, cost, attempt tracking
|
- `src/config-parser.ts` - YAML config parsing with JSON Schema validation
|
||||||
│ └── utils.js # Path generation, atomic writes
|
- `src/audit/` - Crash-safe logging and metrics system
|
||||||
├── config-parser.js # Configuration handling
|
|
||||||
├── error-handling.js # Error management
|
**Configuration:**
|
||||||
├── tool-checker.js # Tool validation
|
- `shannon` - CLI script for running pentests
|
||||||
├── session-manager.js # Session state + reconciliation
|
- `docker-compose.yml` - Temporal server + worker containers
|
||||||
├── checkpoint-manager.js # Git-based checkpointing + rollback
|
- `configs/` - YAML configs with `config-schema.json` for validation
|
||||||
├── queue-validation.js # Deliverable validation
|
- `prompts/` - AI prompt templates (`vuln-*.txt`, `exploit-*.txt`, etc.)
|
||||||
├── ai/
|
|
||||||
│ └── claude-executor.js # Claude Agent SDK integration
|
**Output:**
|
||||||
└── utils/
|
- `audit-logs/{hostname}_{sessionId}/` - Session metrics, agent logs, deliverables
|
||||||
audit-logs/ # Centralized audit data (v3.0)
|
|
||||||
└── {hostname}_{sessionId}/
|
|
||||||
├── session.json # Comprehensive metrics
|
|
||||||
├── prompts/ # Prompt snapshots
|
|
||||||
│ └── {agent}.md
|
|
||||||
└── agents/ # Agent execution logs
|
|
||||||
└── {timestamp}_{agent}_attempt-{N}.log
|
|
||||||
configs/ # Configuration files
|
|
||||||
├── config-schema.json # JSON Schema validation
|
|
||||||
├── example-config.yaml # Template configuration
|
|
||||||
├── juice-shop-config.yaml # Juice Shop example
|
|
||||||
├── keygraph-config.yaml # Keygraph configuration
|
|
||||||
├── chatwoot-config.yaml # Chatwoot configuration
|
|
||||||
├── metabase-config.yaml # Metabase configuration
|
|
||||||
└── cal-com-config.yaml # Cal.com configuration
|
|
||||||
prompts/ # AI prompt templates
|
|
||||||
├── shared/ # Shared content for all prompts
|
|
||||||
│ ├── _target.txt # Target URL template
|
|
||||||
│ ├── _rules.txt # Rules template
|
|
||||||
│ ├── _vuln-scope.txt # Vulnerability scope template
|
|
||||||
│ ├── _exploit-scope.txt # Exploitation scope template
|
|
||||||
│ └── login-instructions.txt # Login flow template
|
|
||||||
├── pre-recon-code.txt # Code analysis
|
|
||||||
├── recon.txt # Reconnaissance
|
|
||||||
├── vuln-*.txt # Vulnerability assessment
|
|
||||||
├── exploit-*.txt # Exploitation
|
|
||||||
└── report-executive.txt # Executive reporting
|
|
||||||
scripts/ # Utility scripts
|
|
||||||
└── export-metrics.js # Export metrics to CSV
|
|
||||||
deliverables/ # Output directory (in target repo)
|
|
||||||
docs/ # Documentation
|
|
||||||
├── unified-audit-system-design.md
|
|
||||||
└── migration-guide.md
|
|
||||||
```
|
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
### Common Issues
|
### Common Issues
|
||||||
- **"Agent already completed"**: Use `--rerun <agent>` for explicit re-execution
|
|
||||||
- **"Missing prerequisites"**: Check `--status` and run prerequisite agents first
|
|
||||||
- **"No sessions found"**: Create a session with `--setup-only` first
|
|
||||||
- **"Repository not found"**: Ensure target local directory exists and is accessible
|
- **"Repository not found"**: Ensure target local directory exists and is accessible
|
||||||
- **"Too many test sessions"**: Use `--cleanup` to remove old sessions and free disk space
|
|
||||||
|
### Temporal & Docker Issues
|
||||||
|
- **"Temporal not ready"**: Wait for health check or run `docker compose logs temporal`
|
||||||
|
- **Worker not processing**: Ensure worker container is running with `docker compose ps`
|
||||||
|
- **Reset workflow state**: `./shannon stop CLEAN=true` removes all Temporal data and volumes
|
||||||
|
- **Local apps unreachable**: Use `host.docker.internal` instead of `localhost` for URLs
|
||||||
|
- **Container permissions**: On Linux, may need `sudo` for docker commands
|
||||||
|
|
||||||
### External Tool Dependencies
|
### External Tool Dependencies
|
||||||
Missing tools can be skipped using `--pipeline-testing` mode during development:
|
Missing tools can be skipped using `PIPELINE_TESTING=true` mode during development:
|
||||||
- `nmap` - Network scanning
|
- `nmap` - Network scanning
|
||||||
- `subfinder` - Subdomain discovery
|
- `subfinder` - Subdomain discovery
|
||||||
- `whatweb` - Web technology detection
|
- `whatweb` - Web technology detection
|
||||||
|
|
||||||
### Diagnostic & Utility Scripts
|
### Diagnostic & Utility Scripts
|
||||||
```bash
|
```bash
|
||||||
# Export metrics to CSV
|
# View Temporal workflow history
|
||||||
./scripts/export-metrics.js --session-id <id> --output metrics.csv
|
open http://localhost:8233
|
||||||
```
|
```
|
||||||
|
|
||||||
Note: For recovery from corrupted state, simply delete `.shannon-store.json` or edit JSON files directly.
|
|
||||||
|
|||||||
+29
-14
@@ -1,3 +1,4 @@
|
|||||||
|
#
|
||||||
# Multi-stage Dockerfile for Pentest Agent
|
# Multi-stage Dockerfile for Pentest Agent
|
||||||
# Uses Chainguard Wolfi for minimal attack surface and supply chain security
|
# Uses Chainguard Wolfi for minimal attack surface and supply chain security
|
||||||
|
|
||||||
@@ -107,36 +108,50 @@ RUN addgroup -g 1001 pentest && \
|
|||||||
# Set working directory
|
# Set working directory
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Copy package.json and package-lock.json first for better caching
|
# Copy package files first for better caching
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
COPY .env ./.env
|
COPY mcp-server/package*.json ./mcp-server/
|
||||||
|
|
||||||
# Install Node.js dependencies as root
|
# Install Node.js dependencies (including devDependencies for TypeScript build)
|
||||||
RUN npm ci --only=production && \
|
RUN npm ci && \
|
||||||
npm install -g zx && \
|
cd mcp-server && npm ci && cd .. && \
|
||||||
npm install -g @anthropic-ai/claude-agent-sdk && \
|
|
||||||
npm cache clean --force
|
npm cache clean --force
|
||||||
|
|
||||||
# Copy application code
|
# Copy application source code
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
|
# Build TypeScript (mcp-server first, then main project)
|
||||||
|
RUN cd mcp-server && npm run build && cd .. && npm run build
|
||||||
|
|
||||||
|
# Remove devDependencies after build to reduce image size
|
||||||
|
RUN npm prune --production && \
|
||||||
|
cd mcp-server && npm prune --production
|
||||||
|
|
||||||
# Create directories for session data and ensure proper permissions
|
# Create directories for session data and ensure proper permissions
|
||||||
|
RUN mkdir -p /app/sessions /app/deliverables /app/repos /app/configs && \
|
||||||
RUN mkdir -p /app/sessions /app/deliverables /app/repos && \
|
mkdir -p /tmp/.cache /tmp/.config /tmp/.npm && \
|
||||||
chown -R pentest:pentest /app /app/repos && \
|
chmod 777 /app && \
|
||||||
chmod +x /app/shannon.mjs
|
chmod 777 /tmp/.cache && \
|
||||||
|
chmod 777 /tmp/.config && \
|
||||||
|
chmod 777 /tmp/.npm && \
|
||||||
|
chown -R pentest:pentest /app
|
||||||
|
|
||||||
# Switch to non-root user
|
# Switch to non-root user
|
||||||
USER pentest
|
USER pentest
|
||||||
|
|
||||||
|
# Configure Git to trust all directories
|
||||||
|
RUN git config --global --add safe.directory '*'
|
||||||
|
|
||||||
# Set environment variables
|
# Set environment variables
|
||||||
ENV NODE_ENV=production
|
ENV NODE_ENV=production
|
||||||
ENV PATH="/usr/local/bin:$PATH"
|
ENV PATH="/usr/local/bin:$PATH"
|
||||||
ENV SHANNON_DOCKER=true
|
ENV SHANNON_DOCKER=true
|
||||||
ENV PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1
|
ENV PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1
|
||||||
ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium-browser
|
ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium-browser
|
||||||
|
ENV npm_config_cache=/tmp/.npm
|
||||||
|
ENV HOME=/tmp
|
||||||
|
ENV XDG_CACHE_HOME=/tmp/.cache
|
||||||
|
ENV XDG_CONFIG_HOME=/tmp/.config
|
||||||
|
|
||||||
# Set entrypoint
|
# Set entrypoint
|
||||||
ENTRYPOINT ["./shannon.mjs"]
|
ENTRYPOINT ["node", "dist/shannon.js"]
|
||||||
|
|||||||
@@ -1,95 +1,661 @@
|
|||||||
# Business Source License 1.1
|
GNU AFFERO GENERAL PUBLIC LICENSE
|
||||||
|
Version 3, 19 November 2007
|
||||||
|
|
||||||
## Parameters
|
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
|
||||||
|
Everyone is permitted to copy and distribute verbatim copies
|
||||||
|
of this license document, but changing it is not allowed.
|
||||||
|
|
||||||
**Licensor:** Keygraph, Inc.
|
Preamble
|
||||||
|
|
||||||
**Licensed Work:** Shannon
|
The GNU Affero General Public License is a free, copyleft license for
|
||||||
The Licensed Work is (c) 2024 - 2025 Keygraph, Inc.
|
software and other kinds of works, specifically designed to ensure
|
||||||
|
cooperation with the community in the case of network server software.
|
||||||
|
|
||||||
**Additional Use Grant:** You may make use of the Licensed Work, provided that you may not use the Licensed Work for a Restricted Commercial Service.
|
The licenses for most software and other practical works are designed
|
||||||
|
to take away your freedom to share and change the works. By contrast,
|
||||||
|
our General Public Licenses are intended to guarantee your freedom to
|
||||||
|
share and change all versions of a program--to make sure it remains free
|
||||||
|
software for all its users.
|
||||||
|
|
||||||
A "Restricted Commercial Service" includes any of the following:
|
When we speak of free software, we are referring to freedom, not
|
||||||
|
price. Our General Public Licenses are designed to make sure that you
|
||||||
|
have the freedom to distribute copies of free software (and charge for
|
||||||
|
them if you wish), that you receive source code or can get it if you
|
||||||
|
want it, that you can change the software or use pieces of it in new
|
||||||
|
free programs, and that you know you can do these things.
|
||||||
|
|
||||||
1. **Commercial Penetration Testing Services**: Offering penetration testing, security auditing, or vulnerability assessment services to third parties (other than your employees and contractors) where Shannon is used as part of the service delivery.
|
Developers that use our General Public Licenses protect your rights
|
||||||
|
with two steps: (1) assert copyright on the software, and (2) offer
|
||||||
|
you this License which gives you legal permission to copy, distribute
|
||||||
|
and/or modify the software.
|
||||||
|
|
||||||
2. **Hosted Shannon Platform**: Operating a managed service or hosted platform that allows third parties (other than your employees and contractors) to access Shannon's functionality, APIs, or penetration testing capabilities through that managed service.
|
A secondary benefit of defending all users' freedom is that
|
||||||
|
improvements made in alternate versions of the program, if they
|
||||||
|
receive widespread use, become available for other developers to
|
||||||
|
incorporate. Many developers of free software are heartened and
|
||||||
|
encouraged by the resulting cooperation. However, in the case of
|
||||||
|
software used on network servers, this result may fail to come about.
|
||||||
|
The GNU General Public License permits making a modified version and
|
||||||
|
letting the public access it on a server without ever releasing its
|
||||||
|
source code to the public.
|
||||||
|
|
||||||
3. **Compliance and Audit Services**: Using Shannon to provide compliance audits, regulatory security assessments, or certification services (such as SOC2, PCI-DSS, ISO 27001, HIPAA, or similar frameworks) to third parties as a commercial offering.
|
The GNU Affero General Public License is designed specifically to
|
||||||
|
ensure that, in such cases, the modified source code becomes available
|
||||||
|
to the community. It requires the operator of a network server to
|
||||||
|
provide the source code of the modified version running there to the
|
||||||
|
users of that server. Therefore, public use of a modified version, on
|
||||||
|
a publicly accessible server, gives the public access to the source
|
||||||
|
code of the modified version.
|
||||||
|
|
||||||
4. **GRC Platform Integration**: Bundling, integrating, or embedding Shannon into a Governance, Risk, and Compliance (GRC) platform, security platform, or similar product that is sold, licensed, or provided as a service to third parties.
|
An older license, called the Affero General Public License and
|
||||||
|
published by Affero, was designed to accomplish similar goals. This is
|
||||||
|
a different license, not a version of the Affero GPL, but Affero has
|
||||||
|
released a new version of the Affero GPL which permits relicensing under
|
||||||
|
this license.
|
||||||
|
|
||||||
**Permitted Use:** For the avoidance of doubt, the following scenarios are explicitly permitted under this license and do not constitute a "Restricted Commercial Service":
|
The precise terms and conditions for copying, distribution and
|
||||||
|
modification follow.
|
||||||
|
|
||||||
- Using Shannon to test your own applications, infrastructure, or systems in any environment (development, staging, production)
|
TERMS AND CONDITIONS
|
||||||
- Using Shannon within your organization for internal security testing by your employees and contractors
|
|
||||||
- Academic research, security research, or educational purposes
|
|
||||||
- Contributing to Shannon's development or creating derivative works for your own use
|
|
||||||
- Using Shannon to learn penetration testing or security research skills
|
|
||||||
- Testing applications you are developing or maintaining, whether commercial or non-commercial
|
|
||||||
- Internal security teams using Shannon for their organization's security program
|
|
||||||
|
|
||||||
**Not Permitted:** For the avoidance of doubt, the following scenarios are not permitted under this license:
|
0. Definitions.
|
||||||
|
|
||||||
- Security consulting firms using Shannon to deliver penetration testing services to clients
|
"This License" refers to version 3 of the GNU Affero General Public License.
|
||||||
- Managed security service providers (MSSPs) using Shannon as part of their service offerings
|
|
||||||
- Offering "Pentesting-as-a-Service" powered by Shannon
|
|
||||||
- Including Shannon in a commercial security scanning or testing product sold to customers
|
|
||||||
- Building a multi-tenant Shannon platform that customers can access
|
|
||||||
- Using Shannon to generate compliance reports or certifications that you sell to third parties
|
|
||||||
|
|
||||||
**Change Date:** 4 years after release
|
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||||
|
works, such as semiconductor masks.
|
||||||
|
|
||||||
**Change License:** Apache License, Version 2.0
|
"The Program" refers to any copyrightable work licensed under this
|
||||||
|
License. Each licensee is addressed as "you". "Licensees" and
|
||||||
|
"recipients" may be individuals or organizations.
|
||||||
|
|
||||||
---
|
To "modify" a work means to copy from or adapt all or part of the work
|
||||||
|
in a fashion requiring copyright permission, other than the making of an
|
||||||
|
exact copy. The resulting work is called a "modified version" of the
|
||||||
|
earlier work or a work "based on" the earlier work.
|
||||||
|
|
||||||
## Notice
|
A "covered work" means either the unmodified Program or a work based
|
||||||
|
on the Program.
|
||||||
|
|
||||||
The Business Source License (this document, or the "License") is not an Open Source license. However, the Licensed Work will eventually be made available under an Open Source License, as stated in this License.
|
To "propagate" a work means to do anything with it that, without
|
||||||
|
permission, would make you directly or secondarily liable for
|
||||||
|
infringement under applicable copyright law, except executing it on a
|
||||||
|
computer or modifying a private copy. Propagation includes copying,
|
||||||
|
distribution (with or without modification), making available to the
|
||||||
|
public, and in some countries other activities as well.
|
||||||
|
|
||||||
License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved.
|
To "convey" a work means any kind of propagation that enables other
|
||||||
"Business Source License" is a trademark of MariaDB Corporation Ab.
|
parties to make or receive copies. Mere interaction with a user through
|
||||||
|
a computer network, with no transfer of a copy, is not conveying.
|
||||||
|
|
||||||
---
|
An interactive user interface displays "Appropriate Legal Notices"
|
||||||
|
to the extent that it includes a convenient and prominently visible
|
||||||
|
feature that (1) displays an appropriate copyright notice, and (2)
|
||||||
|
tells the user that there is no warranty for the work (except to the
|
||||||
|
extent that warranties are provided), that licensees may convey the
|
||||||
|
work under this License, and how to view a copy of this License. If
|
||||||
|
the interface presents a list of user commands or options, such as a
|
||||||
|
menu, a prominent item in the list meets this criterion.
|
||||||
|
|
||||||
## Terms
|
1. Source Code.
|
||||||
|
|
||||||
The Licensor hereby grants you the right to copy, modify, create derivative works, redistribute, and make non-production use of the Licensed Work. The Licensor may make an Additional Use Grant, above, permitting limited production use.
|
The "source code" for a work means the preferred form of the work
|
||||||
|
for making modifications to it. "Object code" means any non-source
|
||||||
|
form of a work.
|
||||||
|
|
||||||
Effective on the Change Date, or the fourth anniversary of the first publicly available distribution of a specific version of the Licensed Work under this License, whichever comes first, the Licensor hereby grants you rights under the terms of the Change License, and the rights granted in the paragraph above terminate.
|
A "Standard Interface" means an interface that either is an official
|
||||||
|
standard defined by a recognized standards body, or, in the case of
|
||||||
|
interfaces specified for a particular programming language, one that
|
||||||
|
is widely used among developers working in that language.
|
||||||
|
|
||||||
If your use of the Licensed Work does not comply with the requirements currently in effect as described in this License, you must purchase a commercial license from the Licensor, its affiliated entities, or authorized resellers, or you must refrain from using the Licensed Work.
|
The "System Libraries" of an executable work include anything, other
|
||||||
|
than the work as a whole, that (a) is included in the normal form of
|
||||||
|
packaging a Major Component, but which is not part of that Major
|
||||||
|
Component, and (b) serves only to enable use of the work with that
|
||||||
|
Major Component, or to implement a Standard Interface for which an
|
||||||
|
implementation is available to the public in source code form. A
|
||||||
|
"Major Component", in this context, means a major essential component
|
||||||
|
(kernel, window system, and so on) of the specific operating system
|
||||||
|
(if any) on which the executable work runs, or a compiler used to
|
||||||
|
produce the work, or an object code interpreter used to run it.
|
||||||
|
|
||||||
All copies of the original and modified Licensed Work, and derivative works of the Licensed Work, are subject to this License. This License applies separately for each version of the Licensed Work and the Change Date may vary for each version of the Licensed Work released by Licensor.
|
The "Corresponding Source" for a work in object code form means all
|
||||||
|
the source code needed to generate, install, and (for an executable
|
||||||
|
work) run the object code and to modify the work, including scripts to
|
||||||
|
control those activities. However, it does not include the work's
|
||||||
|
System Libraries, or general-purpose tools or generally available free
|
||||||
|
programs which are used unmodified in performing those activities but
|
||||||
|
which are not part of the work. For example, Corresponding Source
|
||||||
|
includes interface definition files associated with source files for
|
||||||
|
the work, and the source code for shared libraries and dynamically
|
||||||
|
linked subprograms that the work is specifically designed to require,
|
||||||
|
such as by intimate data communication or control flow between those
|
||||||
|
subprograms and other parts of the work.
|
||||||
|
|
||||||
You must conspicuously display this License on each original or modified copy of the Licensed Work. If you receive the Licensed Work in original or modified form from a third party, the terms and conditions set forth in this License apply to your use of that work.
|
The Corresponding Source need not include anything that users
|
||||||
|
can regenerate automatically from other parts of the Corresponding
|
||||||
|
Source.
|
||||||
|
|
||||||
Any use of the Licensed Work in violation of this License will automatically terminate your rights under this License for the current and all other versions of the Licensed Work.
|
The Corresponding Source for a work in source code form is that
|
||||||
|
same work.
|
||||||
|
|
||||||
This License does not grant you any right in any trademark or logo of Licensor or its affiliates (provided that you may use a trademark or logo of Licensor as expressly required by this License).
|
2. Basic Permissions.
|
||||||
|
|
||||||
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND TITLE.
|
All rights granted under this License are granted for the term of
|
||||||
|
copyright on the Program, and are irrevocable provided the stated
|
||||||
|
conditions are met. This License explicitly affirms your unlimited
|
||||||
|
permission to run the unmodified Program. The output from running a
|
||||||
|
covered work is covered by this License only if the output, given its
|
||||||
|
content, constitutes a covered work. This License acknowledges your
|
||||||
|
rights of fair use or other equivalent, as provided by copyright law.
|
||||||
|
|
||||||
MariaDB hereby grants you permission to use this License's text to license your works, and to refer to it using the trademark "Business Source License", as long as you comply with the Covenants of Licensor below.
|
You may make, run and propagate covered works that you do not
|
||||||
|
convey, without conditions so long as your license otherwise remains
|
||||||
|
in force. You may convey covered works to others for the sole purpose
|
||||||
|
of having them make modifications exclusively for you, or provide you
|
||||||
|
with facilities for running those works, provided that you comply with
|
||||||
|
the terms of this License in conveying all material for which you do
|
||||||
|
not control copyright. Those thus making or running the covered works
|
||||||
|
for you must do so exclusively on your behalf, under your direction
|
||||||
|
and control, on terms that prohibit them from making any copies of
|
||||||
|
your copyrighted material outside their relationship with you.
|
||||||
|
|
||||||
---
|
Conveying under any other circumstances is permitted solely under
|
||||||
|
the conditions stated below. Sublicensing is not allowed; section 10
|
||||||
|
makes it unnecessary.
|
||||||
|
|
||||||
## Covenants of Licensor
|
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||||
|
|
||||||
In consideration of the right to use this License's text and the "Business Source License" name and trademark, Licensor covenants to MariaDB, and to all other recipients of the licensed work to be provided by Licensor:
|
No covered work shall be deemed part of an effective technological
|
||||||
|
measure under any applicable law fulfilling obligations under article
|
||||||
|
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||||
|
similar laws prohibiting or restricting circumvention of such
|
||||||
|
measures.
|
||||||
|
|
||||||
1. To specify as the Change License the GPL Version 2.0 or any later version, or a license that is compatible with GPL Version 2.0 or a later version, where "compatible" means that software provided under the Change License can be included in a program with software provided under GPL Version 2.0 or a later version. Licensor may specify additional Change Licenses without limitation.
|
When you convey a covered work, you waive any legal power to forbid
|
||||||
|
circumvention of technological measures to the extent such circumvention
|
||||||
|
is effected by exercising rights under this License with respect to
|
||||||
|
the covered work, and you disclaim any intention to limit operation or
|
||||||
|
modification of the work as a means of enforcing, against the work's
|
||||||
|
users, your or third parties' legal rights to forbid circumvention of
|
||||||
|
technological measures.
|
||||||
|
|
||||||
2. To either: (a) specify an additional grant of rights to use that does not impose any additional restriction on the right granted in this License, as the Additional Use Grant; or (b) insert the text "None".
|
4. Conveying Verbatim Copies.
|
||||||
|
|
||||||
3. To specify a Change Date.
|
You may convey verbatim copies of the Program's source code as you
|
||||||
|
receive it, in any medium, provided that you conspicuously and
|
||||||
|
appropriately publish on each copy an appropriate copyright notice;
|
||||||
|
keep intact all notices stating that this License and any
|
||||||
|
non-permissive terms added in accord with section 7 apply to the code;
|
||||||
|
keep intact all notices of the absence of any warranty; and give all
|
||||||
|
recipients a copy of this License along with the Program.
|
||||||
|
|
||||||
4. Not to modify this License in any other way.
|
You may charge any price or no price for each copy that you convey,
|
||||||
|
and you may offer support or warranty protection for a fee.
|
||||||
|
|
||||||
---
|
5. Conveying Modified Source Versions.
|
||||||
|
|
||||||
## Questions?
|
You may convey a work based on the Program, or the modifications to
|
||||||
|
produce it from the Program, in the form of source code under the
|
||||||
|
terms of section 4, provided that you also meet all of these conditions:
|
||||||
|
|
||||||
Not sure your use case is covered by this license? Email [legal@keygraph.io](mailto:legal@keygraph.io).
|
a) The work must carry prominent notices stating that you modified
|
||||||
**Shannon Pro** is our commercial edition with **unlimited commercial use**.
|
it, and giving a relevant date.
|
||||||
|
|
||||||
|
b) The work must carry prominent notices stating that it is
|
||||||
|
released under this License and any conditions added under section
|
||||||
|
7. This requirement modifies the requirement in section 4 to
|
||||||
|
"keep intact all notices".
|
||||||
|
|
||||||
|
c) You must license the entire work, as a whole, under this
|
||||||
|
License to anyone who comes into possession of a copy. This
|
||||||
|
License will therefore apply, along with any applicable section 7
|
||||||
|
additional terms, to the whole of the work, and all its parts,
|
||||||
|
regardless of how they are packaged. This License gives no
|
||||||
|
permission to license the work in any other way, but it does not
|
||||||
|
invalidate such permission if you have separately received it.
|
||||||
|
|
||||||
|
d) If the work has interactive user interfaces, each must display
|
||||||
|
Appropriate Legal Notices; however, if the Program has interactive
|
||||||
|
interfaces that do not display Appropriate Legal Notices, your
|
||||||
|
work need not make them do so.
|
||||||
|
|
||||||
|
A compilation of a covered work with other separate and independent
|
||||||
|
works, which are not by their nature extensions of the covered work,
|
||||||
|
and which are not combined with it such as to form a larger program,
|
||||||
|
in or on a volume of a storage or distribution medium, is called an
|
||||||
|
"aggregate" if the compilation and its resulting copyright are not
|
||||||
|
used to limit the access or legal rights of the compilation's users
|
||||||
|
beyond what the individual works permit. Inclusion of a covered work
|
||||||
|
in an aggregate does not cause this License to apply to the other
|
||||||
|
parts of the aggregate.
|
||||||
|
|
||||||
|
6. Conveying Non-Source Forms.
|
||||||
|
|
||||||
|
You may convey a covered work in object code form under the terms
|
||||||
|
of sections 4 and 5, provided that you also convey the
|
||||||
|
machine-readable Corresponding Source under the terms of this License,
|
||||||
|
in one of these ways:
|
||||||
|
|
||||||
|
a) Convey the object code in, or embodied in, a physical product
|
||||||
|
(including a physical distribution medium), accompanied by the
|
||||||
|
Corresponding Source fixed on a durable physical medium
|
||||||
|
customarily used for software interchange.
|
||||||
|
|
||||||
|
b) Convey the object code in, or embodied in, a physical product
|
||||||
|
(including a physical distribution medium), accompanied by a
|
||||||
|
written offer, valid for at least three years and valid for as
|
||||||
|
long as you offer spare parts or customer support for that product
|
||||||
|
model, to give anyone who possesses the object code either (1) a
|
||||||
|
copy of the Corresponding Source for all the software in the
|
||||||
|
product that is covered by this License, on a durable physical
|
||||||
|
medium customarily used for software interchange, for a price no
|
||||||
|
more than your reasonable cost of physically performing this
|
||||||
|
conveying of source, or (2) access to copy the
|
||||||
|
Corresponding Source from a network server at no charge.
|
||||||
|
|
||||||
|
c) Convey individual copies of the object code with a copy of the
|
||||||
|
written offer to provide the Corresponding Source. This
|
||||||
|
alternative is allowed only occasionally and noncommercially, and
|
||||||
|
only if you received the object code with such an offer, in accord
|
||||||
|
with subsection 6b.
|
||||||
|
|
||||||
|
d) Convey the object code by offering access from a designated
|
||||||
|
place (gratis or for a charge), and offer equivalent access to the
|
||||||
|
Corresponding Source in the same way through the same place at no
|
||||||
|
further charge. You need not require recipients to copy the
|
||||||
|
Corresponding Source along with the object code. If the place to
|
||||||
|
copy the object code is a network server, the Corresponding Source
|
||||||
|
may be on a different server (operated by you or a third party)
|
||||||
|
that supports equivalent copying facilities, provided you maintain
|
||||||
|
clear directions next to the object code saying where to find the
|
||||||
|
Corresponding Source. Regardless of what server hosts the
|
||||||
|
Corresponding Source, you remain obligated to ensure that it is
|
||||||
|
available for as long as needed to satisfy these requirements.
|
||||||
|
|
||||||
|
e) Convey the object code using peer-to-peer transmission, provided
|
||||||
|
you inform other peers where the object code and Corresponding
|
||||||
|
Source of the work are being offered to the general public at no
|
||||||
|
charge under subsection 6d.
|
||||||
|
|
||||||
|
A separable portion of the object code, whose source code is excluded
|
||||||
|
from the Corresponding Source as a System Library, need not be
|
||||||
|
included in conveying the object code work.
|
||||||
|
|
||||||
|
A "User Product" is either (1) a "consumer product", which means any
|
||||||
|
tangible personal property which is normally used for personal, family,
|
||||||
|
or household purposes, or (2) anything designed or sold for incorporation
|
||||||
|
into a dwelling. In determining whether a product is a consumer product,
|
||||||
|
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||||
|
product received by a particular user, "normally used" refers to a
|
||||||
|
typical or common use of that class of product, regardless of the status
|
||||||
|
of the particular user or of the way in which the particular user
|
||||||
|
actually uses, or expects or is expected to use, the product. A product
|
||||||
|
is a consumer product regardless of whether the product has substantial
|
||||||
|
commercial, industrial or non-consumer uses, unless such uses represent
|
||||||
|
the only significant mode of use of the product.
|
||||||
|
|
||||||
|
"Installation Information" for a User Product means any methods,
|
||||||
|
procedures, authorization keys, or other information required to install
|
||||||
|
and execute modified versions of a covered work in that User Product from
|
||||||
|
a modified version of its Corresponding Source. The information must
|
||||||
|
suffice to ensure that the continued functioning of the modified object
|
||||||
|
code is in no case prevented or interfered with solely because
|
||||||
|
modification has been made.
|
||||||
|
|
||||||
|
If you convey an object code work under this section in, or with, or
|
||||||
|
specifically for use in, a User Product, and the conveying occurs as
|
||||||
|
part of a transaction in which the right of possession and use of the
|
||||||
|
User Product is transferred to the recipient in perpetuity or for a
|
||||||
|
fixed term (regardless of how the transaction is characterized), the
|
||||||
|
Corresponding Source conveyed under this section must be accompanied
|
||||||
|
by the Installation Information. But this requirement does not apply
|
||||||
|
if neither you nor any third party retains the ability to install
|
||||||
|
modified object code on the User Product (for example, the work has
|
||||||
|
been installed in ROM).
|
||||||
|
|
||||||
|
The requirement to provide Installation Information does not include a
|
||||||
|
requirement to continue to provide support service, warranty, or updates
|
||||||
|
for a work that has been modified or installed by the recipient, or for
|
||||||
|
the User Product in which it has been modified or installed. Access to a
|
||||||
|
network may be denied when the modification itself materially and
|
||||||
|
adversely affects the operation of the network or violates the rules and
|
||||||
|
protocols for communication across the network.
|
||||||
|
|
||||||
|
Corresponding Source conveyed, and Installation Information provided,
|
||||||
|
in accord with this section must be in a format that is publicly
|
||||||
|
documented (and with an implementation available to the public in
|
||||||
|
source code form), and must require no special password or key for
|
||||||
|
unpacking, reading or copying.
|
||||||
|
|
||||||
|
7. Additional Terms.
|
||||||
|
|
||||||
|
"Additional permissions" are terms that supplement the terms of this
|
||||||
|
License by making exceptions from one or more of its conditions.
|
||||||
|
Additional permissions that are applicable to the entire Program shall
|
||||||
|
be treated as though they were included in this License, to the extent
|
||||||
|
that they are valid under applicable law. If additional permissions
|
||||||
|
apply only to part of the Program, that part may be used separately
|
||||||
|
under those permissions, but the entire Program remains governed by
|
||||||
|
this License without regard to the additional permissions.
|
||||||
|
|
||||||
|
When you convey a copy of a covered work, you may at your option
|
||||||
|
remove any additional permissions from that copy, or from any part of
|
||||||
|
it. (Additional permissions may be written to require their own
|
||||||
|
removal in certain cases when you modify the work.) You may place
|
||||||
|
additional permissions on material, added by you to a covered work,
|
||||||
|
for which you have or can give appropriate copyright permission.
|
||||||
|
|
||||||
|
Notwithstanding any other provision of this License, for material you
|
||||||
|
add to a covered work, you may (if authorized by the copyright holders of
|
||||||
|
that material) supplement the terms of this License with terms:
|
||||||
|
|
||||||
|
a) Disclaiming warranty or limiting liability differently from the
|
||||||
|
terms of sections 15 and 16 of this License; or
|
||||||
|
|
||||||
|
b) Requiring preservation of specified reasonable legal notices or
|
||||||
|
author attributions in that material or in the Appropriate Legal
|
||||||
|
Notices displayed by works containing it; or
|
||||||
|
|
||||||
|
c) Prohibiting misrepresentation of the origin of that material, or
|
||||||
|
requiring that modified versions of such material be marked in
|
||||||
|
reasonable ways as different from the original version; or
|
||||||
|
|
||||||
|
d) Limiting the use for publicity purposes of names of licensors or
|
||||||
|
authors of the material; or
|
||||||
|
|
||||||
|
e) Declining to grant rights under trademark law for use of some
|
||||||
|
trade names, trademarks, or service marks; or
|
||||||
|
|
||||||
|
f) Requiring indemnification of licensors and authors of that
|
||||||
|
material by anyone who conveys the material (or modified versions of
|
||||||
|
it) with contractual assumptions of liability to the recipient, for
|
||||||
|
any liability that these contractual assumptions directly impose on
|
||||||
|
those licensors and authors.
|
||||||
|
|
||||||
|
All other non-permissive additional terms are considered "further
|
||||||
|
restrictions" within the meaning of section 10. If the Program as you
|
||||||
|
received it, or any part of it, contains a notice stating that it is
|
||||||
|
governed by this License along with a term that is a further
|
||||||
|
restriction, you may remove that term. If a license document contains
|
||||||
|
a further restriction but permits relicensing or conveying under this
|
||||||
|
License, you may add to a covered work material governed by the terms
|
||||||
|
of that license document, provided that the further restriction does
|
||||||
|
not survive such relicensing or conveying.
|
||||||
|
|
||||||
|
If you add terms to a covered work in accord with this section, you
|
||||||
|
must place, in the relevant source files, a statement of the
|
||||||
|
additional terms that apply to those files, or a notice indicating
|
||||||
|
where to find the applicable terms.
|
||||||
|
|
||||||
|
Additional terms, permissive or non-permissive, may be stated in the
|
||||||
|
form of a separately written license, or stated as exceptions;
|
||||||
|
the above requirements apply either way.
|
||||||
|
|
||||||
|
8. Termination.
|
||||||
|
|
||||||
|
You may not propagate or modify a covered work except as expressly
|
||||||
|
provided under this License. Any attempt otherwise to propagate or
|
||||||
|
modify it is void, and will automatically terminate your rights under
|
||||||
|
this License (including any patent licenses granted under the third
|
||||||
|
paragraph of section 11).
|
||||||
|
|
||||||
|
However, if you cease all violation of this License, then your
|
||||||
|
license from a particular copyright holder is reinstated (a)
|
||||||
|
provisionally, unless and until the copyright holder explicitly and
|
||||||
|
finally terminates your license, and (b) permanently, if the copyright
|
||||||
|
holder fails to notify you of the violation by some reasonable means
|
||||||
|
prior to 60 days after the cessation.
|
||||||
|
|
||||||
|
Moreover, your license from a particular copyright holder is
|
||||||
|
reinstated permanently if the copyright holder notifies you of the
|
||||||
|
violation by some reasonable means, this is the first time you have
|
||||||
|
received notice of violation of this License (for any work) from that
|
||||||
|
copyright holder, and you cure the violation prior to 30 days after
|
||||||
|
your receipt of the notice.
|
||||||
|
|
||||||
|
Termination of your rights under this section does not terminate the
|
||||||
|
licenses of parties who have received copies or rights from you under
|
||||||
|
this License. If your rights have been terminated and not permanently
|
||||||
|
reinstated, you do not qualify to receive new licenses for the same
|
||||||
|
material under section 10.
|
||||||
|
|
||||||
|
9. Acceptance Not Required for Having Copies.
|
||||||
|
|
||||||
|
You are not required to accept this License in order to receive or
|
||||||
|
run a copy of the Program. Ancillary propagation of a covered work
|
||||||
|
occurring solely as a consequence of using peer-to-peer transmission
|
||||||
|
to receive a copy likewise does not require acceptance. However,
|
||||||
|
nothing other than this License grants you permission to propagate or
|
||||||
|
modify any covered work. These actions infringe copyright if you do
|
||||||
|
not accept this License. Therefore, by modifying or propagating a
|
||||||
|
covered work, you indicate your acceptance of this License to do so.
|
||||||
|
|
||||||
|
10. Automatic Licensing of Downstream Recipients.
|
||||||
|
|
||||||
|
Each time you convey a covered work, the recipient automatically
|
||||||
|
receives a license from the original licensors, to run, modify and
|
||||||
|
propagate that work, subject to this License. You are not responsible
|
||||||
|
for enforcing compliance by third parties with this License.
|
||||||
|
|
||||||
|
An "entity transaction" is a transaction transferring control of an
|
||||||
|
organization, or substantially all assets of one, or subdividing an
|
||||||
|
organization, or merging organizations. If propagation of a covered
|
||||||
|
work results from an entity transaction, each party to that
|
||||||
|
transaction who receives a copy of the work also receives whatever
|
||||||
|
licenses to the work the party's predecessor in interest had or could
|
||||||
|
give under the previous paragraph, plus a right to possession of the
|
||||||
|
Corresponding Source of the work from the predecessor in interest, if
|
||||||
|
the predecessor has it or can get it with reasonable efforts.
|
||||||
|
|
||||||
|
You may not impose any further restrictions on the exercise of the
|
||||||
|
rights granted or affirmed under this License. For example, you may
|
||||||
|
not impose a license fee, royalty, or other charge for exercise of
|
||||||
|
rights granted under this License, and you may not initiate litigation
|
||||||
|
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||||
|
any patent claim is infringed by making, using, selling, offering for
|
||||||
|
sale, or importing the Program or any portion of it.
|
||||||
|
|
||||||
|
11. Patents.
|
||||||
|
|
||||||
|
A "contributor" is a copyright holder who authorizes use under this
|
||||||
|
License of the Program or a work on which the Program is based. The
|
||||||
|
work thus licensed is called the contributor's "contributor version".
|
||||||
|
|
||||||
|
A contributor's "essential patent claims" are all patent claims
|
||||||
|
owned or controlled by the contributor, whether already acquired or
|
||||||
|
hereafter acquired, that would be infringed by some manner, permitted
|
||||||
|
by this License, of making, using, or selling its contributor version,
|
||||||
|
but do not include claims that would be infringed only as a
|
||||||
|
consequence of further modification of the contributor version. For
|
||||||
|
purposes of this definition, "control" includes the right to grant
|
||||||
|
patent sublicenses in a manner consistent with the requirements of
|
||||||
|
this License.
|
||||||
|
|
||||||
|
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||||
|
patent license under the contributor's essential patent claims, to
|
||||||
|
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||||
|
propagate the contents of its contributor version.
|
||||||
|
|
||||||
|
In the following three paragraphs, a "patent license" is any express
|
||||||
|
agreement or commitment, however denominated, not to enforce a patent
|
||||||
|
(such as an express permission to practice a patent or covenant not to
|
||||||
|
sue for patent infringement). To "grant" such a patent license to a
|
||||||
|
party means to make such an agreement or commitment not to enforce a
|
||||||
|
patent against the party.
|
||||||
|
|
||||||
|
If you convey a covered work, knowingly relying on a patent license,
|
||||||
|
and the Corresponding Source of the work is not available for anyone
|
||||||
|
to copy, free of charge and under the terms of this License, through a
|
||||||
|
publicly available network server or other readily accessible means,
|
||||||
|
then you must either (1) cause the Corresponding Source to be so
|
||||||
|
available, or (2) arrange to deprive yourself of the benefit of the
|
||||||
|
patent license for this particular work, or (3) arrange, in a manner
|
||||||
|
consistent with the requirements of this License, to extend the patent
|
||||||
|
license to downstream recipients. "Knowingly relying" means you have
|
||||||
|
actual knowledge that, but for the patent license, your conveying the
|
||||||
|
covered work in a country, or your recipient's use of the covered work
|
||||||
|
in a country, would infringe one or more identifiable patents in that
|
||||||
|
country that you have reason to believe are valid.
|
||||||
|
|
||||||
|
If, pursuant to or in connection with a single transaction or
|
||||||
|
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||||
|
covered work, and grant a patent license to some of the parties
|
||||||
|
receiving the covered work authorizing them to use, propagate, modify
|
||||||
|
or convey a specific copy of the covered work, then the patent license
|
||||||
|
you grant is automatically extended to all recipients of the covered
|
||||||
|
work and works based on it.
|
||||||
|
|
||||||
|
A patent license is "discriminatory" if it does not include within
|
||||||
|
the scope of its coverage, prohibits the exercise of, or is
|
||||||
|
conditioned on the non-exercise of one or more of the rights that are
|
||||||
|
specifically granted under this License. You may not convey a covered
|
||||||
|
work if you are a party to an arrangement with a third party that is
|
||||||
|
in the business of distributing software, under which you make payment
|
||||||
|
to the third party based on the extent of your activity of conveying
|
||||||
|
the work, and under which the third party grants, to any of the
|
||||||
|
parties who would receive the covered work from you, a discriminatory
|
||||||
|
patent license (a) in connection with copies of the covered work
|
||||||
|
conveyed by you (or copies made from those copies), or (b) primarily
|
||||||
|
for and in connection with specific products or compilations that
|
||||||
|
contain the covered work, unless you entered into that arrangement,
|
||||||
|
or that patent license was granted, prior to 28 March 2007.
|
||||||
|
|
||||||
|
Nothing in this License shall be construed as excluding or limiting
|
||||||
|
any implied license or other defenses to infringement that may
|
||||||
|
otherwise be available to you under applicable patent law.
|
||||||
|
|
||||||
|
12. No Surrender of Others' Freedom.
|
||||||
|
|
||||||
|
If conditions are imposed on you (whether by court order, agreement or
|
||||||
|
otherwise) that contradict the conditions of this License, they do not
|
||||||
|
excuse you from the conditions of this License. If you cannot convey a
|
||||||
|
covered work so as to satisfy simultaneously your obligations under this
|
||||||
|
License and any other pertinent obligations, then as a consequence you may
|
||||||
|
not convey it at all. For example, if you agree to terms that obligate you
|
||||||
|
to collect a royalty for further conveying from those to whom you convey
|
||||||
|
the Program, the only way you could satisfy both those terms and this
|
||||||
|
License would be to refrain entirely from conveying the Program.
|
||||||
|
|
||||||
|
13. Remote Network Interaction; Use with the GNU General Public License.
|
||||||
|
|
||||||
|
Notwithstanding any other provision of this License, if you modify the
|
||||||
|
Program, your modified version must prominently offer all users
|
||||||
|
interacting with it remotely through a computer network (if your version
|
||||||
|
supports such interaction) an opportunity to receive the Corresponding
|
||||||
|
Source of your version by providing access to the Corresponding Source
|
||||||
|
from a network server at no charge, through some standard or customary
|
||||||
|
means of facilitating copying of software. This Corresponding Source
|
||||||
|
shall include the Corresponding Source for any work covered by version 3
|
||||||
|
of the GNU General Public License that is incorporated pursuant to the
|
||||||
|
following paragraph.
|
||||||
|
|
||||||
|
Notwithstanding any other provision of this License, you have
|
||||||
|
permission to link or combine any covered work with a work licensed
|
||||||
|
under version 3 of the GNU General Public License into a single
|
||||||
|
combined work, and to convey the resulting work. The terms of this
|
||||||
|
License will continue to apply to the part which is the covered work,
|
||||||
|
but the work with which it is combined will remain governed by version
|
||||||
|
3 of the GNU General Public License.
|
||||||
|
|
||||||
|
14. Revised Versions of this License.
|
||||||
|
|
||||||
|
The Free Software Foundation may publish revised and/or new versions of
|
||||||
|
the GNU Affero General Public License from time to time. Such new versions
|
||||||
|
will be similar in spirit to the present version, but may differ in detail to
|
||||||
|
address new problems or concerns.
|
||||||
|
|
||||||
|
Each version is given a distinguishing version number. If the
|
||||||
|
Program specifies that a certain numbered version of the GNU Affero General
|
||||||
|
Public License "or any later version" applies to it, you have the
|
||||||
|
option of following the terms and conditions either of that numbered
|
||||||
|
version or of any later version published by the Free Software
|
||||||
|
Foundation. If the Program does not specify a version number of the
|
||||||
|
GNU Affero General Public License, you may choose any version ever published
|
||||||
|
by the Free Software Foundation.
|
||||||
|
|
||||||
|
If the Program specifies that a proxy can decide which future
|
||||||
|
versions of the GNU Affero General Public License can be used, that proxy's
|
||||||
|
public statement of acceptance of a version permanently authorizes you
|
||||||
|
to choose that version for the Program.
|
||||||
|
|
||||||
|
Later license versions may give you additional or different
|
||||||
|
permissions. However, no additional obligations are imposed on any
|
||||||
|
author or copyright holder as a result of your choosing to follow a
|
||||||
|
later version.
|
||||||
|
|
||||||
|
15. Disclaimer of Warranty.
|
||||||
|
|
||||||
|
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||||
|
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||||
|
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||||
|
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||||
|
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||||
|
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||||
|
|
||||||
|
16. Limitation of Liability.
|
||||||
|
|
||||||
|
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||||
|
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||||
|
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||||
|
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||||
|
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||||
|
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||||
|
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||||
|
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||||
|
SUCH DAMAGES.
|
||||||
|
|
||||||
|
17. Interpretation of Sections 15 and 16.
|
||||||
|
|
||||||
|
If the disclaimer of warranty and limitation of liability provided
|
||||||
|
above cannot be given local legal effect according to their terms,
|
||||||
|
reviewing courts shall apply local law that most closely approximates
|
||||||
|
an absolute waiver of all civil liability in connection with the
|
||||||
|
Program, unless a warranty or assumption of liability accompanies a
|
||||||
|
copy of the Program in return for a fee.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
How to Apply These Terms to Your New Programs
|
||||||
|
|
||||||
|
If you develop a new program, and you want it to be of the greatest
|
||||||
|
possible use to the public, the best way to achieve this is to make it
|
||||||
|
free software which everyone can redistribute and change under these terms.
|
||||||
|
|
||||||
|
To do so, attach the following notices to the program. It is safest
|
||||||
|
to attach them to the start of each source file to most effectively
|
||||||
|
state the exclusion of warranty; and each file should have at least
|
||||||
|
the "copyright" line and a pointer to where the full notice is found.
|
||||||
|
|
||||||
|
<one line to give the program's name and a brief idea of what it does.>
|
||||||
|
Copyright (C) <year> <name of author>
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as published
|
||||||
|
by the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
Also add information on how to contact you by electronic and paper mail.
|
||||||
|
|
||||||
|
If your software can interact with users remotely through a computer
|
||||||
|
network, you should also make sure that it provides a way for users to
|
||||||
|
get its source. For example, if your program is a web application, its
|
||||||
|
interface could display a "Source" link that leads users to an archive
|
||||||
|
of the code. There are many ways you could offer source, and different
|
||||||
|
solutions will be better for different programs; see section 13 for the
|
||||||
|
specific requirements.
|
||||||
|
|
||||||
|
You should also get your employer (if you work as a programmer) or school,
|
||||||
|
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||||
|
For more information on this, and how to apply and follow the GNU AGPL, see
|
||||||
|
<https://www.gnu.org/licenses/>.
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
> [!NOTE]
|
||||||
|
> **[Shannon Lite achieves a 96.15% success rate on a hint-free, source-aware XBOW benchmark. →](https://github.com/KeygraphHQ/shannon/tree/main/xben-benchmark-results/README.md)**
|
||||||
|
|
||||||
|
|
||||||
<div align="center">
|
<div align="center">
|
||||||
|
|
||||||
@@ -11,7 +14,7 @@ Every Claude (coder) deserves their Shannon.
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
[Website](https://keygraph.io) • [Discord](https://discord.gg/aWY8rRUCxc)
|
[Website](https://keygraph.io) • [Discord](https://discord.gg/KAqzSHHpRt)
|
||||||
|
|
||||||
---
|
---
|
||||||
</div>
|
</div>
|
||||||
@@ -37,12 +40,6 @@ Shannon closes this gap by acting as your on-demand whitebox pentester. It doesn
|
|||||||
>
|
>
|
||||||
> ➡️ **[Learn more about the Keygraph Platform](https://keygraph.io)**
|
> ➡️ **[Learn more about the Keygraph Platform](https://keygraph.io)**
|
||||||
|
|
||||||
> [!TIP]
|
|
||||||
> **CTF Mode**: For Capture-The-Flag challenges, use the `ctf-mode` branch which includes optimized prompts for flag extraction and CTF-specific testing workflows.
|
|
||||||
> ```bash
|
|
||||||
> git checkout ctf-mode
|
|
||||||
> ```
|
|
||||||
|
|
||||||
## 🎬 See Shannon in Action
|
## 🎬 See Shannon in Action
|
||||||
|
|
||||||
**Real Results**: Shannon discovered 20+ critical vulnerabilities in OWASP Juice Shop, including complete auth bypass and database exfiltration. [See full report →](sample-reports/shannon-report-juice-shop.md)
|
**Real Results**: Shannon discovered 20+ critical vulnerabilities in OWASP Juice Shop, including complete auth bypass and database exfiltration. [See full report →](sample-reports/shannon-report-juice-shop.md)
|
||||||
@@ -58,18 +55,21 @@ Shannon closes this gap by acting as your on-demand whitebox pentester. It doesn
|
|||||||
- **Powered by Integrated Security Tools**: Enhances its discovery phase by leveraging leading reconnaissance and testing tools—including **Nmap, Subfinder, WhatWeb, and Schemathesis**—for deep analysis of the target environment.
|
- **Powered by Integrated Security Tools**: Enhances its discovery phase by leveraging leading reconnaissance and testing tools—including **Nmap, Subfinder, WhatWeb, and Schemathesis**—for deep analysis of the target environment.
|
||||||
- **Parallel Processing for Faster Results**: Get your report faster. The system parallelizes the most time-intensive phases, running analysis and exploitation for all vulnerability types concurrently.
|
- **Parallel Processing for Faster Results**: Get your report faster. The system parallelizes the most time-intensive phases, running analysis and exploitation for all vulnerability types concurrently.
|
||||||
|
|
||||||
|
|
||||||
## 📦 Product Line
|
## 📦 Product Line
|
||||||
|
|
||||||
Shannon is available in two editions:
|
Shannon is available in two editions:
|
||||||
|
|
||||||
| Edition | License | Best For |
|
| Edition | License | Best For |
|
||||||
|---------|---------|----------|
|
|---------|---------|----------|
|
||||||
| **Shannon Lite** | BSL | Security teams, independent researchers, testing your own applications |
|
| **Shannon Lite** | AGPL-3.0 | Security teams, independent researchers, testing your own applications |
|
||||||
| **Shannon Pro** | Commercial | Enterprises requiring advanced features, CI/CD integration, and dedicated support |
|
| **Shannon Pro** | Commercial | Enterprises requiring advanced features, CI/CD integration, and dedicated support |
|
||||||
|
|
||||||
> **This repository contains Shannon Lite,** which utilizes our core autonomous AI pentesting framework. **Shannon Pro** enhances this foundation with an advanced, LLM-powered data flow analysis engine (inspired by the [LLMDFA paper](https://arxiv.org/abs/2402.10754)) for enterprise-grade code analysis and deeper vulnerability detection.
|
> **This repository contains Shannon Lite,** which utilizes our core autonomous AI pentesting framework. **Shannon Pro** enhances this foundation with an advanced, LLM-powered data flow analysis engine (inspired by the [LLMDFA paper](https://arxiv.org/abs/2402.10754)) for enterprise-grade code analysis and deeper vulnerability detection.
|
||||||
>
|
|
||||||
|
> [!IMPORTANT]
|
||||||
|
> **White-box only.** Shannon Lite is designed for **white-box (source-available)** application security testing.
|
||||||
|
> It expects access to your application's source code and repository layout.
|
||||||
|
|
||||||
[See feature comparison](./SHANNON-PRO.md)
|
[See feature comparison](./SHANNON-PRO.md)
|
||||||
## 📑 Table of Contents
|
## 📑 Table of Contents
|
||||||
|
|
||||||
@@ -79,15 +79,17 @@ Shannon is available in two editions:
|
|||||||
- [Product Line](#-product-line)
|
- [Product Line](#-product-line)
|
||||||
- [Setup & Usage Instructions](#-setup--usage-instructions)
|
- [Setup & Usage Instructions](#-setup--usage-instructions)
|
||||||
- [Prerequisites](#prerequisites)
|
- [Prerequisites](#prerequisites)
|
||||||
- [Authentication Setup](#authentication-setup)
|
- [Quick Start](#quick-start)
|
||||||
- [Quick Start with Docker](#quick-start-with-docker)
|
- [Monitoring Progress](#monitoring-progress)
|
||||||
|
- [Stopping Shannon](#stopping-shannon)
|
||||||
|
- [Usage Examples](#usage-examples)
|
||||||
- [Configuration (Optional)](#configuration-optional)
|
- [Configuration (Optional)](#configuration-optional)
|
||||||
- [Usage Patterns](#usage-patterns)
|
|
||||||
- [Output and Results](#output-and-results)
|
- [Output and Results](#output-and-results)
|
||||||
- [Sample Reports & Benchmarks](#-sample-reports--benchmarks)
|
- [Sample Reports & Benchmarks](#-sample-reports--benchmarks)
|
||||||
- [Architecture](#-architecture)
|
- [Architecture](#-architecture)
|
||||||
- [Coverage and Roadmap](#-coverage-and-roadmap)
|
- [Coverage and Roadmap](#-coverage-and-roadmap)
|
||||||
- [Disclaimers](#-disclaimers)
|
- [Disclaimers](#-disclaimers)
|
||||||
|
- [Telemetry](#-telemetry)
|
||||||
- [License](#-license)
|
- [License](#-license)
|
||||||
- [Community & Support](#-community--support)
|
- [Community & Support](#-community--support)
|
||||||
- [Get in Touch](#-get-in-touch)
|
- [Get in Touch](#-get-in-touch)
|
||||||
@@ -98,32 +100,71 @@ Shannon is available in two editions:
|
|||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
|
|
||||||
- **Claude Console account with credits** - Required for AI-powered analysis
|
- **Docker** - Container runtime ([Install Docker](https://docs.docker.com/get-docker/))
|
||||||
- **Docker installed** - Primary deployment method
|
- **Anthropic API key or Claude Code OAuth token** - Get from [Anthropic Console](https://console.anthropic.com)
|
||||||
|
|
||||||
### Authentication Setup
|
### Quick Start
|
||||||
|
|
||||||
You need either a **Claude Code OAuth token** or an **Anthropic API key** to run Shannon. Get your token from the [Anthropic Console](https://console.anthropic.com) and pass it to Docker via the `-e` flag.
|
|
||||||
|
|
||||||
### Environment Configuration (Optional)
|
|
||||||
|
|
||||||
To prevent Claude Code from hitting token limits during long report generation, set the max output tokens before running Shannon:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export CLAUDE_CODE_MAX_OUTPUT_TOKENS=64000
|
# 1. Clone Shannon
|
||||||
|
git clone https://github.com/KeygraphHQ/shannon.git
|
||||||
|
cd shannon
|
||||||
|
|
||||||
|
# 2. Configure credentials (choose one method)
|
||||||
|
|
||||||
|
# Option A: Export environment variables
|
||||||
|
export ANTHROPIC_API_KEY="your-api-key" # or CLAUDE_CODE_OAUTH_TOKEN
|
||||||
|
export CLAUDE_CODE_MAX_OUTPUT_TOKENS=64000 # recommended
|
||||||
|
|
||||||
|
# Option B: Create a .env file
|
||||||
|
cat > .env << 'EOF'
|
||||||
|
ANTHROPIC_API_KEY=your-api-key
|
||||||
|
CLAUDE_CODE_MAX_OUTPUT_TOKENS=64000
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# 3. Run a pentest
|
||||||
|
./shannon start URL=https://your-app.com REPO=/path/to/your/repo
|
||||||
```
|
```
|
||||||
|
|
||||||
This is especially useful for extensive penetration testing reports or when analyzing large codebases.
|
Shannon will build the containers, start the workflow, and return a workflow ID. The pentest runs in the background.
|
||||||
|
|
||||||
### Quick Start with Docker
|
### Monitoring Progress
|
||||||
|
|
||||||
#### Build the Container
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker build -t shannon:latest .
|
# View real-time worker logs
|
||||||
|
./shannon logs
|
||||||
|
|
||||||
|
# Query a specific workflow's progress
|
||||||
|
./shannon query ID=shannon-1234567890
|
||||||
|
|
||||||
|
# Open the Temporal Web UI for detailed monitoring
|
||||||
|
open http://localhost:8233
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Prepare Your Repository
|
### Stopping Shannon
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Stop all containers (preserves workflow data)
|
||||||
|
./shannon stop
|
||||||
|
|
||||||
|
# Full cleanup (removes all data)
|
||||||
|
./shannon stop CLEAN=true
|
||||||
|
```
|
||||||
|
|
||||||
|
### Usage Examples
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Basic pentest
|
||||||
|
./shannon start URL=https://example.com REPO=/path/to/repo
|
||||||
|
|
||||||
|
# With a configuration file
|
||||||
|
./shannon start URL=https://example.com REPO=/path/to/repo CONFIG=./configs/my-config.yaml
|
||||||
|
|
||||||
|
# Custom output directory
|
||||||
|
./shannon start URL=https://example.com REPO=/path/to/repo OUTPUT=./my-reports
|
||||||
|
```
|
||||||
|
|
||||||
|
### Prepare Your Repository
|
||||||
|
|
||||||
Shannon is designed for **web application security testing** and expects all application code to be available in a single directory structure. This works well for:
|
Shannon is designed for **web application security testing** and expects all application code to be available in a single directory structure. This works well for:
|
||||||
|
|
||||||
@@ -133,65 +174,37 @@ Shannon is designed for **web application security testing** and expects all app
|
|||||||
**For monorepos:**
|
**For monorepos:**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git clone https://github.com/your-org/your-monorepo.git repos/your-app
|
git clone https://github.com/your-org/your-monorepo.git /path/to/your-app
|
||||||
```
|
```
|
||||||
|
|
||||||
**For multi-repository applications** (e.g., separate frontend/backend):
|
**For multi-repository applications** (e.g., separate frontend/backend):
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
mkdir repos/your-app
|
mkdir /path/to/your-app
|
||||||
cd repos/your-app
|
cd /path/to/your-app
|
||||||
git clone https://github.com/your-org/frontend.git
|
git clone https://github.com/your-org/frontend.git
|
||||||
git clone https://github.com/your-org/backend.git
|
git clone https://github.com/your-org/backend.git
|
||||||
git clone https://github.com/your-org/api.git
|
git clone https://github.com/your-org/api.git
|
||||||
```
|
```
|
||||||
|
|
||||||
**For existing local repositories:**
|
### Platform-Specific Instructions
|
||||||
|
|
||||||
|
**For Linux (Native Docker):**
|
||||||
|
|
||||||
|
You may need to run commands with `sudo` depending on your Docker setup. If you encounter permission issues with output files, ensure your user has access to the Docker socket.
|
||||||
|
|
||||||
|
**For macOS:**
|
||||||
|
|
||||||
|
Works out of the box with Docker Desktop installed.
|
||||||
|
|
||||||
|
**Testing Local Applications:**
|
||||||
|
|
||||||
|
Docker containers cannot reach `localhost` on your host machine. Use `host.docker.internal` in place of `localhost`:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cp -r /path/to/your-existing-repo repos/your-app
|
./shannon start URL=http://host.docker.internal:3000 REPO=/path/to/repo
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Run Your First Pentest
|
|
||||||
|
|
||||||
**With Claude Console OAuth Token:**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker run --rm -it \
|
|
||||||
--network host \
|
|
||||||
--cap-add=NET_RAW \
|
|
||||||
--cap-add=NET_ADMIN \
|
|
||||||
-e CLAUDE_CODE_OAUTH_TOKEN="$CLAUDE_CODE_OAUTH_TOKEN" \
|
|
||||||
-v "$(pwd)/repos:/app/repos" \
|
|
||||||
-v "$(pwd)/configs:/app/configs" \
|
|
||||||
shannon:latest \
|
|
||||||
"https://your-app.com/" \
|
|
||||||
"/app/repos/your-app" \
|
|
||||||
--config /app/configs/example-config.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
**With Anthropic API Key:**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker run --rm -it \
|
|
||||||
--network host \
|
|
||||||
--cap-add=NET_RAW \
|
|
||||||
--cap-add=NET_ADMIN \
|
|
||||||
-e ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \
|
|
||||||
-v "$(pwd)/repos:/app/repos" \
|
|
||||||
-v "$(pwd)/configs:/app/configs" \
|
|
||||||
shannon:latest \
|
|
||||||
"https://your-app.com/" \
|
|
||||||
"/app/repos/your-app" \
|
|
||||||
--config /app/configs/example-config.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
**Network Capabilities:**
|
|
||||||
|
|
||||||
- `--cap-add=NET_RAW` - Enables advanced port scanning with nmap
|
|
||||||
- `--cap-add=NET_ADMIN` - Allows network administration for security tools
|
|
||||||
- `--network host` - Provides access to target network interfaces
|
|
||||||
|
|
||||||
### Configuration (Optional)
|
### Configuration (Optional)
|
||||||
|
|
||||||
While you can run without a config file, creating one enables authenticated testing and customized analysis.
|
While you can run without a config file, creating one enables authenticated testing and customized analysis.
|
||||||
@@ -240,30 +253,27 @@ rules:
|
|||||||
|
|
||||||
If your application uses two-factor authentication, simply add the TOTP secret to your config file. The AI will automatically generate the required codes during testing.
|
If your application uses two-factor authentication, simply add the TOTP secret to your config file. The AI will automatically generate the required codes during testing.
|
||||||
|
|
||||||
### Check Status
|
|
||||||
|
|
||||||
View progress of previous runs:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker run --rm shannon:latest --status
|
|
||||||
```
|
|
||||||
|
|
||||||
### Output and Results
|
### Output and Results
|
||||||
|
|
||||||
All analysis results are saved to the `deliverables/` directory:
|
All results are saved to `./audit-logs/{hostname}_{sessionId}/` by default. Use `--output <path>` to specify a custom directory.
|
||||||
|
|
||||||
- **Pre-reconnaissance reports** - External scan results
|
Output structure:
|
||||||
- **Vulnerability assessments** - Potential vulnerabilities from thorough code analysis and network mapping
|
```
|
||||||
- **Exploitation results** - Proof-of-concept attempts
|
audit-logs/{hostname}_{sessionId}/
|
||||||
- **Executive reports** - Business-focused security summaries
|
├── session.json # Metrics and session data
|
||||||
|
├── agents/ # Per-agent execution logs
|
||||||
|
├── prompts/ # Prompt snapshots for reproducibility
|
||||||
|
└── deliverables/
|
||||||
|
└── comprehensive_security_assessment_report.md # Final comprehensive security report
|
||||||
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 📊 Sample Reports & Benchmarks
|
## 📊 Sample Reports
|
||||||
|
|
||||||
See Shannon's capabilities in action with real penetration test results from industry-standard vulnerable applications:
|
> **Looking for quantitative benchmarks?** [See full benchmark methodology and results →](./xben-benchmark-results/README.md)
|
||||||
|
|
||||||
### Benchmark Results
|
See Shannon's capabilities in action with penetration test results from industry-standard vulnerable applications:
|
||||||
|
|
||||||
#### 🧃 **OWASP Juice Shop** • [GitHub](https://github.com/juice-shop/juice-shop)
|
#### 🧃 **OWASP Juice Shop** • [GitHub](https://github.com/juice-shop/juice-shop)
|
||||||
|
|
||||||
@@ -423,20 +433,64 @@ Shannon is designed for legitimate security auditing purposes only.
|
|||||||
- **Time**: As of the current version, a full test run typically takes **1 to 1.5 hours** to complete.
|
- **Time**: As of the current version, a full test run typically takes **1 to 1.5 hours** to complete.
|
||||||
- **Cost**: Running the full test using Anthropic's Claude 4.5 Sonnet model may incur costs of approximately **$50 USD**. Please note that costs are subject to change based on model pricing and the complexity of the target application.
|
- **Cost**: Running the full test using Anthropic's Claude 4.5 Sonnet model may incur costs of approximately **$50 USD**. Please note that costs are subject to change based on model pricing and the complexity of the target application.
|
||||||
|
|
||||||
|
#### **6. Windows Antivirus False Positives**
|
||||||
|
|
||||||
|
Windows Defender may flag files in `xben-benchmark-results/` or `deliverables/` as malware. These are false positives caused by exploit code in the reports. Add an exclusion for the Shannon directory in Windows Defender, or use Docker/WSL2.
|
||||||
|
|
||||||
|
|
||||||
|
## 📊 Telemetry
|
||||||
|
|
||||||
|
Shannon collects anonymous usage telemetry to help improve the tool.
|
||||||
|
|
||||||
|
### What We Collect
|
||||||
|
|
||||||
|
- Workflow and agent lifecycle events (start, complete, fail)
|
||||||
|
- Timing and cost metrics (duration, API costs)
|
||||||
|
- Error types (NOT error messages or stack traces)
|
||||||
|
|
||||||
|
### What We DO NOT Collect
|
||||||
|
|
||||||
|
- Target URLs, repository paths, or configuration
|
||||||
|
- Vulnerability findings or security reports
|
||||||
|
- Error messages, stack traces, or debugging info
|
||||||
|
- Any personally identifiable information (PII)
|
||||||
|
|
||||||
|
### Opting Out
|
||||||
|
|
||||||
|
Telemetry is enabled by default. To disable it, set one of:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Standard opt-out
|
||||||
|
export DO_NOT_TRACK=1
|
||||||
|
|
||||||
|
# Shannon-specific opt-out
|
||||||
|
export SHANNON_TELEMETRY=off
|
||||||
|
```
|
||||||
|
|
||||||
|
Or add `DO_NOT_TRACK=1` to your `.env` file.
|
||||||
|
|
||||||
|
|
||||||
## 📜 License
|
## 📜 License
|
||||||
|
|
||||||
Shannon Lite is released under the [Business Source License 1.1 (BSL)](LICENSE).
|
Shannon Lite is released under the [GNU Affero General Public License v3.0 (AGPL-3.0)](LICENSE).
|
||||||
|
|
||||||
|
Shannon is open source (AGPL v3). This license allows you to:
|
||||||
|
- Use it freely for all internal security testing.
|
||||||
|
- Modify the code privately for internal use without sharing your changes.
|
||||||
|
|
||||||
|
The AGPL's sharing requirements primarily apply to organizations offering Shannon as a public or managed service (such as a SaaS platform). In those specific cases, any modifications made to the core software must be open-sourced.
|
||||||
|
|
||||||
|
|
||||||
## 👥 Community & Support
|
## 👥 Community & Support
|
||||||
|
|
||||||
### Community Resources
|
### Community Resources
|
||||||
|
|
||||||
- 🐛 **Report bugs** via [GitHub Issues](https://github.com/keygraph/shannon/issues)
|
**Contributing:** At this time, we’re not accepting external code contributions (PRs).
|
||||||
- 💡 **Suggest features** in [Discussions](https://github.com/keygraph/shannon/discussions)
|
Issues are welcome for bug reports and feature requests.
|
||||||
- 💬 **Join our [Discord](https://discord.gg/aWY8rRUCxc)** for real-time community support
|
|
||||||
|
- 🐛 **Report bugs** via [GitHub Issues](https://github.com/KeygraphHQ/shannon/issues)
|
||||||
|
- 💡 **Suggest features** in [Discussions](https://github.com/KeygraphHQ/shannon/discussions)
|
||||||
|
- 💬 **Join our [Discord](https://discord.gg/KAqzSHHpRt)** for real-time community support
|
||||||
|
|
||||||
### Stay Connected
|
### Stay Connected
|
||||||
|
|
||||||
|
|||||||
+1
-1
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
### Feature Comparison
|
### Feature Comparison
|
||||||
|
|
||||||
| Feature | Shannon Lite<br>(BSL 1.1) | Shannon Pro<br>(Commercial) |
|
| Feature | Shannon Lite<br>(AGPL-3.0) | Shannon Pro<br>(Commercial) |
|
||||||
|---------|:-------------------------:|:---------------------------:|
|
|---------|:-------------------------:|:---------------------------:|
|
||||||
| **Core Scanning** |
|
| **Core Scanning** |
|
||||||
| Source-Sink Analysis | Basic | LLM-powered data flow analysis for high-precision, source-to-sink vulnerability detection |
|
| Source-Sink Analysis | Basic | LLM-powered data flow analysis for high-precision, source-to-sink vulnerability detection |
|
||||||
|
|||||||
Binary file not shown.
|
Before Width: | Height: | Size: 160 KiB After Width: | Height: | Size: 170 KiB |
@@ -0,0 +1,40 @@
|
|||||||
|
services:
|
||||||
|
temporal:
|
||||||
|
image: temporalio/temporal:latest
|
||||||
|
command: ["server", "start-dev", "--db-filename", "/home/temporal/temporal.db", "--ip", "0.0.0.0"]
|
||||||
|
ports:
|
||||||
|
- "7233:7233" # gRPC
|
||||||
|
- "8233:8233" # Web UI (built-in)
|
||||||
|
volumes:
|
||||||
|
- temporal-data:/home/temporal
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "temporal", "operator", "cluster", "health", "--address", "localhost:7233"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 10
|
||||||
|
start_period: 30s
|
||||||
|
|
||||||
|
worker:
|
||||||
|
build: .
|
||||||
|
entrypoint: ["node", "dist/temporal/worker.js"]
|
||||||
|
environment:
|
||||||
|
- TEMPORAL_ADDRESS=temporal:7233
|
||||||
|
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
||||||
|
- CLAUDE_CODE_OAUTH_TOKEN=${CLAUDE_CODE_OAUTH_TOKEN:-}
|
||||||
|
- CLAUDE_CODE_MAX_OUTPUT_TOKENS=${CLAUDE_CODE_MAX_OUTPUT_TOKENS:-64000}
|
||||||
|
depends_on:
|
||||||
|
temporal:
|
||||||
|
condition: service_healthy
|
||||||
|
volumes:
|
||||||
|
- ./prompts:/app/prompts
|
||||||
|
- ./audit-logs:/app/audit-logs
|
||||||
|
- ${TARGET_REPO:-.}:/target-repo
|
||||||
|
- ${BENCHMARKS_BASE:-.}:/benchmarks
|
||||||
|
- ${HOME}/.shannon:/tmp/.shannon
|
||||||
|
shm_size: 2gb
|
||||||
|
ipc: host
|
||||||
|
security_opt:
|
||||||
|
- seccomp:unconfined
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
temporal-data:
|
||||||
Generated
+35
@@ -10,6 +10,10 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@anthropic-ai/claude-agent-sdk": "^0.1.0",
|
"@anthropic-ai/claude-agent-sdk": "^0.1.0",
|
||||||
"zod": "^3.22.4"
|
"zod": "^3.22.4"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@types/node": "^25.0.3",
|
||||||
|
"typescript": "^5.9.3"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@anthropic-ai/claude-agent-sdk": {
|
"node_modules/@anthropic-ai/claude-agent-sdk": {
|
||||||
@@ -241,6 +245,37 @@
|
|||||||
"url": "https://opencollective.com/libvips"
|
"url": "https://opencollective.com/libvips"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/node": {
|
||||||
|
"version": "25.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-25.0.3.tgz",
|
||||||
|
"integrity": "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"undici-types": "~7.16.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/typescript": {
|
||||||
|
"version": "5.9.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
|
||||||
|
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"bin": {
|
||||||
|
"tsc": "bin/tsc",
|
||||||
|
"tsserver": "bin/tsserver"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=14.17"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/undici-types": {
|
||||||
|
"version": "7.16.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
|
||||||
|
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/zod": {
|
"node_modules/zod": {
|
||||||
"version": "3.25.76",
|
"version": "3.25.76",
|
||||||
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
|
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
|
||||||
|
|||||||
@@ -2,12 +2,17 @@
|
|||||||
"name": "@shannon/mcp-server",
|
"name": "@shannon/mcp-server",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"main": "./src/index.js",
|
"main": "./dist/index.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
"build": "tsc",
|
||||||
"clean": "rm -rf dist"
|
"clean": "rm -rf dist"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@anthropic-ai/claude-agent-sdk": "^0.1.0",
|
"@anthropic-ai/claude-agent-sdk": "^0.1.0",
|
||||||
"zod": "^3.22.4"
|
"zod": "^3.22.4"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@types/node": "^25.0.3",
|
||||||
|
"typescript": "^5.9.3"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,35 +0,0 @@
|
|||||||
/**
|
|
||||||
* Shannon Helper MCP Server
|
|
||||||
*
|
|
||||||
* In-process MCP server providing save_deliverable and generate_totp tools
|
|
||||||
* for Shannon penetration testing agents.
|
|
||||||
*
|
|
||||||
* Replaces bash script invocations with native tool access.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { createSdkMcpServer } from '@anthropic-ai/claude-agent-sdk';
|
|
||||||
import { saveDeliverableTool } from './tools/save-deliverable.js';
|
|
||||||
import { generateTotpTool } from './tools/generate-totp.js';
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create Shannon Helper MCP Server with target directory context
|
|
||||||
*
|
|
||||||
* @param {string} targetDir - The target repository directory where deliverables should be saved
|
|
||||||
* @returns {Object} MCP server instance
|
|
||||||
*/
|
|
||||||
export function createShannonHelperServer(targetDir) {
|
|
||||||
// Store target directory for tool access
|
|
||||||
global.__SHANNON_TARGET_DIR = targetDir;
|
|
||||||
|
|
||||||
return createSdkMcpServer({
|
|
||||||
name: 'shannon-helper',
|
|
||||||
version: '1.0.0',
|
|
||||||
tools: [saveDeliverableTool, generateTotpTool],
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Export tools for direct usage if needed
|
|
||||||
export { saveDeliverableTool, generateTotpTool };
|
|
||||||
|
|
||||||
// Export types for external use
|
|
||||||
export * from './types/index.js';
|
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shannon Helper MCP Server
|
||||||
|
*
|
||||||
|
* In-process MCP server providing save_deliverable and generate_totp tools
|
||||||
|
* for Shannon penetration testing agents.
|
||||||
|
*
|
||||||
|
* Replaces bash script invocations with native tool access.
|
||||||
|
*
|
||||||
|
* Uses factory pattern to create tools with targetDir captured in closure,
|
||||||
|
* ensuring thread-safety when multiple workflows run in parallel.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { createSdkMcpServer } from '@anthropic-ai/claude-agent-sdk';
|
||||||
|
import { createSaveDeliverableTool } from './tools/save-deliverable.js';
|
||||||
|
import { generateTotpTool } from './tools/generate-totp.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create Shannon Helper MCP Server with target directory context
|
||||||
|
*
|
||||||
|
* Each workflow should create its own MCP server instance with its targetDir.
|
||||||
|
* The save_deliverable tool captures targetDir in a closure, preventing race
|
||||||
|
* conditions when multiple workflows run in parallel.
|
||||||
|
*/
|
||||||
|
export function createShannonHelperServer(targetDir: string): ReturnType<typeof createSdkMcpServer> {
|
||||||
|
// Create save_deliverable tool with targetDir in closure (no global variable)
|
||||||
|
const saveDeliverableTool = createSaveDeliverableTool(targetDir);
|
||||||
|
|
||||||
|
return createSdkMcpServer({
|
||||||
|
name: 'shannon-helper',
|
||||||
|
version: '1.0.0',
|
||||||
|
tools: [saveDeliverableTool, generateTotpTool],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Export factory for direct usage if needed
|
||||||
|
export { createSaveDeliverableTool } from './tools/save-deliverable.js';
|
||||||
|
export { generateTotpTool } from './tools/generate-totp.js';
|
||||||
|
|
||||||
|
// Export types for external use
|
||||||
|
export * from './types/index.js';
|
||||||
@@ -1,3 +1,9 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* generate_totp MCP Tool
|
* generate_totp MCP Tool
|
||||||
*
|
*
|
||||||
@@ -9,7 +15,7 @@
|
|||||||
import { tool } from '@anthropic-ai/claude-agent-sdk';
|
import { tool } from '@anthropic-ai/claude-agent-sdk';
|
||||||
import { createHmac } from 'crypto';
|
import { createHmac } from 'crypto';
|
||||||
import { z } from 'zod';
|
import { z } from 'zod';
|
||||||
import { createToolResult } from '../types/tool-responses.js';
|
import { createToolResult, type ToolResult, type GenerateTotpResponse } from '../types/tool-responses.js';
|
||||||
import { base32Decode, validateTotpSecret } from '../validation/totp-validator.js';
|
import { base32Decode, validateTotpSecret } from '../validation/totp-validator.js';
|
||||||
import { createCryptoError, createGenericError } from '../utils/error-formatter.js';
|
import { createCryptoError, createGenericError } from '../utils/error-formatter.js';
|
||||||
|
|
||||||
@@ -24,16 +30,13 @@ export const GenerateTotpInputSchema = z.object({
|
|||||||
.describe('Base32-encoded TOTP secret'),
|
.describe('Base32-encoded TOTP secret'),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
export type GenerateTotpInput = z.infer<typeof GenerateTotpInputSchema>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate HOTP code (RFC 4226)
|
* Generate HOTP code (RFC 4226)
|
||||||
* Ported from generate-totp-standalone.mjs (lines 74-99)
|
* Ported from generate-totp-standalone.mjs (lines 74-99)
|
||||||
*
|
|
||||||
* @param {string} secret - Base32-encoded secret
|
|
||||||
* @param {number} counter - Counter value
|
|
||||||
* @param {number} [digits=6] - Number of digits in OTP
|
|
||||||
* @returns {string} OTP code
|
|
||||||
*/
|
*/
|
||||||
function generateHOTP(secret, counter, digits = 6) {
|
function generateHOTP(secret: string, counter: number, digits: number = 6): string {
|
||||||
const key = base32Decode(secret);
|
const key = base32Decode(secret);
|
||||||
|
|
||||||
// Convert counter to 8-byte buffer (big-endian)
|
// Convert counter to 8-byte buffer (big-endian)
|
||||||
@@ -46,12 +49,12 @@ function generateHOTP(secret, counter, digits = 6) {
|
|||||||
const hash = hmac.digest();
|
const hash = hmac.digest();
|
||||||
|
|
||||||
// Dynamic truncation
|
// Dynamic truncation
|
||||||
const offset = hash[hash.length - 1] & 0x0f;
|
const offset = hash[hash.length - 1]! & 0x0f;
|
||||||
const code =
|
const code =
|
||||||
((hash[offset] & 0x7f) << 24) |
|
((hash[offset]! & 0x7f) << 24) |
|
||||||
((hash[offset + 1] & 0xff) << 16) |
|
((hash[offset + 1]! & 0xff) << 16) |
|
||||||
((hash[offset + 2] & 0xff) << 8) |
|
((hash[offset + 2]! & 0xff) << 8) |
|
||||||
(hash[offset + 3] & 0xff);
|
(hash[offset + 3]! & 0xff);
|
||||||
|
|
||||||
// Generate digits
|
// Generate digits
|
||||||
const otp = (code % Math.pow(10, digits)).toString().padStart(digits, '0');
|
const otp = (code % Math.pow(10, digits)).toString().padStart(digits, '0');
|
||||||
@@ -61,13 +64,8 @@ function generateHOTP(secret, counter, digits = 6) {
|
|||||||
/**
|
/**
|
||||||
* Generate TOTP code (RFC 6238)
|
* Generate TOTP code (RFC 6238)
|
||||||
* Ported from generate-totp-standalone.mjs (lines 101-106)
|
* Ported from generate-totp-standalone.mjs (lines 101-106)
|
||||||
*
|
|
||||||
* @param {string} secret - Base32-encoded secret
|
|
||||||
* @param {number} [timeStep=30] - Time step in seconds
|
|
||||||
* @param {number} [digits=6] - Number of digits in OTP
|
|
||||||
* @returns {string} OTP code
|
|
||||||
*/
|
*/
|
||||||
function generateTOTP(secret, timeStep = 30, digits = 6) {
|
function generateTOTP(secret: string, timeStep: number = 30, digits: number = 6): string {
|
||||||
const currentTime = Math.floor(Date.now() / 1000);
|
const currentTime = Math.floor(Date.now() / 1000);
|
||||||
const counter = Math.floor(currentTime / timeStep);
|
const counter = Math.floor(currentTime / timeStep);
|
||||||
return generateHOTP(secret, counter, digits);
|
return generateHOTP(secret, counter, digits);
|
||||||
@@ -75,23 +73,16 @@ function generateTOTP(secret, timeStep = 30, digits = 6) {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Get seconds until TOTP code expires
|
* Get seconds until TOTP code expires
|
||||||
*
|
|
||||||
* @param {number} [timeStep=30] - Time step in seconds
|
|
||||||
* @returns {number} Seconds until expiration
|
|
||||||
*/
|
*/
|
||||||
function getSecondsUntilExpiration(timeStep = 30) {
|
function getSecondsUntilExpiration(timeStep: number = 30): number {
|
||||||
const currentTime = Math.floor(Date.now() / 1000);
|
const currentTime = Math.floor(Date.now() / 1000);
|
||||||
return timeStep - (currentTime % timeStep);
|
return timeStep - (currentTime % timeStep);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* generate_totp tool implementation
|
* generate_totp tool implementation
|
||||||
*
|
|
||||||
* @param {Object} args
|
|
||||||
* @param {string} args.secret - Base32-encoded TOTP secret
|
|
||||||
* @returns {Promise<Object>} Tool result
|
|
||||||
*/
|
*/
|
||||||
export async function generateTotp(args) {
|
export async function generateTotp(args: GenerateTotpInput): Promise<ToolResult> {
|
||||||
try {
|
try {
|
||||||
const { secret } = args;
|
const { secret } = args;
|
||||||
|
|
||||||
@@ -104,7 +95,7 @@ export async function generateTotp(args) {
|
|||||||
const timestamp = new Date().toISOString();
|
const timestamp = new Date().toISOString();
|
||||||
|
|
||||||
// Success response
|
// Success response
|
||||||
const successResponse = {
|
const successResponse: GenerateTotpResponse = {
|
||||||
status: 'success',
|
status: 'success',
|
||||||
message: 'TOTP code generated successfully',
|
message: 'TOTP code generated successfully',
|
||||||
totpCode,
|
totpCode,
|
||||||
@@ -1,85 +0,0 @@
|
|||||||
/**
|
|
||||||
* save_deliverable MCP Tool
|
|
||||||
*
|
|
||||||
* Saves deliverable files with automatic validation.
|
|
||||||
* Replaces tools/save_deliverable.js bash script.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { tool } from '@anthropic-ai/claude-agent-sdk';
|
|
||||||
import { z } from 'zod';
|
|
||||||
import { DeliverableType, DELIVERABLE_FILENAMES, isQueueType } from '../types/deliverables.js';
|
|
||||||
import { createToolResult } from '../types/tool-responses.js';
|
|
||||||
import { validateQueueJson } from '../validation/queue-validator.js';
|
|
||||||
import { saveDeliverableFile } from '../utils/file-operations.js';
|
|
||||||
import { createValidationError, createGenericError } from '../utils/error-formatter.js';
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Input schema for save_deliverable tool
|
|
||||||
*/
|
|
||||||
export const SaveDeliverableInputSchema = z.object({
|
|
||||||
deliverable_type: z.nativeEnum(DeliverableType).describe('Type of deliverable to save'),
|
|
||||||
content: z.string().min(1).describe('File content (markdown for analysis/evidence, JSON for queues)'),
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* save_deliverable tool implementation
|
|
||||||
*
|
|
||||||
* @param {Object} args
|
|
||||||
* @param {string} args.deliverable_type - Type of deliverable to save
|
|
||||||
* @param {string} args.content - File content
|
|
||||||
* @returns {Promise<Object>} Tool result
|
|
||||||
*/
|
|
||||||
export async function saveDeliverable(args) {
|
|
||||||
try {
|
|
||||||
const { deliverable_type, content } = args;
|
|
||||||
|
|
||||||
// Validate queue JSON if applicable
|
|
||||||
if (isQueueType(deliverable_type)) {
|
|
||||||
const queueValidation = validateQueueJson(content);
|
|
||||||
if (!queueValidation.valid) {
|
|
||||||
const errorResponse = createValidationError(
|
|
||||||
queueValidation.message,
|
|
||||||
true,
|
|
||||||
{
|
|
||||||
deliverableType: deliverable_type,
|
|
||||||
expectedFormat: '{"vulnerabilities": [...]}',
|
|
||||||
}
|
|
||||||
);
|
|
||||||
return createToolResult(errorResponse);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get filename and save file
|
|
||||||
const filename = DELIVERABLE_FILENAMES[deliverable_type];
|
|
||||||
const filepath = saveDeliverableFile(filename, content);
|
|
||||||
|
|
||||||
// Success response
|
|
||||||
const successResponse = {
|
|
||||||
status: 'success',
|
|
||||||
message: `Deliverable saved successfully: ${filename}`,
|
|
||||||
filepath,
|
|
||||||
deliverableType: deliverable_type,
|
|
||||||
validated: isQueueType(deliverable_type),
|
|
||||||
};
|
|
||||||
|
|
||||||
return createToolResult(successResponse);
|
|
||||||
} catch (error) {
|
|
||||||
const errorResponse = createGenericError(
|
|
||||||
error,
|
|
||||||
false,
|
|
||||||
{ deliverableType: args.deliverable_type }
|
|
||||||
);
|
|
||||||
|
|
||||||
return createToolResult(errorResponse);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tool definition for MCP server - created using SDK's tool() function
|
|
||||||
*/
|
|
||||||
export const saveDeliverableTool = tool(
|
|
||||||
'save_deliverable',
|
|
||||||
'Saves deliverable files with automatic validation. Queue files must have {"vulnerabilities": [...]} structure.',
|
|
||||||
SaveDeliverableInputSchema.shape,
|
|
||||||
saveDeliverable
|
|
||||||
);
|
|
||||||
@@ -0,0 +1,101 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* save_deliverable MCP Tool
|
||||||
|
*
|
||||||
|
* Saves deliverable files with automatic validation.
|
||||||
|
* Replaces tools/save_deliverable.js bash script.
|
||||||
|
*
|
||||||
|
* Uses factory pattern to capture targetDir in closure, avoiding race conditions
|
||||||
|
* when multiple workflows run in parallel.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { tool } from '@anthropic-ai/claude-agent-sdk';
|
||||||
|
import { z } from 'zod';
|
||||||
|
import { DeliverableType, DELIVERABLE_FILENAMES, isQueueType } from '../types/deliverables.js';
|
||||||
|
import { createToolResult, type ToolResult, type SaveDeliverableResponse } from '../types/tool-responses.js';
|
||||||
|
import { validateQueueJson } from '../validation/queue-validator.js';
|
||||||
|
import { saveDeliverableFile } from '../utils/file-operations.js';
|
||||||
|
import { createValidationError, createGenericError } from '../utils/error-formatter.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Input schema for save_deliverable tool
|
||||||
|
*/
|
||||||
|
export const SaveDeliverableInputSchema = z.object({
|
||||||
|
deliverable_type: z.nativeEnum(DeliverableType).describe('Type of deliverable to save'),
|
||||||
|
content: z.string().min(1).describe('File content (markdown for analysis/evidence, JSON for queues)'),
|
||||||
|
});
|
||||||
|
|
||||||
|
export type SaveDeliverableInput = z.infer<typeof SaveDeliverableInputSchema>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create save_deliverable handler with targetDir captured in closure
|
||||||
|
*
|
||||||
|
* This factory pattern ensures each MCP server instance has its own targetDir,
|
||||||
|
* preventing race conditions when multiple workflows run in parallel.
|
||||||
|
*/
|
||||||
|
function createSaveDeliverableHandler(targetDir: string) {
|
||||||
|
return async function saveDeliverable(args: SaveDeliverableInput): Promise<ToolResult> {
|
||||||
|
try {
|
||||||
|
const { deliverable_type, content } = args;
|
||||||
|
|
||||||
|
// Validate queue JSON if applicable
|
||||||
|
if (isQueueType(deliverable_type)) {
|
||||||
|
const queueValidation = validateQueueJson(content);
|
||||||
|
if (!queueValidation.valid) {
|
||||||
|
const errorResponse = createValidationError(
|
||||||
|
queueValidation.message ?? 'Invalid queue JSON',
|
||||||
|
true,
|
||||||
|
{
|
||||||
|
deliverableType: deliverable_type,
|
||||||
|
expectedFormat: '{"vulnerabilities": [...]}',
|
||||||
|
}
|
||||||
|
);
|
||||||
|
return createToolResult(errorResponse);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get filename and save file (targetDir captured from closure)
|
||||||
|
const filename = DELIVERABLE_FILENAMES[deliverable_type];
|
||||||
|
const filepath = saveDeliverableFile(targetDir, filename, content);
|
||||||
|
|
||||||
|
// Success response
|
||||||
|
const successResponse: SaveDeliverableResponse = {
|
||||||
|
status: 'success',
|
||||||
|
message: `Deliverable saved successfully: ${filename}`,
|
||||||
|
filepath,
|
||||||
|
deliverableType: deliverable_type,
|
||||||
|
validated: isQueueType(deliverable_type),
|
||||||
|
};
|
||||||
|
|
||||||
|
return createToolResult(successResponse);
|
||||||
|
} catch (error) {
|
||||||
|
const errorResponse = createGenericError(
|
||||||
|
error,
|
||||||
|
false,
|
||||||
|
{ deliverableType: args.deliverable_type }
|
||||||
|
);
|
||||||
|
|
||||||
|
return createToolResult(errorResponse);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory function to create save_deliverable tool with targetDir in closure
|
||||||
|
*
|
||||||
|
* Each MCP server instance should call this with its own targetDir to ensure
|
||||||
|
* deliverables are saved to the correct workflow's directory.
|
||||||
|
*/
|
||||||
|
export function createSaveDeliverableTool(targetDir: string) {
|
||||||
|
return tool(
|
||||||
|
'save_deliverable',
|
||||||
|
'Saves deliverable files with automatic validation. Queue files must have {"vulnerabilities": [...]} structure.',
|
||||||
|
SaveDeliverableInputSchema.shape,
|
||||||
|
createSaveDeliverableHandler(targetDir)
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -1,3 +1,9 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Deliverable Type Definitions
|
* Deliverable Type Definitions
|
||||||
*
|
*
|
||||||
@@ -5,63 +11,42 @@
|
|||||||
* Must match the exact mappings from tools/save_deliverable.js.
|
* Must match the exact mappings from tools/save_deliverable.js.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
export enum DeliverableType {
|
||||||
* @typedef {Object} DeliverableType
|
|
||||||
* @property {string} CODE_ANALYSIS
|
|
||||||
* @property {string} RECON
|
|
||||||
* @property {string} INJECTION_ANALYSIS
|
|
||||||
* @property {string} INJECTION_QUEUE
|
|
||||||
* @property {string} XSS_ANALYSIS
|
|
||||||
* @property {string} XSS_QUEUE
|
|
||||||
* @property {string} AUTH_ANALYSIS
|
|
||||||
* @property {string} AUTH_QUEUE
|
|
||||||
* @property {string} AUTHZ_ANALYSIS
|
|
||||||
* @property {string} AUTHZ_QUEUE
|
|
||||||
* @property {string} SSRF_ANALYSIS
|
|
||||||
* @property {string} SSRF_QUEUE
|
|
||||||
* @property {string} INJECTION_EVIDENCE
|
|
||||||
* @property {string} XSS_EVIDENCE
|
|
||||||
* @property {string} AUTH_EVIDENCE
|
|
||||||
* @property {string} AUTHZ_EVIDENCE
|
|
||||||
* @property {string} SSRF_EVIDENCE
|
|
||||||
*/
|
|
||||||
|
|
||||||
export const DeliverableType = {
|
|
||||||
// Pre-recon agent
|
// Pre-recon agent
|
||||||
CODE_ANALYSIS: 'CODE_ANALYSIS',
|
CODE_ANALYSIS = 'CODE_ANALYSIS',
|
||||||
|
|
||||||
// Recon agent
|
// Recon agent
|
||||||
RECON: 'RECON',
|
RECON = 'RECON',
|
||||||
|
|
||||||
// Vulnerability analysis agents
|
// Vulnerability analysis agents
|
||||||
INJECTION_ANALYSIS: 'INJECTION_ANALYSIS',
|
INJECTION_ANALYSIS = 'INJECTION_ANALYSIS',
|
||||||
INJECTION_QUEUE: 'INJECTION_QUEUE',
|
INJECTION_QUEUE = 'INJECTION_QUEUE',
|
||||||
|
|
||||||
XSS_ANALYSIS: 'XSS_ANALYSIS',
|
XSS_ANALYSIS = 'XSS_ANALYSIS',
|
||||||
XSS_QUEUE: 'XSS_QUEUE',
|
XSS_QUEUE = 'XSS_QUEUE',
|
||||||
|
|
||||||
AUTH_ANALYSIS: 'AUTH_ANALYSIS',
|
AUTH_ANALYSIS = 'AUTH_ANALYSIS',
|
||||||
AUTH_QUEUE: 'AUTH_QUEUE',
|
AUTH_QUEUE = 'AUTH_QUEUE',
|
||||||
|
|
||||||
AUTHZ_ANALYSIS: 'AUTHZ_ANALYSIS',
|
AUTHZ_ANALYSIS = 'AUTHZ_ANALYSIS',
|
||||||
AUTHZ_QUEUE: 'AUTHZ_QUEUE',
|
AUTHZ_QUEUE = 'AUTHZ_QUEUE',
|
||||||
|
|
||||||
SSRF_ANALYSIS: 'SSRF_ANALYSIS',
|
SSRF_ANALYSIS = 'SSRF_ANALYSIS',
|
||||||
SSRF_QUEUE: 'SSRF_QUEUE',
|
SSRF_QUEUE = 'SSRF_QUEUE',
|
||||||
|
|
||||||
// Exploitation agents
|
// Exploitation agents
|
||||||
INJECTION_EVIDENCE: 'INJECTION_EVIDENCE',
|
INJECTION_EVIDENCE = 'INJECTION_EVIDENCE',
|
||||||
XSS_EVIDENCE: 'XSS_EVIDENCE',
|
XSS_EVIDENCE = 'XSS_EVIDENCE',
|
||||||
AUTH_EVIDENCE: 'AUTH_EVIDENCE',
|
AUTH_EVIDENCE = 'AUTH_EVIDENCE',
|
||||||
AUTHZ_EVIDENCE: 'AUTHZ_EVIDENCE',
|
AUTHZ_EVIDENCE = 'AUTHZ_EVIDENCE',
|
||||||
SSRF_EVIDENCE: 'SSRF_EVIDENCE',
|
SSRF_EVIDENCE = 'SSRF_EVIDENCE',
|
||||||
};
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Hard-coded filename mappings from agent prompts
|
* Hard-coded filename mappings from agent prompts
|
||||||
* Must match tools/save_deliverable.js exactly
|
* Must match tools/save_deliverable.js exactly
|
||||||
*/
|
*/
|
||||||
export const DELIVERABLE_FILENAMES = {
|
export const DELIVERABLE_FILENAMES: Record<DeliverableType, string> = {
|
||||||
[DeliverableType.CODE_ANALYSIS]: 'code_analysis_deliverable.md',
|
[DeliverableType.CODE_ANALYSIS]: 'code_analysis_deliverable.md',
|
||||||
[DeliverableType.RECON]: 'recon_deliverable.md',
|
[DeliverableType.RECON]: 'recon_deliverable.md',
|
||||||
[DeliverableType.INJECTION_ANALYSIS]: 'injection_analysis_deliverable.md',
|
[DeliverableType.INJECTION_ANALYSIS]: 'injection_analysis_deliverable.md',
|
||||||
@@ -84,7 +69,7 @@ export const DELIVERABLE_FILENAMES = {
|
|||||||
/**
|
/**
|
||||||
* Queue types that require JSON validation
|
* Queue types that require JSON validation
|
||||||
*/
|
*/
|
||||||
export const QUEUE_TYPES = [
|
export const QUEUE_TYPES: DeliverableType[] = [
|
||||||
DeliverableType.INJECTION_QUEUE,
|
DeliverableType.INJECTION_QUEUE,
|
||||||
DeliverableType.XSS_QUEUE,
|
DeliverableType.XSS_QUEUE,
|
||||||
DeliverableType.AUTH_QUEUE,
|
DeliverableType.AUTH_QUEUE,
|
||||||
@@ -94,14 +79,18 @@ export const QUEUE_TYPES = [
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Type guard to check if a deliverable type is a queue
|
* Type guard to check if a deliverable type is a queue
|
||||||
* @param {string} type - Deliverable type to check
|
|
||||||
* @returns {boolean} True if the type is a queue type
|
|
||||||
*/
|
*/
|
||||||
export function isQueueType(type) {
|
export function isQueueType(type: string): boolean {
|
||||||
return QUEUE_TYPES.includes(type);
|
return QUEUE_TYPES.includes(type as DeliverableType);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @typedef {Object} VulnerabilityQueue
|
* Vulnerability queue structure
|
||||||
* @property {Array<Object>} vulnerabilities - Array of vulnerability objects
|
|
||||||
*/
|
*/
|
||||||
|
export interface VulnerabilityQueue {
|
||||||
|
vulnerabilities: VulnerabilityItem[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface VulnerabilityItem {
|
||||||
|
[key: string]: unknown;
|
||||||
|
}
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
/**
|
|
||||||
* Type definitions barrel export
|
|
||||||
*/
|
|
||||||
|
|
||||||
export * from './deliverables.js';
|
|
||||||
export * from './tool-responses.js';
|
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Type definitions barrel export
|
||||||
|
*/
|
||||||
|
|
||||||
|
export * from './deliverables.js';
|
||||||
|
export * from './tool-responses.js';
|
||||||
@@ -1,58 +0,0 @@
|
|||||||
/**
|
|
||||||
* Tool Response Type Definitions
|
|
||||||
*
|
|
||||||
* Defines structured response formats for MCP tools to ensure
|
|
||||||
* consistent error handling and success reporting.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @typedef {Object} ErrorResponse
|
|
||||||
* @property {'error'} status
|
|
||||||
* @property {string} message
|
|
||||||
* @property {string} errorType - ValidationError, FileSystemError, CryptoError, etc.
|
|
||||||
* @property {boolean} retryable
|
|
||||||
* @property {Record<string, unknown>} [context]
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @typedef {Object} SuccessResponse
|
|
||||||
* @property {'success'} status
|
|
||||||
* @property {string} message
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @typedef {Object} SaveDeliverableResponse
|
|
||||||
* @property {'success'} status
|
|
||||||
* @property {string} message
|
|
||||||
* @property {string} filepath
|
|
||||||
* @property {string} deliverableType
|
|
||||||
* @property {boolean} validated - true if queue JSON was validated
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @typedef {Object} GenerateTotpResponse
|
|
||||||
* @property {'success'} status
|
|
||||||
* @property {string} message
|
|
||||||
* @property {string} totpCode
|
|
||||||
* @property {string} timestamp
|
|
||||||
* @property {number} expiresIn - seconds until expiration
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Helper to create tool result from response
|
|
||||||
* MCP tools should return this format
|
|
||||||
*
|
|
||||||
* @param {ErrorResponse | SaveDeliverableResponse | GenerateTotpResponse} response
|
|
||||||
* @returns {{ content: Array<{ type: string; text: string }>; isError: boolean }}
|
|
||||||
*/
|
|
||||||
export function createToolResult(response) {
|
|
||||||
return {
|
|
||||||
content: [
|
|
||||||
{
|
|
||||||
type: 'text',
|
|
||||||
text: JSON.stringify(response, null, 2),
|
|
||||||
},
|
|
||||||
],
|
|
||||||
isError: response.status === 'error',
|
|
||||||
};
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,73 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tool Response Type Definitions
|
||||||
|
*
|
||||||
|
* Defines structured response formats for MCP tools to ensure
|
||||||
|
* consistent error handling and success reporting.
|
||||||
|
*/
|
||||||
|
|
||||||
|
export interface ErrorResponse {
|
||||||
|
status: 'error';
|
||||||
|
message: string;
|
||||||
|
errorType: string; // ValidationError, FileSystemError, CryptoError, etc.
|
||||||
|
retryable: boolean;
|
||||||
|
context?: Record<string, unknown>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SuccessResponse {
|
||||||
|
status: 'success';
|
||||||
|
message: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SaveDeliverableResponse {
|
||||||
|
status: 'success';
|
||||||
|
message: string;
|
||||||
|
filepath: string;
|
||||||
|
deliverableType: string;
|
||||||
|
validated: boolean; // true if queue JSON was validated
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface GenerateTotpResponse {
|
||||||
|
status: 'success';
|
||||||
|
message: string;
|
||||||
|
totpCode: string;
|
||||||
|
timestamp: string;
|
||||||
|
expiresIn: number; // seconds until expiration
|
||||||
|
}
|
||||||
|
|
||||||
|
export type ToolResponse =
|
||||||
|
| ErrorResponse
|
||||||
|
| SuccessResponse
|
||||||
|
| SaveDeliverableResponse
|
||||||
|
| GenerateTotpResponse;
|
||||||
|
|
||||||
|
export interface ToolResultContent {
|
||||||
|
type: string;
|
||||||
|
text: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ToolResult {
|
||||||
|
content: ToolResultContent[];
|
||||||
|
isError: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper to create tool result from response
|
||||||
|
* MCP tools should return this format
|
||||||
|
*/
|
||||||
|
export function createToolResult(response: ToolResponse): ToolResult {
|
||||||
|
return {
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: JSON.stringify(response, null, 2),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
isError: response.status === 'error',
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -1,71 +0,0 @@
|
|||||||
/**
|
|
||||||
* Error Formatting Utilities
|
|
||||||
*
|
|
||||||
* Helper functions for creating structured error responses.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @typedef {Object} ErrorResponse
|
|
||||||
* @property {'error'} status
|
|
||||||
* @property {string} message
|
|
||||||
* @property {string} errorType
|
|
||||||
* @property {boolean} retryable
|
|
||||||
* @property {Record<string, unknown>} [context]
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a validation error response
|
|
||||||
*
|
|
||||||
* @param {string} message
|
|
||||||
* @param {boolean} [retryable=true]
|
|
||||||
* @param {Record<string, unknown>} [context]
|
|
||||||
* @returns {ErrorResponse}
|
|
||||||
*/
|
|
||||||
export function createValidationError(message, retryable = true, context) {
|
|
||||||
return {
|
|
||||||
status: 'error',
|
|
||||||
message,
|
|
||||||
errorType: 'ValidationError',
|
|
||||||
retryable,
|
|
||||||
context,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a crypto error response
|
|
||||||
*
|
|
||||||
* @param {string} message
|
|
||||||
* @param {boolean} [retryable=false]
|
|
||||||
* @param {Record<string, unknown>} [context]
|
|
||||||
* @returns {ErrorResponse}
|
|
||||||
*/
|
|
||||||
export function createCryptoError(message, retryable = false, context) {
|
|
||||||
return {
|
|
||||||
status: 'error',
|
|
||||||
message,
|
|
||||||
errorType: 'CryptoError',
|
|
||||||
retryable,
|
|
||||||
context,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a generic error response
|
|
||||||
*
|
|
||||||
* @param {unknown} error
|
|
||||||
* @param {boolean} [retryable=false]
|
|
||||||
* @param {Record<string, unknown>} [context]
|
|
||||||
* @returns {ErrorResponse}
|
|
||||||
*/
|
|
||||||
export function createGenericError(error, retryable = false, context) {
|
|
||||||
const message = error instanceof Error ? error.message : String(error);
|
|
||||||
const errorType = error instanceof Error ? error.constructor.name : 'UnknownError';
|
|
||||||
|
|
||||||
return {
|
|
||||||
status: 'error',
|
|
||||||
message,
|
|
||||||
errorType,
|
|
||||||
retryable,
|
|
||||||
context,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,67 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Error Formatting Utilities
|
||||||
|
*
|
||||||
|
* Helper functions for creating structured error responses.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type { ErrorResponse } from '../types/tool-responses.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a validation error response
|
||||||
|
*/
|
||||||
|
export function createValidationError(
|
||||||
|
message: string,
|
||||||
|
retryable: boolean = true,
|
||||||
|
context?: Record<string, unknown>
|
||||||
|
): ErrorResponse {
|
||||||
|
return {
|
||||||
|
status: 'error',
|
||||||
|
message,
|
||||||
|
errorType: 'ValidationError',
|
||||||
|
retryable,
|
||||||
|
...(context !== undefined && { context }),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a crypto error response
|
||||||
|
*/
|
||||||
|
export function createCryptoError(
|
||||||
|
message: string,
|
||||||
|
retryable: boolean = false,
|
||||||
|
context?: Record<string, unknown>
|
||||||
|
): ErrorResponse {
|
||||||
|
return {
|
||||||
|
status: 'error',
|
||||||
|
message,
|
||||||
|
errorType: 'CryptoError',
|
||||||
|
retryable,
|
||||||
|
...(context !== undefined && { context }),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a generic error response
|
||||||
|
*/
|
||||||
|
export function createGenericError(
|
||||||
|
error: unknown,
|
||||||
|
retryable: boolean = false,
|
||||||
|
context?: Record<string, unknown>
|
||||||
|
): ErrorResponse {
|
||||||
|
const message = error instanceof Error ? error.message : String(error);
|
||||||
|
const errorType = error instanceof Error ? error.constructor.name : 'UnknownError';
|
||||||
|
|
||||||
|
return {
|
||||||
|
status: 'error',
|
||||||
|
message,
|
||||||
|
errorType,
|
||||||
|
retryable,
|
||||||
|
...(context !== undefined && { context }),
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -1,3 +1,9 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* File Operations Utilities
|
* File Operations Utilities
|
||||||
*
|
*
|
||||||
@@ -11,20 +17,18 @@ import { join } from 'path';
|
|||||||
/**
|
/**
|
||||||
* Save deliverable file to deliverables/ directory
|
* Save deliverable file to deliverables/ directory
|
||||||
*
|
*
|
||||||
* @param {string} filename - Name of the file to save
|
* @param targetDir - Target directory for deliverables (passed explicitly to avoid race conditions)
|
||||||
* @param {string} content - Content to write to the file
|
* @param filename - Name of the deliverable file
|
||||||
* @returns {string} Full path to the saved file
|
* @param content - File content to save
|
||||||
*/
|
*/
|
||||||
export function saveDeliverableFile(filename, content) {
|
export function saveDeliverableFile(targetDir: string, filename: string, content: string): string {
|
||||||
// Use target directory from global context (set by createShannonHelperServer)
|
|
||||||
const targetDir = global.__SHANNON_TARGET_DIR || process.cwd();
|
|
||||||
const deliverablesDir = join(targetDir, 'deliverables');
|
const deliverablesDir = join(targetDir, 'deliverables');
|
||||||
const filepath = join(deliverablesDir, filename);
|
const filepath = join(deliverablesDir, filename);
|
||||||
|
|
||||||
// Ensure deliverables directory exists
|
// Ensure deliverables directory exists
|
||||||
try {
|
try {
|
||||||
mkdirSync(deliverablesDir, { recursive: true });
|
mkdirSync(deliverablesDir, { recursive: true });
|
||||||
} catch (error) {
|
} catch {
|
||||||
// Directory might already exist, ignore
|
// Directory might already exist, ignore
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1,51 +0,0 @@
|
|||||||
/**
|
|
||||||
* Queue Validator
|
|
||||||
*
|
|
||||||
* Validates JSON structure for vulnerability queue files.
|
|
||||||
* Ported from tools/save_deliverable.js (lines 56-75).
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @typedef {Object} ValidationResult
|
|
||||||
* @property {boolean} valid
|
|
||||||
* @property {string} [message]
|
|
||||||
* @property {Object} [data]
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Validate JSON structure for queue files
|
|
||||||
* Queue files must have a 'vulnerabilities' array
|
|
||||||
*
|
|
||||||
* @param {string} content - JSON string to validate
|
|
||||||
* @returns {ValidationResult} ValidationResult with valid flag, optional error message, and parsed data
|
|
||||||
*/
|
|
||||||
export function validateQueueJson(content) {
|
|
||||||
try {
|
|
||||||
const parsed = JSON.parse(content);
|
|
||||||
|
|
||||||
// Queue files must have a 'vulnerabilities' array
|
|
||||||
if (!parsed.vulnerabilities) {
|
|
||||||
return {
|
|
||||||
valid: false,
|
|
||||||
message: `Invalid queue structure: Missing 'vulnerabilities' property. Expected: {"vulnerabilities": [...]}`,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!Array.isArray(parsed.vulnerabilities)) {
|
|
||||||
return {
|
|
||||||
valid: false,
|
|
||||||
message: `Invalid queue structure: 'vulnerabilities' must be an array. Expected: {"vulnerabilities": [...]}`,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
valid: true,
|
|
||||||
data: parsed,
|
|
||||||
};
|
|
||||||
} catch (error) {
|
|
||||||
return {
|
|
||||||
valid: false,
|
|
||||||
message: `Invalid JSON: ${error instanceof Error ? error.message : String(error)}`,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,65 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Queue Validator
|
||||||
|
*
|
||||||
|
* Validates JSON structure for vulnerability queue files.
|
||||||
|
* Ported from tools/save_deliverable.js (lines 56-75).
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type { VulnerabilityQueue } from '../types/deliverables.js';
|
||||||
|
|
||||||
|
export interface ValidationResult {
|
||||||
|
valid: boolean;
|
||||||
|
message?: string;
|
||||||
|
data?: VulnerabilityQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validate JSON structure for queue files
|
||||||
|
* Queue files must have a 'vulnerabilities' array
|
||||||
|
*/
|
||||||
|
export function validateQueueJson(content: string): ValidationResult {
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(content) as unknown;
|
||||||
|
|
||||||
|
// Type guard for the parsed result
|
||||||
|
if (typeof parsed !== 'object' || parsed === null) {
|
||||||
|
return {
|
||||||
|
valid: false,
|
||||||
|
message: `Invalid queue structure: Expected an object. Got: ${typeof parsed}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const obj = parsed as Record<string, unknown>;
|
||||||
|
|
||||||
|
// Queue files must have a 'vulnerabilities' array
|
||||||
|
if (!('vulnerabilities' in obj)) {
|
||||||
|
return {
|
||||||
|
valid: false,
|
||||||
|
message: `Invalid queue structure: Missing 'vulnerabilities' property. Expected: {"vulnerabilities": [...]}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Array.isArray(obj.vulnerabilities)) {
|
||||||
|
return {
|
||||||
|
valid: false,
|
||||||
|
message: `Invalid queue structure: 'vulnerabilities' must be an array. Expected: {"vulnerabilities": [...]}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
valid: true,
|
||||||
|
data: parsed as VulnerabilityQueue,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
return {
|
||||||
|
valid: false,
|
||||||
|
message: `Invalid JSON: ${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
+10
-8
@@ -1,3 +1,9 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* TOTP Validator
|
* TOTP Validator
|
||||||
*
|
*
|
||||||
@@ -8,11 +14,8 @@
|
|||||||
/**
|
/**
|
||||||
* Base32 decode function
|
* Base32 decode function
|
||||||
* Ported from generate-totp-standalone.mjs
|
* Ported from generate-totp-standalone.mjs
|
||||||
*
|
|
||||||
* @param {string} encoded - Base32 encoded string
|
|
||||||
* @returns {Buffer} Buffer containing decoded bytes
|
|
||||||
*/
|
*/
|
||||||
export function base32Decode(encoded) {
|
export function base32Decode(encoded: string): Buffer {
|
||||||
const alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567';
|
const alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567';
|
||||||
const cleanInput = encoded.toUpperCase().replace(/[^A-Z2-7]/g, '');
|
const cleanInput = encoded.toUpperCase().replace(/[^A-Z2-7]/g, '');
|
||||||
|
|
||||||
@@ -20,7 +23,7 @@ export function base32Decode(encoded) {
|
|||||||
return Buffer.alloc(0);
|
return Buffer.alloc(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
const output = [];
|
const output: number[] = [];
|
||||||
let bits = 0;
|
let bits = 0;
|
||||||
let value = 0;
|
let value = 0;
|
||||||
|
|
||||||
@@ -46,10 +49,9 @@ export function base32Decode(encoded) {
|
|||||||
* Validate TOTP secret
|
* Validate TOTP secret
|
||||||
* Must be base32-encoded string
|
* Must be base32-encoded string
|
||||||
*
|
*
|
||||||
* @param {string} secret - Secret to validate
|
* @returns true if valid, throws Error if invalid
|
||||||
* @returns {boolean} true if valid, throws Error if invalid
|
|
||||||
*/
|
*/
|
||||||
export function validateTotpSecret(secret) {
|
export function validateTotpSecret(secret: string): boolean {
|
||||||
if (!secret || secret.length === 0) {
|
if (!secret || secret.length === 0) {
|
||||||
throw new Error('TOTP secret cannot be empty');
|
throw new Error('TOTP secret cannot be empty');
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,50 @@
|
|||||||
|
{
|
||||||
|
// Visit https://aka.ms/tsconfig to read more about this file
|
||||||
|
"compilerOptions": {
|
||||||
|
// File Layout
|
||||||
|
"rootDir": "./src",
|
||||||
|
"outDir": "./dist",
|
||||||
|
|
||||||
|
// Environment Settings
|
||||||
|
// See also https://aka.ms/tsconfig/module
|
||||||
|
"module": "nodenext",
|
||||||
|
"moduleResolution": "nodenext",
|
||||||
|
|
||||||
|
"target": "es2022",
|
||||||
|
"lib": ["es2022"],
|
||||||
|
|
||||||
|
"types": ["node"],
|
||||||
|
// For nodejs:
|
||||||
|
// "lib": ["esnext"],
|
||||||
|
// "types": ["node"],
|
||||||
|
// and npm install -D @types/node
|
||||||
|
|
||||||
|
"resolveJsonModule": true,
|
||||||
|
"forceConsistentCasingInFileNames": true,
|
||||||
|
"noEmitOnError": true,
|
||||||
|
|
||||||
|
// Other Outputs
|
||||||
|
"sourceMap": true,
|
||||||
|
"declaration": true,
|
||||||
|
"declarationMap": true,
|
||||||
|
|
||||||
|
// Stricter Typechecking Options
|
||||||
|
"noUncheckedIndexedAccess": true,
|
||||||
|
"exactOptionalPropertyTypes": true,
|
||||||
|
|
||||||
|
// Style Options
|
||||||
|
// "noImplicitReturns": true,
|
||||||
|
// "noImplicitOverride": true,
|
||||||
|
// "noUnusedLocals": true,
|
||||||
|
// "noUnusedParameters": true,
|
||||||
|
// "noFallthroughCasesInSwitch": true,
|
||||||
|
// "noPropertyAccessFromIndexSignature": true,
|
||||||
|
|
||||||
|
// Recommended Options
|
||||||
|
"strict": true,
|
||||||
|
"noUncheckedSideEffectImports": true,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
},
|
||||||
|
"include": ["src/**/*"],
|
||||||
|
"exclude": ["node_modules", "dist"]
|
||||||
|
}
|
||||||
Generated
+1983
-2
File diff suppressed because it is too large
Load Diff
+15
-4
@@ -2,12 +2,20 @@
|
|||||||
"name": "shannon",
|
"name": "shannon",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"main": "shannon.mjs",
|
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"start": "./shannon.mjs"
|
"build": "tsc",
|
||||||
|
"temporal:server": "docker compose -f docker/docker-compose.temporal.yml up temporal -d",
|
||||||
|
"temporal:server:stop": "docker compose -f docker/docker-compose.temporal.yml down",
|
||||||
|
"temporal:worker": "node dist/temporal/worker.js",
|
||||||
|
"temporal:start": "node dist/temporal/client.js",
|
||||||
|
"temporal:query": "node dist/temporal/query.js"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@anthropic-ai/claude-agent-sdk": "^0.1.0",
|
"@anthropic-ai/claude-agent-sdk": "^0.1.0",
|
||||||
|
"@temporalio/activity": "^1.11.0",
|
||||||
|
"@temporalio/client": "^1.11.0",
|
||||||
|
"@temporalio/worker": "^1.11.0",
|
||||||
|
"@temporalio/workflow": "^1.11.0",
|
||||||
"ajv": "^8.12.0",
|
"ajv": "^8.12.0",
|
||||||
"ajv-formats": "^2.1.1",
|
"ajv-formats": "^2.1.1",
|
||||||
"boxen": "^8.0.1",
|
"boxen": "^8.0.1",
|
||||||
@@ -16,10 +24,13 @@
|
|||||||
"figlet": "^1.9.3",
|
"figlet": "^1.9.3",
|
||||||
"gradient-string": "^3.0.0",
|
"gradient-string": "^3.0.0",
|
||||||
"js-yaml": "^4.1.0",
|
"js-yaml": "^4.1.0",
|
||||||
|
"posthog-node": "^5.20.0",
|
||||||
"zod": "^3.22.4",
|
"zod": "^3.22.4",
|
||||||
"zx": "^8.0.0"
|
"zx": "^8.0.0"
|
||||||
},
|
},
|
||||||
"bin": {
|
"devDependencies": {
|
||||||
"shannon": "./shannon.mjs"
|
"@types/js-yaml": "^4.0.9",
|
||||||
|
"@types/node": "^25.0.3",
|
||||||
|
"typescript": "^5.9.3"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,31 +1 @@
|
|||||||
Create a file at `deliverables/comprehensive_security_assessment_report.md` with this content:
|
Read `deliverables/comprehensive_security_assessment_report.md`, prepend "# Security Assessment Report\n\n**Target:** {{WEB_URL}}\n\n" to the content, and save it back. Say "Done".
|
||||||
|
|
||||||
```markdown
|
|
||||||
# Injection Exploitation Evidence
|
|
||||||
|
|
||||||
## Successfully Exploited Vulnerabilities
|
|
||||||
|
|
||||||
### INJ-VULN-01: SQL Injection Test
|
|
||||||
|
|
||||||
**Summary:** Test SQL injection vulnerability for pipeline testing.
|
|
||||||
|
|
||||||
# Authentication Exploitation Evidence
|
|
||||||
|
|
||||||
## Successfully Exploited Vulnerabilities
|
|
||||||
|
|
||||||
### AUTH-VULN-01: Authentication Bypass Test
|
|
||||||
|
|
||||||
**Summary:** Test authentication vulnerability for pipeline testing.
|
|
||||||
```
|
|
||||||
|
|
||||||
Then read and modify the file by adding these sections at the top:
|
|
||||||
|
|
||||||
1. **Executive Summary** section with:
|
|
||||||
- Target: {{WEB_URL}}
|
|
||||||
- Assessment Date: (current date)
|
|
||||||
- Scope: Authentication, XSS, SQL and Command Injection, SSRF, Authorization testing
|
|
||||||
- Brief summary of findings (2-3 vulnerabilities found for testing)
|
|
||||||
|
|
||||||
2. **Network Reconnaissance** section with test data for open ports and services
|
|
||||||
|
|
||||||
Save the modified content back to `deliverables/comprehensive_security_assessment_report.md`, then say "Done".
|
|
||||||
|
|||||||
@@ -1,175 +0,0 @@
|
|||||||
#!/usr/bin/env node
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Export Metrics to CSV
|
|
||||||
*
|
|
||||||
* Converts session.json from audit-logs into CSV format for spreadsheet analysis.
|
|
||||||
*
|
|
||||||
* DATA SOURCE:
|
|
||||||
* - Reads from: audit-logs/{hostname}_{sessionId}/session.json
|
|
||||||
* - Source of truth for all metrics, timing, and cost data
|
|
||||||
* - Automatically created by Shannon during agent execution
|
|
||||||
*
|
|
||||||
* CSV OUTPUT:
|
|
||||||
* - One row per agent with: agent, phase, status, attempts, duration_ms, cost_usd
|
|
||||||
* - Perfect for importing into Excel/Google Sheets for analysis
|
|
||||||
*
|
|
||||||
* USE CASES:
|
|
||||||
* - Compare performance across multiple sessions
|
|
||||||
* - Track costs and optimize budget
|
|
||||||
* - Identify slow agents for optimization
|
|
||||||
* - Generate charts and visualizations
|
|
||||||
* - Export data for external reporting tools
|
|
||||||
*
|
|
||||||
* EXAMPLES:
|
|
||||||
* ```bash
|
|
||||||
* # Export to stdout
|
|
||||||
* ./scripts/export-metrics.js --session-id abc123
|
|
||||||
*
|
|
||||||
* # Export to file
|
|
||||||
* ./scripts/export-metrics.js --session-id abc123 --output metrics.csv
|
|
||||||
*
|
|
||||||
* # Find session ID from Shannon store
|
|
||||||
* cat .shannon-store.json | jq '.sessions | keys'
|
|
||||||
* ```
|
|
||||||
*
|
|
||||||
* NOTE: For raw metrics, just read audit-logs/.../session.json directly.
|
|
||||||
* This script only exists to provide a spreadsheet-friendly CSV format.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import chalk from 'chalk';
|
|
||||||
import { fs, path } from 'zx';
|
|
||||||
import { getSession } from '../src/session-manager.js';
|
|
||||||
import { AuditSession } from '../src/audit/index.js';
|
|
||||||
|
|
||||||
// Parse command-line arguments
|
|
||||||
function parseArgs() {
|
|
||||||
const args = {
|
|
||||||
sessionId: null,
|
|
||||||
output: null
|
|
||||||
};
|
|
||||||
|
|
||||||
for (let i = 2; i < process.argv.length; i++) {
|
|
||||||
const arg = process.argv[i];
|
|
||||||
|
|
||||||
if (arg === '--session-id' && process.argv[i + 1]) {
|
|
||||||
args.sessionId = process.argv[i + 1];
|
|
||||||
i++;
|
|
||||||
} else if (arg === '--output' && process.argv[i + 1]) {
|
|
||||||
args.output = process.argv[i + 1];
|
|
||||||
i++;
|
|
||||||
} else if (arg === '--help' || arg === '-h') {
|
|
||||||
printUsage();
|
|
||||||
process.exit(0);
|
|
||||||
} else {
|
|
||||||
console.log(chalk.red(`❌ Unknown argument: ${arg}`));
|
|
||||||
printUsage();
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return args;
|
|
||||||
}
|
|
||||||
|
|
||||||
function printUsage() {
|
|
||||||
console.log(chalk.cyan('\n📊 Export Metrics to CSV'));
|
|
||||||
console.log(chalk.gray('\nUsage: ./scripts/export-metrics.js [options]\n'));
|
|
||||||
console.log(chalk.white('Options:'));
|
|
||||||
console.log(chalk.gray(' --session-id <id> Session ID to export (required)'));
|
|
||||||
console.log(chalk.gray(' --output <file> Output CSV file path (default: stdout)'));
|
|
||||||
console.log(chalk.gray(' --help, -h Show this help\n'));
|
|
||||||
console.log(chalk.white('Examples:'));
|
|
||||||
console.log(chalk.gray(' # Export to stdout'));
|
|
||||||
console.log(chalk.gray(' ./scripts/export-metrics.js --session-id abc123\n'));
|
|
||||||
console.log(chalk.gray(' # Export to file'));
|
|
||||||
console.log(chalk.gray(' ./scripts/export-metrics.js --session-id abc123 --output metrics.csv\n'));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Export metrics for a session
|
|
||||||
async function exportMetrics(sessionId) {
|
|
||||||
const session = await getSession(sessionId);
|
|
||||||
if (!session) {
|
|
||||||
throw new Error(`Session ${sessionId} not found`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const auditSession = new AuditSession(session);
|
|
||||||
await auditSession.initialize();
|
|
||||||
const metrics = await auditSession.getMetrics();
|
|
||||||
|
|
||||||
return exportAsCSV(session, metrics);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Export as CSV
|
|
||||||
function exportAsCSV(session, metrics) {
|
|
||||||
const lines = [];
|
|
||||||
|
|
||||||
// Header
|
|
||||||
lines.push('agent,phase,status,attempts,duration_ms,cost_usd');
|
|
||||||
|
|
||||||
// Phase mapping
|
|
||||||
const phaseMap = {
|
|
||||||
'pre-recon': 'pre-recon',
|
|
||||||
'recon': 'recon',
|
|
||||||
'injection-vuln': 'vulnerability-analysis',
|
|
||||||
'xss-vuln': 'vulnerability-analysis',
|
|
||||||
'auth-vuln': 'vulnerability-analysis',
|
|
||||||
'authz-vuln': 'vulnerability-analysis',
|
|
||||||
'ssrf-vuln': 'vulnerability-analysis',
|
|
||||||
'injection-exploit': 'exploitation',
|
|
||||||
'xss-exploit': 'exploitation',
|
|
||||||
'auth-exploit': 'exploitation',
|
|
||||||
'authz-exploit': 'exploitation',
|
|
||||||
'ssrf-exploit': 'exploitation',
|
|
||||||
'report': 'reporting'
|
|
||||||
};
|
|
||||||
|
|
||||||
// Agent rows
|
|
||||||
for (const [agentName, agentData] of Object.entries(metrics.metrics.agents)) {
|
|
||||||
const phase = phaseMap[agentName] || 'unknown';
|
|
||||||
|
|
||||||
lines.push([
|
|
||||||
agentName,
|
|
||||||
phase,
|
|
||||||
agentData.status,
|
|
||||||
agentData.attempts.length,
|
|
||||||
agentData.final_duration_ms,
|
|
||||||
agentData.total_cost_usd.toFixed(4)
|
|
||||||
].join(','));
|
|
||||||
}
|
|
||||||
|
|
||||||
return lines.join('\n');
|
|
||||||
}
|
|
||||||
|
|
||||||
// Main execution
|
|
||||||
async function main() {
|
|
||||||
const args = parseArgs();
|
|
||||||
|
|
||||||
if (!args.sessionId) {
|
|
||||||
console.log(chalk.red('❌ Must specify --session-id'));
|
|
||||||
printUsage();
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(chalk.cyan.bold('\n📊 Exporting Metrics to CSV\n'));
|
|
||||||
console.log(chalk.gray(`Session ID: ${args.sessionId}\n`));
|
|
||||||
|
|
||||||
const output = await exportMetrics(args.sessionId);
|
|
||||||
|
|
||||||
if (args.output) {
|
|
||||||
await fs.writeFile(args.output, output);
|
|
||||||
console.log(chalk.green(`✅ Exported to: ${args.output}`));
|
|
||||||
} else {
|
|
||||||
console.log(chalk.cyan('CSV Output:\n'));
|
|
||||||
console.log(output);
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log();
|
|
||||||
}
|
|
||||||
|
|
||||||
main().catch(error => {
|
|
||||||
console.log(chalk.red.bold(`\n🚨 Fatal error: ${error.message}`));
|
|
||||||
if (process.env.DEBUG) {
|
|
||||||
console.log(chalk.gray(error.stack));
|
|
||||||
}
|
|
||||||
process.exit(1);
|
|
||||||
});
|
|
||||||
@@ -0,0 +1,213 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Shannon CLI - AI Penetration Testing Framework
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
COMPOSE_FILE="docker-compose.yml"
|
||||||
|
|
||||||
|
# Load .env if present
|
||||||
|
if [ -f .env ]; then
|
||||||
|
set -a
|
||||||
|
source .env
|
||||||
|
set +a
|
||||||
|
fi
|
||||||
|
|
||||||
|
show_help() {
|
||||||
|
cat << 'EOF'
|
||||||
|
Shannon - AI Penetration Testing Framework
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
./shannon start URL=<url> REPO=<path> Start a pentest workflow
|
||||||
|
./shannon logs ID=<workflow-id> Tail logs for a specific workflow
|
||||||
|
./shannon query ID=<workflow-id> Query workflow progress
|
||||||
|
./shannon stop Stop all containers
|
||||||
|
./shannon help Show this help message
|
||||||
|
|
||||||
|
Options for 'start':
|
||||||
|
CONFIG=<path> Configuration file (YAML)
|
||||||
|
OUTPUT=<path> Output directory for reports
|
||||||
|
PIPELINE_TESTING=true Use minimal prompts for fast testing
|
||||||
|
|
||||||
|
Options for 'stop':
|
||||||
|
CLEAN=true Remove all data including volumes
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
./shannon start URL=https://example.com REPO=/path/to/repo
|
||||||
|
./shannon start URL=https://example.com REPO=/path/to/repo CONFIG=./config.yaml
|
||||||
|
./shannon logs ID=example.com_shannon-1234567890
|
||||||
|
./shannon query ID=shannon-1234567890
|
||||||
|
./shannon stop CLEAN=true
|
||||||
|
|
||||||
|
Monitor workflows at http://localhost:8233
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
# Parse KEY=value arguments into variables
|
||||||
|
parse_args() {
|
||||||
|
for arg in "$@"; do
|
||||||
|
case "$arg" in
|
||||||
|
URL=*) URL="${arg#URL=}" ;;
|
||||||
|
REPO=*) REPO="${arg#REPO=}" ;;
|
||||||
|
CONFIG=*) CONFIG="${arg#CONFIG=}" ;;
|
||||||
|
OUTPUT=*) OUTPUT="${arg#OUTPUT=}" ;;
|
||||||
|
ID=*) ID="${arg#ID=}" ;;
|
||||||
|
CLEAN=*) CLEAN="${arg#CLEAN=}" ;;
|
||||||
|
PIPELINE_TESTING=*) PIPELINE_TESTING="${arg#PIPELINE_TESTING=}" ;;
|
||||||
|
REBUILD=*) REBUILD="${arg#REBUILD=}" ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check if Temporal is running and healthy
|
||||||
|
is_temporal_ready() {
|
||||||
|
docker compose -f "$COMPOSE_FILE" exec -T temporal \
|
||||||
|
temporal operator cluster health --address localhost:7233 2>/dev/null | grep -q "SERVING"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Ensure containers are running
|
||||||
|
ensure_containers() {
|
||||||
|
# Quick check: if Temporal is already healthy, we're good
|
||||||
|
if is_temporal_ready; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Need to start containers
|
||||||
|
echo "Starting Shannon containers..."
|
||||||
|
if [ "$REBUILD" = "true" ]; then
|
||||||
|
# Force rebuild without cache (use when code changes aren't being picked up)
|
||||||
|
echo "Rebuilding with --no-cache..."
|
||||||
|
docker compose -f "$COMPOSE_FILE" build --no-cache worker
|
||||||
|
fi
|
||||||
|
docker compose -f "$COMPOSE_FILE" up -d --build
|
||||||
|
|
||||||
|
# Wait for Temporal to be ready
|
||||||
|
echo "Waiting for Temporal to be ready..."
|
||||||
|
for i in $(seq 1 30); do
|
||||||
|
if is_temporal_ready; then
|
||||||
|
echo "Temporal is ready!"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if [ "$i" -eq 30 ]; then
|
||||||
|
echo "Timeout waiting for Temporal"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_start() {
|
||||||
|
parse_args "$@"
|
||||||
|
|
||||||
|
# Validate required vars
|
||||||
|
if [ -z "$URL" ] || [ -z "$REPO" ]; then
|
||||||
|
echo "ERROR: URL and REPO are required"
|
||||||
|
echo "Usage: ./shannon start URL=<url> REPO=<path>"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check for API key
|
||||||
|
if [ -z "$ANTHROPIC_API_KEY" ] && [ -z "$CLAUDE_CODE_OAUTH_TOKEN" ]; then
|
||||||
|
echo "ERROR: Set ANTHROPIC_API_KEY or CLAUDE_CODE_OAUTH_TOKEN in .env"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Determine container path for REPO
|
||||||
|
# - If REPO is already a container path (/benchmarks/*, /target-repo), use as-is
|
||||||
|
# - Otherwise, it's a host path - mount to /target-repo and use that
|
||||||
|
case "$REPO" in
|
||||||
|
/benchmarks/*|/target-repo|/target-repo/*)
|
||||||
|
CONTAINER_REPO="$REPO"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
# Host path - export for docker-compose mount
|
||||||
|
export TARGET_REPO="$REPO"
|
||||||
|
CONTAINER_REPO="/target-repo"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# Ensure containers are running (starts them if needed)
|
||||||
|
ensure_containers
|
||||||
|
|
||||||
|
# Build optional args
|
||||||
|
ARGS=""
|
||||||
|
[ -n "$CONFIG" ] && ARGS="$ARGS --config $CONFIG"
|
||||||
|
[ -n "$OUTPUT" ] && ARGS="$ARGS --output $OUTPUT"
|
||||||
|
[ "$PIPELINE_TESTING" = "true" ] && ARGS="$ARGS --pipeline-testing"
|
||||||
|
|
||||||
|
# Run the client to submit workflow
|
||||||
|
docker compose -f "$COMPOSE_FILE" exec -T worker \
|
||||||
|
node dist/temporal/client.js "$URL" "$CONTAINER_REPO" $ARGS
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_logs() {
|
||||||
|
parse_args "$@"
|
||||||
|
|
||||||
|
if [ -z "$ID" ]; then
|
||||||
|
echo "ERROR: ID is required"
|
||||||
|
echo "Usage: ./shannon logs ID=<workflow-id>"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
WORKFLOW_LOG="./audit-logs/${ID}/workflow.log"
|
||||||
|
|
||||||
|
if [ -f "$WORKFLOW_LOG" ]; then
|
||||||
|
echo "Tailing workflow log: $WORKFLOW_LOG"
|
||||||
|
tail -f "$WORKFLOW_LOG"
|
||||||
|
else
|
||||||
|
echo "ERROR: Workflow log not found: $WORKFLOW_LOG"
|
||||||
|
echo ""
|
||||||
|
echo "Possible causes:"
|
||||||
|
echo " - Workflow hasn't started yet"
|
||||||
|
echo " - Workflow ID is incorrect"
|
||||||
|
echo " - Workflow is using a custom OUTPUT path"
|
||||||
|
echo ""
|
||||||
|
echo "Check: ./shannon query ID=$ID for workflow details"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_query() {
|
||||||
|
parse_args "$@"
|
||||||
|
|
||||||
|
if [ -z "$ID" ]; then
|
||||||
|
echo "ERROR: ID is required"
|
||||||
|
echo "Usage: ./shannon query ID=<workflow-id>"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
docker compose -f "$COMPOSE_FILE" exec -T worker \
|
||||||
|
node dist/temporal/query.js "$ID"
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_stop() {
|
||||||
|
parse_args "$@"
|
||||||
|
|
||||||
|
if [ "$CLEAN" = "true" ]; then
|
||||||
|
docker compose -f "$COMPOSE_FILE" down -v
|
||||||
|
else
|
||||||
|
docker compose -f "$COMPOSE_FILE" down
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Main command dispatch
|
||||||
|
case "${1:-help}" in
|
||||||
|
start)
|
||||||
|
shift
|
||||||
|
cmd_start "$@"
|
||||||
|
;;
|
||||||
|
logs)
|
||||||
|
shift
|
||||||
|
cmd_logs "$@"
|
||||||
|
;;
|
||||||
|
query)
|
||||||
|
shift
|
||||||
|
cmd_query "$@"
|
||||||
|
;;
|
||||||
|
stop)
|
||||||
|
shift
|
||||||
|
cmd_stop "$@"
|
||||||
|
;;
|
||||||
|
help|--help|-h|*)
|
||||||
|
show_help
|
||||||
|
;;
|
||||||
|
esac
|
||||||
-481
@@ -1,481 +0,0 @@
|
|||||||
#!/usr/bin/env zx
|
|
||||||
|
|
||||||
import { path, fs } from 'zx';
|
|
||||||
import chalk from 'chalk';
|
|
||||||
import dotenv from 'dotenv';
|
|
||||||
|
|
||||||
dotenv.config();
|
|
||||||
|
|
||||||
// Config and Tools
|
|
||||||
import { parseConfig, distributeConfig } from './src/config-parser.js';
|
|
||||||
import { checkToolAvailability, handleMissingTools } from './src/tool-checker.js';
|
|
||||||
|
|
||||||
// Session and Checkpoints
|
|
||||||
import { createSession, updateSession, getSession, AGENTS } from './src/session-manager.js';
|
|
||||||
import { runPhase, getGitCommitHash } from './src/checkpoint-manager.js';
|
|
||||||
|
|
||||||
// Setup and Deliverables
|
|
||||||
import { setupLocalRepo } from './src/setup/environment.js';
|
|
||||||
|
|
||||||
// AI and Prompts
|
|
||||||
import { runClaudePromptWithRetry } from './src/ai/claude-executor.js';
|
|
||||||
import { loadPrompt } from './src/prompts/prompt-manager.js';
|
|
||||||
|
|
||||||
// Phases
|
|
||||||
import { executePreReconPhase } from './src/phases/pre-recon.js';
|
|
||||||
import { assembleFinalReport } from './src/phases/reporting.js';
|
|
||||||
|
|
||||||
// Utils
|
|
||||||
import { timingResults, costResults, displayTimingSummary, Timer } from './src/utils/metrics.js';
|
|
||||||
import { formatDuration, generateAuditPath } from './src/audit/utils.js';
|
|
||||||
|
|
||||||
// CLI
|
|
||||||
import { handleDeveloperCommand } from './src/cli/command-handler.js';
|
|
||||||
import { showHelp, displaySplashScreen } from './src/cli/ui.js';
|
|
||||||
import { validateWebUrl, validateRepoPath } from './src/cli/input-validator.js';
|
|
||||||
|
|
||||||
// Error Handling
|
|
||||||
import { PentestError, logError } from './src/error-handling.js';
|
|
||||||
|
|
||||||
// Session Manager Functions
|
|
||||||
import {
|
|
||||||
calculateVulnerabilityAnalysisSummary,
|
|
||||||
calculateExploitationSummary,
|
|
||||||
getNextAgent
|
|
||||||
} from './src/session-manager.js';
|
|
||||||
|
|
||||||
// Configure zx to disable timeouts (let tools run as long as needed)
|
|
||||||
$.timeout = 0;
|
|
||||||
|
|
||||||
// Setup graceful cleanup on process signals
|
|
||||||
process.on('SIGINT', async () => {
|
|
||||||
console.log(chalk.yellow('\n⚠️ Received SIGINT, cleaning up...'));
|
|
||||||
|
|
||||||
process.exit(0);
|
|
||||||
});
|
|
||||||
|
|
||||||
process.on('SIGTERM', async () => {
|
|
||||||
console.log(chalk.yellow('\n⚠️ Received SIGTERM, cleaning up...'));
|
|
||||||
|
|
||||||
process.exit(0);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Main orchestration function
|
|
||||||
async function main(webUrl, repoPath, configPath = null, pipelineTestingMode = false) {
|
|
||||||
const totalTimer = new Timer('total-execution');
|
|
||||||
timingResults.total = totalTimer;
|
|
||||||
|
|
||||||
// Display splash screen
|
|
||||||
await displaySplashScreen();
|
|
||||||
|
|
||||||
console.log(chalk.cyan.bold('🚀 AI PENETRATION TESTING AGENT'));
|
|
||||||
console.log(chalk.cyan(`🎯 Target: ${webUrl}`));
|
|
||||||
console.log(chalk.cyan(`📁 Source: ${repoPath}`));
|
|
||||||
if (configPath) {
|
|
||||||
console.log(chalk.cyan(`⚙️ Config: ${configPath}`));
|
|
||||||
}
|
|
||||||
console.log(chalk.gray('─'.repeat(60)));
|
|
||||||
|
|
||||||
// Parse configuration if provided
|
|
||||||
let config = null;
|
|
||||||
let distributedConfig = null;
|
|
||||||
if (configPath) {
|
|
||||||
try {
|
|
||||||
// Resolve config path - check configs folder if relative path
|
|
||||||
let resolvedConfigPath = configPath;
|
|
||||||
if (!path.isAbsolute(configPath)) {
|
|
||||||
const configsDir = path.join(process.cwd(), 'configs');
|
|
||||||
const configInConfigsDir = path.join(configsDir, configPath);
|
|
||||||
// Check if file exists in configs directory, otherwise use original path
|
|
||||||
if (await fs.pathExists(configInConfigsDir)) {
|
|
||||||
resolvedConfigPath = configInConfigsDir;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
config = await parseConfig(resolvedConfigPath);
|
|
||||||
distributedConfig = distributeConfig(config);
|
|
||||||
console.log(chalk.green(`✅ Configuration loaded successfully`));
|
|
||||||
} catch (error) {
|
|
||||||
await logError(error, `Configuration loading from ${configPath}`);
|
|
||||||
throw error; // Let the main error boundary handle it
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check tool availability
|
|
||||||
const toolAvailability = await checkToolAvailability();
|
|
||||||
handleMissingTools(toolAvailability);
|
|
||||||
|
|
||||||
// Setup local repository
|
|
||||||
console.log(chalk.blue('📁 Setting up local repository...'));
|
|
||||||
let sourceDir;
|
|
||||||
try {
|
|
||||||
sourceDir = await setupLocalRepo(repoPath);
|
|
||||||
const variables = { webUrl, repoPath, sourceDir };
|
|
||||||
console.log(chalk.green('✅ Local repository setup successfully'));
|
|
||||||
} catch (error) {
|
|
||||||
console.log(chalk.red(`❌ Failed to setup local repository: ${error.message}`));
|
|
||||||
console.log(chalk.gray('This could be due to:'));
|
|
||||||
console.log(chalk.gray(' - Insufficient permissions'));
|
|
||||||
console.log(chalk.gray(' - Repository path not accessible'));
|
|
||||||
console.log(chalk.gray(' - Git initialization issues'));
|
|
||||||
console.log(chalk.gray(' - Insufficient disk space'));
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
const variables = { webUrl, repoPath, sourceDir };
|
|
||||||
|
|
||||||
// Create session for tracking (in normal mode)
|
|
||||||
const session = await createSession(webUrl, repoPath, configPath, sourceDir);
|
|
||||||
console.log(chalk.blue(`📝 Session created: ${session.id.substring(0, 8)}...`));
|
|
||||||
|
|
||||||
// If setup-only mode, exit after session creation
|
|
||||||
if (process.argv.includes('--setup-only')) {
|
|
||||||
console.log(chalk.green('✅ Setup complete! Local repository setup and session created.'));
|
|
||||||
console.log(chalk.gray('Use developer commands to run individual agents:'));
|
|
||||||
console.log(chalk.gray(' ./shannon.mjs --run-agent pre-recon'));
|
|
||||||
console.log(chalk.gray(' ./shannon.mjs --status'));
|
|
||||||
process.exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper function to update session progress
|
|
||||||
const updateSessionProgress = async (agentName, commitHash = null) => {
|
|
||||||
try {
|
|
||||||
const updates = {
|
|
||||||
completedAgents: [...new Set([...session.completedAgents, agentName])],
|
|
||||||
failedAgents: session.failedAgents.filter(name => name !== agentName), // Remove from failed if it was there
|
|
||||||
status: 'in-progress'
|
|
||||||
};
|
|
||||||
|
|
||||||
if (commitHash) {
|
|
||||||
updates.checkpoints = { ...session.checkpoints, [agentName]: commitHash };
|
|
||||||
}
|
|
||||||
|
|
||||||
await updateSession(session.id, updates);
|
|
||||||
// Update local session object for subsequent updates
|
|
||||||
Object.assign(session, updates);
|
|
||||||
console.log(chalk.gray(` 📝 Session updated: ${agentName} completed`));
|
|
||||||
} catch (error) {
|
|
||||||
console.log(chalk.yellow(` ⚠️ Failed to update session: ${error.message}`));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Create outputs directory in source directory
|
|
||||||
try {
|
|
||||||
const outputsDir = path.join(sourceDir, 'outputs');
|
|
||||||
await fs.ensureDir(outputsDir);
|
|
||||||
await fs.ensureDir(path.join(outputsDir, 'schemas'));
|
|
||||||
await fs.ensureDir(path.join(outputsDir, 'scans'));
|
|
||||||
} catch (error) {
|
|
||||||
throw new PentestError(
|
|
||||||
`Failed to create output directories: ${error.message}`,
|
|
||||||
'filesystem',
|
|
||||||
false,
|
|
||||||
{ sourceDir, originalError: error.message }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if we should continue from where session left off
|
|
||||||
const nextAgent = getNextAgent(session);
|
|
||||||
if (!nextAgent) {
|
|
||||||
console.log(chalk.green(`✅ All agents completed! Session is finished.`));
|
|
||||||
await displayTimingSummary(timingResults, costResults, session.completedAgents);
|
|
||||||
process.exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(chalk.blue(`🔄 Continuing from ${nextAgent.displayName} (${session.completedAgents.length}/${Object.keys(AGENTS).length} agents completed)`));
|
|
||||||
|
|
||||||
// Determine which phase to start from based on next agent
|
|
||||||
const startPhase = nextAgent.name === 'pre-recon' ? 1
|
|
||||||
: nextAgent.name === 'recon' ? 2
|
|
||||||
: ['injection-vuln', 'xss-vuln', 'auth-vuln', 'ssrf-vuln', 'authz-vuln'].includes(nextAgent.name) ? 3
|
|
||||||
: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit'].includes(nextAgent.name) ? 4
|
|
||||||
: nextAgent.name === 'report' ? 5 : 1;
|
|
||||||
|
|
||||||
// PHASE 1: PRE-RECONNAISSANCE
|
|
||||||
if (startPhase <= 1) {
|
|
||||||
const { duration: preReconDuration } = await executePreReconPhase(
|
|
||||||
webUrl,
|
|
||||||
sourceDir,
|
|
||||||
variables,
|
|
||||||
distributedConfig,
|
|
||||||
toolAvailability,
|
|
||||||
pipelineTestingMode,
|
|
||||||
session.id // Pass session ID for logging
|
|
||||||
);
|
|
||||||
timingResults.phases['pre-recon'] = preReconDuration;
|
|
||||||
await updateSessionProgress('pre-recon');
|
|
||||||
}
|
|
||||||
|
|
||||||
// PHASE 2: RECONNAISSANCE
|
|
||||||
if (startPhase <= 2) {
|
|
||||||
console.log(chalk.magenta.bold('\n🔎 PHASE 2: RECONNAISSANCE'));
|
|
||||||
console.log(chalk.magenta('Analyzing initial findings...'));
|
|
||||||
const reconTimer = new Timer('phase-2-recon');
|
|
||||||
const recon = await runClaudePromptWithRetry(
|
|
||||||
await loadPrompt('recon', variables, distributedConfig, pipelineTestingMode),
|
|
||||||
sourceDir,
|
|
||||||
'*',
|
|
||||||
'',
|
|
||||||
AGENTS['recon'].displayName,
|
|
||||||
'recon', // Agent name for snapshot creation
|
|
||||||
chalk.cyan,
|
|
||||||
{ id: session.id, webUrl } // Session metadata for audit logging (STANDARD: use 'id' field)
|
|
||||||
);
|
|
||||||
const reconDuration = reconTimer.stop();
|
|
||||||
timingResults.phases['recon'] = reconDuration;
|
|
||||||
|
|
||||||
console.log(chalk.green(`✅ Reconnaissance complete in ${formatDuration(reconDuration)}`));
|
|
||||||
await updateSessionProgress('recon');
|
|
||||||
}
|
|
||||||
|
|
||||||
// PHASE 3: VULNERABILITY ANALYSIS
|
|
||||||
if (startPhase <= 3) {
|
|
||||||
const vulnTimer = new Timer('phase-3-vulnerability-analysis');
|
|
||||||
console.log(chalk.red.bold('\n🚨 PHASE 3: VULNERABILITY ANALYSIS'));
|
|
||||||
|
|
||||||
await runPhase('vulnerability-analysis', session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt);
|
|
||||||
|
|
||||||
// Display vulnerability analysis summary
|
|
||||||
const currentSession = await getSession(session.id);
|
|
||||||
const vulnSummary = calculateVulnerabilityAnalysisSummary(currentSession);
|
|
||||||
console.log(chalk.blue(`\n📊 Vulnerability Analysis Summary: ${vulnSummary.totalAnalyses} analyses, ${vulnSummary.totalVulnerabilities} vulnerabilities found, ${vulnSummary.exploitationCandidates} ready for exploitation`));
|
|
||||||
|
|
||||||
const vulnDuration = vulnTimer.stop();
|
|
||||||
timingResults.phases['vulnerability-analysis'] = vulnDuration;
|
|
||||||
|
|
||||||
console.log(chalk.green(`✅ Vulnerability analysis phase complete in ${formatDuration(vulnDuration)}`));
|
|
||||||
}
|
|
||||||
|
|
||||||
// PHASE 4: EXPLOITATION
|
|
||||||
if (startPhase <= 4) {
|
|
||||||
const exploitTimer = new Timer('phase-4-exploitation');
|
|
||||||
console.log(chalk.red.bold('\n💥 PHASE 4: EXPLOITATION'));
|
|
||||||
|
|
||||||
// Get fresh session data to ensure we have latest vulnerability analysis results
|
|
||||||
const freshSession = await getSession(session.id);
|
|
||||||
await runPhase('exploitation', freshSession, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt);
|
|
||||||
|
|
||||||
// Display exploitation summary
|
|
||||||
const finalSession = await getSession(session.id);
|
|
||||||
const exploitSummary = calculateExploitationSummary(finalSession);
|
|
||||||
if (exploitSummary.eligibleExploits > 0) {
|
|
||||||
console.log(chalk.blue(`\n🎯 Exploitation Summary: ${exploitSummary.totalAttempts}/${exploitSummary.eligibleExploits} attempted, ${exploitSummary.skippedExploits} skipped (no vulnerabilities)`));
|
|
||||||
} else {
|
|
||||||
console.log(chalk.gray(`\n🎯 Exploitation Summary: No exploitation attempts (no vulnerabilities found)`));
|
|
||||||
}
|
|
||||||
|
|
||||||
const exploitDuration = exploitTimer.stop();
|
|
||||||
timingResults.phases['exploitation'] = exploitDuration;
|
|
||||||
|
|
||||||
console.log(chalk.green(`✅ Exploitation phase complete in ${formatDuration(exploitDuration)}`));
|
|
||||||
}
|
|
||||||
|
|
||||||
// PHASE 5: REPORTING
|
|
||||||
if (startPhase <= 5) {
|
|
||||||
console.log(chalk.greenBright.bold('\n📊 PHASE 5: REPORTING'));
|
|
||||||
console.log(chalk.greenBright('Generating executive summary and assembling final report...'));
|
|
||||||
const reportTimer = new Timer('phase-5-reporting');
|
|
||||||
|
|
||||||
// First, assemble all deliverables into a single concatenated report
|
|
||||||
console.log(chalk.blue('📝 Assembling deliverables from specialist agents...'));
|
|
||||||
|
|
||||||
try {
|
|
||||||
await assembleFinalReport(sourceDir);
|
|
||||||
} catch (error) {
|
|
||||||
console.log(chalk.red(`❌ Error assembling final report: ${error.message}`));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Then run reporter agent to create executive summary and clean up hallucinations
|
|
||||||
console.log(chalk.blue('📋 Generating executive summary and cleaning up report...'));
|
|
||||||
const execSummary = await runClaudePromptWithRetry(
|
|
||||||
await loadPrompt('report-executive', variables, distributedConfig, pipelineTestingMode),
|
|
||||||
sourceDir,
|
|
||||||
'*',
|
|
||||||
'',
|
|
||||||
'Executive Summary and Report Cleanup',
|
|
||||||
'report', // Agent name for snapshot creation
|
|
||||||
chalk.cyan,
|
|
||||||
{ id: session.id, webUrl } // Session metadata for audit logging (STANDARD: use 'id' field)
|
|
||||||
);
|
|
||||||
|
|
||||||
const reportDuration = reportTimer.stop();
|
|
||||||
timingResults.phases['reporting'] = reportDuration;
|
|
||||||
|
|
||||||
console.log(chalk.green(`✅ Final report generated in ${formatDuration(reportDuration)}`));
|
|
||||||
|
|
||||||
// Get the commit hash after successful report generation for checkpoint
|
|
||||||
try {
|
|
||||||
const reportCommitHash = await getGitCommitHash(sourceDir);
|
|
||||||
await updateSessionProgress('report', reportCommitHash);
|
|
||||||
console.log(chalk.gray(` 📍 Report checkpoint saved: ${reportCommitHash.substring(0, 8)}`));
|
|
||||||
} catch (error) {
|
|
||||||
console.log(chalk.yellow(` ⚠️ Failed to save report checkpoint: ${error.message}`));
|
|
||||||
await updateSessionProgress('report'); // Fallback without checkpoint
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate final timing and cost data
|
|
||||||
const totalDuration = timingResults.total.stop();
|
|
||||||
const timingBreakdown = {
|
|
||||||
total: totalDuration,
|
|
||||||
phases: { ...timingResults.phases },
|
|
||||||
agents: { ...timingResults.agents },
|
|
||||||
commands: { ...timingResults.commands }
|
|
||||||
};
|
|
||||||
|
|
||||||
// Use accumulated cost data
|
|
||||||
const costBreakdown = {
|
|
||||||
total: costResults.total,
|
|
||||||
agents: { ...costResults.agents }
|
|
||||||
};
|
|
||||||
|
|
||||||
// Mark session as completed with timing and cost data
|
|
||||||
await updateSession(session.id, {
|
|
||||||
status: 'completed',
|
|
||||||
timingBreakdown,
|
|
||||||
costBreakdown
|
|
||||||
});
|
|
||||||
|
|
||||||
// Display comprehensive timing summary
|
|
||||||
displayTimingSummary();
|
|
||||||
|
|
||||||
console.log(chalk.cyan.bold('\n🎉 PENETRATION TESTING COMPLETE!'));
|
|
||||||
console.log(chalk.gray('─'.repeat(60)));
|
|
||||||
|
|
||||||
// Calculate audit logs path
|
|
||||||
const auditLogsPath = generateAuditPath(session);
|
|
||||||
|
|
||||||
// Return final report path and audit logs path for clickable output
|
|
||||||
return {
|
|
||||||
reportPath: path.join(sourceDir, 'deliverables', 'comprehensive_security_assessment_report.md'),
|
|
||||||
auditLogsPath
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Entry point - handle both direct node execution and shebang execution
|
|
||||||
let args = process.argv.slice(2);
|
|
||||||
// If first arg is the script name (from shebang), remove it
|
|
||||||
if (args[0] && args[0].includes('shannon.mjs')) {
|
|
||||||
args = args.slice(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse flags and arguments
|
|
||||||
let configPath = null;
|
|
||||||
let pipelineTestingMode = false;
|
|
||||||
const nonFlagArgs = [];
|
|
||||||
let developerCommand = null;
|
|
||||||
const developerCommands = ['--run-phase', '--run-all', '--rollback-to', '--rerun', '--status', '--list-agents', '--cleanup'];
|
|
||||||
|
|
||||||
for (let i = 0; i < args.length; i++) {
|
|
||||||
if (args[i] === '--config') {
|
|
||||||
if (i + 1 < args.length) {
|
|
||||||
configPath = args[i + 1];
|
|
||||||
i++; // Skip the next argument
|
|
||||||
} else {
|
|
||||||
console.log(chalk.red('❌ --config flag requires a file path'));
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
} else if (args[i] === '--pipeline-testing') {
|
|
||||||
pipelineTestingMode = true;
|
|
||||||
} else if (developerCommands.includes(args[i])) {
|
|
||||||
developerCommand = args[i];
|
|
||||||
// Collect remaining args for the developer command
|
|
||||||
const remainingArgs = args.slice(i + 1).filter(arg => !arg.startsWith('--') || arg === '--pipeline-testing');
|
|
||||||
|
|
||||||
// Check for --pipeline-testing in remaining args
|
|
||||||
if (remainingArgs.includes('--pipeline-testing')) {
|
|
||||||
pipelineTestingMode = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add non-flag args (excluding --pipeline-testing)
|
|
||||||
nonFlagArgs.push(...remainingArgs.filter(arg => arg !== '--pipeline-testing'));
|
|
||||||
break; // Stop parsing after developer command
|
|
||||||
} else if (!args[i].startsWith('-')) {
|
|
||||||
nonFlagArgs.push(args[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle help flag
|
|
||||||
if (args.includes('--help') || args.includes('-h') || args.includes('help')) {
|
|
||||||
showHelp();
|
|
||||||
process.exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle developer commands
|
|
||||||
if (developerCommand) {
|
|
||||||
await handleDeveloperCommand(developerCommand, nonFlagArgs, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt);
|
|
||||||
|
|
||||||
process.exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle no arguments - show help
|
|
||||||
if (nonFlagArgs.length === 0) {
|
|
||||||
console.log(chalk.red.bold('❌ Error: No arguments provided\n'));
|
|
||||||
showHelp();
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle insufficient arguments
|
|
||||||
if (nonFlagArgs.length < 2) {
|
|
||||||
console.log(chalk.red('❌ Both WEB_URL and REPO_PATH are required'));
|
|
||||||
console.log(chalk.gray('Usage: ./shannon.mjs <WEB_URL> <REPO_PATH> [--config config.yaml]'));
|
|
||||||
console.log(chalk.gray('Help: ./shannon.mjs --help'));
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
const [webUrl, repoPath] = nonFlagArgs;
|
|
||||||
|
|
||||||
// Validate web URL
|
|
||||||
const webUrlValidation = validateWebUrl(webUrl);
|
|
||||||
if (!webUrlValidation.valid) {
|
|
||||||
console.log(chalk.red(`❌ Invalid web URL: ${webUrlValidation.error}`));
|
|
||||||
console.log(chalk.gray(`Expected format: https://example.com`));
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Validate repository path
|
|
||||||
const repoPathValidation = await validateRepoPath(repoPath);
|
|
||||||
if (!repoPathValidation.valid) {
|
|
||||||
console.log(chalk.red(`❌ Invalid repository path: ${repoPathValidation.error}`));
|
|
||||||
console.log(chalk.gray(`Expected: Accessible local directory path`));
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Success - show validated inputs
|
|
||||||
console.log(chalk.green('✅ Input validation passed:'));
|
|
||||||
console.log(chalk.gray(` Target Web URL: ${webUrl}`));
|
|
||||||
console.log(chalk.gray(` Target Repository: ${repoPathValidation.path}\n`));
|
|
||||||
console.log(chalk.gray(` Config Path: ${configPath}\n`));
|
|
||||||
if (pipelineTestingMode) {
|
|
||||||
console.log(chalk.yellow('⚡ PIPELINE TESTING MODE ENABLED - Using minimal test prompts for fast pipeline validation\n'));
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const result = await main(webUrl, repoPathValidation.path, configPath, pipelineTestingMode);
|
|
||||||
console.log(chalk.green.bold('\n📄 FINAL REPORT AVAILABLE:'));
|
|
||||||
console.log(chalk.cyan(result.reportPath));
|
|
||||||
console.log(chalk.green.bold('\n📂 AUDIT LOGS AVAILABLE:'));
|
|
||||||
console.log(chalk.cyan(result.auditLogsPath));
|
|
||||||
|
|
||||||
} catch (error) {
|
|
||||||
// Enhanced error boundary with proper logging
|
|
||||||
if (error instanceof PentestError) {
|
|
||||||
await logError(error, 'Main execution failed');
|
|
||||||
console.log(chalk.red.bold('\n🚨 PENTEST EXECUTION FAILED'));
|
|
||||||
console.log(chalk.red(` Type: ${error.type}`));
|
|
||||||
console.log(chalk.red(` Retryable: ${error.retryable ? 'Yes' : 'No'}`));
|
|
||||||
|
|
||||||
if (error.retryable) {
|
|
||||||
console.log(chalk.yellow(' Consider running the command again or checking network connectivity.'));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
console.log(chalk.red.bold('\n🚨 UNEXPECTED ERROR OCCURRED'));
|
|
||||||
console.log(chalk.red(` Error: ${error?.message || error?.toString() || 'Unknown error'}`));
|
|
||||||
|
|
||||||
if (process.env.DEBUG) {
|
|
||||||
console.log(chalk.gray(` Stack: ${error?.stack || 'No stack trace available'}`));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,79 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
// Null Object pattern for audit logging - callers never check for null
|
||||||
|
|
||||||
|
import type { AuditSession } from '../audit/index.js';
|
||||||
|
import { formatTimestamp } from '../utils/formatting.js';
|
||||||
|
|
||||||
|
export interface AuditLogger {
|
||||||
|
logLlmResponse(turn: number, content: string): Promise<void>;
|
||||||
|
logToolStart(toolName: string, parameters: unknown): Promise<void>;
|
||||||
|
logToolEnd(result: unknown): Promise<void>;
|
||||||
|
logError(error: Error, duration: number, turns: number): Promise<void>;
|
||||||
|
}
|
||||||
|
|
||||||
|
class RealAuditLogger implements AuditLogger {
|
||||||
|
private auditSession: AuditSession;
|
||||||
|
|
||||||
|
constructor(auditSession: AuditSession) {
|
||||||
|
this.auditSession = auditSession;
|
||||||
|
}
|
||||||
|
|
||||||
|
async logLlmResponse(turn: number, content: string): Promise<void> {
|
||||||
|
await this.auditSession.logEvent('llm_response', {
|
||||||
|
turn,
|
||||||
|
content,
|
||||||
|
timestamp: formatTimestamp(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async logToolStart(toolName: string, parameters: unknown): Promise<void> {
|
||||||
|
await this.auditSession.logEvent('tool_start', {
|
||||||
|
toolName,
|
||||||
|
parameters,
|
||||||
|
timestamp: formatTimestamp(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async logToolEnd(result: unknown): Promise<void> {
|
||||||
|
await this.auditSession.logEvent('tool_end', {
|
||||||
|
result,
|
||||||
|
timestamp: formatTimestamp(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async logError(error: Error, duration: number, turns: number): Promise<void> {
|
||||||
|
await this.auditSession.logEvent('error', {
|
||||||
|
message: error.message,
|
||||||
|
errorType: error.constructor.name,
|
||||||
|
stack: error.stack,
|
||||||
|
duration,
|
||||||
|
turns,
|
||||||
|
timestamp: formatTimestamp(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Null Object implementation - all methods are safe no-ops */
|
||||||
|
class NullAuditLogger implements AuditLogger {
|
||||||
|
async logLlmResponse(_turn: number, _content: string): Promise<void> {}
|
||||||
|
|
||||||
|
async logToolStart(_toolName: string, _parameters: unknown): Promise<void> {}
|
||||||
|
|
||||||
|
async logToolEnd(_result: unknown): Promise<void> {}
|
||||||
|
|
||||||
|
async logError(_error: Error, _duration: number, _turns: number): Promise<void> {}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns no-op when auditSession is null
|
||||||
|
export function createAuditLogger(auditSession: AuditSession | null): AuditLogger {
|
||||||
|
if (auditSession) {
|
||||||
|
return new RealAuditLogger(auditSession);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new NullAuditLogger();
|
||||||
|
}
|
||||||
@@ -1,696 +0,0 @@
|
|||||||
import { $, fs, path } from 'zx';
|
|
||||||
import chalk from 'chalk';
|
|
||||||
import { query } from '@anthropic-ai/claude-agent-sdk';
|
|
||||||
import { fileURLToPath } from 'url';
|
|
||||||
import { dirname } from 'path';
|
|
||||||
|
|
||||||
import { isRetryableError, getRetryDelay, PentestError } from '../error-handling.js';
|
|
||||||
import { ProgressIndicator } from '../progress-indicator.js';
|
|
||||||
import { timingResults, costResults, Timer } from '../utils/metrics.js';
|
|
||||||
import { formatDuration } from '../audit/utils.js';
|
|
||||||
import { createGitCheckpoint, commitGitSuccess, rollbackGitWorkspace } from '../utils/git-manager.js';
|
|
||||||
import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../constants.js';
|
|
||||||
import { filterJsonToolCalls, getAgentPrefix } from '../utils/output-formatter.js';
|
|
||||||
import { generateSessionLogPath } from '../session-manager.js';
|
|
||||||
import { AuditSession } from '../audit/index.js';
|
|
||||||
import { createShannonHelperServer } from '../../mcp-server/src/index.js';
|
|
||||||
|
|
||||||
const __filename = fileURLToPath(import.meta.url);
|
|
||||||
const __dirname = dirname(__filename);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert agent name to prompt name for MCP_AGENT_MAPPING lookup
|
|
||||||
*
|
|
||||||
* @param {string} agentName - Agent name (e.g., 'xss-vuln', 'injection-exploit')
|
|
||||||
* @returns {string} Prompt name (e.g., 'vuln-xss', 'exploit-injection')
|
|
||||||
*/
|
|
||||||
function agentNameToPromptName(agentName) {
|
|
||||||
// Special cases
|
|
||||||
if (agentName === 'pre-recon') return 'pre-recon-code';
|
|
||||||
if (agentName === 'report') return 'report-executive';
|
|
||||||
if (agentName === 'recon') return 'recon';
|
|
||||||
|
|
||||||
// Pattern: {type}-vuln → vuln-{type}
|
|
||||||
const vulnMatch = agentName.match(/^(.+)-vuln$/);
|
|
||||||
if (vulnMatch) {
|
|
||||||
return `vuln-${vulnMatch[1]}`;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pattern: {type}-exploit → exploit-{type}
|
|
||||||
const exploitMatch = agentName.match(/^(.+)-exploit$/);
|
|
||||||
if (exploitMatch) {
|
|
||||||
return `exploit-${exploitMatch[1]}`;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Default: return as-is
|
|
||||||
return agentName;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Simplified validation using direct agent name mapping
|
|
||||||
async function validateAgentOutput(result, agentName, sourceDir) {
|
|
||||||
console.log(chalk.blue(` 🔍 Validating ${agentName} agent output`));
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Check if agent completed successfully
|
|
||||||
if (!result.success || !result.result) {
|
|
||||||
console.log(chalk.red(` ❌ Validation failed: Agent execution was unsuccessful`));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get validator function for this agent
|
|
||||||
const validator = AGENT_VALIDATORS[agentName];
|
|
||||||
|
|
||||||
if (!validator) {
|
|
||||||
console.log(chalk.yellow(` ⚠️ No validator found for agent "${agentName}" - assuming success`));
|
|
||||||
console.log(chalk.green(` ✅ Validation passed: Unknown agent with successful result`));
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(chalk.blue(` 📋 Using validator for agent: ${agentName}`));
|
|
||||||
console.log(chalk.blue(` 📂 Source directory: ${sourceDir}`));
|
|
||||||
|
|
||||||
// Apply validation function
|
|
||||||
const validationResult = await validator(sourceDir);
|
|
||||||
|
|
||||||
if (validationResult) {
|
|
||||||
console.log(chalk.green(` ✅ Validation passed: Required files/structure present`));
|
|
||||||
} else {
|
|
||||||
console.log(chalk.red(` ❌ Validation failed: Missing required deliverable files`));
|
|
||||||
}
|
|
||||||
|
|
||||||
return validationResult;
|
|
||||||
|
|
||||||
} catch (error) {
|
|
||||||
console.log(chalk.red(` ❌ Validation failed with error: ${error.message}`));
|
|
||||||
return false; // Assume invalid on validation error
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pure function: Run Claude Code with SDK - Maximum Autonomy
|
|
||||||
// WARNING: This is a low-level function. Use runClaudePromptWithRetry() for agent execution to ensure:
|
|
||||||
// - Retry logic and error handling
|
|
||||||
// - Output validation
|
|
||||||
// - Prompt snapshotting for debugging
|
|
||||||
// - Git checkpoint/rollback safety
|
|
||||||
async function runClaudePrompt(prompt, sourceDir, allowedTools = 'Read', context = '', description = 'Claude analysis', agentName = null, colorFn = chalk.cyan, sessionMetadata = null, auditSession = null, attemptNumber = 1) {
|
|
||||||
const timer = new Timer(`agent-${description.toLowerCase().replace(/\s+/g, '-')}`);
|
|
||||||
const fullPrompt = context ? `${context}\n\n${prompt}` : prompt;
|
|
||||||
let totalCost = 0;
|
|
||||||
let partialCost = 0; // Track partial cost for crash safety
|
|
||||||
|
|
||||||
// Auto-detect execution mode to adjust logging behavior
|
|
||||||
const isParallelExecution = description.includes('vuln agent') || description.includes('exploit agent');
|
|
||||||
const useCleanOutput = description.includes('Pre-recon agent') ||
|
|
||||||
description.includes('Recon agent') ||
|
|
||||||
description.includes('Executive Summary and Report Cleanup') ||
|
|
||||||
description.includes('vuln agent') ||
|
|
||||||
description.includes('exploit agent');
|
|
||||||
|
|
||||||
// Disable status manager - using simple JSON filtering for all agents now
|
|
||||||
const statusManager = null;
|
|
||||||
|
|
||||||
// Setup progress indicator for clean output agents
|
|
||||||
let progressIndicator = null;
|
|
||||||
if (useCleanOutput) {
|
|
||||||
const agentType = description.includes('Pre-recon') ? 'pre-reconnaissance' :
|
|
||||||
description.includes('Recon') ? 'reconnaissance' :
|
|
||||||
description.includes('Report') ? 'report generation' : 'analysis';
|
|
||||||
progressIndicator = new ProgressIndicator(`Running ${agentType}...`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// NOTE: Logging now handled by AuditSession (append-only, crash-safe)
|
|
||||||
// Legacy log path generation kept for compatibility
|
|
||||||
let logFilePath = null;
|
|
||||||
if (sessionMetadata && sessionMetadata.webUrl && sessionMetadata.id) {
|
|
||||||
const timestamp = new Date().toISOString().replace(/T/, '_').replace(/[:.]/g, '-').slice(0, 19);
|
|
||||||
const agentName = description.toLowerCase().replace(/\s+/g, '-');
|
|
||||||
const logDir = generateSessionLogPath(sessionMetadata.webUrl, sessionMetadata.id);
|
|
||||||
logFilePath = path.join(logDir, `${timestamp}_${agentName}_attempt-${attemptNumber}.log`);
|
|
||||||
} else {
|
|
||||||
console.log(chalk.blue(` 🤖 Running Claude Code: ${description}...`));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Declare variables that need to be accessible in both try and catch blocks
|
|
||||||
let turnCount = 0;
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Create MCP server with target directory context
|
|
||||||
const shannonHelperServer = createShannonHelperServer(sourceDir);
|
|
||||||
|
|
||||||
// Look up agent's assigned Playwright MCP server
|
|
||||||
// Convert agent name (e.g., 'xss-vuln') to prompt name (e.g., 'vuln-xss')
|
|
||||||
let playwrightMcpName = null;
|
|
||||||
if (agentName) {
|
|
||||||
const promptName = agentNameToPromptName(agentName);
|
|
||||||
playwrightMcpName = MCP_AGENT_MAPPING[promptName];
|
|
||||||
|
|
||||||
if (playwrightMcpName) {
|
|
||||||
console.log(chalk.gray(` 🎭 Assigned ${agentName} → ${playwrightMcpName}`));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Configure MCP servers: shannon-helper (SDK) + playwright-agentN (stdio)
|
|
||||||
const mcpServers = {
|
|
||||||
'shannon-helper': shannonHelperServer,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Add Playwright MCP server if this agent needs browser automation
|
|
||||||
if (playwrightMcpName) {
|
|
||||||
const userDataDir = `/tmp/${playwrightMcpName}`;
|
|
||||||
|
|
||||||
// Detect if running in Docker via explicit environment variable
|
|
||||||
const isDocker = process.env.SHANNON_DOCKER === 'true';
|
|
||||||
|
|
||||||
// Build args array - conditionally add --executable-path for Docker
|
|
||||||
const mcpArgs = [
|
|
||||||
'@playwright/mcp@latest',
|
|
||||||
'--isolated',
|
|
||||||
'--user-data-dir', userDataDir,
|
|
||||||
];
|
|
||||||
|
|
||||||
// Docker: Use system Chromium; Local: Use Playwright's bundled browsers
|
|
||||||
if (isDocker) {
|
|
||||||
mcpArgs.push('--executable-path', '/usr/bin/chromium-browser');
|
|
||||||
mcpArgs.push('--browser', 'chromium');
|
|
||||||
}
|
|
||||||
|
|
||||||
mcpServers[playwrightMcpName] = {
|
|
||||||
type: 'stdio',
|
|
||||||
command: 'npx',
|
|
||||||
args: mcpArgs,
|
|
||||||
env: {
|
|
||||||
...process.env,
|
|
||||||
PLAYWRIGHT_HEADLESS: 'true', // Ensure headless mode for security and CI compatibility
|
|
||||||
...(isDocker && { PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: '1' }), // Only skip in Docker
|
|
||||||
},
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
const options = {
|
|
||||||
model: 'claude-sonnet-4-5-20250929', // Use latest Claude 4.5 Sonnet
|
|
||||||
maxTurns: 10_000, // Maximum turns for autonomous work
|
|
||||||
cwd: sourceDir, // Set working directory using SDK option
|
|
||||||
permissionMode: 'bypassPermissions', // Bypass all permission checks for pentesting
|
|
||||||
mcpServers,
|
|
||||||
};
|
|
||||||
|
|
||||||
// SDK Options only shown for verbose agents (not clean output)
|
|
||||||
if (!useCleanOutput) {
|
|
||||||
console.log(chalk.gray(` SDK Options: maxTurns=${options.maxTurns}, cwd=${sourceDir}, permissions=BYPASS`));
|
|
||||||
}
|
|
||||||
|
|
||||||
let result = null;
|
|
||||||
let messages = [];
|
|
||||||
let apiErrorDetected = false;
|
|
||||||
|
|
||||||
// Start progress indicator for clean output agents
|
|
||||||
if (progressIndicator) {
|
|
||||||
progressIndicator.start();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
let messageCount = 0;
|
|
||||||
try {
|
|
||||||
for await (const message of query({ prompt: fullPrompt, options })) {
|
|
||||||
messageCount++;
|
|
||||||
|
|
||||||
if (message.type === "assistant") {
|
|
||||||
turnCount++;
|
|
||||||
|
|
||||||
const content = Array.isArray(message.message.content)
|
|
||||||
? message.message.content.map(c => c.text || JSON.stringify(c)).join('\n')
|
|
||||||
: message.message.content;
|
|
||||||
|
|
||||||
if (statusManager) {
|
|
||||||
// Smart status updates for parallel execution
|
|
||||||
const toolUse = statusManager.parseToolUse(content);
|
|
||||||
statusManager.updateAgentStatus(description, {
|
|
||||||
tool_use: toolUse,
|
|
||||||
assistant_text: content,
|
|
||||||
turnCount
|
|
||||||
});
|
|
||||||
} else if (useCleanOutput) {
|
|
||||||
// Clean output for all agents: filter JSON tool calls but show meaningful text
|
|
||||||
const cleanedContent = filterJsonToolCalls(content);
|
|
||||||
if (cleanedContent.trim()) {
|
|
||||||
// Temporarily stop progress indicator to show output
|
|
||||||
if (progressIndicator) {
|
|
||||||
progressIndicator.stop();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isParallelExecution) {
|
|
||||||
// Compact output for parallel agents with prefixes
|
|
||||||
const prefix = getAgentPrefix(description);
|
|
||||||
console.log(colorFn(`${prefix} ${cleanedContent}`));
|
|
||||||
} else {
|
|
||||||
// Full turn output for single agents
|
|
||||||
console.log(colorFn(`\n 🤖 Turn ${turnCount} (${description}):`))
|
|
||||||
console.log(colorFn(` ${cleanedContent}`));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Restart progress indicator after output
|
|
||||||
if (progressIndicator) {
|
|
||||||
progressIndicator.start();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Full streaming output - show complete messages with specialist color
|
|
||||||
console.log(colorFn(`\n 🤖 Turn ${turnCount} (${description}):`))
|
|
||||||
console.log(colorFn(` ${content}`));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Log to audit system (crash-safe, append-only)
|
|
||||||
if (auditSession) {
|
|
||||||
await auditSession.logEvent('llm_response', {
|
|
||||||
turn: turnCount,
|
|
||||||
content,
|
|
||||||
timestamp: new Date().toISOString()
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
messages.push(content);
|
|
||||||
|
|
||||||
// Check for API error patterns in assistant message content
|
|
||||||
if (content && typeof content === 'string') {
|
|
||||||
const lowerContent = content.toLowerCase();
|
|
||||||
if (lowerContent.includes('session limit reached')) {
|
|
||||||
throw new PentestError('Session limit reached', 'billing', false);
|
|
||||||
}
|
|
||||||
if (lowerContent.includes('api error') || lowerContent.includes('terminated')) {
|
|
||||||
apiErrorDetected = true;
|
|
||||||
console.log(chalk.red(` ⚠️ API Error detected in assistant response: ${content.trim()}`));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} else if (message.type === "system" && message.subtype === "init") {
|
|
||||||
// Show useful system info only for verbose agents
|
|
||||||
if (!useCleanOutput) {
|
|
||||||
console.log(chalk.blue(` ℹ️ Model: ${message.model}, Permission: ${message.permissionMode}`));
|
|
||||||
if (message.mcp_servers && message.mcp_servers.length > 0) {
|
|
||||||
const mcpStatus = message.mcp_servers.map(s => `${s.name}(${s.status})`).join(', ');
|
|
||||||
console.log(chalk.blue(` 📦 MCP: ${mcpStatus}`));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} else if (message.type === "user") {
|
|
||||||
// Skip user messages (these are our own inputs echoed back)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
} else if (message.type === "tool_use") {
|
|
||||||
console.log(chalk.yellow(`\n 🔧 Using Tool: ${message.name}`));
|
|
||||||
if (message.input && Object.keys(message.input).length > 0) {
|
|
||||||
console.log(chalk.gray(` Input: ${JSON.stringify(message.input, null, 2)}`));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Log tool start event
|
|
||||||
if (auditSession) {
|
|
||||||
await auditSession.logEvent('tool_start', {
|
|
||||||
toolName: message.name,
|
|
||||||
parameters: message.input,
|
|
||||||
timestamp: new Date().toISOString()
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else if (message.type === "tool_result") {
|
|
||||||
console.log(chalk.green(` ✅ Tool Result:`));
|
|
||||||
if (message.content) {
|
|
||||||
// Show tool results but truncate if too long
|
|
||||||
const resultStr = typeof message.content === 'string' ? message.content : JSON.stringify(message.content, null, 2);
|
|
||||||
if (resultStr.length > 500) {
|
|
||||||
console.log(chalk.gray(` ${resultStr.slice(0, 500)}...\n [Result truncated - ${resultStr.length} total chars]`));
|
|
||||||
} else {
|
|
||||||
console.log(chalk.gray(` ${resultStr}`));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Log tool end event
|
|
||||||
if (auditSession) {
|
|
||||||
await auditSession.logEvent('tool_end', {
|
|
||||||
result: message.content,
|
|
||||||
timestamp: new Date().toISOString()
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else if (message.type === "result") {
|
|
||||||
result = message.result;
|
|
||||||
|
|
||||||
if (!statusManager) {
|
|
||||||
if (useCleanOutput) {
|
|
||||||
// Clean completion output - just duration and cost
|
|
||||||
console.log(chalk.magenta(`\n 🏁 COMPLETED:`));
|
|
||||||
const cost = message.total_cost_usd || 0;
|
|
||||||
console.log(chalk.gray(` ⏱️ Duration: ${(message.duration_ms/1000).toFixed(1)}s, Cost: $${cost.toFixed(4)}`));
|
|
||||||
|
|
||||||
if (message.subtype === "error_max_turns") {
|
|
||||||
console.log(chalk.red(` ⚠️ Stopped: Hit maximum turns limit`));
|
|
||||||
} else if (message.subtype === "error_during_execution") {
|
|
||||||
console.log(chalk.red(` ❌ Stopped: Execution error`));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (message.permission_denials && message.permission_denials.length > 0) {
|
|
||||||
console.log(chalk.yellow(` 🚫 ${message.permission_denials.length} permission denials`));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Full completion output for agents without clean output
|
|
||||||
console.log(chalk.magenta(`\n 🏁 COMPLETED:`));
|
|
||||||
const cost = message.total_cost_usd || 0;
|
|
||||||
console.log(chalk.gray(` ⏱️ Duration: ${(message.duration_ms/1000).toFixed(1)}s, Cost: $${cost.toFixed(4)}`));
|
|
||||||
|
|
||||||
if (message.subtype === "error_max_turns") {
|
|
||||||
console.log(chalk.red(` ⚠️ Stopped: Hit maximum turns limit`));
|
|
||||||
} else if (message.subtype === "error_during_execution") {
|
|
||||||
console.log(chalk.red(` ❌ Stopped: Execution error`));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (message.permission_denials && message.permission_denials.length > 0) {
|
|
||||||
console.log(chalk.yellow(` 🚫 ${message.permission_denials.length} permission denials`));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Show result content (if it's reasonable length)
|
|
||||||
if (result && typeof result === 'string') {
|
|
||||||
if (result.length > 1000) {
|
|
||||||
console.log(chalk.magenta(` 📄 ${result.slice(0, 1000)}... [${result.length} total chars]`));
|
|
||||||
} else {
|
|
||||||
console.log(chalk.magenta(` 📄 ${result}`));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Track cost for all agents
|
|
||||||
const cost = message.total_cost_usd || 0;
|
|
||||||
const agentKey = description.toLowerCase().replace(/\s+/g, '-');
|
|
||||||
costResults.agents[agentKey] = cost;
|
|
||||||
costResults.total += cost;
|
|
||||||
|
|
||||||
// Store cost for return value and partial tracking
|
|
||||||
totalCost = cost;
|
|
||||||
partialCost = cost;
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
// Log any other message types we might not be handling
|
|
||||||
console.log(chalk.gray(` 💬 ${message.type}: ${JSON.stringify(message, null, 2)}`));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (queryError) {
|
|
||||||
throw queryError; // Re-throw to outer catch
|
|
||||||
}
|
|
||||||
|
|
||||||
const duration = timer.stop();
|
|
||||||
const agentKey = description.toLowerCase().replace(/\s+/g, '-');
|
|
||||||
timingResults.agents[agentKey] = duration;
|
|
||||||
|
|
||||||
// API error detection is logged but not immediately failed
|
|
||||||
// Let the retry logic handle validation first
|
|
||||||
if (apiErrorDetected) {
|
|
||||||
console.log(chalk.yellow(` ⚠️ API Error detected in ${description} - will validate deliverables before failing`));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Finish status line for parallel execution
|
|
||||||
if (statusManager) {
|
|
||||||
statusManager.clearAgentStatus(description);
|
|
||||||
statusManager.finishStatusLine();
|
|
||||||
}
|
|
||||||
|
|
||||||
// NOTE: Log writing now handled by AuditSession (crash-safe, append-only)
|
|
||||||
// Legacy log writing removed - audit system handles this automatically
|
|
||||||
|
|
||||||
// Show completion messages based on agent type
|
|
||||||
if (progressIndicator) {
|
|
||||||
// Single agents with progress indicator
|
|
||||||
const agentType = description.includes('Pre-recon') ? 'Pre-recon analysis' :
|
|
||||||
description.includes('Recon') ? 'Reconnaissance' :
|
|
||||||
description.includes('Report') ? 'Report generation' : 'Analysis';
|
|
||||||
progressIndicator.finish(`${agentType} complete! (${turnCount} turns, ${formatDuration(duration)})`);
|
|
||||||
} else if (isParallelExecution) {
|
|
||||||
// Compact completion for parallel agents
|
|
||||||
const prefix = getAgentPrefix(description);
|
|
||||||
console.log(chalk.green(`${prefix} ✅ Complete (${turnCount} turns, ${formatDuration(duration)})`));
|
|
||||||
} else if (!useCleanOutput) {
|
|
||||||
// Verbose completion for remaining agents
|
|
||||||
console.log(chalk.green(` ✅ Claude Code completed: ${description} (${turnCount} turns) in ${formatDuration(duration)}`));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return result with log file path for all agents
|
|
||||||
const returnData = {
|
|
||||||
result,
|
|
||||||
success: true,
|
|
||||||
duration,
|
|
||||||
turns: turnCount,
|
|
||||||
cost: totalCost,
|
|
||||||
partialCost, // Include partial cost for crash recovery
|
|
||||||
apiErrorDetected
|
|
||||||
};
|
|
||||||
if (logFilePath) {
|
|
||||||
returnData.logFile = logFilePath;
|
|
||||||
}
|
|
||||||
return returnData;
|
|
||||||
|
|
||||||
} catch (error) {
|
|
||||||
const duration = timer.stop();
|
|
||||||
const agentKey = description.toLowerCase().replace(/\s+/g, '-');
|
|
||||||
timingResults.agents[agentKey] = duration;
|
|
||||||
|
|
||||||
// Clear status for parallel execution before showing error
|
|
||||||
if (statusManager) {
|
|
||||||
statusManager.clearAgentStatus(description);
|
|
||||||
statusManager.finishStatusLine();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Log error to audit system
|
|
||||||
if (auditSession) {
|
|
||||||
await auditSession.logEvent('error', {
|
|
||||||
message: error.message,
|
|
||||||
errorType: error.constructor.name,
|
|
||||||
stack: error.stack,
|
|
||||||
duration,
|
|
||||||
turns: turnCount,
|
|
||||||
timestamp: new Date().toISOString()
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Show error messages based on agent type
|
|
||||||
if (progressIndicator) {
|
|
||||||
// Single agents with progress indicator
|
|
||||||
progressIndicator.stop();
|
|
||||||
const agentType = description.includes('Pre-recon') ? 'Pre-recon analysis' :
|
|
||||||
description.includes('Recon') ? 'Reconnaissance' :
|
|
||||||
description.includes('Report') ? 'Report generation' : 'Analysis';
|
|
||||||
console.log(chalk.red(`❌ ${agentType} failed (${formatDuration(duration)})`));
|
|
||||||
} else if (isParallelExecution) {
|
|
||||||
// Compact error for parallel agents
|
|
||||||
const prefix = getAgentPrefix(description);
|
|
||||||
console.log(chalk.red(`${prefix} ❌ Failed (${formatDuration(duration)})`));
|
|
||||||
} else if (!useCleanOutput) {
|
|
||||||
// Verbose error for remaining agents
|
|
||||||
console.log(chalk.red(` ❌ Claude Code failed: ${description} (${formatDuration(duration)})`));
|
|
||||||
}
|
|
||||||
console.log(chalk.red(` Error Type: ${error.constructor.name}`));
|
|
||||||
console.log(chalk.red(` Message: ${error.message}`));
|
|
||||||
console.log(chalk.gray(` Agent: ${description}`));
|
|
||||||
console.log(chalk.gray(` Working Directory: ${sourceDir}`));
|
|
||||||
console.log(chalk.gray(` Retryable: ${isRetryableError(error) ? 'Yes' : 'No'}`));
|
|
||||||
|
|
||||||
// Log additional context if available
|
|
||||||
if (error.code) {
|
|
||||||
console.log(chalk.gray(` Error Code: ${error.code}`));
|
|
||||||
}
|
|
||||||
if (error.status) {
|
|
||||||
console.log(chalk.gray(` HTTP Status: ${error.status}`));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save detailed error to log file for debugging
|
|
||||||
try {
|
|
||||||
const errorLog = {
|
|
||||||
timestamp: new Date().toISOString(),
|
|
||||||
agent: description,
|
|
||||||
error: {
|
|
||||||
name: error.constructor.name,
|
|
||||||
message: error.message,
|
|
||||||
code: error.code,
|
|
||||||
status: error.status,
|
|
||||||
stack: error.stack
|
|
||||||
},
|
|
||||||
context: {
|
|
||||||
sourceDir,
|
|
||||||
prompt: fullPrompt.slice(0, 200) + '...',
|
|
||||||
retryable: isRetryableError(error)
|
|
||||||
},
|
|
||||||
duration
|
|
||||||
};
|
|
||||||
|
|
||||||
const logPath = path.join(sourceDir, 'error.log');
|
|
||||||
await fs.appendFile(logPath, JSON.stringify(errorLog) + '\n');
|
|
||||||
} catch (logError) {
|
|
||||||
// Ignore logging errors to avoid cascading failures
|
|
||||||
console.log(chalk.gray(` (Failed to write error log: ${logError.message})`));
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
error: error.message,
|
|
||||||
errorType: error.constructor.name,
|
|
||||||
prompt: fullPrompt.slice(0, 100) + '...',
|
|
||||||
success: false,
|
|
||||||
duration,
|
|
||||||
cost: partialCost, // Include partial cost on error
|
|
||||||
retryable: isRetryableError(error)
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// PREFERRED: Production-ready Claude agent execution with full orchestration
|
|
||||||
// This is the standard function for all agent execution. Provides:
|
|
||||||
// - Intelligent retry logic with exponential backoff
|
|
||||||
// - Output validation to ensure deliverables are created
|
|
||||||
// - Prompt snapshotting for debugging and reproducibility
|
|
||||||
// - Git checkpoint/rollback safety for workspace protection
|
|
||||||
// - Comprehensive error handling and logging
|
|
||||||
// - Crash-safe audit logging via AuditSession
|
|
||||||
export async function runClaudePromptWithRetry(prompt, sourceDir, allowedTools = 'Read', context = '', description = 'Claude analysis', agentName = null, colorFn = chalk.cyan, sessionMetadata = null) {
|
|
||||||
const maxRetries = 3;
|
|
||||||
let lastError;
|
|
||||||
let retryContext = context; // Preserve context between retries
|
|
||||||
|
|
||||||
console.log(chalk.cyan(`🚀 Starting ${description} with ${maxRetries} max attempts`));
|
|
||||||
|
|
||||||
// Initialize audit session (crash-safe logging)
|
|
||||||
let auditSession = null;
|
|
||||||
if (sessionMetadata && agentName) {
|
|
||||||
auditSession = new AuditSession(sessionMetadata);
|
|
||||||
await auditSession.initialize();
|
|
||||||
}
|
|
||||||
|
|
||||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
||||||
// Create checkpoint before each attempt
|
|
||||||
await createGitCheckpoint(sourceDir, description, attempt);
|
|
||||||
|
|
||||||
// Start agent tracking in audit system (saves prompt snapshot automatically)
|
|
||||||
if (auditSession) {
|
|
||||||
const fullPrompt = retryContext ? `${retryContext}\n\n${prompt}` : prompt;
|
|
||||||
await auditSession.startAgent(agentName, fullPrompt, attempt);
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const result = await runClaudePrompt(prompt, sourceDir, allowedTools, retryContext, description, agentName, colorFn, sessionMetadata, auditSession, attempt);
|
|
||||||
|
|
||||||
// Validate output after successful run
|
|
||||||
if (result.success) {
|
|
||||||
const validationPassed = await validateAgentOutput(result, agentName, sourceDir);
|
|
||||||
|
|
||||||
if (validationPassed) {
|
|
||||||
// Check if API error was detected but validation passed
|
|
||||||
if (result.apiErrorDetected) {
|
|
||||||
console.log(chalk.yellow(`📋 Validation: Ready for exploitation despite API error warnings`));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Record successful attempt in audit system
|
|
||||||
if (auditSession) {
|
|
||||||
await auditSession.endAgent(agentName, {
|
|
||||||
attemptNumber: attempt,
|
|
||||||
duration_ms: result.duration,
|
|
||||||
cost_usd: result.cost || 0,
|
|
||||||
success: true,
|
|
||||||
checkpoint: await getGitCommitHash(sourceDir)
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Commit successful changes (will include the snapshot)
|
|
||||||
await commitGitSuccess(sourceDir, description);
|
|
||||||
console.log(chalk.green.bold(`🎉 ${description} completed successfully on attempt ${attempt}/${maxRetries}`));
|
|
||||||
return result;
|
|
||||||
} else {
|
|
||||||
// Agent completed but output validation failed
|
|
||||||
console.log(chalk.yellow(`⚠️ ${description} completed but output validation failed`));
|
|
||||||
|
|
||||||
// Record failed validation attempt in audit system
|
|
||||||
if (auditSession) {
|
|
||||||
await auditSession.endAgent(agentName, {
|
|
||||||
attemptNumber: attempt,
|
|
||||||
duration_ms: result.duration,
|
|
||||||
cost_usd: result.partialCost || result.cost || 0,
|
|
||||||
success: false,
|
|
||||||
error: 'Output validation failed',
|
|
||||||
isFinalAttempt: attempt === maxRetries
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// If API error detected AND validation failed, this is a retryable error
|
|
||||||
if (result.apiErrorDetected) {
|
|
||||||
console.log(chalk.yellow(`⚠️ API Error detected with validation failure - treating as retryable`));
|
|
||||||
lastError = new Error('API Error: terminated with validation failure');
|
|
||||||
} else {
|
|
||||||
lastError = new Error('Output validation failed');
|
|
||||||
}
|
|
||||||
|
|
||||||
if (attempt < maxRetries) {
|
|
||||||
// Rollback contaminated workspace
|
|
||||||
await rollbackGitWorkspace(sourceDir, 'validation failure');
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
// FAIL FAST - Don't continue with broken pipeline
|
|
||||||
throw new PentestError(
|
|
||||||
`Agent ${description} failed output validation after ${maxRetries} attempts. Required deliverable files were not created.`,
|
|
||||||
'validation',
|
|
||||||
false,
|
|
||||||
{ description, sourceDir, attemptsExhausted: maxRetries }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (error) {
|
|
||||||
lastError = error;
|
|
||||||
|
|
||||||
// Record failed attempt in audit system
|
|
||||||
if (auditSession) {
|
|
||||||
await auditSession.endAgent(agentName, {
|
|
||||||
attemptNumber: attempt,
|
|
||||||
duration_ms: error.duration || 0,
|
|
||||||
cost_usd: error.cost || 0,
|
|
||||||
success: false,
|
|
||||||
error: error.message,
|
|
||||||
isFinalAttempt: attempt === maxRetries
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if error is retryable
|
|
||||||
if (!isRetryableError(error)) {
|
|
||||||
console.log(chalk.red(`❌ ${description} failed with non-retryable error: ${error.message}`));
|
|
||||||
await rollbackGitWorkspace(sourceDir, 'non-retryable error cleanup');
|
|
||||||
throw error;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (attempt < maxRetries) {
|
|
||||||
// Rollback for clean retry
|
|
||||||
await rollbackGitWorkspace(sourceDir, 'retryable error cleanup');
|
|
||||||
|
|
||||||
const delay = getRetryDelay(error, attempt);
|
|
||||||
const delaySeconds = (delay / 1000).toFixed(1);
|
|
||||||
console.log(chalk.yellow(`⚠️ ${description} failed (attempt ${attempt}/${maxRetries})`));
|
|
||||||
console.log(chalk.gray(` Error: ${error.message}`));
|
|
||||||
console.log(chalk.gray(` Workspace rolled back, retrying in ${delaySeconds}s...`));
|
|
||||||
|
|
||||||
// Preserve any partial results for next retry
|
|
||||||
if (error.partialResults) {
|
|
||||||
retryContext = `${context}\n\nPrevious partial results: ${JSON.stringify(error.partialResults)}`;
|
|
||||||
}
|
|
||||||
|
|
||||||
await new Promise(resolve => setTimeout(resolve, delay));
|
|
||||||
} else {
|
|
||||||
await rollbackGitWorkspace(sourceDir, 'final failure cleanup');
|
|
||||||
console.log(chalk.red(`❌ ${description} failed after ${maxRetries} attempts`));
|
|
||||||
console.log(chalk.red(` Final error: ${error.message}`));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
throw lastError;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper function to get git commit hash
|
|
||||||
async function getGitCommitHash(sourceDir) {
|
|
||||||
try {
|
|
||||||
const result = await $`cd ${sourceDir} && git rev-parse HEAD`;
|
|
||||||
return result.stdout.trim();
|
|
||||||
} catch (error) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,529 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
// Production Claude agent execution with retry, git checkpoints, and audit logging
|
||||||
|
|
||||||
|
import { fs, path } from 'zx';
|
||||||
|
import chalk, { type ChalkInstance } from 'chalk';
|
||||||
|
import { query } from '@anthropic-ai/claude-agent-sdk';
|
||||||
|
|
||||||
|
import { isRetryableError, getRetryDelay, PentestError } from '../error-handling.js';
|
||||||
|
import { timingResults, Timer } from '../utils/metrics.js';
|
||||||
|
import { formatTimestamp } from '../utils/formatting.js';
|
||||||
|
import { createGitCheckpoint, commitGitSuccess, rollbackGitWorkspace, getGitCommitHash } from '../utils/git-manager.js';
|
||||||
|
import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../constants.js';
|
||||||
|
import { AuditSession } from '../audit/index.js';
|
||||||
|
import { createShannonHelperServer } from '../../mcp-server/dist/index.js';
|
||||||
|
import type { SessionMetadata } from '../audit/utils.js';
|
||||||
|
import { getPromptNameForAgent } from '../types/agents.js';
|
||||||
|
import type { AgentName } from '../types/index.js';
|
||||||
|
|
||||||
|
import { dispatchMessage } from './message-handlers.js';
|
||||||
|
import { detectExecutionContext, formatErrorOutput, formatCompletionMessage } from './output-formatters.js';
|
||||||
|
import { createProgressManager } from './progress-manager.js';
|
||||||
|
import { createAuditLogger } from './audit-logger.js';
|
||||||
|
|
||||||
|
declare global {
|
||||||
|
var SHANNON_DISABLE_LOADER: boolean | undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ClaudePromptResult {
|
||||||
|
result?: string | null;
|
||||||
|
success: boolean;
|
||||||
|
duration: number;
|
||||||
|
turns?: number;
|
||||||
|
cost: number;
|
||||||
|
partialCost?: number;
|
||||||
|
apiErrorDetected?: boolean;
|
||||||
|
error?: string;
|
||||||
|
errorType?: string;
|
||||||
|
prompt?: string;
|
||||||
|
retryable?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface StdioMcpServer {
|
||||||
|
type: 'stdio';
|
||||||
|
command: string;
|
||||||
|
args: string[];
|
||||||
|
env: Record<string, string>;
|
||||||
|
}
|
||||||
|
|
||||||
|
type McpServer = ReturnType<typeof createShannonHelperServer> | StdioMcpServer;
|
||||||
|
|
||||||
|
// Configures MCP servers for agent execution, with Docker-specific Chromium handling
|
||||||
|
function buildMcpServers(
|
||||||
|
sourceDir: string,
|
||||||
|
agentName: string | null
|
||||||
|
): Record<string, McpServer> {
|
||||||
|
const shannonHelperServer = createShannonHelperServer(sourceDir);
|
||||||
|
|
||||||
|
const mcpServers: Record<string, McpServer> = {
|
||||||
|
'shannon-helper': shannonHelperServer,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (agentName) {
|
||||||
|
const promptName = getPromptNameForAgent(agentName as AgentName);
|
||||||
|
const playwrightMcpName = MCP_AGENT_MAPPING[promptName as keyof typeof MCP_AGENT_MAPPING] || null;
|
||||||
|
|
||||||
|
if (playwrightMcpName) {
|
||||||
|
console.log(chalk.gray(` Assigned ${agentName} -> ${playwrightMcpName}`));
|
||||||
|
|
||||||
|
const userDataDir = `/tmp/${playwrightMcpName}`;
|
||||||
|
|
||||||
|
// Docker uses system Chromium; local dev uses Playwright's bundled browsers
|
||||||
|
const isDocker = process.env.SHANNON_DOCKER === 'true';
|
||||||
|
|
||||||
|
const mcpArgs: string[] = [
|
||||||
|
'@playwright/mcp@latest',
|
||||||
|
'--isolated',
|
||||||
|
'--user-data-dir', userDataDir,
|
||||||
|
];
|
||||||
|
|
||||||
|
// Docker: Use system Chromium; Local: Use Playwright's bundled browsers
|
||||||
|
if (isDocker) {
|
||||||
|
mcpArgs.push('--executable-path', '/usr/bin/chromium-browser');
|
||||||
|
mcpArgs.push('--browser', 'chromium');
|
||||||
|
}
|
||||||
|
|
||||||
|
const envVars: Record<string, string> = Object.fromEntries(
|
||||||
|
Object.entries({
|
||||||
|
...process.env,
|
||||||
|
PLAYWRIGHT_HEADLESS: 'true',
|
||||||
|
...(isDocker && { PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: '1' }),
|
||||||
|
}).filter((entry): entry is [string, string] => entry[1] !== undefined)
|
||||||
|
);
|
||||||
|
|
||||||
|
mcpServers[playwrightMcpName] = {
|
||||||
|
type: 'stdio' as const,
|
||||||
|
command: 'npx',
|
||||||
|
args: mcpArgs,
|
||||||
|
env: envVars,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return mcpServers;
|
||||||
|
}
|
||||||
|
|
||||||
|
function outputLines(lines: string[]): void {
|
||||||
|
for (const line of lines) {
|
||||||
|
console.log(line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function writeErrorLog(
|
||||||
|
err: Error & { code?: string; status?: number },
|
||||||
|
sourceDir: string,
|
||||||
|
fullPrompt: string,
|
||||||
|
duration: number
|
||||||
|
): Promise<void> {
|
||||||
|
try {
|
||||||
|
const errorLog = {
|
||||||
|
timestamp: formatTimestamp(),
|
||||||
|
agent: 'claude-executor',
|
||||||
|
error: {
|
||||||
|
name: err.constructor.name,
|
||||||
|
message: err.message,
|
||||||
|
code: err.code,
|
||||||
|
status: err.status,
|
||||||
|
stack: err.stack
|
||||||
|
},
|
||||||
|
context: {
|
||||||
|
sourceDir,
|
||||||
|
prompt: fullPrompt.slice(0, 200) + '...',
|
||||||
|
retryable: isRetryableError(err)
|
||||||
|
},
|
||||||
|
duration
|
||||||
|
};
|
||||||
|
const logPath = path.join(sourceDir, 'error.log');
|
||||||
|
await fs.appendFile(logPath, JSON.stringify(errorLog) + '\n');
|
||||||
|
} catch (logError) {
|
||||||
|
const logErrMsg = logError instanceof Error ? logError.message : String(logError);
|
||||||
|
console.log(chalk.gray(` (Failed to write error log: ${logErrMsg})`));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function validateAgentOutput(
|
||||||
|
result: ClaudePromptResult,
|
||||||
|
agentName: string | null,
|
||||||
|
sourceDir: string
|
||||||
|
): Promise<boolean> {
|
||||||
|
console.log(chalk.blue(` Validating ${agentName} agent output`));
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Check if agent completed successfully
|
||||||
|
if (!result.success || !result.result) {
|
||||||
|
console.log(chalk.red(` Validation failed: Agent execution was unsuccessful`));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get validator function for this agent
|
||||||
|
const validator = agentName ? AGENT_VALIDATORS[agentName as keyof typeof AGENT_VALIDATORS] : undefined;
|
||||||
|
|
||||||
|
if (!validator) {
|
||||||
|
console.log(chalk.yellow(` No validator found for agent "${agentName}" - assuming success`));
|
||||||
|
console.log(chalk.green(` Validation passed: Unknown agent with successful result`));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(chalk.blue(` Using validator for agent: ${agentName}`));
|
||||||
|
console.log(chalk.blue(` Source directory: ${sourceDir}`));
|
||||||
|
|
||||||
|
// Apply validation function
|
||||||
|
const validationResult = await validator(sourceDir);
|
||||||
|
|
||||||
|
if (validationResult) {
|
||||||
|
console.log(chalk.green(` Validation passed: Required files/structure present`));
|
||||||
|
} else {
|
||||||
|
console.log(chalk.red(` Validation failed: Missing required deliverable files`));
|
||||||
|
}
|
||||||
|
|
||||||
|
return validationResult;
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
const errMsg = error instanceof Error ? error.message : String(error);
|
||||||
|
console.log(chalk.red(` Validation failed with error: ${errMsg}`));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Low-level SDK execution. Handles message streaming, progress, and audit logging.
|
||||||
|
// Exported for Temporal activities to call single-attempt execution.
|
||||||
|
export async function runClaudePrompt(
|
||||||
|
prompt: string,
|
||||||
|
sourceDir: string,
|
||||||
|
context: string = '',
|
||||||
|
description: string = 'Claude analysis',
|
||||||
|
agentName: string | null = null,
|
||||||
|
colorFn: ChalkInstance = chalk.cyan,
|
||||||
|
sessionMetadata: SessionMetadata | null = null,
|
||||||
|
auditSession: AuditSession | null = null,
|
||||||
|
attemptNumber: number = 1
|
||||||
|
): Promise<ClaudePromptResult> {
|
||||||
|
const timer = new Timer(`agent-${description.toLowerCase().replace(/\s+/g, '-')}`);
|
||||||
|
const fullPrompt = context ? `${context}\n\n${prompt}` : prompt;
|
||||||
|
|
||||||
|
const execContext = detectExecutionContext(description);
|
||||||
|
const progress = createProgressManager(
|
||||||
|
{ description, useCleanOutput: execContext.useCleanOutput },
|
||||||
|
global.SHANNON_DISABLE_LOADER ?? false
|
||||||
|
);
|
||||||
|
const auditLogger = createAuditLogger(auditSession);
|
||||||
|
|
||||||
|
console.log(chalk.blue(` Running Claude Code: ${description}...`));
|
||||||
|
|
||||||
|
const mcpServers = buildMcpServers(sourceDir, agentName);
|
||||||
|
const options = {
|
||||||
|
model: 'claude-sonnet-4-5-20250929',
|
||||||
|
maxTurns: 10_000,
|
||||||
|
cwd: sourceDir,
|
||||||
|
permissionMode: 'bypassPermissions' as const,
|
||||||
|
mcpServers,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!execContext.useCleanOutput) {
|
||||||
|
console.log(chalk.gray(` SDK Options: maxTurns=${options.maxTurns}, cwd=${sourceDir}, permissions=BYPASS`));
|
||||||
|
}
|
||||||
|
|
||||||
|
let turnCount = 0;
|
||||||
|
let result: string | null = null;
|
||||||
|
let apiErrorDetected = false;
|
||||||
|
let totalCost = 0;
|
||||||
|
|
||||||
|
progress.start();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const messageLoopResult = await processMessageStream(
|
||||||
|
fullPrompt,
|
||||||
|
options,
|
||||||
|
{ execContext, description, colorFn, progress, auditLogger },
|
||||||
|
timer
|
||||||
|
);
|
||||||
|
|
||||||
|
turnCount = messageLoopResult.turnCount;
|
||||||
|
result = messageLoopResult.result;
|
||||||
|
apiErrorDetected = messageLoopResult.apiErrorDetected;
|
||||||
|
totalCost = messageLoopResult.cost;
|
||||||
|
|
||||||
|
// === SPENDING CAP SAFEGUARD ===
|
||||||
|
// Defense-in-depth: Detect spending cap that slipped through detectApiError().
|
||||||
|
// When spending cap is hit, Claude returns a short message with $0 cost.
|
||||||
|
// Legitimate agent work NEVER costs $0 with only 1-2 turns.
|
||||||
|
if (turnCount <= 2 && totalCost === 0) {
|
||||||
|
const resultLower = (result || '').toLowerCase();
|
||||||
|
const BILLING_KEYWORDS = ['spending', 'cap', 'limit', 'budget', 'resets'];
|
||||||
|
const looksLikeBillingError = BILLING_KEYWORDS.some((kw) =>
|
||||||
|
resultLower.includes(kw)
|
||||||
|
);
|
||||||
|
|
||||||
|
if (looksLikeBillingError) {
|
||||||
|
throw new PentestError(
|
||||||
|
`Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`,
|
||||||
|
'billing',
|
||||||
|
true // Retryable - Temporal will use 5-30 min backoff
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const duration = timer.stop();
|
||||||
|
timingResults.agents[execContext.agentKey] = duration;
|
||||||
|
|
||||||
|
if (apiErrorDetected) {
|
||||||
|
console.log(chalk.yellow(` API Error detected in ${description} - will validate deliverables before failing`));
|
||||||
|
}
|
||||||
|
|
||||||
|
progress.finish(formatCompletionMessage(execContext, description, turnCount, duration));
|
||||||
|
|
||||||
|
return {
|
||||||
|
result,
|
||||||
|
success: true,
|
||||||
|
duration,
|
||||||
|
turns: turnCount,
|
||||||
|
cost: totalCost,
|
||||||
|
partialCost: totalCost,
|
||||||
|
apiErrorDetected
|
||||||
|
};
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
const duration = timer.stop();
|
||||||
|
timingResults.agents[execContext.agentKey] = duration;
|
||||||
|
|
||||||
|
const err = error as Error & { code?: string; status?: number };
|
||||||
|
|
||||||
|
await auditLogger.logError(err, duration, turnCount);
|
||||||
|
progress.stop();
|
||||||
|
outputLines(formatErrorOutput(err, execContext, description, duration, sourceDir, isRetryableError(err)));
|
||||||
|
await writeErrorLog(err, sourceDir, fullPrompt, duration);
|
||||||
|
|
||||||
|
return {
|
||||||
|
error: err.message,
|
||||||
|
errorType: err.constructor.name,
|
||||||
|
prompt: fullPrompt.slice(0, 100) + '...',
|
||||||
|
success: false,
|
||||||
|
duration,
|
||||||
|
cost: totalCost,
|
||||||
|
retryable: isRetryableError(err)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
interface MessageLoopResult {
|
||||||
|
turnCount: number;
|
||||||
|
result: string | null;
|
||||||
|
apiErrorDetected: boolean;
|
||||||
|
cost: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface MessageLoopDeps {
|
||||||
|
execContext: ReturnType<typeof detectExecutionContext>;
|
||||||
|
description: string;
|
||||||
|
colorFn: ChalkInstance;
|
||||||
|
progress: ReturnType<typeof createProgressManager>;
|
||||||
|
auditLogger: ReturnType<typeof createAuditLogger>;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function processMessageStream(
|
||||||
|
fullPrompt: string,
|
||||||
|
options: NonNullable<Parameters<typeof query>[0]['options']>,
|
||||||
|
deps: MessageLoopDeps,
|
||||||
|
timer: Timer
|
||||||
|
): Promise<MessageLoopResult> {
|
||||||
|
const { execContext, description, colorFn, progress, auditLogger } = deps;
|
||||||
|
const HEARTBEAT_INTERVAL = 30000;
|
||||||
|
|
||||||
|
let turnCount = 0;
|
||||||
|
let result: string | null = null;
|
||||||
|
let apiErrorDetected = false;
|
||||||
|
let cost = 0;
|
||||||
|
let lastHeartbeat = Date.now();
|
||||||
|
|
||||||
|
for await (const message of query({ prompt: fullPrompt, options })) {
|
||||||
|
// Heartbeat logging when loader is disabled
|
||||||
|
const now = Date.now();
|
||||||
|
if (global.SHANNON_DISABLE_LOADER && now - lastHeartbeat > HEARTBEAT_INTERVAL) {
|
||||||
|
console.log(chalk.blue(` [${Math.floor((now - timer.startTime) / 1000)}s] ${description} running... (Turn ${turnCount})`));
|
||||||
|
lastHeartbeat = now;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Increment turn count for assistant messages
|
||||||
|
if (message.type === 'assistant') {
|
||||||
|
turnCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
const dispatchResult = await dispatchMessage(
|
||||||
|
message as { type: string; subtype?: string },
|
||||||
|
turnCount,
|
||||||
|
{ execContext, description, colorFn, progress, auditLogger }
|
||||||
|
);
|
||||||
|
|
||||||
|
if (dispatchResult.type === 'throw') {
|
||||||
|
throw dispatchResult.error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dispatchResult.type === 'complete') {
|
||||||
|
result = dispatchResult.result;
|
||||||
|
cost = dispatchResult.cost;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dispatchResult.type === 'continue' && dispatchResult.apiErrorDetected) {
|
||||||
|
apiErrorDetected = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { turnCount, result, apiErrorDetected, cost };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Main entry point for agent execution. Handles retries, git checkpoints, and validation.
|
||||||
|
export async function runClaudePromptWithRetry(
|
||||||
|
prompt: string,
|
||||||
|
sourceDir: string,
|
||||||
|
_allowedTools: string = 'Read',
|
||||||
|
context: string = '',
|
||||||
|
description: string = 'Claude analysis',
|
||||||
|
agentName: string | null = null,
|
||||||
|
colorFn: ChalkInstance = chalk.cyan,
|
||||||
|
sessionMetadata: SessionMetadata | null = null
|
||||||
|
): Promise<ClaudePromptResult> {
|
||||||
|
const maxRetries = 3;
|
||||||
|
let lastError: Error | undefined;
|
||||||
|
let retryContext = context;
|
||||||
|
|
||||||
|
console.log(chalk.cyan(`Starting ${description} with ${maxRetries} max attempts`));
|
||||||
|
|
||||||
|
let auditSession: AuditSession | null = null;
|
||||||
|
if (sessionMetadata && agentName) {
|
||||||
|
auditSession = new AuditSession(sessionMetadata);
|
||||||
|
await auditSession.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||||
|
await createGitCheckpoint(sourceDir, description, attempt);
|
||||||
|
|
||||||
|
if (auditSession && agentName) {
|
||||||
|
const fullPrompt = retryContext ? `${retryContext}\n\n${prompt}` : prompt;
|
||||||
|
await auditSession.startAgent(agentName, fullPrompt, attempt);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await runClaudePrompt(
|
||||||
|
prompt, sourceDir, retryContext,
|
||||||
|
description, agentName, colorFn, sessionMetadata, auditSession, attempt
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.success) {
|
||||||
|
const validationPassed = await validateAgentOutput(result, agentName, sourceDir);
|
||||||
|
|
||||||
|
if (validationPassed) {
|
||||||
|
if (result.apiErrorDetected) {
|
||||||
|
console.log(chalk.yellow(`Validation: Ready for exploitation despite API error warnings`));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (auditSession && agentName) {
|
||||||
|
const commitHash = await getGitCommitHash(sourceDir);
|
||||||
|
const endResult: {
|
||||||
|
attemptNumber: number;
|
||||||
|
duration_ms: number;
|
||||||
|
cost_usd: number;
|
||||||
|
success: true;
|
||||||
|
checkpoint?: string;
|
||||||
|
} = {
|
||||||
|
attemptNumber: attempt,
|
||||||
|
duration_ms: result.duration,
|
||||||
|
cost_usd: result.cost || 0,
|
||||||
|
success: true,
|
||||||
|
};
|
||||||
|
if (commitHash) {
|
||||||
|
endResult.checkpoint = commitHash;
|
||||||
|
}
|
||||||
|
await auditSession.endAgent(agentName, endResult);
|
||||||
|
}
|
||||||
|
|
||||||
|
await commitGitSuccess(sourceDir, description);
|
||||||
|
console.log(chalk.green.bold(`${description} completed successfully on attempt ${attempt}/${maxRetries}`));
|
||||||
|
return result;
|
||||||
|
// Validation failure is retryable - agent might succeed on retry with cleaner workspace
|
||||||
|
} else {
|
||||||
|
console.log(chalk.yellow(`${description} completed but output validation failed`));
|
||||||
|
|
||||||
|
if (auditSession && agentName) {
|
||||||
|
await auditSession.endAgent(agentName, {
|
||||||
|
attemptNumber: attempt,
|
||||||
|
duration_ms: result.duration,
|
||||||
|
cost_usd: result.partialCost || result.cost || 0,
|
||||||
|
success: false,
|
||||||
|
error: 'Output validation failed',
|
||||||
|
isFinalAttempt: attempt === maxRetries
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.apiErrorDetected) {
|
||||||
|
console.log(chalk.yellow(`API Error detected with validation failure - treating as retryable`));
|
||||||
|
lastError = new Error('API Error: terminated with validation failure');
|
||||||
|
} else {
|
||||||
|
lastError = new Error('Output validation failed');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (attempt < maxRetries) {
|
||||||
|
await rollbackGitWorkspace(sourceDir, 'validation failure');
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
throw new PentestError(
|
||||||
|
`Agent ${description} failed output validation after ${maxRetries} attempts. Required deliverable files were not created.`,
|
||||||
|
'validation',
|
||||||
|
false,
|
||||||
|
{ description, sourceDir, attemptsExhausted: maxRetries }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
const err = error as Error & { duration?: number; cost?: number; partialResults?: unknown };
|
||||||
|
lastError = err;
|
||||||
|
|
||||||
|
if (auditSession && agentName) {
|
||||||
|
await auditSession.endAgent(agentName, {
|
||||||
|
attemptNumber: attempt,
|
||||||
|
duration_ms: err.duration || 0,
|
||||||
|
cost_usd: err.cost || 0,
|
||||||
|
success: false,
|
||||||
|
error: err.message,
|
||||||
|
isFinalAttempt: attempt === maxRetries
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isRetryableError(err)) {
|
||||||
|
console.log(chalk.red(`${description} failed with non-retryable error: ${err.message}`));
|
||||||
|
await rollbackGitWorkspace(sourceDir, 'non-retryable error cleanup');
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (attempt < maxRetries) {
|
||||||
|
await rollbackGitWorkspace(sourceDir, 'retryable error cleanup');
|
||||||
|
|
||||||
|
const delay = getRetryDelay(err, attempt);
|
||||||
|
const delaySeconds = (delay / 1000).toFixed(1);
|
||||||
|
console.log(chalk.yellow(`${description} failed (attempt ${attempt}/${maxRetries})`));
|
||||||
|
console.log(chalk.gray(` Error: ${err.message}`));
|
||||||
|
console.log(chalk.gray(` Workspace rolled back, retrying in ${delaySeconds}s...`));
|
||||||
|
|
||||||
|
if (err.partialResults) {
|
||||||
|
retryContext = `${context}\n\nPrevious partial results: ${JSON.stringify(err.partialResults)}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
await new Promise(resolve => setTimeout(resolve, delay));
|
||||||
|
} else {
|
||||||
|
await rollbackGitWorkspace(sourceDir, 'final failure cleanup');
|
||||||
|
console.log(chalk.red(`${description} failed after ${maxRetries} attempts`));
|
||||||
|
console.log(chalk.red(` Final error: ${err.message}`));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw lastError;
|
||||||
|
}
|
||||||
@@ -0,0 +1,272 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
// Pure functions for processing SDK message types
|
||||||
|
|
||||||
|
import { PentestError } from '../error-handling.js';
|
||||||
|
import { filterJsonToolCalls } from '../utils/output-formatter.js';
|
||||||
|
import { formatTimestamp } from '../utils/formatting.js';
|
||||||
|
import chalk from 'chalk';
|
||||||
|
import {
|
||||||
|
formatAssistantOutput,
|
||||||
|
formatResultOutput,
|
||||||
|
formatToolUseOutput,
|
||||||
|
formatToolResultOutput,
|
||||||
|
} from './output-formatters.js';
|
||||||
|
import { costResults } from '../utils/metrics.js';
|
||||||
|
import type { AuditLogger } from './audit-logger.js';
|
||||||
|
import type { ProgressManager } from './progress-manager.js';
|
||||||
|
import type {
|
||||||
|
AssistantMessage,
|
||||||
|
ResultMessage,
|
||||||
|
ToolUseMessage,
|
||||||
|
ToolResultMessage,
|
||||||
|
AssistantResult,
|
||||||
|
ResultData,
|
||||||
|
ToolUseData,
|
||||||
|
ToolResultData,
|
||||||
|
ApiErrorDetection,
|
||||||
|
ContentBlock,
|
||||||
|
SystemInitMessage,
|
||||||
|
ExecutionContext,
|
||||||
|
} from './types.js';
|
||||||
|
import type { ChalkInstance } from 'chalk';
|
||||||
|
|
||||||
|
// Handles both array and string content formats from SDK
|
||||||
|
export function extractMessageContent(message: AssistantMessage): string {
|
||||||
|
const messageContent = message.message;
|
||||||
|
|
||||||
|
if (Array.isArray(messageContent.content)) {
|
||||||
|
return messageContent.content
|
||||||
|
.map((c: ContentBlock) => c.text || JSON.stringify(c))
|
||||||
|
.join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
return String(messageContent.content);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function detectApiError(content: string): ApiErrorDetection {
|
||||||
|
if (!content || typeof content !== 'string') {
|
||||||
|
return { detected: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
const lowerContent = content.toLowerCase();
|
||||||
|
|
||||||
|
// === BILLING/SPENDING CAP ERRORS (Retryable with long backoff) ===
|
||||||
|
// When Claude Code hits its spending cap, it returns a short message like
|
||||||
|
// "Spending cap reached resets 8am" instead of throwing an error.
|
||||||
|
// These should retry with 5-30 min backoff so workflows can recover when cap resets.
|
||||||
|
const BILLING_PATTERNS = [
|
||||||
|
'spending cap',
|
||||||
|
'spending limit',
|
||||||
|
'cap reached',
|
||||||
|
'budget exceeded',
|
||||||
|
'usage limit',
|
||||||
|
];
|
||||||
|
|
||||||
|
const isBillingError = BILLING_PATTERNS.some((pattern) =>
|
||||||
|
lowerContent.includes(pattern)
|
||||||
|
);
|
||||||
|
|
||||||
|
if (isBillingError) {
|
||||||
|
return {
|
||||||
|
detected: true,
|
||||||
|
shouldThrow: new PentestError(
|
||||||
|
`Billing limit reached: ${content.slice(0, 100)}`,
|
||||||
|
'billing',
|
||||||
|
true // RETRYABLE - Temporal will use 5-30 min backoff
|
||||||
|
),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// === SESSION LIMIT (Non-retryable) ===
|
||||||
|
// Different from spending cap - usually means something is fundamentally wrong
|
||||||
|
if (lowerContent.includes('session limit reached')) {
|
||||||
|
return {
|
||||||
|
detected: true,
|
||||||
|
shouldThrow: new PentestError('Session limit reached', 'billing', false),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Non-fatal API errors - detected but continue
|
||||||
|
if (lowerContent.includes('api error') || lowerContent.includes('terminated')) {
|
||||||
|
return { detected: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
return { detected: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
export function handleAssistantMessage(
|
||||||
|
message: AssistantMessage,
|
||||||
|
turnCount: number
|
||||||
|
): AssistantResult {
|
||||||
|
const content = extractMessageContent(message);
|
||||||
|
const cleanedContent = filterJsonToolCalls(content);
|
||||||
|
const errorDetection = detectApiError(content);
|
||||||
|
|
||||||
|
const result: AssistantResult = {
|
||||||
|
content,
|
||||||
|
cleanedContent,
|
||||||
|
apiErrorDetected: errorDetection.detected,
|
||||||
|
logData: {
|
||||||
|
turn: turnCount,
|
||||||
|
content,
|
||||||
|
timestamp: formatTimestamp(),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
// Only add shouldThrow if it exists (exactOptionalPropertyTypes compliance)
|
||||||
|
if (errorDetection.shouldThrow) {
|
||||||
|
result.shouldThrow = errorDetection.shouldThrow;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final message of a query with cost/duration info
|
||||||
|
export function handleResultMessage(message: ResultMessage): ResultData {
|
||||||
|
const result: ResultData = {
|
||||||
|
result: message.result || null,
|
||||||
|
cost: message.total_cost_usd || 0,
|
||||||
|
duration_ms: message.duration_ms || 0,
|
||||||
|
permissionDenials: message.permission_denials?.length || 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Only add subtype if it exists (exactOptionalPropertyTypes compliance)
|
||||||
|
if (message.subtype) {
|
||||||
|
result.subtype = message.subtype;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function handleToolUseMessage(message: ToolUseMessage): ToolUseData {
|
||||||
|
return {
|
||||||
|
toolName: message.name,
|
||||||
|
parameters: message.input || {},
|
||||||
|
timestamp: formatTimestamp(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Truncates long results for display (500 char limit), preserves full content for logging
|
||||||
|
export function handleToolResultMessage(message: ToolResultMessage): ToolResultData {
|
||||||
|
const content = message.content;
|
||||||
|
const contentStr =
|
||||||
|
typeof content === 'string' ? content : JSON.stringify(content, null, 2);
|
||||||
|
|
||||||
|
const displayContent =
|
||||||
|
contentStr.length > 500
|
||||||
|
? `${contentStr.slice(0, 500)}...\n[Result truncated - ${contentStr.length} total chars]`
|
||||||
|
: contentStr;
|
||||||
|
|
||||||
|
return {
|
||||||
|
content,
|
||||||
|
displayContent,
|
||||||
|
timestamp: formatTimestamp(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Output helper for console logging
|
||||||
|
function outputLines(lines: string[]): void {
|
||||||
|
for (const line of lines) {
|
||||||
|
console.log(line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Message dispatch result types
|
||||||
|
export type MessageDispatchAction =
|
||||||
|
| { type: 'continue'; apiErrorDetected?: boolean }
|
||||||
|
| { type: 'complete'; result: string | null; cost: number }
|
||||||
|
| { type: 'throw'; error: Error };
|
||||||
|
|
||||||
|
export interface MessageDispatchDeps {
|
||||||
|
execContext: ExecutionContext;
|
||||||
|
description: string;
|
||||||
|
colorFn: ChalkInstance;
|
||||||
|
progress: ProgressManager;
|
||||||
|
auditLogger: AuditLogger;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dispatches SDK messages to appropriate handlers and formatters
|
||||||
|
export async function dispatchMessage(
|
||||||
|
message: { type: string; subtype?: string },
|
||||||
|
turnCount: number,
|
||||||
|
deps: MessageDispatchDeps
|
||||||
|
): Promise<MessageDispatchAction> {
|
||||||
|
const { execContext, description, colorFn, progress, auditLogger } = deps;
|
||||||
|
|
||||||
|
switch (message.type) {
|
||||||
|
case 'assistant': {
|
||||||
|
const assistantResult = handleAssistantMessage(message as AssistantMessage, turnCount);
|
||||||
|
|
||||||
|
if (assistantResult.shouldThrow) {
|
||||||
|
return { type: 'throw', error: assistantResult.shouldThrow };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (assistantResult.cleanedContent.trim()) {
|
||||||
|
progress.stop();
|
||||||
|
outputLines(formatAssistantOutput(
|
||||||
|
assistantResult.cleanedContent,
|
||||||
|
execContext,
|
||||||
|
turnCount,
|
||||||
|
description,
|
||||||
|
colorFn
|
||||||
|
));
|
||||||
|
progress.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
await auditLogger.logLlmResponse(turnCount, assistantResult.content);
|
||||||
|
|
||||||
|
if (assistantResult.apiErrorDetected) {
|
||||||
|
console.log(chalk.red(` API Error detected in assistant response`));
|
||||||
|
return { type: 'continue', apiErrorDetected: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
return { type: 'continue' };
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'system': {
|
||||||
|
if (message.subtype === 'init' && !execContext.useCleanOutput) {
|
||||||
|
const initMsg = message as SystemInitMessage;
|
||||||
|
console.log(chalk.blue(` Model: ${initMsg.model}, Permission: ${initMsg.permissionMode}`));
|
||||||
|
if (initMsg.mcp_servers && initMsg.mcp_servers.length > 0) {
|
||||||
|
const mcpStatus = initMsg.mcp_servers.map(s => `${s.name}(${s.status})`).join(', ');
|
||||||
|
console.log(chalk.blue(` MCP: ${mcpStatus}`));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return { type: 'continue' };
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'user':
|
||||||
|
return { type: 'continue' };
|
||||||
|
|
||||||
|
case 'tool_use': {
|
||||||
|
const toolData = handleToolUseMessage(message as unknown as ToolUseMessage);
|
||||||
|
outputLines(formatToolUseOutput(toolData.toolName, toolData.parameters));
|
||||||
|
await auditLogger.logToolStart(toolData.toolName, toolData.parameters);
|
||||||
|
return { type: 'continue' };
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'tool_result': {
|
||||||
|
const toolResultData = handleToolResultMessage(message as unknown as ToolResultMessage);
|
||||||
|
outputLines(formatToolResultOutput(toolResultData.displayContent));
|
||||||
|
await auditLogger.logToolEnd(toolResultData.content);
|
||||||
|
return { type: 'continue' };
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'result': {
|
||||||
|
const resultData = handleResultMessage(message as ResultMessage);
|
||||||
|
outputLines(formatResultOutput(resultData, !execContext.useCleanOutput));
|
||||||
|
costResults.agents[execContext.agentKey] = resultData.cost;
|
||||||
|
costResults.total += resultData.cost;
|
||||||
|
return { type: 'complete', result: resultData.result, cost: resultData.cost };
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
console.log(chalk.gray(` ${message.type}: ${JSON.stringify(message, null, 2)}`));
|
||||||
|
return { type: 'continue' };
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,169 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
// Pure functions for formatting console output
|
||||||
|
|
||||||
|
import chalk from 'chalk';
|
||||||
|
import { extractAgentType, formatDuration } from '../utils/formatting.js';
|
||||||
|
import { getAgentPrefix } from '../utils/output-formatter.js';
|
||||||
|
import type { ExecutionContext, ResultData } from './types.js';
|
||||||
|
|
||||||
|
export function detectExecutionContext(description: string): ExecutionContext {
|
||||||
|
const isParallelExecution =
|
||||||
|
description.includes('vuln agent') || description.includes('exploit agent');
|
||||||
|
|
||||||
|
const useCleanOutput =
|
||||||
|
description.includes('Pre-recon agent') ||
|
||||||
|
description.includes('Recon agent') ||
|
||||||
|
description.includes('Executive Summary and Report Cleanup') ||
|
||||||
|
description.includes('vuln agent') ||
|
||||||
|
description.includes('exploit agent');
|
||||||
|
|
||||||
|
const agentType = extractAgentType(description);
|
||||||
|
|
||||||
|
const agentKey = description.toLowerCase().replace(/\s+/g, '-');
|
||||||
|
|
||||||
|
return { isParallelExecution, useCleanOutput, agentType, agentKey };
|
||||||
|
}
|
||||||
|
|
||||||
|
export function formatAssistantOutput(
|
||||||
|
cleanedContent: string,
|
||||||
|
context: ExecutionContext,
|
||||||
|
turnCount: number,
|
||||||
|
description: string,
|
||||||
|
colorFn: typeof chalk.cyan = chalk.cyan
|
||||||
|
): string[] {
|
||||||
|
if (!cleanedContent.trim()) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const lines: string[] = [];
|
||||||
|
|
||||||
|
if (context.isParallelExecution) {
|
||||||
|
// Compact output for parallel agents with prefixes
|
||||||
|
const prefix = getAgentPrefix(description);
|
||||||
|
lines.push(colorFn(`${prefix} ${cleanedContent}`));
|
||||||
|
} else {
|
||||||
|
// Full turn output for sequential agents
|
||||||
|
lines.push(colorFn(`\n Turn ${turnCount} (${description}):`));
|
||||||
|
lines.push(colorFn(` ${cleanedContent}`));
|
||||||
|
}
|
||||||
|
|
||||||
|
return lines;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function formatResultOutput(data: ResultData, showFullResult: boolean): string[] {
|
||||||
|
const lines: string[] = [];
|
||||||
|
|
||||||
|
lines.push(chalk.magenta(`\n COMPLETED:`));
|
||||||
|
lines.push(
|
||||||
|
chalk.gray(
|
||||||
|
` Duration: ${(data.duration_ms / 1000).toFixed(1)}s, Cost: $${data.cost.toFixed(4)}`
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
if (data.subtype === 'error_max_turns') {
|
||||||
|
lines.push(chalk.red(` Stopped: Hit maximum turns limit`));
|
||||||
|
} else if (data.subtype === 'error_during_execution') {
|
||||||
|
lines.push(chalk.red(` Stopped: Execution error`));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.permissionDenials > 0) {
|
||||||
|
lines.push(chalk.yellow(` ${data.permissionDenials} permission denials`));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (showFullResult && data.result && typeof data.result === 'string') {
|
||||||
|
if (data.result.length > 1000) {
|
||||||
|
lines.push(chalk.magenta(` ${data.result.slice(0, 1000)}... [${data.result.length} total chars]`));
|
||||||
|
} else {
|
||||||
|
lines.push(chalk.magenta(` ${data.result}`));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return lines;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function formatErrorOutput(
|
||||||
|
error: Error & { code?: string; status?: number },
|
||||||
|
context: ExecutionContext,
|
||||||
|
description: string,
|
||||||
|
duration: number,
|
||||||
|
sourceDir: string,
|
||||||
|
isRetryable: boolean
|
||||||
|
): string[] {
|
||||||
|
const lines: string[] = [];
|
||||||
|
|
||||||
|
if (context.isParallelExecution) {
|
||||||
|
const prefix = getAgentPrefix(description);
|
||||||
|
lines.push(chalk.red(`${prefix} Failed (${formatDuration(duration)})`));
|
||||||
|
} else if (context.useCleanOutput) {
|
||||||
|
lines.push(chalk.red(`${context.agentType} failed (${formatDuration(duration)})`));
|
||||||
|
} else {
|
||||||
|
lines.push(chalk.red(` Claude Code failed: ${description} (${formatDuration(duration)})`));
|
||||||
|
}
|
||||||
|
|
||||||
|
lines.push(chalk.red(` Error Type: ${error.constructor.name}`));
|
||||||
|
lines.push(chalk.red(` Message: ${error.message}`));
|
||||||
|
lines.push(chalk.gray(` Agent: ${description}`));
|
||||||
|
lines.push(chalk.gray(` Working Directory: ${sourceDir}`));
|
||||||
|
lines.push(chalk.gray(` Retryable: ${isRetryable ? 'Yes' : 'No'}`));
|
||||||
|
|
||||||
|
if (error.code) {
|
||||||
|
lines.push(chalk.gray(` Error Code: ${error.code}`));
|
||||||
|
}
|
||||||
|
if (error.status) {
|
||||||
|
lines.push(chalk.gray(` HTTP Status: ${error.status}`));
|
||||||
|
}
|
||||||
|
|
||||||
|
return lines;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function formatCompletionMessage(
|
||||||
|
context: ExecutionContext,
|
||||||
|
description: string,
|
||||||
|
turnCount: number,
|
||||||
|
duration: number
|
||||||
|
): string {
|
||||||
|
if (context.isParallelExecution) {
|
||||||
|
const prefix = getAgentPrefix(description);
|
||||||
|
return chalk.green(`${prefix} Complete (${turnCount} turns, ${formatDuration(duration)})`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (context.useCleanOutput) {
|
||||||
|
return chalk.green(
|
||||||
|
`${context.agentType.charAt(0).toUpperCase() + context.agentType.slice(1)} complete! (${turnCount} turns, ${formatDuration(duration)})`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return chalk.green(
|
||||||
|
` Claude Code completed: ${description} (${turnCount} turns) in ${formatDuration(duration)}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function formatToolUseOutput(
|
||||||
|
toolName: string,
|
||||||
|
input: Record<string, unknown> | undefined
|
||||||
|
): string[] {
|
||||||
|
const lines: string[] = [];
|
||||||
|
|
||||||
|
lines.push(chalk.yellow(`\n Using Tool: ${toolName}`));
|
||||||
|
if (input && Object.keys(input).length > 0) {
|
||||||
|
lines.push(chalk.gray(` Input: ${JSON.stringify(input, null, 2)}`));
|
||||||
|
}
|
||||||
|
|
||||||
|
return lines;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function formatToolResultOutput(displayContent: string): string[] {
|
||||||
|
const lines: string[] = [];
|
||||||
|
|
||||||
|
lines.push(chalk.green(` Tool Result:`));
|
||||||
|
if (displayContent) {
|
||||||
|
lines.push(chalk.gray(` ${displayContent}`));
|
||||||
|
}
|
||||||
|
|
||||||
|
return lines;
|
||||||
|
}
|
||||||
@@ -0,0 +1,76 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
// Null Object pattern for progress indicator - callers never check for null
|
||||||
|
|
||||||
|
import { ProgressIndicator } from '../progress-indicator.js';
|
||||||
|
import { extractAgentType } from '../utils/formatting.js';
|
||||||
|
|
||||||
|
export interface ProgressContext {
|
||||||
|
description: string;
|
||||||
|
useCleanOutput: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ProgressManager {
|
||||||
|
start(): void;
|
||||||
|
stop(): void;
|
||||||
|
finish(message: string): void;
|
||||||
|
isActive(): boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
class RealProgressManager implements ProgressManager {
|
||||||
|
private indicator: ProgressIndicator;
|
||||||
|
private active: boolean = false;
|
||||||
|
|
||||||
|
constructor(message: string) {
|
||||||
|
this.indicator = new ProgressIndicator(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
start(): void {
|
||||||
|
this.indicator.start();
|
||||||
|
this.active = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
stop(): void {
|
||||||
|
this.indicator.stop();
|
||||||
|
this.active = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
finish(message: string): void {
|
||||||
|
this.indicator.finish(message);
|
||||||
|
this.active = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
isActive(): boolean {
|
||||||
|
return this.active;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Null Object implementation - all methods are safe no-ops */
|
||||||
|
class NullProgressManager implements ProgressManager {
|
||||||
|
start(): void {}
|
||||||
|
|
||||||
|
stop(): void {}
|
||||||
|
|
||||||
|
finish(_message: string): void {}
|
||||||
|
|
||||||
|
isActive(): boolean {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns no-op when disabled
|
||||||
|
export function createProgressManager(
|
||||||
|
context: ProgressContext,
|
||||||
|
disableLoader: boolean
|
||||||
|
): ProgressManager {
|
||||||
|
if (!context.useCleanOutput || disableLoader) {
|
||||||
|
return new NullProgressManager();
|
||||||
|
}
|
||||||
|
|
||||||
|
const agentType = extractAgentType(context.description);
|
||||||
|
return new RealProgressManager(`Running ${agentType}...`);
|
||||||
|
}
|
||||||
+134
@@ -0,0 +1,134 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
// Type definitions for Claude executor message processing pipeline
|
||||||
|
|
||||||
|
export interface ExecutionContext {
|
||||||
|
isParallelExecution: boolean;
|
||||||
|
useCleanOutput: boolean;
|
||||||
|
agentType: string;
|
||||||
|
agentKey: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ProcessingState {
|
||||||
|
turnCount: number;
|
||||||
|
result: string | null;
|
||||||
|
apiErrorDetected: boolean;
|
||||||
|
totalCost: number;
|
||||||
|
partialCost: number;
|
||||||
|
lastHeartbeat: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ProcessingResult {
|
||||||
|
result: string | null;
|
||||||
|
turnCount: number;
|
||||||
|
apiErrorDetected: boolean;
|
||||||
|
totalCost: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface AssistantResult {
|
||||||
|
content: string;
|
||||||
|
cleanedContent: string;
|
||||||
|
apiErrorDetected: boolean;
|
||||||
|
shouldThrow?: Error;
|
||||||
|
logData: {
|
||||||
|
turn: number;
|
||||||
|
content: string;
|
||||||
|
timestamp: string;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ResultData {
|
||||||
|
result: string | null;
|
||||||
|
cost: number;
|
||||||
|
duration_ms: number;
|
||||||
|
subtype?: string;
|
||||||
|
permissionDenials: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ToolUseData {
|
||||||
|
toolName: string;
|
||||||
|
parameters: Record<string, unknown>;
|
||||||
|
timestamp: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ToolResultData {
|
||||||
|
content: unknown;
|
||||||
|
displayContent: string;
|
||||||
|
timestamp: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ContentBlock {
|
||||||
|
type?: string;
|
||||||
|
text?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface AssistantMessage {
|
||||||
|
type: 'assistant';
|
||||||
|
message: {
|
||||||
|
content: ContentBlock[] | string;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ResultMessage {
|
||||||
|
type: 'result';
|
||||||
|
result?: string;
|
||||||
|
total_cost_usd?: number;
|
||||||
|
duration_ms?: number;
|
||||||
|
subtype?: string;
|
||||||
|
permission_denials?: unknown[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ToolUseMessage {
|
||||||
|
type: 'tool_use';
|
||||||
|
name: string;
|
||||||
|
input?: Record<string, unknown>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ToolResultMessage {
|
||||||
|
type: 'tool_result';
|
||||||
|
content?: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ApiErrorDetection {
|
||||||
|
detected: boolean;
|
||||||
|
shouldThrow?: Error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Message types from SDK stream
|
||||||
|
export type SdkMessage =
|
||||||
|
| AssistantMessage
|
||||||
|
| ResultMessage
|
||||||
|
| ToolUseMessage
|
||||||
|
| ToolResultMessage
|
||||||
|
| SystemInitMessage
|
||||||
|
| UserMessage;
|
||||||
|
|
||||||
|
export interface SystemInitMessage {
|
||||||
|
type: 'system';
|
||||||
|
subtype: 'init';
|
||||||
|
model?: string;
|
||||||
|
permissionMode?: string;
|
||||||
|
mcp_servers?: Array<{ name: string; status: string }>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface UserMessage {
|
||||||
|
type: 'user';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dispatch result types for message processing
|
||||||
|
export type MessageDispatchResult =
|
||||||
|
| { action: 'continue' }
|
||||||
|
| { action: 'break'; result: string | null; cost: number }
|
||||||
|
| { action: 'throw'; error: Error };
|
||||||
|
|
||||||
|
export interface MessageDispatchContext {
|
||||||
|
turnCount: number;
|
||||||
|
execContext: ExecutionContext;
|
||||||
|
description: string;
|
||||||
|
colorFn: (text: string) => string;
|
||||||
|
useCleanOutput: boolean;
|
||||||
|
}
|
||||||
@@ -1,206 +0,0 @@
|
|||||||
/**
|
|
||||||
* Audit Session - Main Facade
|
|
||||||
*
|
|
||||||
* Coordinates logger, metrics tracker, and concurrency control for comprehensive
|
|
||||||
* crash-safe audit logging.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { AgentLogger } from './logger.js';
|
|
||||||
import { MetricsTracker } from './metrics-tracker.js';
|
|
||||||
import { initializeAuditStructure, formatTimestamp } from './utils.js';
|
|
||||||
import { SessionMutex } from '../utils/concurrency.js';
|
|
||||||
|
|
||||||
// Global mutex instance
|
|
||||||
const sessionMutex = new SessionMutex();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* AuditSession - Main audit system facade
|
|
||||||
*/
|
|
||||||
export class AuditSession {
|
|
||||||
/**
|
|
||||||
* @param {Object} sessionMetadata - Session metadata from Shannon store
|
|
||||||
* @param {string} sessionMetadata.id - Session UUID
|
|
||||||
* @param {string} sessionMetadata.webUrl - Target web URL
|
|
||||||
* @param {string} [sessionMetadata.repoPath] - Target repository path
|
|
||||||
*/
|
|
||||||
constructor(sessionMetadata) {
|
|
||||||
this.sessionMetadata = sessionMetadata;
|
|
||||||
this.sessionId = sessionMetadata.id;
|
|
||||||
|
|
||||||
// Validate required fields
|
|
||||||
if (!this.sessionId) {
|
|
||||||
throw new Error('sessionMetadata.id is required');
|
|
||||||
}
|
|
||||||
if (!this.sessionMetadata.webUrl) {
|
|
||||||
throw new Error('sessionMetadata.webUrl is required');
|
|
||||||
}
|
|
||||||
|
|
||||||
// Components
|
|
||||||
this.metricsTracker = new MetricsTracker(sessionMetadata);
|
|
||||||
|
|
||||||
// Active logger (one at a time per agent attempt)
|
|
||||||
this.currentLogger = null;
|
|
||||||
|
|
||||||
// Initialization flag
|
|
||||||
this.initialized = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize audit session (creates directories, session.json)
|
|
||||||
* Idempotent and race-safe
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async initialize() {
|
|
||||||
if (this.initialized) {
|
|
||||||
return; // Already initialized
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create directory structure
|
|
||||||
await initializeAuditStructure(this.sessionMetadata);
|
|
||||||
|
|
||||||
// Initialize metrics tracker (loads or creates session.json)
|
|
||||||
await this.metricsTracker.initialize();
|
|
||||||
|
|
||||||
this.initialized = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Ensure initialized (helper for lazy initialization)
|
|
||||||
* @private
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async ensureInitialized() {
|
|
||||||
if (!this.initialized) {
|
|
||||||
await this.initialize();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Start agent execution
|
|
||||||
* @param {string} agentName - Agent name
|
|
||||||
* @param {string} promptContent - Full prompt content
|
|
||||||
* @param {number} [attemptNumber=1] - Attempt number
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async startAgent(agentName, promptContent, attemptNumber = 1) {
|
|
||||||
await this.ensureInitialized();
|
|
||||||
|
|
||||||
// Save prompt snapshot (only on first attempt)
|
|
||||||
if (attemptNumber === 1) {
|
|
||||||
await AgentLogger.savePrompt(this.sessionMetadata, agentName, promptContent);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create and initialize logger for this attempt
|
|
||||||
this.currentLogger = new AgentLogger(this.sessionMetadata, agentName, attemptNumber);
|
|
||||||
await this.currentLogger.initialize();
|
|
||||||
|
|
||||||
// Start metrics tracking
|
|
||||||
this.metricsTracker.startAgent(agentName, attemptNumber);
|
|
||||||
|
|
||||||
// Log start event
|
|
||||||
await this.currentLogger.logEvent('agent_start', {
|
|
||||||
agentName,
|
|
||||||
attemptNumber,
|
|
||||||
timestamp: formatTimestamp()
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Log event during agent execution
|
|
||||||
* @param {string} eventType - Event type (tool_start, tool_end, llm_response, etc.)
|
|
||||||
* @param {Object} eventData - Event data
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async logEvent(eventType, eventData) {
|
|
||||||
if (!this.currentLogger) {
|
|
||||||
throw new Error('No active logger. Call startAgent() first.');
|
|
||||||
}
|
|
||||||
|
|
||||||
await this.currentLogger.logEvent(eventType, eventData);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* End agent execution (mutex-protected)
|
|
||||||
* @param {string} agentName - Agent name
|
|
||||||
* @param {Object} result - Execution result
|
|
||||||
* @param {number} result.attemptNumber - Attempt number
|
|
||||||
* @param {number} result.duration_ms - Duration in milliseconds
|
|
||||||
* @param {number} result.cost_usd - Cost in USD
|
|
||||||
* @param {boolean} result.success - Whether attempt succeeded
|
|
||||||
* @param {string} [result.error] - Error message (if failed)
|
|
||||||
* @param {string} [result.checkpoint] - Git checkpoint hash (if succeeded)
|
|
||||||
* @param {boolean} [result.isFinalAttempt=false] - Whether this is the final attempt
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async endAgent(agentName, result) {
|
|
||||||
// Log end event
|
|
||||||
if (this.currentLogger) {
|
|
||||||
await this.currentLogger.logEvent('agent_end', {
|
|
||||||
agentName,
|
|
||||||
success: result.success,
|
|
||||||
duration_ms: result.duration_ms,
|
|
||||||
cost_usd: result.cost_usd,
|
|
||||||
timestamp: formatTimestamp()
|
|
||||||
});
|
|
||||||
|
|
||||||
// Close logger
|
|
||||||
await this.currentLogger.close();
|
|
||||||
this.currentLogger = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mutex-protected update to session.json
|
|
||||||
const unlock = await sessionMutex.lock(this.sessionId);
|
|
||||||
try {
|
|
||||||
// Reload metrics (in case of parallel updates)
|
|
||||||
await this.metricsTracker.reload();
|
|
||||||
|
|
||||||
// Update metrics
|
|
||||||
await this.metricsTracker.endAgent(agentName, result);
|
|
||||||
} finally {
|
|
||||||
unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Mark multiple agents as rolled back
|
|
||||||
* @param {string[]} agentNames - Array of agent names
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async markMultipleRolledBack(agentNames) {
|
|
||||||
await this.ensureInitialized();
|
|
||||||
|
|
||||||
const unlock = await sessionMutex.lock(this.sessionId);
|
|
||||||
try {
|
|
||||||
await this.metricsTracker.reload();
|
|
||||||
await this.metricsTracker.markMultipleRolledBack(agentNames);
|
|
||||||
} finally {
|
|
||||||
unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Update session status
|
|
||||||
* @param {string} status - New status (in-progress, completed, failed)
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async updateSessionStatus(status) {
|
|
||||||
await this.ensureInitialized();
|
|
||||||
|
|
||||||
const unlock = await sessionMutex.lock(this.sessionId);
|
|
||||||
try {
|
|
||||||
await this.metricsTracker.reload();
|
|
||||||
await this.metricsTracker.updateSessionStatus(status);
|
|
||||||
} finally {
|
|
||||||
unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get current metrics (read-only)
|
|
||||||
* @returns {Promise<Object>} Current metrics
|
|
||||||
*/
|
|
||||||
async getMetrics() {
|
|
||||||
await this.ensureInitialized();
|
|
||||||
return this.metricsTracker.getMetrics();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,254 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Audit Session - Main Facade
|
||||||
|
*
|
||||||
|
* Coordinates logger, metrics tracker, and concurrency control for comprehensive
|
||||||
|
* crash-safe audit logging.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { AgentLogger } from './logger.js';
|
||||||
|
import { WorkflowLogger, type AgentLogDetails, type WorkflowSummary } from './workflow-logger.js';
|
||||||
|
import { MetricsTracker } from './metrics-tracker.js';
|
||||||
|
import { initializeAuditStructure, type SessionMetadata } from './utils.js';
|
||||||
|
import { formatTimestamp } from '../utils/formatting.js';
|
||||||
|
import { SessionMutex } from '../utils/concurrency.js';
|
||||||
|
|
||||||
|
// Global mutex instance
|
||||||
|
const sessionMutex = new SessionMutex();
|
||||||
|
|
||||||
|
interface AgentEndResult {
|
||||||
|
attemptNumber: number;
|
||||||
|
duration_ms: number;
|
||||||
|
cost_usd: number;
|
||||||
|
success: boolean;
|
||||||
|
error?: string;
|
||||||
|
checkpoint?: string;
|
||||||
|
isFinalAttempt?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* AuditSession - Main audit system facade
|
||||||
|
*/
|
||||||
|
export class AuditSession {
|
||||||
|
private sessionMetadata: SessionMetadata;
|
||||||
|
private sessionId: string;
|
||||||
|
private metricsTracker: MetricsTracker;
|
||||||
|
private workflowLogger: WorkflowLogger;
|
||||||
|
private currentLogger: AgentLogger | null = null;
|
||||||
|
private currentAgentName: string | null = null;
|
||||||
|
private initialized: boolean = false;
|
||||||
|
|
||||||
|
constructor(sessionMetadata: SessionMetadata) {
|
||||||
|
this.sessionMetadata = sessionMetadata;
|
||||||
|
this.sessionId = sessionMetadata.id;
|
||||||
|
|
||||||
|
// Validate required fields
|
||||||
|
if (!this.sessionId) {
|
||||||
|
throw new Error('sessionMetadata.id is required');
|
||||||
|
}
|
||||||
|
if (!this.sessionMetadata.webUrl) {
|
||||||
|
throw new Error('sessionMetadata.webUrl is required');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Components
|
||||||
|
this.metricsTracker = new MetricsTracker(sessionMetadata);
|
||||||
|
this.workflowLogger = new WorkflowLogger(sessionMetadata);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize audit session (creates directories, session.json)
|
||||||
|
* Idempotent and race-safe
|
||||||
|
*/
|
||||||
|
async initialize(): Promise<void> {
|
||||||
|
if (this.initialized) {
|
||||||
|
return; // Already initialized
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create directory structure
|
||||||
|
await initializeAuditStructure(this.sessionMetadata);
|
||||||
|
|
||||||
|
// Initialize metrics tracker (loads or creates session.json)
|
||||||
|
await this.metricsTracker.initialize();
|
||||||
|
|
||||||
|
// Initialize workflow logger
|
||||||
|
await this.workflowLogger.initialize();
|
||||||
|
|
||||||
|
this.initialized = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure initialized (helper for lazy initialization)
|
||||||
|
*/
|
||||||
|
private async ensureInitialized(): Promise<void> {
|
||||||
|
if (!this.initialized) {
|
||||||
|
await this.initialize();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start agent execution
|
||||||
|
*/
|
||||||
|
async startAgent(
|
||||||
|
agentName: string,
|
||||||
|
promptContent: string,
|
||||||
|
attemptNumber: number = 1
|
||||||
|
): Promise<void> {
|
||||||
|
await this.ensureInitialized();
|
||||||
|
|
||||||
|
// Save prompt snapshot (only on first attempt)
|
||||||
|
if (attemptNumber === 1) {
|
||||||
|
await AgentLogger.savePrompt(this.sessionMetadata, agentName, promptContent);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track current agent name for workflow logging
|
||||||
|
this.currentAgentName = agentName;
|
||||||
|
|
||||||
|
// Create and initialize logger for this attempt
|
||||||
|
this.currentLogger = new AgentLogger(this.sessionMetadata, agentName, attemptNumber);
|
||||||
|
await this.currentLogger.initialize();
|
||||||
|
|
||||||
|
// Start metrics tracking
|
||||||
|
this.metricsTracker.startAgent(agentName, attemptNumber);
|
||||||
|
|
||||||
|
// Log start event
|
||||||
|
await this.currentLogger.logEvent('agent_start', {
|
||||||
|
agentName,
|
||||||
|
attemptNumber,
|
||||||
|
timestamp: formatTimestamp(),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Log to unified workflow log
|
||||||
|
await this.workflowLogger.logAgent(agentName, 'start', { attemptNumber });
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log event during agent execution
|
||||||
|
*/
|
||||||
|
async logEvent(eventType: string, eventData: unknown): Promise<void> {
|
||||||
|
if (!this.currentLogger) {
|
||||||
|
throw new Error('No active logger. Call startAgent() first.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Log to agent-specific log file (JSON format)
|
||||||
|
await this.currentLogger.logEvent(eventType, eventData);
|
||||||
|
|
||||||
|
// Also log to unified workflow log (human-readable format)
|
||||||
|
const data = eventData as Record<string, unknown>;
|
||||||
|
const agentName = this.currentAgentName || 'unknown';
|
||||||
|
switch (eventType) {
|
||||||
|
case 'tool_start':
|
||||||
|
await this.workflowLogger.logToolStart(
|
||||||
|
agentName,
|
||||||
|
String(data.toolName || ''),
|
||||||
|
data.parameters
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
case 'llm_response':
|
||||||
|
await this.workflowLogger.logLlmResponse(
|
||||||
|
agentName,
|
||||||
|
Number(data.turn || 0),
|
||||||
|
String(data.content || '')
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
// tool_end and error events are intentionally not logged to workflow log
|
||||||
|
// to reduce noise - the agent completion message captures the outcome
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* End agent execution (mutex-protected)
|
||||||
|
*/
|
||||||
|
async endAgent(agentName: string, result: AgentEndResult): Promise<void> {
|
||||||
|
// Log end event
|
||||||
|
if (this.currentLogger) {
|
||||||
|
await this.currentLogger.logEvent('agent_end', {
|
||||||
|
agentName,
|
||||||
|
success: result.success,
|
||||||
|
duration_ms: result.duration_ms,
|
||||||
|
cost_usd: result.cost_usd,
|
||||||
|
timestamp: formatTimestamp(),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Close logger
|
||||||
|
await this.currentLogger.close();
|
||||||
|
this.currentLogger = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset current agent name
|
||||||
|
this.currentAgentName = null;
|
||||||
|
|
||||||
|
// Log to unified workflow log
|
||||||
|
const agentLogDetails: AgentLogDetails = {
|
||||||
|
attemptNumber: result.attemptNumber,
|
||||||
|
duration_ms: result.duration_ms,
|
||||||
|
cost_usd: result.cost_usd,
|
||||||
|
success: result.success,
|
||||||
|
...(result.error !== undefined && { error: result.error }),
|
||||||
|
};
|
||||||
|
await this.workflowLogger.logAgent(agentName, 'end', agentLogDetails);
|
||||||
|
|
||||||
|
// Mutex-protected update to session.json
|
||||||
|
const unlock = await sessionMutex.lock(this.sessionId);
|
||||||
|
try {
|
||||||
|
// Reload inside mutex to prevent lost updates during parallel exploitation phase
|
||||||
|
await this.metricsTracker.reload();
|
||||||
|
|
||||||
|
// Update metrics
|
||||||
|
await this.metricsTracker.endAgent(agentName, result);
|
||||||
|
} finally {
|
||||||
|
unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update session status
|
||||||
|
*/
|
||||||
|
async updateSessionStatus(status: 'in-progress' | 'completed' | 'failed'): Promise<void> {
|
||||||
|
await this.ensureInitialized();
|
||||||
|
|
||||||
|
const unlock = await sessionMutex.lock(this.sessionId);
|
||||||
|
try {
|
||||||
|
await this.metricsTracker.reload();
|
||||||
|
await this.metricsTracker.updateSessionStatus(status);
|
||||||
|
} finally {
|
||||||
|
unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get current metrics (read-only)
|
||||||
|
*/
|
||||||
|
async getMetrics(): Promise<unknown> {
|
||||||
|
await this.ensureInitialized();
|
||||||
|
return this.metricsTracker.getMetrics();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log phase start to unified workflow log
|
||||||
|
*/
|
||||||
|
async logPhaseStart(phase: string): Promise<void> {
|
||||||
|
await this.ensureInitialized();
|
||||||
|
await this.workflowLogger.logPhase(phase, 'start');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log phase completion to unified workflow log
|
||||||
|
*/
|
||||||
|
async logPhaseComplete(phase: string): Promise<void> {
|
||||||
|
await this.ensureInitialized();
|
||||||
|
await this.workflowLogger.logPhase(phase, 'complete');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log workflow completion to unified workflow log
|
||||||
|
*/
|
||||||
|
async logWorkflowComplete(summary: WorkflowSummary): Promise<void> {
|
||||||
|
await this.ensureInitialized();
|
||||||
|
await this.workflowLogger.logWorkflowComplete(summary);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,3 +1,9 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unified Audit & Metrics System
|
* Unified Audit & Metrics System
|
||||||
*
|
*
|
||||||
@@ -12,5 +18,6 @@
|
|||||||
|
|
||||||
export { AuditSession } from './audit-session.js';
|
export { AuditSession } from './audit-session.js';
|
||||||
export { AgentLogger } from './logger.js';
|
export { AgentLogger } from './logger.js';
|
||||||
|
export { WorkflowLogger } from './workflow-logger.js';
|
||||||
export { MetricsTracker } from './metrics-tracker.js';
|
export { MetricsTracker } from './metrics-tracker.js';
|
||||||
export * as AuditUtils from './utils.js';
|
export * as AuditUtils from './utils.js';
|
||||||
@@ -1,3 +1,9 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Append-Only Agent Logger
|
* Append-Only Agent Logger
|
||||||
*
|
*
|
||||||
@@ -6,18 +12,33 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import fs from 'fs';
|
import fs from 'fs';
|
||||||
import { generateLogPath, generatePromptPath, atomicWrite, formatTimestamp } from './utils.js';
|
import {
|
||||||
|
generateLogPath,
|
||||||
|
generatePromptPath,
|
||||||
|
type SessionMetadata,
|
||||||
|
} from './utils.js';
|
||||||
|
import { atomicWrite } from '../utils/file-io.js';
|
||||||
|
import { formatTimestamp } from '../utils/formatting.js';
|
||||||
|
|
||||||
|
interface LogEvent {
|
||||||
|
type: string;
|
||||||
|
timestamp: string;
|
||||||
|
data: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* AgentLogger - Manages append-only logging for a single agent execution
|
* AgentLogger - Manages append-only logging for a single agent execution
|
||||||
*/
|
*/
|
||||||
export class AgentLogger {
|
export class AgentLogger {
|
||||||
/**
|
private sessionMetadata: SessionMetadata;
|
||||||
* @param {Object} sessionMetadata - Session metadata
|
private agentName: string;
|
||||||
* @param {string} agentName - Name of the agent
|
private attemptNumber: number;
|
||||||
* @param {number} attemptNumber - Attempt number (1, 2, 3, ...)
|
private timestamp: number;
|
||||||
*/
|
private logPath: string;
|
||||||
constructor(sessionMetadata, agentName, attemptNumber) {
|
private stream: fs.WriteStream | null = null;
|
||||||
|
private isOpen: boolean = false;
|
||||||
|
|
||||||
|
constructor(sessionMetadata: SessionMetadata, agentName: string, attemptNumber: number) {
|
||||||
this.sessionMetadata = sessionMetadata;
|
this.sessionMetadata = sessionMetadata;
|
||||||
this.agentName = agentName;
|
this.agentName = agentName;
|
||||||
this.attemptNumber = attemptNumber;
|
this.attemptNumber = attemptNumber;
|
||||||
@@ -25,17 +46,12 @@ export class AgentLogger {
|
|||||||
|
|
||||||
// Generate log file path
|
// Generate log file path
|
||||||
this.logPath = generateLogPath(sessionMetadata, agentName, this.timestamp, attemptNumber);
|
this.logPath = generateLogPath(sessionMetadata, agentName, this.timestamp, attemptNumber);
|
||||||
|
|
||||||
// Create write stream (append mode)
|
|
||||||
this.stream = null;
|
|
||||||
this.isOpen = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialize the log stream (creates file and opens stream)
|
* Initialize the log stream (creates file and opens stream)
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
*/
|
||||||
async initialize() {
|
async initialize(): Promise<void> {
|
||||||
if (this.isOpen) {
|
if (this.isOpen) {
|
||||||
return; // Already initialized
|
return; // Already initialized
|
||||||
}
|
}
|
||||||
@@ -44,7 +60,7 @@ export class AgentLogger {
|
|||||||
this.stream = fs.createWriteStream(this.logPath, {
|
this.stream = fs.createWriteStream(this.logPath, {
|
||||||
flags: 'a', // Append mode
|
flags: 'a', // Append mode
|
||||||
encoding: 'utf8',
|
encoding: 'utf8',
|
||||||
autoClose: true
|
autoClose: true,
|
||||||
});
|
});
|
||||||
|
|
||||||
this.isOpen = true;
|
this.isOpen = true;
|
||||||
@@ -55,10 +71,8 @@ export class AgentLogger {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Write header to log file
|
* Write header to log file
|
||||||
* @private
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
*/
|
||||||
async writeHeader() {
|
private async writeHeader(): Promise<void> {
|
||||||
const header = [
|
const header = [
|
||||||
`========================================`,
|
`========================================`,
|
||||||
`Agent: ${this.agentName}`,
|
`Agent: ${this.agentName}`,
|
||||||
@@ -66,7 +80,7 @@ export class AgentLogger {
|
|||||||
`Started: ${formatTimestamp(this.timestamp)}`,
|
`Started: ${formatTimestamp(this.timestamp)}`,
|
||||||
`Session: ${this.sessionMetadata.id}`,
|
`Session: ${this.sessionMetadata.id}`,
|
||||||
`Web URL: ${this.sessionMetadata.webUrl}`,
|
`Web URL: ${this.sessionMetadata.webUrl}`,
|
||||||
`========================================\n`
|
`========================================\n`,
|
||||||
].join('\n');
|
].join('\n');
|
||||||
|
|
||||||
return this.writeRaw(header);
|
return this.writeRaw(header);
|
||||||
@@ -74,33 +88,21 @@ export class AgentLogger {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Write raw text to log file with immediate flush
|
* Write raw text to log file with immediate flush
|
||||||
* @private
|
|
||||||
* @param {string} text - Text to write
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
*/
|
||||||
writeRaw(text) {
|
private writeRaw(text: string): Promise<void> {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
if (!this.isOpen || !this.stream) {
|
if (!this.isOpen || !this.stream) {
|
||||||
reject(new Error('Logger not initialized'));
|
reject(new Error('Logger not initialized'));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write and flush immediately (crash-safe)
|
|
||||||
const needsDrain = !this.stream.write(text, 'utf8', (error) => {
|
const needsDrain = !this.stream.write(text, 'utf8', (error) => {
|
||||||
if (error) {
|
if (error) reject(error);
|
||||||
reject(error);
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
|
|
||||||
if (needsDrain) {
|
if (needsDrain) {
|
||||||
// Buffer is full, wait for drain
|
this.stream.once('drain', resolve);
|
||||||
const drainHandler = () => {
|
|
||||||
this.stream.removeListener('drain', drainHandler);
|
|
||||||
resolve();
|
|
||||||
};
|
|
||||||
this.stream.once('drain', drainHandler);
|
|
||||||
} else {
|
} else {
|
||||||
// Buffer has space, resolve immediately
|
|
||||||
resolve();
|
resolve();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@@ -109,15 +111,12 @@ export class AgentLogger {
|
|||||||
/**
|
/**
|
||||||
* Log an event (tool_start, tool_end, llm_response, etc.)
|
* Log an event (tool_start, tool_end, llm_response, etc.)
|
||||||
* Events are logged as JSON for parseability
|
* Events are logged as JSON for parseability
|
||||||
* @param {string} eventType - Type of event
|
|
||||||
* @param {Object} eventData - Event data
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
*/
|
||||||
async logEvent(eventType, eventData) {
|
async logEvent(eventType: string, eventData: unknown): Promise<void> {
|
||||||
const event = {
|
const event: LogEvent = {
|
||||||
type: eventType,
|
type: eventType,
|
||||||
timestamp: formatTimestamp(),
|
timestamp: formatTimestamp(),
|
||||||
data: eventData
|
data: eventData,
|
||||||
};
|
};
|
||||||
|
|
||||||
const eventLine = `${JSON.stringify(event)}\n`;
|
const eventLine = `${JSON.stringify(event)}\n`;
|
||||||
@@ -126,15 +125,14 @@ export class AgentLogger {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Close the log stream
|
* Close the log stream
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
*/
|
||||||
async close() {
|
async close(): Promise<void> {
|
||||||
if (!this.isOpen || !this.stream) {
|
if (!this.isOpen || !this.stream) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
this.stream.end(() => {
|
this.stream!.end(() => {
|
||||||
this.isOpen = false;
|
this.isOpen = false;
|
||||||
resolve();
|
resolve();
|
||||||
});
|
});
|
||||||
@@ -144,12 +142,12 @@ export class AgentLogger {
|
|||||||
/**
|
/**
|
||||||
* Save prompt snapshot to prompts directory
|
* Save prompt snapshot to prompts directory
|
||||||
* Static method - doesn't require logger instance
|
* Static method - doesn't require logger instance
|
||||||
* @param {Object} sessionMetadata - Session metadata
|
|
||||||
* @param {string} agentName - Agent name
|
|
||||||
* @param {string} promptContent - Full prompt content
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
*/
|
||||||
static async savePrompt(sessionMetadata, agentName, promptContent) {
|
static async savePrompt(
|
||||||
|
sessionMetadata: SessionMetadata,
|
||||||
|
agentName: string,
|
||||||
|
promptContent: string
|
||||||
|
): Promise<void> {
|
||||||
const promptPath = generatePromptPath(sessionMetadata, agentName);
|
const promptPath = generatePromptPath(sessionMetadata, agentName);
|
||||||
|
|
||||||
// Create header with metadata
|
// Create header with metadata
|
||||||
@@ -161,7 +159,7 @@ export class AgentLogger {
|
|||||||
`**Saved:** ${formatTimestamp()}`,
|
`**Saved:** ${formatTimestamp()}`,
|
||||||
``,
|
``,
|
||||||
`---`,
|
`---`,
|
||||||
``
|
``,
|
||||||
].join('\n');
|
].join('\n');
|
||||||
|
|
||||||
const fullContent = header + promptContent;
|
const fullContent = header + promptContent;
|
||||||
@@ -1,331 +0,0 @@
|
|||||||
/**
|
|
||||||
* Metrics Tracker
|
|
||||||
*
|
|
||||||
* Manages session.json with comprehensive timing, cost, and validation metrics.
|
|
||||||
* Tracks attempt-level data for complete forensic trail.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import {
|
|
||||||
generateSessionJsonPath,
|
|
||||||
atomicWrite,
|
|
||||||
readJson,
|
|
||||||
fileExists,
|
|
||||||
formatTimestamp,
|
|
||||||
calculatePercentage
|
|
||||||
} from './utils.js';
|
|
||||||
|
|
||||||
/**
|
|
||||||
* MetricsTracker - Manages metrics for a session
|
|
||||||
*/
|
|
||||||
export class MetricsTracker {
|
|
||||||
/**
|
|
||||||
* @param {Object} sessionMetadata - Session metadata from Shannon store
|
|
||||||
*/
|
|
||||||
constructor(sessionMetadata) {
|
|
||||||
this.sessionMetadata = sessionMetadata;
|
|
||||||
this.sessionJsonPath = generateSessionJsonPath(sessionMetadata);
|
|
||||||
|
|
||||||
// In-memory state (loaded from/synced to session.json)
|
|
||||||
this.data = null;
|
|
||||||
|
|
||||||
// Active timers (agent name -> start time)
|
|
||||||
this.activeTimers = new Map();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize session.json (idempotent)
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async initialize() {
|
|
||||||
// Check if session.json already exists
|
|
||||||
const exists = await fileExists(this.sessionJsonPath);
|
|
||||||
|
|
||||||
if (exists) {
|
|
||||||
// Load existing data
|
|
||||||
this.data = await readJson(this.sessionJsonPath);
|
|
||||||
} else {
|
|
||||||
// Create new session.json
|
|
||||||
this.data = this.createInitialData();
|
|
||||||
await this.save();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create initial session.json structure
|
|
||||||
* @private
|
|
||||||
* @returns {Object} Initial session data
|
|
||||||
*/
|
|
||||||
createInitialData() {
|
|
||||||
return {
|
|
||||||
session: {
|
|
||||||
id: this.sessionMetadata.id,
|
|
||||||
webUrl: this.sessionMetadata.webUrl,
|
|
||||||
repoPath: this.sessionMetadata.repoPath,
|
|
||||||
status: 'in-progress',
|
|
||||||
createdAt: this.sessionMetadata.createdAt || formatTimestamp()
|
|
||||||
},
|
|
||||||
metrics: {
|
|
||||||
total_duration_ms: 0,
|
|
||||||
total_cost_usd: 0,
|
|
||||||
phases: {}, // Phase-level aggregations: { duration_ms, duration_percentage, cost_usd, agent_count }
|
|
||||||
agents: {} // Agent-level metrics: { status, attempts[], final_duration_ms, total_cost_usd, checkpoint }
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Start tracking an agent execution
|
|
||||||
* @param {string} agentName - Agent name
|
|
||||||
* @param {number} attemptNumber - Attempt number
|
|
||||||
* @returns {void}
|
|
||||||
*/
|
|
||||||
startAgent(agentName, attemptNumber) {
|
|
||||||
this.activeTimers.set(agentName, {
|
|
||||||
startTime: Date.now(),
|
|
||||||
attemptNumber
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* End agent execution and update metrics
|
|
||||||
* @param {string} agentName - Agent name
|
|
||||||
* @param {Object} result - Agent execution result
|
|
||||||
* @param {number} result.attemptNumber - Attempt number
|
|
||||||
* @param {number} result.duration_ms - Duration in milliseconds
|
|
||||||
* @param {number} result.cost_usd - Cost in USD
|
|
||||||
* @param {boolean} result.success - Whether attempt succeeded
|
|
||||||
* @param {string} [result.error] - Error message (if failed)
|
|
||||||
* @param {string} [result.checkpoint] - Git checkpoint hash (if succeeded)
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async endAgent(agentName, result) {
|
|
||||||
// Initialize agent metrics if not exists
|
|
||||||
if (!this.data.metrics.agents[agentName]) {
|
|
||||||
this.data.metrics.agents[agentName] = {
|
|
||||||
status: 'in-progress',
|
|
||||||
attempts: [],
|
|
||||||
final_duration_ms: 0,
|
|
||||||
total_cost_usd: 0 // Total cost across all attempts (including retries)
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
const agent = this.data.metrics.agents[agentName];
|
|
||||||
|
|
||||||
// Add attempt to array
|
|
||||||
const attempt = {
|
|
||||||
attempt_number: result.attemptNumber,
|
|
||||||
duration_ms: result.duration_ms,
|
|
||||||
cost_usd: result.cost_usd,
|
|
||||||
success: result.success,
|
|
||||||
timestamp: formatTimestamp()
|
|
||||||
};
|
|
||||||
|
|
||||||
if (result.error) {
|
|
||||||
attempt.error = result.error;
|
|
||||||
}
|
|
||||||
|
|
||||||
agent.attempts.push(attempt);
|
|
||||||
|
|
||||||
// Update total cost (includes failed attempts)
|
|
||||||
agent.total_cost_usd = agent.attempts.reduce((sum, a) => sum + a.cost_usd, 0);
|
|
||||||
|
|
||||||
// If successful, update final metrics and status
|
|
||||||
if (result.success) {
|
|
||||||
agent.status = 'success';
|
|
||||||
agent.final_duration_ms = result.duration_ms;
|
|
||||||
|
|
||||||
if (result.checkpoint) {
|
|
||||||
agent.checkpoint = result.checkpoint;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// If this was the last attempt, mark as failed
|
|
||||||
if (result.isFinalAttempt) {
|
|
||||||
agent.status = 'failed';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clear active timer
|
|
||||||
this.activeTimers.delete(agentName);
|
|
||||||
|
|
||||||
// Recalculate aggregations
|
|
||||||
this.recalculateAggregations();
|
|
||||||
|
|
||||||
// Save to disk
|
|
||||||
await this.save();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Mark agent as rolled back
|
|
||||||
* @param {string} agentName - Agent name
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async markRolledBack(agentName) {
|
|
||||||
if (!this.data.metrics.agents[agentName]) {
|
|
||||||
return; // Agent not tracked
|
|
||||||
}
|
|
||||||
|
|
||||||
const agent = this.data.metrics.agents[agentName];
|
|
||||||
agent.status = 'rolled-back';
|
|
||||||
agent.rolled_back_at = formatTimestamp();
|
|
||||||
|
|
||||||
// Recalculate aggregations (exclude rolled-back agents)
|
|
||||||
this.recalculateAggregations();
|
|
||||||
|
|
||||||
await this.save();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Mark multiple agents as rolled back
|
|
||||||
* @param {string[]} agentNames - Array of agent names
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async markMultipleRolledBack(agentNames) {
|
|
||||||
for (const agentName of agentNames) {
|
|
||||||
if (this.data.metrics.agents[agentName]) {
|
|
||||||
const agent = this.data.metrics.agents[agentName];
|
|
||||||
agent.status = 'rolled-back';
|
|
||||||
agent.rolled_back_at = formatTimestamp();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
this.recalculateAggregations();
|
|
||||||
await this.save();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Update session status
|
|
||||||
* @param {string} status - New status (in-progress, completed, failed)
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async updateSessionStatus(status) {
|
|
||||||
this.data.session.status = status;
|
|
||||||
|
|
||||||
if (status === 'completed' || status === 'failed') {
|
|
||||||
this.data.session.completedAt = formatTimestamp();
|
|
||||||
}
|
|
||||||
|
|
||||||
await this.save();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Recalculate aggregations (total duration, total cost, phases)
|
|
||||||
* @private
|
|
||||||
*/
|
|
||||||
recalculateAggregations() {
|
|
||||||
const agents = this.data.metrics.agents;
|
|
||||||
|
|
||||||
// Only count successful agents (not rolled-back or failed)
|
|
||||||
const successfulAgents = Object.entries(agents)
|
|
||||||
.filter(([_, data]) => data.status === 'success');
|
|
||||||
|
|
||||||
// Calculate total duration and cost
|
|
||||||
const totalDuration = successfulAgents.reduce(
|
|
||||||
(sum, [_, data]) => sum + data.final_duration_ms,
|
|
||||||
0
|
|
||||||
);
|
|
||||||
|
|
||||||
const totalCost = successfulAgents.reduce(
|
|
||||||
(sum, [_, data]) => sum + data.total_cost_usd,
|
|
||||||
0
|
|
||||||
);
|
|
||||||
|
|
||||||
this.data.metrics.total_duration_ms = totalDuration;
|
|
||||||
this.data.metrics.total_cost_usd = totalCost;
|
|
||||||
|
|
||||||
// Calculate phase-level metrics
|
|
||||||
this.data.metrics.phases = this.calculatePhaseMetrics(successfulAgents);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calculate phase-level metrics
|
|
||||||
* @private
|
|
||||||
* @param {Array} successfulAgents - Array of [agentName, agentData] tuples
|
|
||||||
* @returns {Object} Phase metrics
|
|
||||||
*/
|
|
||||||
calculatePhaseMetrics(successfulAgents) {
|
|
||||||
const phases = {
|
|
||||||
'pre-recon': [],
|
|
||||||
'recon': [],
|
|
||||||
'vulnerability-analysis': [],
|
|
||||||
'exploitation': [],
|
|
||||||
'reporting': []
|
|
||||||
};
|
|
||||||
|
|
||||||
// Map agents to phases
|
|
||||||
const agentPhaseMap = {
|
|
||||||
'pre-recon': 'pre-recon',
|
|
||||||
'recon': 'recon',
|
|
||||||
'injection-vuln': 'vulnerability-analysis',
|
|
||||||
'xss-vuln': 'vulnerability-analysis',
|
|
||||||
'auth-vuln': 'vulnerability-analysis',
|
|
||||||
'authz-vuln': 'vulnerability-analysis',
|
|
||||||
'ssrf-vuln': 'vulnerability-analysis',
|
|
||||||
'injection-exploit': 'exploitation',
|
|
||||||
'xss-exploit': 'exploitation',
|
|
||||||
'auth-exploit': 'exploitation',
|
|
||||||
'authz-exploit': 'exploitation',
|
|
||||||
'ssrf-exploit': 'exploitation',
|
|
||||||
'report': 'reporting'
|
|
||||||
};
|
|
||||||
|
|
||||||
// Group agents by phase
|
|
||||||
for (const [agentName, agentData] of successfulAgents) {
|
|
||||||
const phase = agentPhaseMap[agentName];
|
|
||||||
if (phase) {
|
|
||||||
phases[phase].push(agentData);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate metrics per phase
|
|
||||||
const phaseMetrics = {};
|
|
||||||
const totalDuration = this.data.metrics.total_duration_ms;
|
|
||||||
|
|
||||||
for (const [phaseName, agentList] of Object.entries(phases)) {
|
|
||||||
if (agentList.length === 0) continue;
|
|
||||||
|
|
||||||
const phaseDuration = agentList.reduce(
|
|
||||||
(sum, agent) => sum + agent.final_duration_ms,
|
|
||||||
0
|
|
||||||
);
|
|
||||||
|
|
||||||
const phaseCost = agentList.reduce(
|
|
||||||
(sum, agent) => sum + agent.total_cost_usd,
|
|
||||||
0
|
|
||||||
);
|
|
||||||
|
|
||||||
phaseMetrics[phaseName] = {
|
|
||||||
duration_ms: phaseDuration,
|
|
||||||
duration_percentage: calculatePercentage(phaseDuration, totalDuration),
|
|
||||||
cost_usd: phaseCost,
|
|
||||||
agent_count: agentList.length
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
return phaseMetrics;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get current metrics
|
|
||||||
* @returns {Object} Current metrics data
|
|
||||||
*/
|
|
||||||
getMetrics() {
|
|
||||||
return JSON.parse(JSON.stringify(this.data));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Save metrics to session.json (atomic write)
|
|
||||||
* @private
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async save() {
|
|
||||||
await atomicWrite(this.sessionJsonPath, this.data);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reload metrics from disk
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
|
||||||
async reload() {
|
|
||||||
this.data = await readJson(this.sessionJsonPath);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,313 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Metrics Tracker
|
||||||
|
*
|
||||||
|
* Manages session.json with comprehensive timing, cost, and validation metrics.
|
||||||
|
* Tracks attempt-level data for complete forensic trail.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import {
|
||||||
|
generateSessionJsonPath,
|
||||||
|
type SessionMetadata,
|
||||||
|
} from './utils.js';
|
||||||
|
import { atomicWrite, readJson, fileExists } from '../utils/file-io.js';
|
||||||
|
import { formatTimestamp, calculatePercentage } from '../utils/formatting.js';
|
||||||
|
import { AGENT_PHASE_MAP, type PhaseName } from '../session-manager.js';
|
||||||
|
import type { AgentName } from '../types/index.js';
|
||||||
|
|
||||||
|
interface AttemptData {
|
||||||
|
attempt_number: number;
|
||||||
|
duration_ms: number;
|
||||||
|
cost_usd: number;
|
||||||
|
success: boolean;
|
||||||
|
timestamp: string;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface AgentMetrics {
|
||||||
|
status: 'in-progress' | 'success' | 'failed';
|
||||||
|
attempts: AttemptData[];
|
||||||
|
final_duration_ms: number;
|
||||||
|
total_cost_usd: number;
|
||||||
|
checkpoint?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface PhaseMetrics {
|
||||||
|
duration_ms: number;
|
||||||
|
duration_percentage: number;
|
||||||
|
cost_usd: number;
|
||||||
|
agent_count: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SessionData {
|
||||||
|
session: {
|
||||||
|
id: string;
|
||||||
|
webUrl: string;
|
||||||
|
repoPath?: string;
|
||||||
|
status: 'in-progress' | 'completed' | 'failed';
|
||||||
|
createdAt: string;
|
||||||
|
completedAt?: string;
|
||||||
|
};
|
||||||
|
metrics: {
|
||||||
|
total_duration_ms: number;
|
||||||
|
total_cost_usd: number;
|
||||||
|
phases: Record<string, PhaseMetrics>;
|
||||||
|
agents: Record<string, AgentMetrics>;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
interface AgentEndResult {
|
||||||
|
attemptNumber: number;
|
||||||
|
duration_ms: number;
|
||||||
|
cost_usd: number;
|
||||||
|
success: boolean;
|
||||||
|
error?: string;
|
||||||
|
checkpoint?: string;
|
||||||
|
isFinalAttempt?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ActiveTimer {
|
||||||
|
startTime: number;
|
||||||
|
attemptNumber: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* MetricsTracker - Manages metrics for a session
|
||||||
|
*/
|
||||||
|
export class MetricsTracker {
|
||||||
|
private sessionMetadata: SessionMetadata;
|
||||||
|
private sessionJsonPath: string;
|
||||||
|
private data: SessionData | null = null;
|
||||||
|
private activeTimers: Map<string, ActiveTimer> = new Map();
|
||||||
|
|
||||||
|
constructor(sessionMetadata: SessionMetadata) {
|
||||||
|
this.sessionMetadata = sessionMetadata;
|
||||||
|
this.sessionJsonPath = generateSessionJsonPath(sessionMetadata);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize session.json (idempotent)
|
||||||
|
*/
|
||||||
|
async initialize(): Promise<void> {
|
||||||
|
// Check if session.json already exists
|
||||||
|
const exists = await fileExists(this.sessionJsonPath);
|
||||||
|
|
||||||
|
if (exists) {
|
||||||
|
// Load existing data
|
||||||
|
this.data = await readJson<SessionData>(this.sessionJsonPath);
|
||||||
|
} else {
|
||||||
|
// Create new session.json
|
||||||
|
this.data = this.createInitialData();
|
||||||
|
await this.save();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create initial session.json structure
|
||||||
|
*/
|
||||||
|
private createInitialData(): SessionData {
|
||||||
|
const sessionData: SessionData = {
|
||||||
|
session: {
|
||||||
|
id: this.sessionMetadata.id,
|
||||||
|
webUrl: this.sessionMetadata.webUrl,
|
||||||
|
status: 'in-progress',
|
||||||
|
createdAt: (this.sessionMetadata as { createdAt?: string }).createdAt || formatTimestamp(),
|
||||||
|
},
|
||||||
|
metrics: {
|
||||||
|
total_duration_ms: 0,
|
||||||
|
total_cost_usd: 0,
|
||||||
|
phases: {}, // Phase-level aggregations
|
||||||
|
agents: {}, // Agent-level metrics
|
||||||
|
},
|
||||||
|
};
|
||||||
|
// Only add repoPath if it exists
|
||||||
|
if (this.sessionMetadata.repoPath) {
|
||||||
|
sessionData.session.repoPath = this.sessionMetadata.repoPath;
|
||||||
|
}
|
||||||
|
return sessionData;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start tracking an agent execution
|
||||||
|
*/
|
||||||
|
startAgent(agentName: string, attemptNumber: number): void {
|
||||||
|
this.activeTimers.set(agentName, {
|
||||||
|
startTime: Date.now(),
|
||||||
|
attemptNumber,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* End agent execution and update metrics
|
||||||
|
*/
|
||||||
|
async endAgent(agentName: string, result: AgentEndResult): Promise<void> {
|
||||||
|
if (!this.data) {
|
||||||
|
throw new Error('MetricsTracker not initialized');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize agent metrics if not exists
|
||||||
|
const existingAgent = this.data.metrics.agents[agentName];
|
||||||
|
const agent = existingAgent ?? {
|
||||||
|
status: 'in-progress' as const,
|
||||||
|
attempts: [],
|
||||||
|
final_duration_ms: 0,
|
||||||
|
total_cost_usd: 0,
|
||||||
|
};
|
||||||
|
this.data.metrics.agents[agentName] = agent;
|
||||||
|
|
||||||
|
// Add attempt to array
|
||||||
|
const attempt: AttemptData = {
|
||||||
|
attempt_number: result.attemptNumber,
|
||||||
|
duration_ms: result.duration_ms,
|
||||||
|
cost_usd: result.cost_usd,
|
||||||
|
success: result.success,
|
||||||
|
timestamp: formatTimestamp(),
|
||||||
|
};
|
||||||
|
|
||||||
|
if (result.error) {
|
||||||
|
attempt.error = result.error;
|
||||||
|
}
|
||||||
|
|
||||||
|
agent.attempts.push(attempt);
|
||||||
|
|
||||||
|
// Update total cost (includes failed attempts)
|
||||||
|
agent.total_cost_usd = agent.attempts.reduce((sum, a) => sum + a.cost_usd, 0);
|
||||||
|
|
||||||
|
// If successful, update final metrics and status
|
||||||
|
if (result.success) {
|
||||||
|
agent.status = 'success';
|
||||||
|
agent.final_duration_ms = result.duration_ms;
|
||||||
|
|
||||||
|
if (result.checkpoint) {
|
||||||
|
agent.checkpoint = result.checkpoint;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// If this was the last attempt, mark as failed
|
||||||
|
if (result.isFinalAttempt) {
|
||||||
|
agent.status = 'failed';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear active timer
|
||||||
|
this.activeTimers.delete(agentName);
|
||||||
|
|
||||||
|
// Recalculate aggregations
|
||||||
|
this.recalculateAggregations();
|
||||||
|
|
||||||
|
// Save to disk
|
||||||
|
await this.save();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update session status
|
||||||
|
*/
|
||||||
|
async updateSessionStatus(status: 'in-progress' | 'completed' | 'failed'): Promise<void> {
|
||||||
|
if (!this.data) return;
|
||||||
|
|
||||||
|
this.data.session.status = status;
|
||||||
|
|
||||||
|
if (status === 'completed' || status === 'failed') {
|
||||||
|
this.data.session.completedAt = formatTimestamp();
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.save();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recalculate aggregations (total duration, total cost, phases)
|
||||||
|
*/
|
||||||
|
private recalculateAggregations(): void {
|
||||||
|
if (!this.data) return;
|
||||||
|
|
||||||
|
const agents = this.data.metrics.agents;
|
||||||
|
|
||||||
|
// Only count successful agents
|
||||||
|
const successfulAgents = Object.entries(agents).filter(
|
||||||
|
([, data]) => data.status === 'success'
|
||||||
|
);
|
||||||
|
|
||||||
|
// Calculate total duration and cost
|
||||||
|
const totalDuration = successfulAgents.reduce(
|
||||||
|
(sum, [, data]) => sum + data.final_duration_ms,
|
||||||
|
0
|
||||||
|
);
|
||||||
|
|
||||||
|
const totalCost = successfulAgents.reduce((sum, [, data]) => sum + data.total_cost_usd, 0);
|
||||||
|
|
||||||
|
this.data.metrics.total_duration_ms = totalDuration;
|
||||||
|
this.data.metrics.total_cost_usd = totalCost;
|
||||||
|
|
||||||
|
// Calculate phase-level metrics
|
||||||
|
this.data.metrics.phases = this.calculatePhaseMetrics(successfulAgents);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate phase-level metrics
|
||||||
|
*/
|
||||||
|
private calculatePhaseMetrics(
|
||||||
|
successfulAgents: Array<[string, AgentMetrics]>
|
||||||
|
): Record<string, PhaseMetrics> {
|
||||||
|
const phases: Record<PhaseName, AgentMetrics[]> = {
|
||||||
|
'pre-recon': [],
|
||||||
|
'recon': [],
|
||||||
|
'vulnerability-analysis': [],
|
||||||
|
'exploitation': [],
|
||||||
|
'reporting': [],
|
||||||
|
};
|
||||||
|
|
||||||
|
// Group agents by phase using imported AGENT_PHASE_MAP
|
||||||
|
for (const [agentName, agentData] of successfulAgents) {
|
||||||
|
const phase = AGENT_PHASE_MAP[agentName as AgentName];
|
||||||
|
if (phase) {
|
||||||
|
phases[phase].push(agentData);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate metrics per phase
|
||||||
|
const phaseMetrics: Record<string, PhaseMetrics> = {};
|
||||||
|
const totalDuration = this.data!.metrics.total_duration_ms;
|
||||||
|
|
||||||
|
for (const [phaseName, agentList] of Object.entries(phases)) {
|
||||||
|
if (agentList.length === 0) continue;
|
||||||
|
|
||||||
|
const phaseDuration = agentList.reduce((sum, agent) => sum + agent.final_duration_ms, 0);
|
||||||
|
const phaseCost = agentList.reduce((sum, agent) => sum + agent.total_cost_usd, 0);
|
||||||
|
|
||||||
|
phaseMetrics[phaseName] = {
|
||||||
|
duration_ms: phaseDuration,
|
||||||
|
duration_percentage: calculatePercentage(phaseDuration, totalDuration),
|
||||||
|
cost_usd: phaseCost,
|
||||||
|
agent_count: agentList.length,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return phaseMetrics;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get current metrics
|
||||||
|
*/
|
||||||
|
getMetrics(): SessionData {
|
||||||
|
return JSON.parse(JSON.stringify(this.data)) as SessionData;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save metrics to session.json (atomic write)
|
||||||
|
*/
|
||||||
|
private async save(): Promise<void> {
|
||||||
|
if (!this.data) return;
|
||||||
|
await atomicWrite(this.sessionJsonPath, this.data);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reload metrics from disk
|
||||||
|
*/
|
||||||
|
async reload(): Promise<void> {
|
||||||
|
this.data = await readJson<SessionData>(this.sessionJsonPath);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,3 +1,9 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Audit System Utilities
|
* Audit System Utilities
|
||||||
*
|
*
|
||||||
@@ -16,38 +22,48 @@ const __dirname = path.dirname(__filename);
|
|||||||
export const SHANNON_ROOT = path.resolve(__dirname, '..', '..');
|
export const SHANNON_ROOT = path.resolve(__dirname, '..', '..');
|
||||||
export const AUDIT_LOGS_DIR = path.join(SHANNON_ROOT, 'audit-logs');
|
export const AUDIT_LOGS_DIR = path.join(SHANNON_ROOT, 'audit-logs');
|
||||||
|
|
||||||
|
export interface SessionMetadata {
|
||||||
|
id: string;
|
||||||
|
webUrl: string;
|
||||||
|
repoPath?: string;
|
||||||
|
outputPath?: string;
|
||||||
|
[key: string]: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate standardized session identifier: {hostname}_{sessionId}
|
* Extract and sanitize hostname from URL for use in identifiers
|
||||||
* @param {Object} sessionMetadata - Session metadata from Shannon store
|
|
||||||
* @param {string} sessionMetadata.id - UUID session ID
|
|
||||||
* @param {string} sessionMetadata.webUrl - Target web URL
|
|
||||||
* @returns {string} Formatted session identifier
|
|
||||||
*/
|
*/
|
||||||
export function generateSessionIdentifier(sessionMetadata) {
|
export function sanitizeHostname(url: string): string {
|
||||||
const { id, webUrl } = sessionMetadata;
|
return new URL(url).hostname.replace(/[^a-zA-Z0-9-]/g, '-');
|
||||||
const hostname = new URL(webUrl).hostname.replace(/[^a-zA-Z0-9-]/g, '-');
|
}
|
||||||
return `${hostname}_${id}`;
|
|
||||||
|
/**
|
||||||
|
* Generate standardized session identifier from workflow ID
|
||||||
|
* Workflow IDs already contain hostname, so we use them directly
|
||||||
|
*/
|
||||||
|
export function generateSessionIdentifier(sessionMetadata: SessionMetadata): string {
|
||||||
|
return sessionMetadata.id;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate path to audit log directory for a session
|
* Generate path to audit log directory for a session
|
||||||
* @param {Object} sessionMetadata - Session metadata
|
* Uses custom outputPath if provided, otherwise defaults to AUDIT_LOGS_DIR
|
||||||
* @returns {string} Absolute path to session audit directory
|
|
||||||
*/
|
*/
|
||||||
export function generateAuditPath(sessionMetadata) {
|
export function generateAuditPath(sessionMetadata: SessionMetadata): string {
|
||||||
const sessionIdentifier = generateSessionIdentifier(sessionMetadata);
|
const sessionIdentifier = generateSessionIdentifier(sessionMetadata);
|
||||||
return path.join(AUDIT_LOGS_DIR, sessionIdentifier);
|
const baseDir = sessionMetadata.outputPath || AUDIT_LOGS_DIR;
|
||||||
|
return path.join(baseDir, sessionIdentifier);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate path to agent log file
|
* Generate path to agent log file
|
||||||
* @param {Object} sessionMetadata - Session metadata
|
|
||||||
* @param {string} agentName - Name of the agent
|
|
||||||
* @param {number} timestamp - Timestamp (ms since epoch)
|
|
||||||
* @param {number} attemptNumber - Attempt number (1, 2, 3, ...)
|
|
||||||
* @returns {string} Absolute path to agent log file
|
|
||||||
*/
|
*/
|
||||||
export function generateLogPath(sessionMetadata, agentName, timestamp, attemptNumber) {
|
export function generateLogPath(
|
||||||
|
sessionMetadata: SessionMetadata,
|
||||||
|
agentName: string,
|
||||||
|
timestamp: number,
|
||||||
|
attemptNumber: number
|
||||||
|
): string {
|
||||||
const auditPath = generateAuditPath(sessionMetadata);
|
const auditPath = generateAuditPath(sessionMetadata);
|
||||||
const filename = `${timestamp}_${agentName}_attempt-${attemptNumber}.log`;
|
const filename = `${timestamp}_${agentName}_attempt-${attemptNumber}.log`;
|
||||||
return path.join(auditPath, 'agents', filename);
|
return path.join(auditPath, 'agents', filename);
|
||||||
@@ -55,36 +71,37 @@ export function generateLogPath(sessionMetadata, agentName, timestamp, attemptNu
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate path to prompt snapshot file
|
* Generate path to prompt snapshot file
|
||||||
* @param {Object} sessionMetadata - Session metadata
|
|
||||||
* @param {string} agentName - Name of the agent
|
|
||||||
* @returns {string} Absolute path to prompt file
|
|
||||||
*/
|
*/
|
||||||
export function generatePromptPath(sessionMetadata, agentName) {
|
export function generatePromptPath(sessionMetadata: SessionMetadata, agentName: string): string {
|
||||||
const auditPath = generateAuditPath(sessionMetadata);
|
const auditPath = generateAuditPath(sessionMetadata);
|
||||||
return path.join(auditPath, 'prompts', `${agentName}.md`);
|
return path.join(auditPath, 'prompts', `${agentName}.md`);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate path to session.json file
|
* Generate path to session.json file
|
||||||
* @param {Object} sessionMetadata - Session metadata
|
|
||||||
* @returns {string} Absolute path to session.json
|
|
||||||
*/
|
*/
|
||||||
export function generateSessionJsonPath(sessionMetadata) {
|
export function generateSessionJsonPath(sessionMetadata: SessionMetadata): string {
|
||||||
const auditPath = generateAuditPath(sessionMetadata);
|
const auditPath = generateAuditPath(sessionMetadata);
|
||||||
return path.join(auditPath, 'session.json');
|
return path.join(auditPath, 'session.json');
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Ensure directory exists (idempotent, race-safe)
|
* Generate path to workflow.log file
|
||||||
* @param {string} dirPath - Directory path to create
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
*/
|
||||||
export async function ensureDirectory(dirPath) {
|
export function generateWorkflowLogPath(sessionMetadata: SessionMetadata): string {
|
||||||
|
const auditPath = generateAuditPath(sessionMetadata);
|
||||||
|
return path.join(auditPath, 'workflow.log');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure directory exists (idempotent, race-safe)
|
||||||
|
*/
|
||||||
|
export async function ensureDirectory(dirPath: string): Promise<void> {
|
||||||
try {
|
try {
|
||||||
await fs.mkdir(dirPath, { recursive: true });
|
await fs.mkdir(dirPath, { recursive: true });
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// Ignore EEXIST errors (race condition safe)
|
// Ignore EEXIST errors (race condition safe)
|
||||||
if (error.code !== 'EEXIST') {
|
if ((error as NodeJS.ErrnoException).code !== 'EEXIST') {
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -93,11 +110,8 @@ export async function ensureDirectory(dirPath) {
|
|||||||
/**
|
/**
|
||||||
* Atomic write using temp file + rename pattern
|
* Atomic write using temp file + rename pattern
|
||||||
* Guarantees no partial writes or corruption on crash
|
* Guarantees no partial writes or corruption on crash
|
||||||
* @param {string} filePath - Target file path
|
|
||||||
* @param {Object|string} data - Data to write (will be JSON.stringified if object)
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
*/
|
||||||
export async function atomicWrite(filePath, data) {
|
export async function atomicWrite(filePath: string, data: object | string): Promise<void> {
|
||||||
const tempPath = `${filePath}.tmp`;
|
const tempPath = `${filePath}.tmp`;
|
||||||
const content = typeof data === 'string' ? data : JSON.stringify(data, null, 2);
|
const content = typeof data === 'string' ? data : JSON.stringify(data, null, 2);
|
||||||
|
|
||||||
@@ -111,7 +125,7 @@ export async function atomicWrite(filePath, data) {
|
|||||||
// Clean up temp file on failure
|
// Clean up temp file on failure
|
||||||
try {
|
try {
|
||||||
await fs.unlink(tempPath);
|
await fs.unlink(tempPath);
|
||||||
} catch (cleanupError) {
|
} catch {
|
||||||
// Ignore cleanup errors
|
// Ignore cleanup errors
|
||||||
}
|
}
|
||||||
throw error;
|
throw error;
|
||||||
@@ -120,10 +134,8 @@ export async function atomicWrite(filePath, data) {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Format duration in milliseconds to human-readable string
|
* Format duration in milliseconds to human-readable string
|
||||||
* @param {number} ms - Duration in milliseconds
|
|
||||||
* @returns {string} Formatted duration (e.g., "2m 34s", "45s", "1.2s")
|
|
||||||
*/
|
*/
|
||||||
export function formatDuration(ms) {
|
export function formatDuration(ms: number): string {
|
||||||
if (ms < 1000) {
|
if (ms < 1000) {
|
||||||
return `${ms}ms`;
|
return `${ms}ms`;
|
||||||
}
|
}
|
||||||
@@ -140,40 +152,31 @@ export function formatDuration(ms) {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Format timestamp to ISO 8601 string
|
* Format timestamp to ISO 8601 string
|
||||||
* @param {number} [timestamp] - Unix timestamp in ms (defaults to now)
|
|
||||||
* @returns {string} ISO 8601 formatted string
|
|
||||||
*/
|
*/
|
||||||
export function formatTimestamp(timestamp = Date.now()) {
|
export function formatTimestamp(timestamp: number = Date.now()): string {
|
||||||
return new Date(timestamp).toISOString();
|
return new Date(timestamp).toISOString();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculate percentage
|
* Calculate percentage
|
||||||
* @param {number} part - Part value
|
|
||||||
* @param {number} total - Total value
|
|
||||||
* @returns {number} Percentage (0-100)
|
|
||||||
*/
|
*/
|
||||||
export function calculatePercentage(part, total) {
|
export function calculatePercentage(part: number, total: number): number {
|
||||||
if (total === 0) return 0;
|
if (total === 0) return 0;
|
||||||
return (part / total) * 100;
|
return (part / total) * 100;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read and parse JSON file
|
* Read and parse JSON file
|
||||||
* @param {string} filePath - Path to JSON file
|
|
||||||
* @returns {Promise<Object>} Parsed JSON data
|
|
||||||
*/
|
*/
|
||||||
export async function readJson(filePath) {
|
export async function readJson<T = unknown>(filePath: string): Promise<T> {
|
||||||
const content = await fs.readFile(filePath, 'utf8');
|
const content = await fs.readFile(filePath, 'utf8');
|
||||||
return JSON.parse(content);
|
return JSON.parse(content) as T;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if file exists
|
* Check if file exists
|
||||||
* @param {string} filePath - Path to check
|
|
||||||
* @returns {Promise<boolean>} True if file exists
|
|
||||||
*/
|
*/
|
||||||
export async function fileExists(filePath) {
|
export async function fileExists(filePath: string): Promise<boolean> {
|
||||||
try {
|
try {
|
||||||
await fs.access(filePath);
|
await fs.access(filePath);
|
||||||
return true;
|
return true;
|
||||||
@@ -185,10 +188,8 @@ export async function fileExists(filePath) {
|
|||||||
/**
|
/**
|
||||||
* Initialize audit directory structure for a session
|
* Initialize audit directory structure for a session
|
||||||
* Creates: audit-logs/{sessionId}/, agents/, prompts/
|
* Creates: audit-logs/{sessionId}/, agents/, prompts/
|
||||||
* @param {Object} sessionMetadata - Session metadata
|
|
||||||
* @returns {Promise<void>}
|
|
||||||
*/
|
*/
|
||||||
export async function initializeAuditStructure(sessionMetadata) {
|
export async function initializeAuditStructure(sessionMetadata: SessionMetadata): Promise<void> {
|
||||||
const auditPath = generateAuditPath(sessionMetadata);
|
const auditPath = generateAuditPath(sessionMetadata);
|
||||||
const agentsPath = path.join(auditPath, 'agents');
|
const agentsPath = path.join(auditPath, 'agents');
|
||||||
const promptsPath = path.join(auditPath, 'prompts');
|
const promptsPath = path.join(auditPath, 'prompts');
|
||||||
@@ -0,0 +1,382 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Workflow Logger
|
||||||
|
*
|
||||||
|
* Provides a unified, human-readable log file per workflow.
|
||||||
|
* Optimized for `tail -f` viewing during concurrent workflow execution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import fs from 'fs';
|
||||||
|
import path from 'path';
|
||||||
|
import { generateWorkflowLogPath, ensureDirectory, type SessionMetadata } from './utils.js';
|
||||||
|
import { formatDuration, formatTimestamp } from '../utils/formatting.js';
|
||||||
|
|
||||||
|
export interface AgentLogDetails {
|
||||||
|
attemptNumber?: number;
|
||||||
|
duration_ms?: number;
|
||||||
|
cost_usd?: number;
|
||||||
|
success?: boolean;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface AgentMetricsSummary {
|
||||||
|
durationMs: number;
|
||||||
|
costUsd: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface WorkflowSummary {
|
||||||
|
status: 'completed' | 'failed';
|
||||||
|
totalDurationMs: number;
|
||||||
|
totalCostUsd: number;
|
||||||
|
completedAgents: string[];
|
||||||
|
agentMetrics: Record<string, AgentMetricsSummary>;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* WorkflowLogger - Manages the unified workflow log file
|
||||||
|
*/
|
||||||
|
export class WorkflowLogger {
|
||||||
|
private sessionMetadata: SessionMetadata;
|
||||||
|
private logPath: string;
|
||||||
|
private stream: fs.WriteStream | null = null;
|
||||||
|
private initialized: boolean = false;
|
||||||
|
|
||||||
|
constructor(sessionMetadata: SessionMetadata) {
|
||||||
|
this.sessionMetadata = sessionMetadata;
|
||||||
|
this.logPath = generateWorkflowLogPath(sessionMetadata);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize the log stream (creates file and writes header)
|
||||||
|
*/
|
||||||
|
async initialize(): Promise<void> {
|
||||||
|
if (this.initialized) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure directory exists
|
||||||
|
await ensureDirectory(path.dirname(this.logPath));
|
||||||
|
|
||||||
|
// Create write stream with append mode
|
||||||
|
this.stream = fs.createWriteStream(this.logPath, {
|
||||||
|
flags: 'a',
|
||||||
|
encoding: 'utf8',
|
||||||
|
autoClose: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
this.initialized = true;
|
||||||
|
|
||||||
|
// Write header only if file is new (empty)
|
||||||
|
const stats = await fs.promises.stat(this.logPath).catch(() => null);
|
||||||
|
if (!stats || stats.size === 0) {
|
||||||
|
await this.writeHeader();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write header to log file
|
||||||
|
*/
|
||||||
|
private async writeHeader(): Promise<void> {
|
||||||
|
const header = [
|
||||||
|
`================================================================================`,
|
||||||
|
`Shannon Pentest - Workflow Log`,
|
||||||
|
`================================================================================`,
|
||||||
|
`Workflow ID: ${this.sessionMetadata.id}`,
|
||||||
|
`Target URL: ${this.sessionMetadata.webUrl}`,
|
||||||
|
`Started: ${formatTimestamp()}`,
|
||||||
|
`================================================================================`,
|
||||||
|
``,
|
||||||
|
].join('\n');
|
||||||
|
|
||||||
|
return this.writeRaw(header);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write raw text to log file with immediate flush
|
||||||
|
*/
|
||||||
|
private writeRaw(text: string): Promise<void> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
if (!this.initialized || !this.stream) {
|
||||||
|
reject(new Error('WorkflowLogger not initialized'));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const needsDrain = !this.stream.write(text, 'utf8', (error) => {
|
||||||
|
if (error) reject(error);
|
||||||
|
});
|
||||||
|
|
||||||
|
if (needsDrain) {
|
||||||
|
this.stream.once('drain', resolve);
|
||||||
|
} else {
|
||||||
|
resolve();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format timestamp for log line (local time, human readable)
|
||||||
|
*/
|
||||||
|
private formatLogTime(): string {
|
||||||
|
const now = new Date();
|
||||||
|
return now.toISOString().replace('T', ' ').slice(0, 19);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log a phase transition event
|
||||||
|
*/
|
||||||
|
async logPhase(phase: string, event: 'start' | 'complete'): Promise<void> {
|
||||||
|
await this.ensureInitialized();
|
||||||
|
|
||||||
|
const action = event === 'start' ? 'Starting' : 'Completed';
|
||||||
|
const line = `[${this.formatLogTime()}] [PHASE] ${action}: ${phase}\n`;
|
||||||
|
|
||||||
|
// Add blank line before phase start for readability
|
||||||
|
if (event === 'start') {
|
||||||
|
await this.writeRaw('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.writeRaw(line);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log an agent event
|
||||||
|
*/
|
||||||
|
async logAgent(
|
||||||
|
agentName: string,
|
||||||
|
event: 'start' | 'end',
|
||||||
|
details?: AgentLogDetails
|
||||||
|
): Promise<void> {
|
||||||
|
await this.ensureInitialized();
|
||||||
|
|
||||||
|
let message: string;
|
||||||
|
|
||||||
|
if (event === 'start') {
|
||||||
|
const attempt = details?.attemptNumber ?? 1;
|
||||||
|
message = `${agentName}: Starting (attempt ${attempt})`;
|
||||||
|
} else {
|
||||||
|
const parts: string[] = [agentName + ':'];
|
||||||
|
|
||||||
|
if (details?.success === false) {
|
||||||
|
parts.push('Failed');
|
||||||
|
if (details?.error) {
|
||||||
|
parts.push(`- ${details.error}`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
parts.push('Completed');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (details?.duration_ms !== undefined) {
|
||||||
|
parts.push(`(${formatDuration(details.duration_ms)}`);
|
||||||
|
if (details?.cost_usd !== undefined) {
|
||||||
|
parts.push(`$${details.cost_usd.toFixed(2)})`);
|
||||||
|
} else {
|
||||||
|
parts.push(')');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
message = parts.join(' ');
|
||||||
|
}
|
||||||
|
|
||||||
|
const line = `[${this.formatLogTime()}] [AGENT] ${message}\n`;
|
||||||
|
await this.writeRaw(line);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log a general event
|
||||||
|
*/
|
||||||
|
async logEvent(eventType: string, message: string): Promise<void> {
|
||||||
|
await this.ensureInitialized();
|
||||||
|
|
||||||
|
const line = `[${this.formatLogTime()}] [${eventType.toUpperCase()}] ${message}\n`;
|
||||||
|
await this.writeRaw(line);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log an error
|
||||||
|
*/
|
||||||
|
async logError(error: Error, context?: string): Promise<void> {
|
||||||
|
await this.ensureInitialized();
|
||||||
|
|
||||||
|
const contextStr = context ? ` (${context})` : '';
|
||||||
|
const line = `[${this.formatLogTime()}] [ERROR] ${error.message}${contextStr}\n`;
|
||||||
|
await this.writeRaw(line);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Truncate string to max length with ellipsis
|
||||||
|
*/
|
||||||
|
private truncate(str: string, maxLen: number): string {
|
||||||
|
if (str.length <= maxLen) return str;
|
||||||
|
return str.slice(0, maxLen - 3) + '...';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format tool parameters for human-readable display
|
||||||
|
*/
|
||||||
|
private formatToolParams(toolName: string, params: unknown): string {
|
||||||
|
if (!params || typeof params !== 'object') {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
const p = params as Record<string, unknown>;
|
||||||
|
|
||||||
|
// Tool-specific formatting for common tools
|
||||||
|
switch (toolName) {
|
||||||
|
case 'Bash':
|
||||||
|
if (p.command) {
|
||||||
|
return this.truncate(String(p.command).replace(/\n/g, ' '), 100);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'Read':
|
||||||
|
if (p.file_path) {
|
||||||
|
return String(p.file_path);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'Write':
|
||||||
|
if (p.file_path) {
|
||||||
|
return String(p.file_path);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'Edit':
|
||||||
|
if (p.file_path) {
|
||||||
|
return String(p.file_path);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'Glob':
|
||||||
|
if (p.pattern) {
|
||||||
|
return String(p.pattern);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'Grep':
|
||||||
|
if (p.pattern) {
|
||||||
|
const path = p.path ? ` in ${p.path}` : '';
|
||||||
|
return `"${this.truncate(String(p.pattern), 50)}"${path}`;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'WebFetch':
|
||||||
|
if (p.url) {
|
||||||
|
return String(p.url);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'mcp__playwright__browser_navigate':
|
||||||
|
if (p.url) {
|
||||||
|
return String(p.url);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'mcp__playwright__browser_click':
|
||||||
|
if (p.selector) {
|
||||||
|
return this.truncate(String(p.selector), 60);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'mcp__playwright__browser_type':
|
||||||
|
if (p.selector) {
|
||||||
|
const text = p.text ? `: "${this.truncate(String(p.text), 30)}"` : '';
|
||||||
|
return `${this.truncate(String(p.selector), 40)}${text}`;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default: show first string-valued param truncated
|
||||||
|
for (const [key, val] of Object.entries(p)) {
|
||||||
|
if (typeof val === 'string' && val.length > 0) {
|
||||||
|
return `${key}=${this.truncate(val, 60)}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log tool start event
|
||||||
|
*/
|
||||||
|
async logToolStart(agentName: string, toolName: string, parameters: unknown): Promise<void> {
|
||||||
|
await this.ensureInitialized();
|
||||||
|
|
||||||
|
const params = this.formatToolParams(toolName, parameters);
|
||||||
|
const paramStr = params ? `: ${params}` : '';
|
||||||
|
const line = `[${this.formatLogTime()}] [${agentName}] [TOOL] ${toolName}${paramStr}\n`;
|
||||||
|
await this.writeRaw(line);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log LLM response
|
||||||
|
*/
|
||||||
|
async logLlmResponse(agentName: string, turn: number, content: string): Promise<void> {
|
||||||
|
await this.ensureInitialized();
|
||||||
|
|
||||||
|
// Show full content, replacing newlines with escaped version for single-line output
|
||||||
|
const escaped = content.replace(/\n/g, '\\n');
|
||||||
|
const line = `[${this.formatLogTime()}] [${agentName}] [LLM] Turn ${turn}: ${escaped}\n`;
|
||||||
|
await this.writeRaw(line);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log workflow completion with full summary
|
||||||
|
*/
|
||||||
|
async logWorkflowComplete(summary: WorkflowSummary): Promise<void> {
|
||||||
|
await this.ensureInitialized();
|
||||||
|
|
||||||
|
const status = summary.status === 'completed' ? 'COMPLETED' : 'FAILED';
|
||||||
|
|
||||||
|
await this.writeRaw('\n');
|
||||||
|
await this.writeRaw(`================================================================================\n`);
|
||||||
|
await this.writeRaw(`Workflow ${status}\n`);
|
||||||
|
await this.writeRaw(`────────────────────────────────────────\n`);
|
||||||
|
await this.writeRaw(`Workflow ID: ${this.sessionMetadata.id}\n`);
|
||||||
|
await this.writeRaw(`Status: ${summary.status}\n`);
|
||||||
|
await this.writeRaw(`Duration: ${formatDuration(summary.totalDurationMs)}\n`);
|
||||||
|
await this.writeRaw(`Total Cost: $${summary.totalCostUsd.toFixed(4)}\n`);
|
||||||
|
await this.writeRaw(`Agents: ${summary.completedAgents.length} completed\n`);
|
||||||
|
|
||||||
|
if (summary.error) {
|
||||||
|
await this.writeRaw(`Error: ${summary.error}\n`);
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.writeRaw(`\n`);
|
||||||
|
await this.writeRaw(`Agent Breakdown:\n`);
|
||||||
|
|
||||||
|
for (const agentName of summary.completedAgents) {
|
||||||
|
const metrics = summary.agentMetrics[agentName];
|
||||||
|
if (metrics) {
|
||||||
|
const duration = formatDuration(metrics.durationMs);
|
||||||
|
const cost = metrics.costUsd !== null ? `$${metrics.costUsd.toFixed(4)}` : 'N/A';
|
||||||
|
await this.writeRaw(` - ${agentName} (${duration}, ${cost})\n`);
|
||||||
|
} else {
|
||||||
|
await this.writeRaw(` - ${agentName}\n`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.writeRaw(`================================================================================\n`);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure initialized (helper for lazy initialization)
|
||||||
|
*/
|
||||||
|
private async ensureInitialized(): Promise<void> {
|
||||||
|
if (!this.initialized) {
|
||||||
|
await this.initialize();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the log stream
|
||||||
|
*/
|
||||||
|
async close(): Promise<void> {
|
||||||
|
if (!this.initialized || !this.stream) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
this.stream!.end(() => {
|
||||||
|
this.initialized = false;
|
||||||
|
resolve();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,903 +0,0 @@
|
|||||||
import { fs, path, $ } from 'zx';
|
|
||||||
import chalk from 'chalk';
|
|
||||||
import { PentestError } from './error-handling.js';
|
|
||||||
import { parseConfig, distributeConfig } from './config-parser.js';
|
|
||||||
import { executeGitCommandWithRetry } from './utils/git-manager.js';
|
|
||||||
import { formatDuration } from './audit/utils.js';
|
|
||||||
import {
|
|
||||||
AGENTS,
|
|
||||||
PHASES,
|
|
||||||
selectSession,
|
|
||||||
validateAgent,
|
|
||||||
validateAgentRange,
|
|
||||||
validatePhase,
|
|
||||||
checkPrerequisites,
|
|
||||||
getNextAgent,
|
|
||||||
markAgentCompleted,
|
|
||||||
markAgentFailed,
|
|
||||||
getSessionStatus,
|
|
||||||
rollbackToAgent,
|
|
||||||
updateSession
|
|
||||||
} from './session-manager.js';
|
|
||||||
|
|
||||||
// Check if target repository exists and is accessible
|
|
||||||
const validateTargetRepo = async (targetRepo) => {
|
|
||||||
if (!targetRepo || !await fs.pathExists(targetRepo)) {
|
|
||||||
throw new PentestError(
|
|
||||||
`Target repository '${targetRepo}' not found or not accessible`,
|
|
||||||
'filesystem',
|
|
||||||
false,
|
|
||||||
{ targetRepo }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if it's a git repository
|
|
||||||
const gitDir = path.join(targetRepo, '.git');
|
|
||||||
if (!await fs.pathExists(gitDir)) {
|
|
||||||
throw new PentestError(
|
|
||||||
`Target repository '${targetRepo}' is not a git repository`,
|
|
||||||
'validation',
|
|
||||||
false,
|
|
||||||
{ targetRepo }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Get git commit hash for checkpoint
|
|
||||||
export const getGitCommitHash = async (targetRepo) => {
|
|
||||||
try {
|
|
||||||
const result = await executeGitCommandWithRetry(['git', 'rev-parse', 'HEAD'], targetRepo, 'getting commit hash');
|
|
||||||
return result.stdout.trim();
|
|
||||||
} catch (error) {
|
|
||||||
throw new PentestError(
|
|
||||||
`Failed to get git commit hash: ${error.message}`,
|
|
||||||
'git',
|
|
||||||
false,
|
|
||||||
{ targetRepo, originalError: error.message }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Rollback git workspace to specific commit
|
|
||||||
const rollbackGitToCommit = async (targetRepo, commitHash) => {
|
|
||||||
try {
|
|
||||||
await executeGitCommandWithRetry(['git', 'reset', '--hard', commitHash], targetRepo, 'rollback to commit');
|
|
||||||
await executeGitCommandWithRetry(['git', 'clean', '-fd'], targetRepo, 'cleaning after rollback');
|
|
||||||
console.log(chalk.green(`✅ Git workspace rolled back to commit ${commitHash.substring(0, 8)}`));
|
|
||||||
} catch (error) {
|
|
||||||
throw new PentestError(
|
|
||||||
`Failed to rollback git workspace: ${error.message}`,
|
|
||||||
'git',
|
|
||||||
false,
|
|
||||||
{ targetRepo, commitHash, originalError: error.message }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Run a single agent with retry logic and checkpointing
|
|
||||||
const runSingleAgent = async (agentName, session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt, allowRerun = false, skipWorkspaceClean = false) => {
|
|
||||||
// Validate agent first
|
|
||||||
const agent = validateAgent(agentName);
|
|
||||||
|
|
||||||
console.log(chalk.cyan(`\n🤖 Running agent: ${agent.displayName}`));
|
|
||||||
|
|
||||||
// Reload session to get latest state (important for agent ranges)
|
|
||||||
const { getSession } = await import('./session-manager.js');
|
|
||||||
const freshSession = await getSession(session.id);
|
|
||||||
if (!freshSession) {
|
|
||||||
throw new PentestError(`Session ${session.id} not found`, 'validation', false);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use fresh session for all subsequent checks
|
|
||||||
session = freshSession;
|
|
||||||
|
|
||||||
// Warn if session is completed
|
|
||||||
if (session.status === 'completed') {
|
|
||||||
console.log(chalk.yellow('⚠️ This session is already completed. Re-running will modify completed results.'));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Block re-running completed agents unless explicitly allowed - use --rerun for explicit rollback and re-run
|
|
||||||
if (!allowRerun && session.completedAgents.includes(agentName)) {
|
|
||||||
throw new PentestError(
|
|
||||||
`Agent '${agentName}' has already been completed. Use --rerun ${agentName} for explicit rollback and re-execution.`,
|
|
||||||
'validation',
|
|
||||||
false,
|
|
||||||
{
|
|
||||||
agentName,
|
|
||||||
suggestion: `--rerun ${agentName}`,
|
|
||||||
completedAgents: session.completedAgents
|
|
||||||
}
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
const targetRepo = session.targetRepo;
|
|
||||||
await validateTargetRepo(targetRepo);
|
|
||||||
|
|
||||||
// Check prerequisites
|
|
||||||
checkPrerequisites(session, agentName);
|
|
||||||
|
|
||||||
// Additional safety check: if this agent is not completed but we have uncommitted changes,
|
|
||||||
// it might be from a previous interrupted run. Clean the workspace to be safe.
|
|
||||||
// Skip workspace cleaning during parallel execution to avoid agents interfering with each other
|
|
||||||
if (!session.completedAgents.includes(agentName) && !allowRerun && !skipWorkspaceClean) {
|
|
||||||
try {
|
|
||||||
const status = await executeGitCommandWithRetry(['git', 'status', '--porcelain'], targetRepo, 'checking workspace status');
|
|
||||||
const hasUncommittedChanges = status.stdout.trim().length > 0;
|
|
||||||
|
|
||||||
if (hasUncommittedChanges) {
|
|
||||||
console.log(chalk.yellow(` ⚠️ Detected uncommitted changes before running ${agentName}`));
|
|
||||||
console.log(chalk.yellow(` 🧹 Cleaning workspace to ensure clean agent execution`));
|
|
||||||
await executeGitCommandWithRetry(['git', 'reset', '--hard', 'HEAD'], targetRepo, 'cleaning workspace');
|
|
||||||
await executeGitCommandWithRetry(['git', 'clean', '-fd'], targetRepo, 'removing untracked files');
|
|
||||||
console.log(chalk.green(` ✅ Workspace cleaned successfully`));
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
console.log(chalk.yellow(` ⚠️ Could not check/clean workspace: ${error.message}`));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create checkpoint before execution
|
|
||||||
const variables = {
|
|
||||||
webUrl: session.webUrl,
|
|
||||||
repoPath: session.repoPath,
|
|
||||||
sourceDir: targetRepo
|
|
||||||
};
|
|
||||||
|
|
||||||
// Handle relative config paths - prepend configs/ if needed
|
|
||||||
let configPath = null;
|
|
||||||
if (session.configFile) {
|
|
||||||
configPath = session.configFile.startsWith('configs/')
|
|
||||||
? session.configFile
|
|
||||||
: path.join('configs', session.configFile);
|
|
||||||
}
|
|
||||||
|
|
||||||
const config = configPath ? await parseConfig(configPath) : null;
|
|
||||||
const distributedConfig = config ? distributeConfig(config) : null;
|
|
||||||
// Removed prompt snapshotting - using live prompts from repo
|
|
||||||
|
|
||||||
// Initialize variables that will be used in both try and catch blocks
|
|
||||||
let validationData = null;
|
|
||||||
let timingData = null;
|
|
||||||
let costData = null;
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Load and run the appropriate prompt
|
|
||||||
let promptName = getPromptName(agentName);
|
|
||||||
const prompt = await loadPrompt(promptName, variables, distributedConfig, pipelineTestingMode);
|
|
||||||
|
|
||||||
// Get color function for this agent
|
|
||||||
const getAgentColor = (agentName) => {
|
|
||||||
const colorMap = {
|
|
||||||
'injection-vuln': chalk.red,
|
|
||||||
'injection-exploit': chalk.red,
|
|
||||||
'xss-vuln': chalk.yellow,
|
|
||||||
'xss-exploit': chalk.yellow,
|
|
||||||
'auth-vuln': chalk.blue,
|
|
||||||
'auth-exploit': chalk.blue,
|
|
||||||
'ssrf-vuln': chalk.magenta,
|
|
||||||
'ssrf-exploit': chalk.magenta,
|
|
||||||
'authz-vuln': chalk.green,
|
|
||||||
'authz-exploit': chalk.green
|
|
||||||
};
|
|
||||||
return colorMap[agentName] || chalk.cyan;
|
|
||||||
};
|
|
||||||
|
|
||||||
const result = await runClaudePromptWithRetry(
|
|
||||||
prompt,
|
|
||||||
targetRepo,
|
|
||||||
'*',
|
|
||||||
'',
|
|
||||||
AGENTS[agentName].displayName,
|
|
||||||
agentName, // Pass agent name for snapshot creation
|
|
||||||
getAgentColor(agentName), // Pass color function for this agent
|
|
||||||
{ id: session.id, webUrl: session.webUrl, repoPath: session.repoPath } // Session metadata for audit logging
|
|
||||||
);
|
|
||||||
|
|
||||||
if (!result.success) {
|
|
||||||
throw new PentestError(
|
|
||||||
`Agent execution failed: ${result.error}`,
|
|
||||||
'agent',
|
|
||||||
result.retryable || false,
|
|
||||||
{ agentName, result }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get commit hash for checkpoint
|
|
||||||
const commitHash = await getGitCommitHash(targetRepo);
|
|
||||||
|
|
||||||
// Extract timing and cost data from result if available
|
|
||||||
timingData = result.duration;
|
|
||||||
costData = result.cost || 0;
|
|
||||||
|
|
||||||
if (agentName.includes('-vuln')) {
|
|
||||||
// Extract vulnerability type from agent name (e.g., 'injection-vuln' -> 'injection')
|
|
||||||
const vulnType = agentName.replace('-vuln', '');
|
|
||||||
try {
|
|
||||||
const { safeValidateQueueAndDeliverable } = await import('./queue-validation.js');
|
|
||||||
const validation = await safeValidateQueueAndDeliverable(vulnType, targetRepo);
|
|
||||||
|
|
||||||
if (validation.success) {
|
|
||||||
// Log validation result (don't store - will be re-validated during exploitation phase)
|
|
||||||
console.log(chalk.blue(`📋 Validation: ${validation.data.shouldExploit ? `Ready for exploitation (${validation.data.vulnerabilityCount} vulnerabilities)` : 'No vulnerabilities found'}`));
|
|
||||||
validationData = {
|
|
||||||
shouldExploit: validation.data.shouldExploit,
|
|
||||||
vulnerabilityCount: validation.data.vulnerabilityCount
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
console.log(chalk.yellow(`⚠️ Validation failed: ${validation.error.message}`));
|
|
||||||
}
|
|
||||||
} catch (validationError) {
|
|
||||||
console.log(chalk.yellow(`⚠️ Could not validate ${vulnType}: ${validationError.message}`));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mark agent as completed (validation not stored - will be re-checked during exploitation)
|
|
||||||
await markAgentCompleted(session.id, agentName, commitHash);
|
|
||||||
|
|
||||||
// Only show completion message for sequential execution
|
|
||||||
if (!skipWorkspaceClean) {
|
|
||||||
console.log(chalk.green(`✅ Agent '${agentName}' completed successfully`));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return immutable result object with enhanced metadata
|
|
||||||
return Object.freeze({
|
|
||||||
success: true,
|
|
||||||
agentName,
|
|
||||||
result,
|
|
||||||
validation: validationData,
|
|
||||||
timing: timingData,
|
|
||||||
cost: costData,
|
|
||||||
checkpoint: commitHash,
|
|
||||||
completedAt: new Date().toISOString()
|
|
||||||
});
|
|
||||||
|
|
||||||
} catch (error) {
|
|
||||||
// Mark agent as failed
|
|
||||||
await markAgentFailed(session.id, agentName);
|
|
||||||
|
|
||||||
// Only show failure message for sequential execution
|
|
||||||
if (!skipWorkspaceClean) {
|
|
||||||
console.log(chalk.red(`❌ Agent '${agentName}' failed: ${error.message}`));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return immutable error object with enhanced context
|
|
||||||
const errorResult = Object.freeze({
|
|
||||||
success: false,
|
|
||||||
agentName,
|
|
||||||
error: {
|
|
||||||
message: error.message,
|
|
||||||
type: error.constructor.name,
|
|
||||||
retryable: error.retryable || false,
|
|
||||||
originalError: error
|
|
||||||
},
|
|
||||||
validation: validationData,
|
|
||||||
timing: timingData,
|
|
||||||
failedAt: new Date().toISOString(),
|
|
||||||
context: {
|
|
||||||
targetRepo,
|
|
||||||
promptName: getPromptName(agentName),
|
|
||||||
sessionId: session.id
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Throw enhanced error with preserved context
|
|
||||||
const enhancedError = new PentestError(
|
|
||||||
`Agent '${agentName}' execution failed: ${error.message}`,
|
|
||||||
'agent',
|
|
||||||
error.retryable || false,
|
|
||||||
{
|
|
||||||
agentName,
|
|
||||||
sessionId: session.id,
|
|
||||||
originalError: error.message,
|
|
||||||
errorResult
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
throw enhancedError;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Run multiple agents in sequence
|
|
||||||
const runAgentRange = async (startAgent, endAgent, session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt) => {
|
|
||||||
const agents = validateAgentRange(startAgent, endAgent);
|
|
||||||
|
|
||||||
console.log(chalk.cyan(`\n🔄 Running agent range: ${startAgent} to ${endAgent} (${agents.length} agents)`));
|
|
||||||
|
|
||||||
for (const agent of agents) {
|
|
||||||
// Skip if already completed
|
|
||||||
if (session.completedAgents.includes(agent.name)) {
|
|
||||||
console.log(chalk.gray(`⏭️ Agent '${agent.name}' already completed, skipping`));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
await runSingleAgent(agent.name, session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt);
|
|
||||||
} catch (error) {
|
|
||||||
console.log(chalk.red(`❌ Agent range execution stopped at '${agent.name}' due to failure`));
|
|
||||||
throw error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(chalk.green(`✅ Agent range ${startAgent} to ${endAgent} completed successfully`));
|
|
||||||
};
|
|
||||||
|
|
||||||
// Run vulnerability agents in parallel
|
|
||||||
const runParallelVuln = async (session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt) => {
|
|
||||||
const vulnAgents = ['injection-vuln', 'xss-vuln', 'auth-vuln', 'ssrf-vuln', 'authz-vuln'];
|
|
||||||
const activeAgents = vulnAgents.filter(agent => !session.completedAgents.includes(agent));
|
|
||||||
|
|
||||||
if (activeAgents.length === 0) {
|
|
||||||
console.log(chalk.gray('⏭️ All vulnerability agents already completed'));
|
|
||||||
return { completed: vulnAgents, failed: [] };
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(chalk.cyan(`\n🚀 Starting ${activeAgents.length} vulnerability analysis specialists in parallel...`));
|
|
||||||
console.log(chalk.gray(' Specialists: ' + activeAgents.join(', ')));
|
|
||||||
console.log();
|
|
||||||
|
|
||||||
const startTime = Date.now();
|
|
||||||
|
|
||||||
// Collect all results without logging individual completions
|
|
||||||
const results = await Promise.allSettled(
|
|
||||||
activeAgents.map(async (agentName, index) => {
|
|
||||||
// Add 2-second stagger to prevent API overwhelm
|
|
||||||
await new Promise(resolve => setTimeout(resolve, index * 2000));
|
|
||||||
|
|
||||||
let lastError;
|
|
||||||
let attempts = 0;
|
|
||||||
const maxAttempts = 3;
|
|
||||||
|
|
||||||
while (attempts < maxAttempts) {
|
|
||||||
attempts++;
|
|
||||||
try {
|
|
||||||
const result = await runSingleAgent(agentName, session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt, false, true);
|
|
||||||
return { agentName, ...result, attempts };
|
|
||||||
} catch (error) {
|
|
||||||
lastError = error;
|
|
||||||
if (attempts < maxAttempts) {
|
|
||||||
console.log(chalk.yellow(`⚠️ ${agentName} failed attempt ${attempts}/${maxAttempts}, retrying...`));
|
|
||||||
await new Promise(resolve => setTimeout(resolve, 5000));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
throw { agentName, error: lastError, attempts };
|
|
||||||
})
|
|
||||||
);
|
|
||||||
|
|
||||||
const totalDuration = Date.now() - startTime;
|
|
||||||
|
|
||||||
// Process and display results in a nice table
|
|
||||||
console.log(chalk.cyan('\n📊 Vulnerability Analysis Results'));
|
|
||||||
console.log(chalk.gray('─'.repeat(80)));
|
|
||||||
|
|
||||||
// Table header
|
|
||||||
console.log(chalk.bold('Agent Status Vulns Attempt Duration Cost'));
|
|
||||||
console.log(chalk.gray('─'.repeat(80)));
|
|
||||||
|
|
||||||
const completed = [];
|
|
||||||
const failed = [];
|
|
||||||
|
|
||||||
results.forEach((result, index) => {
|
|
||||||
const agentName = activeAgents[index];
|
|
||||||
const agentDisplay = agentName.padEnd(22);
|
|
||||||
|
|
||||||
if (result.status === 'fulfilled') {
|
|
||||||
const data = result.value;
|
|
||||||
completed.push(agentName);
|
|
||||||
|
|
||||||
const vulnCount = data.validation?.vulnerabilityCount || 0;
|
|
||||||
const duration = formatDuration(data.timing || 0);
|
|
||||||
const cost = `$${(data.cost || 0).toFixed(4)}`;
|
|
||||||
|
|
||||||
console.log(
|
|
||||||
`${chalk.green(agentDisplay)} ${chalk.green('✓ Success')} ${vulnCount.toString().padStart(5)} ` +
|
|
||||||
`${data.attempts}/3 ${duration.padEnd(11)} ${cost}`
|
|
||||||
);
|
|
||||||
|
|
||||||
// Show log file path for detailed review
|
|
||||||
if (data.logFile) {
|
|
||||||
const relativePath = path.relative(process.cwd(), data.logFile);
|
|
||||||
console.log(chalk.gray(` └─ Detailed log: ${relativePath}`));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
const error = result.reason.error || result.reason;
|
|
||||||
failed.push({ agent: agentName, error: error.message });
|
|
||||||
|
|
||||||
const attempts = result.reason.attempts || 3; // Default to 3 if not available
|
|
||||||
|
|
||||||
console.log(
|
|
||||||
`${chalk.red(agentDisplay)} ${chalk.red('✗ Failed ')} - ` +
|
|
||||||
`${attempts}/3 - -`
|
|
||||||
);
|
|
||||||
console.log(chalk.gray(` └─ ${error.message.substring(0, 60)}...`));
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log(chalk.gray('─'.repeat(80)));
|
|
||||||
console.log(chalk.cyan(`Summary: ${completed.length}/${activeAgents.length} succeeded in ${formatDuration(totalDuration)}`));
|
|
||||||
|
|
||||||
return { completed, failed };
|
|
||||||
};
|
|
||||||
|
|
||||||
// Run exploitation agents in parallel
|
|
||||||
const runParallelExploit = async (session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt) => {
|
|
||||||
const exploitAgents = ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit'];
|
|
||||||
|
|
||||||
// Get fresh session data to ensure we have the latest vulnerability analysis results
|
|
||||||
// This prevents race conditions where parallel vuln agents haven't updated session state yet
|
|
||||||
const { getSession } = await import('./session-manager.js');
|
|
||||||
const freshSession = await getSession(session.id);
|
|
||||||
|
|
||||||
// Load validation module
|
|
||||||
const { safeValidateQueueAndDeliverable } = await import('./queue-validation.js');
|
|
||||||
|
|
||||||
// Only run exploit agents whose vuln counterparts completed successfully AND found vulnerabilities
|
|
||||||
const eligibilityChecks = await Promise.all(
|
|
||||||
exploitAgents.map(async (agentName) => {
|
|
||||||
const vulnAgentName = agentName.replace('-exploit', '-vuln');
|
|
||||||
|
|
||||||
// Must have completed the vulnerability analysis
|
|
||||||
if (!freshSession.completedAgents.includes(vulnAgentName)) {
|
|
||||||
return { agentName, eligible: false };
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if vulnerabilities were found by validating the queue file
|
|
||||||
const vulnType = vulnAgentName.replace('-vuln', ''); // "injection-vuln" -> "injection"
|
|
||||||
const validation = await safeValidateQueueAndDeliverable(vulnType, freshSession.targetRepo);
|
|
||||||
|
|
||||||
if (!validation.success || !validation.data.shouldExploit) {
|
|
||||||
console.log(chalk.gray(`⏭️ Skipping ${agentName} (no vulnerabilities found in ${vulnAgentName})`));
|
|
||||||
return { agentName, eligible: false };
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(chalk.blue(`✓ ${agentName} eligible (${validation.data.vulnerabilityCount} vulnerabilities from ${vulnAgentName})`));
|
|
||||||
return { agentName, eligible: true };
|
|
||||||
})
|
|
||||||
);
|
|
||||||
|
|
||||||
const eligibleAgents = eligibilityChecks
|
|
||||||
.filter(check => check.eligible)
|
|
||||||
.map(check => check.agentName);
|
|
||||||
|
|
||||||
const activeAgents = eligibleAgents.filter(agent => !freshSession.completedAgents.includes(agent));
|
|
||||||
|
|
||||||
if (activeAgents.length === 0) {
|
|
||||||
if (eligibleAgents.length === 0) {
|
|
||||||
console.log(chalk.gray('⏭️ No exploitation agents eligible (no vulnerabilities found)'));
|
|
||||||
} else {
|
|
||||||
console.log(chalk.gray('⏭️ All eligible exploitation agents already completed'));
|
|
||||||
}
|
|
||||||
return { completed: eligibleAgents, failed: [] };
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(chalk.cyan(`\n🎯 Starting ${activeAgents.length} exploitation specialists in parallel...`));
|
|
||||||
console.log(chalk.gray(' Specialists: ' + activeAgents.join(', ')));
|
|
||||||
console.log();
|
|
||||||
|
|
||||||
const startTime = Date.now();
|
|
||||||
|
|
||||||
// Collect all results without logging individual completions
|
|
||||||
const results = await Promise.allSettled(
|
|
||||||
activeAgents.map(async (agentName, index) => {
|
|
||||||
// Add 2-second stagger to prevent API overwhelm
|
|
||||||
await new Promise(resolve => setTimeout(resolve, index * 2000));
|
|
||||||
|
|
||||||
let lastError;
|
|
||||||
let attempts = 0;
|
|
||||||
const maxAttempts = 3;
|
|
||||||
|
|
||||||
while (attempts < maxAttempts) {
|
|
||||||
attempts++;
|
|
||||||
try {
|
|
||||||
const result = await runSingleAgent(agentName, freshSession, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt, false, true);
|
|
||||||
return { agentName, ...result, attempts };
|
|
||||||
} catch (error) {
|
|
||||||
lastError = error;
|
|
||||||
if (attempts < maxAttempts) {
|
|
||||||
console.log(chalk.yellow(`⚠️ ${agentName} failed attempt ${attempts}/${maxAttempts}, retrying...`));
|
|
||||||
await new Promise(resolve => setTimeout(resolve, 5000));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
throw { agentName, error: lastError, attempts };
|
|
||||||
})
|
|
||||||
);
|
|
||||||
|
|
||||||
const totalDuration = Date.now() - startTime;
|
|
||||||
|
|
||||||
// Process and display results in a nice table
|
|
||||||
console.log(chalk.cyan('\n🎯 Exploitation Results'));
|
|
||||||
console.log(chalk.gray('─'.repeat(80)));
|
|
||||||
|
|
||||||
// Table header
|
|
||||||
console.log(chalk.bold('Agent Status Result Attempt Duration Cost'));
|
|
||||||
console.log(chalk.gray('─'.repeat(80)));
|
|
||||||
|
|
||||||
const completed = [];
|
|
||||||
const failed = [];
|
|
||||||
|
|
||||||
results.forEach((result, index) => {
|
|
||||||
const agentName = activeAgents[index];
|
|
||||||
const agentDisplay = agentName.padEnd(22);
|
|
||||||
|
|
||||||
if (result.status === 'fulfilled') {
|
|
||||||
const data = result.value;
|
|
||||||
completed.push(agentName);
|
|
||||||
|
|
||||||
const exploitResult = 'Success'; // Could be enhanced to show actual exploitation result
|
|
||||||
const duration = formatDuration(data.timing || 0);
|
|
||||||
const cost = `$${(data.cost || 0).toFixed(4)}`;
|
|
||||||
|
|
||||||
console.log(
|
|
||||||
`${chalk.green(agentDisplay)} ${chalk.green('✓ Success')} ${exploitResult.padEnd(6)} ` +
|
|
||||||
`${data.attempts}/3 ${duration.padEnd(11)} ${cost}`
|
|
||||||
);
|
|
||||||
|
|
||||||
// Show log file path for detailed review
|
|
||||||
if (data.logFile) {
|
|
||||||
const relativePath = path.relative(process.cwd(), data.logFile);
|
|
||||||
console.log(chalk.gray(` └─ Detailed log: ${relativePath}`));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
const error = result.reason.error || result.reason;
|
|
||||||
failed.push({ agent: agentName, error: error.message });
|
|
||||||
|
|
||||||
const attempts = result.reason.attempts || 3; // Default to 3 if not available
|
|
||||||
|
|
||||||
console.log(
|
|
||||||
`${chalk.red(agentDisplay)} ${chalk.red('✗ Failed ')} - ` +
|
|
||||||
`${attempts}/3 - -`
|
|
||||||
);
|
|
||||||
console.log(chalk.gray(` └─ ${error.message.substring(0, 60)}...`));
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log(chalk.gray('─'.repeat(80)));
|
|
||||||
console.log(chalk.cyan(`Summary: ${completed.length}/${activeAgents.length} succeeded in ${formatDuration(totalDuration)}`));
|
|
||||||
|
|
||||||
return { completed, failed };
|
|
||||||
};
|
|
||||||
|
|
||||||
// Run all agents in a phase
|
|
||||||
export const runPhase = async (phaseName, session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt) => {
|
|
||||||
console.log(chalk.cyan(`\n📋 Running phase: ${phaseName} (parallel execution)`));
|
|
||||||
|
|
||||||
// Use parallel execution for both vulnerability-analysis and exploitation phases
|
|
||||||
if (phaseName === 'vulnerability-analysis') {
|
|
||||||
console.log(chalk.cyan('🚀 Using parallel execution for 5x faster vulnerability analysis'));
|
|
||||||
const results = await runParallelVuln(session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt);
|
|
||||||
|
|
||||||
if (results.failed.length > 0) {
|
|
||||||
console.log(chalk.yellow(`⚠️ ${results.failed.length} agents failed, but phase continues`));
|
|
||||||
results.failed.forEach(failure => {
|
|
||||||
console.log(chalk.red(` - ${failure.agent}: ${failure.error}`));
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(chalk.green(`✅ Phase '${phaseName}' completed: ${results.completed.length} succeeded, ${results.failed.length} failed`));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (phaseName === 'exploitation') {
|
|
||||||
console.log(chalk.cyan('🎯 Using parallel execution for 5x faster exploitation'));
|
|
||||||
const results = await runParallelExploit(session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt);
|
|
||||||
|
|
||||||
if (results.failed.length > 0) {
|
|
||||||
console.log(chalk.yellow(`⚠️ ${results.failed.length} agents failed, but phase continues`));
|
|
||||||
results.failed.forEach(failure => {
|
|
||||||
console.log(chalk.red(` - ${failure.agent}: ${failure.error}`));
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(chalk.green(`✅ Phase '${phaseName}' completed: ${results.completed.length} succeeded, ${results.failed.length} failed`));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// For other phases (pre-reconnaissance, reconnaissance, reporting), run the single agent
|
|
||||||
const agents = validatePhase(phaseName);
|
|
||||||
if (agents.length === 1) {
|
|
||||||
const agent = agents[0];
|
|
||||||
if (session.completedAgents.includes(agent.name)) {
|
|
||||||
console.log(chalk.gray(`⏭️ Agent '${agent.name}' already completed, skipping`));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
await runSingleAgent(agent.name, session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt);
|
|
||||||
console.log(chalk.green(`✅ Phase '${phaseName}' completed successfully`));
|
|
||||||
} else {
|
|
||||||
throw new PentestError(`Phase '${phaseName}' has multiple agents but no parallel execution defined`, 'validation', false);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Rollback to specific agent checkpoint
|
|
||||||
export const rollbackTo = async (targetAgent, session) => {
|
|
||||||
console.log(chalk.yellow(`🔄 Rolling back to agent: ${targetAgent}`));
|
|
||||||
|
|
||||||
await validateTargetRepo(session.targetRepo);
|
|
||||||
validateAgent(targetAgent);
|
|
||||||
|
|
||||||
if (!session.checkpoints[targetAgent]) {
|
|
||||||
throw new PentestError(
|
|
||||||
`No checkpoint found for agent '${targetAgent}' in session history`,
|
|
||||||
'validation',
|
|
||||||
false,
|
|
||||||
{ targetAgent, availableCheckpoints: Object.keys(session.checkpoints) }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
const commitHash = session.checkpoints[targetAgent];
|
|
||||||
|
|
||||||
// Rollback git workspace
|
|
||||||
await rollbackGitToCommit(session.targetRepo, commitHash);
|
|
||||||
|
|
||||||
// Update session state (removes agents from completedAgents)
|
|
||||||
await rollbackToAgent(session.id, targetAgent);
|
|
||||||
|
|
||||||
// Mark rolled-back agents in audit system (for forensic trail)
|
|
||||||
try {
|
|
||||||
const { AuditSession } = await import('./audit/index.js');
|
|
||||||
const auditSession = new AuditSession(session);
|
|
||||||
await auditSession.initialize();
|
|
||||||
|
|
||||||
// Find agents that were rolled back (agents after targetAgent)
|
|
||||||
const targetOrder = AGENTS[targetAgent].order;
|
|
||||||
const rolledBackAgents = Object.values(AGENTS)
|
|
||||||
.filter(agent => agent.order > targetOrder)
|
|
||||||
.map(agent => agent.name);
|
|
||||||
|
|
||||||
// Mark them as rolled-back in audit system
|
|
||||||
if (rolledBackAgents.length > 0) {
|
|
||||||
await auditSession.markMultipleRolledBack(rolledBackAgents);
|
|
||||||
console.log(chalk.gray(` Marked ${rolledBackAgents.length} agents as rolled-back in audit logs`));
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
// Non-critical: rollback succeeded even if audit update failed
|
|
||||||
console.log(chalk.yellow(` ⚠️ Failed to update audit logs: ${error.message}`));
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(chalk.green(`✅ Successfully rolled back to agent '${targetAgent}'`));
|
|
||||||
};
|
|
||||||
|
|
||||||
// Rerun specific agent (rollback to previous + run current)
|
|
||||||
export const rerunAgent = async (agentName, session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt) => {
|
|
||||||
console.log(chalk.cyan(`🔁 Rerunning agent: ${agentName}`));
|
|
||||||
|
|
||||||
const agent = validateAgent(agentName);
|
|
||||||
|
|
||||||
// Find previous agent checkpoint or initial state
|
|
||||||
let rollbackTarget = null;
|
|
||||||
if (agent.prerequisites.length > 0) {
|
|
||||||
// Find the last completed prerequisite
|
|
||||||
const completedPrereqs = agent.prerequisites.filter(prereq =>
|
|
||||||
session.completedAgents.includes(prereq)
|
|
||||||
);
|
|
||||||
if (completedPrereqs.length > 0) {
|
|
||||||
// Get the prerequisite with highest order
|
|
||||||
rollbackTarget = completedPrereqs.reduce((latest, current) =>
|
|
||||||
AGENTS[current].order > AGENTS[latest].order ? current : latest
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rollbackTarget) {
|
|
||||||
console.log(chalk.blue(`📍 Rolling back to prerequisite: ${rollbackTarget}`));
|
|
||||||
await rollbackTo(rollbackTarget, session);
|
|
||||||
} else if (agent.name === 'pre-recon') {
|
|
||||||
// Special case: rollback to initial clone
|
|
||||||
console.log(chalk.blue(`📍 Rolling back to initial repository state`));
|
|
||||||
try {
|
|
||||||
const initialCommit = await executeGitCommandWithRetry(['git', 'log', '--reverse', '--format=%H'], session.targetRepo, 'finding initial commit');
|
|
||||||
const firstCommit = initialCommit.stdout.trim().split('\n')[0];
|
|
||||||
await rollbackGitToCommit(session.targetRepo, firstCommit);
|
|
||||||
} catch (error) {
|
|
||||||
console.log(chalk.yellow(`⚠️ Could not find initial commit, using HEAD: ${error.message}`));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Run the target agent (allow rerun since we've explicitly rolled back)
|
|
||||||
await runSingleAgent(agentName, session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt, true);
|
|
||||||
|
|
||||||
console.log(chalk.green(`✅ Agent '${agentName}' rerun completed successfully`));
|
|
||||||
};
|
|
||||||
|
|
||||||
// Run all remaining agents to completion
|
|
||||||
export const runAll = async (session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt) => {
|
|
||||||
// Get all agents in order
|
|
||||||
const allAgentNames = Object.keys(AGENTS);
|
|
||||||
|
|
||||||
console.log(chalk.cyan(`\n🚀 Running all remaining agents to completion`));
|
|
||||||
console.log(chalk.gray(`Current progress: ${session.completedAgents.length}/${allAgentNames.length} agents completed`));
|
|
||||||
|
|
||||||
// Find remaining agents (not yet completed)
|
|
||||||
const remainingAgents = allAgentNames.filter(agentName =>
|
|
||||||
!session.completedAgents.includes(agentName)
|
|
||||||
);
|
|
||||||
|
|
||||||
if (remainingAgents.length === 0) {
|
|
||||||
console.log(chalk.green('✅ All agents already completed!'));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(chalk.blue(`📋 Remaining agents: ${remainingAgents.join(', ')}`));
|
|
||||||
console.log();
|
|
||||||
|
|
||||||
// Run each remaining agent in sequence
|
|
||||||
for (const agentName of remainingAgents) {
|
|
||||||
await runSingleAgent(agentName, session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt);
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(chalk.green(`\n🎉 All agents completed successfully! Session marked as completed.`));
|
|
||||||
};
|
|
||||||
|
|
||||||
// Display session status
|
|
||||||
export const displayStatus = async (session) => {
|
|
||||||
const status = getSessionStatus(session);
|
|
||||||
const timeAgo = getTimeAgo(session.lastActivity);
|
|
||||||
|
|
||||||
console.log(chalk.cyan(`Session: ${new URL(session.webUrl).hostname} + ${path.basename(session.repoPath)}`));
|
|
||||||
console.log(chalk.gray(`Session ID: ${session.id}`));
|
|
||||||
console.log(chalk.gray(`Source Directory: ${session.targetRepo}`));
|
|
||||||
|
|
||||||
// Check if final deliverable exists and show its path
|
|
||||||
if (session.targetRepo) {
|
|
||||||
const finalReportPath = path.join(session.targetRepo, 'deliverables', 'comprehensive_security_assessment_report.md');
|
|
||||||
try {
|
|
||||||
if (await fs.pathExists(finalReportPath)) {
|
|
||||||
console.log(chalk.gray(`Final Deliverable Available: ${finalReportPath}`));
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
// Silently ignore if we can't check the file
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const statusColor = status.status === 'completed' ? chalk.green : status.status === 'failed' ? chalk.red : chalk.blue;
|
|
||||||
console.log(statusColor(`Status: ${status.status} (${status.completedCount}/${status.totalAgents} agents completed)`));
|
|
||||||
console.log(chalk.gray(`Last Activity: ${timeAgo}`));
|
|
||||||
|
|
||||||
if (session.configFile) {
|
|
||||||
console.log(chalk.gray(`Config: ${session.configFile}`));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Display cost and timing breakdown if available
|
|
||||||
if (session.costBreakdown || session.timingBreakdown) {
|
|
||||||
console.log(); // Empty line before metrics
|
|
||||||
|
|
||||||
if (session.timingBreakdown) {
|
|
||||||
console.log(chalk.blue('⏱️ Timing Breakdown:'));
|
|
||||||
console.log(chalk.gray(` Total Execution: ${formatDuration(session.timingBreakdown.total || 0)}`));
|
|
||||||
|
|
||||||
if (session.timingBreakdown.phases) {
|
|
||||||
Object.entries(session.timingBreakdown.phases).forEach(([phase, duration]) => {
|
|
||||||
console.log(chalk.gray(` ${phase}: ${formatDuration(duration)}`));
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
if (session.timingBreakdown.agents) {
|
|
||||||
console.log(chalk.gray(' Per Agent:'));
|
|
||||||
Object.entries(session.timingBreakdown.agents).forEach(([agent, duration]) => {
|
|
||||||
console.log(chalk.gray(` ${agent}: ${formatDuration(duration)}`));
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (session.costBreakdown) {
|
|
||||||
console.log(chalk.blue('💰 Cost Breakdown:'));
|
|
||||||
console.log(chalk.gray(` Total Cost: $${(session.costBreakdown.total || 0).toFixed(4)}`));
|
|
||||||
|
|
||||||
if (session.costBreakdown.agents) {
|
|
||||||
console.log(chalk.gray(' Per Agent:'));
|
|
||||||
Object.entries(session.costBreakdown.agents).forEach(([agent, cost]) => {
|
|
||||||
console.log(chalk.gray(` ${agent}: $${cost.toFixed(4)}`));
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(); // Empty line
|
|
||||||
|
|
||||||
// Display agent status
|
|
||||||
const agentList = Object.values(AGENTS).sort((a, b) => a.order - b.order);
|
|
||||||
|
|
||||||
for (const agent of agentList) {
|
|
||||||
let statusIcon, statusText, statusColor;
|
|
||||||
|
|
||||||
if (session.completedAgents.includes(agent.name)) {
|
|
||||||
statusIcon = '✅';
|
|
||||||
statusText = `completed ${getTimeAgoForAgent(session, agent.name)}`;
|
|
||||||
statusColor = chalk.green;
|
|
||||||
} else if (session.failedAgents.includes(agent.name)) {
|
|
||||||
statusIcon = '❌';
|
|
||||||
statusText = `failed ${getTimeAgoForAgent(session, agent.name)}`;
|
|
||||||
statusColor = chalk.red;
|
|
||||||
} else {
|
|
||||||
statusIcon = '⏸️';
|
|
||||||
statusText = 'pending';
|
|
||||||
statusColor = chalk.gray;
|
|
||||||
}
|
|
||||||
|
|
||||||
const displayName = agent.name.replace(/-/g, ' ');
|
|
||||||
console.log(`${statusIcon} ${statusColor(displayName.padEnd(20))} (${statusText})`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Show next action
|
|
||||||
const nextAgent = getNextAgent(session);
|
|
||||||
if (nextAgent) {
|
|
||||||
console.log(chalk.cyan(`\nNext: Run --run-agent ${nextAgent.name}`));
|
|
||||||
} else if (status.failedCount > 0) {
|
|
||||||
const failedAgent = session.failedAgents[0];
|
|
||||||
console.log(chalk.yellow(`\nNext: Fix ${failedAgent} failure or run --rerun ${failedAgent}`));
|
|
||||||
} else if (status.status === 'completed') {
|
|
||||||
console.log(chalk.green('\nAll agents completed successfully! 🎉'));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// List all available agents
|
|
||||||
export const listAgents = () => {
|
|
||||||
console.log(chalk.cyan('Available Agents:'));
|
|
||||||
|
|
||||||
const phaseNames = Object.keys(PHASES);
|
|
||||||
|
|
||||||
phaseNames.forEach((phaseName, phaseIndex) => {
|
|
||||||
const phaseAgents = PHASES[phaseName];
|
|
||||||
const phaseDisplayName = phaseName.split('-').map(word =>
|
|
||||||
word.charAt(0).toUpperCase() + word.slice(1)
|
|
||||||
).join(' ');
|
|
||||||
|
|
||||||
console.log(chalk.yellow(`\nPhase ${phaseIndex + 1} - ${phaseDisplayName}:`));
|
|
||||||
|
|
||||||
phaseAgents.forEach(agentName => {
|
|
||||||
const agent = AGENTS[agentName];
|
|
||||||
console.log(chalk.white(` ${agent.name.padEnd(18)} ${agent.displayName}`));
|
|
||||||
});
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
// Helper function to get prompt name from agent name
|
|
||||||
const getPromptName = (agentName) => {
|
|
||||||
const mappings = {
|
|
||||||
'pre-recon': 'pre-recon-code',
|
|
||||||
'recon': 'recon',
|
|
||||||
'injection-vuln': 'vuln-injection',
|
|
||||||
'xss-vuln': 'vuln-xss',
|
|
||||||
'auth-vuln': 'vuln-auth',
|
|
||||||
'ssrf-vuln': 'vuln-ssrf',
|
|
||||||
'authz-vuln': 'vuln-authz',
|
|
||||||
'injection-exploit': 'exploit-injection',
|
|
||||||
'xss-exploit': 'exploit-xss',
|
|
||||||
'auth-exploit': 'exploit-auth',
|
|
||||||
'ssrf-exploit': 'exploit-ssrf',
|
|
||||||
'authz-exploit': 'exploit-authz',
|
|
||||||
'report': 'report-executive'
|
|
||||||
};
|
|
||||||
|
|
||||||
return mappings[agentName] || agentName;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Helper function to get time ago for specific agent
|
|
||||||
const getTimeAgoForAgent = (session, agentName) => {
|
|
||||||
// This would need to be implemented based on session checkpoint timestamps
|
|
||||||
// For now, just return relative to last activity
|
|
||||||
return getTimeAgo(session.lastActivity);
|
|
||||||
};
|
|
||||||
|
|
||||||
// Helper function for time ago calculation
|
|
||||||
const getTimeAgo = (timestamp) => {
|
|
||||||
const now = new Date();
|
|
||||||
const past = new Date(timestamp);
|
|
||||||
const diffMs = now - past;
|
|
||||||
|
|
||||||
const diffMins = Math.floor(diffMs / (1000 * 60));
|
|
||||||
const diffHours = Math.floor(diffMs / (1000 * 60 * 60));
|
|
||||||
const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24));
|
|
||||||
|
|
||||||
if (diffMins < 60) {
|
|
||||||
return `${diffMins}m ago`;
|
|
||||||
} else if (diffHours < 24) {
|
|
||||||
return `${diffHours}h ago`;
|
|
||||||
} else {
|
|
||||||
return `${diffDays}d ago`;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
@@ -1,137 +0,0 @@
|
|||||||
import chalk from 'chalk';
|
|
||||||
import {
|
|
||||||
selectSession, deleteSession, deleteAllSessions,
|
|
||||||
validateAgent, validatePhase, reconcileSession
|
|
||||||
} from '../session-manager.js';
|
|
||||||
import {
|
|
||||||
runPhase, runAll, rollbackTo, rerunAgent, displayStatus, listAgents
|
|
||||||
} from '../checkpoint-manager.js';
|
|
||||||
import { logError, PentestError } from '../error-handling.js';
|
|
||||||
import { promptConfirmation } from './prompts.js';
|
|
||||||
|
|
||||||
// Developer command handlers
|
|
||||||
export async function handleDeveloperCommand(command, args, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt) {
|
|
||||||
try {
|
|
||||||
let session;
|
|
||||||
|
|
||||||
// Commands that don't require session selection
|
|
||||||
if (command === '--list-agents') {
|
|
||||||
listAgents();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (command === '--cleanup') {
|
|
||||||
// Handle cleanup without needing session selection first
|
|
||||||
if (args[0]) {
|
|
||||||
// Cleanup specific session by ID
|
|
||||||
const sessionId = args[0];
|
|
||||||
const deletedSession = await deleteSession(sessionId);
|
|
||||||
console.log(chalk.green(`✅ Deleted session ${sessionId} (${new URL(deletedSession.webUrl).hostname})`));
|
|
||||||
} else {
|
|
||||||
// Cleanup all sessions - require confirmation
|
|
||||||
const confirmed = await promptConfirmation(chalk.yellow('⚠️ This will delete all pentest sessions. Are you sure? (y/N):'));
|
|
||||||
if (confirmed) {
|
|
||||||
const deleted = await deleteAllSessions();
|
|
||||||
if (deleted) {
|
|
||||||
console.log(chalk.green('✅ All sessions deleted'));
|
|
||||||
} else {
|
|
||||||
console.log(chalk.yellow('⚠️ No sessions found to delete'));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
console.log(chalk.gray('Cleanup cancelled'));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Early validation for commands with agent names (before session selection)
|
|
||||||
|
|
||||||
if (command === '--run-phase') {
|
|
||||||
if (!args[0]) {
|
|
||||||
console.log(chalk.red('❌ --run-phase requires a phase name'));
|
|
||||||
console.log(chalk.gray('Usage: ./shannon.mjs --run-phase <phase-name>'));
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
validatePhase(args[0]); // This will throw PentestError if invalid
|
|
||||||
}
|
|
||||||
|
|
||||||
if (command === '--rollback-to' || command === '--rerun') {
|
|
||||||
if (!args[0]) {
|
|
||||||
console.log(chalk.red(`❌ ${command} requires an agent name`));
|
|
||||||
console.log(chalk.gray(`Usage: ./shannon.mjs ${command} <agent-name>`));
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
validateAgent(args[0]); // This will throw PentestError if invalid
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get session for other commands
|
|
||||||
try {
|
|
||||||
session = await selectSession();
|
|
||||||
} catch (error) {
|
|
||||||
console.log(chalk.red(`❌ ${error.message}`));
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Self-healing: Reconcile session with audit logs before executing command
|
|
||||||
// This ensures Shannon store is consistent with audit data, even after crash recovery
|
|
||||||
try {
|
|
||||||
const reconcileReport = await reconcileSession(session.id);
|
|
||||||
|
|
||||||
if (reconcileReport.promotions.length > 0) {
|
|
||||||
console.log(chalk.blue(`🔄 Reconciled: Added ${reconcileReport.promotions.length} completed agents from audit logs`));
|
|
||||||
}
|
|
||||||
if (reconcileReport.demotions.length > 0) {
|
|
||||||
console.log(chalk.yellow(`🔄 Reconciled: Removed ${reconcileReport.demotions.length} rolled-back agents`));
|
|
||||||
}
|
|
||||||
if (reconcileReport.failures.length > 0) {
|
|
||||||
console.log(chalk.yellow(`🔄 Reconciled: Marked ${reconcileReport.failures.length} failed agents`));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reload session after reconciliation to get fresh state
|
|
||||||
const { getSession } = await import('../session-manager.js');
|
|
||||||
session = await getSession(session.id);
|
|
||||||
} catch (error) {
|
|
||||||
// Reconciliation failure is non-critical, but log warning
|
|
||||||
console.log(chalk.yellow(`⚠️ Failed to reconcile session with audit logs: ${error.message}`));
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (command) {
|
|
||||||
|
|
||||||
case '--run-phase':
|
|
||||||
await runPhase(args[0], session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case '--run-all':
|
|
||||||
await runAll(session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case '--rollback-to':
|
|
||||||
await rollbackTo(args[0], session);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case '--rerun':
|
|
||||||
await rerunAgent(args[0], session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case '--status':
|
|
||||||
await displayStatus(session);
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
console.log(chalk.red(`❌ Unknown developer command: ${command}`));
|
|
||||||
console.log(chalk.gray('Use --help to see available commands'));
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
if (error instanceof PentestError) {
|
|
||||||
await logError(error, `Developer command ${command}`);
|
|
||||||
console.log(chalk.red.bold(`\n🚨 Command failed: ${error.message}`));
|
|
||||||
} else {
|
|
||||||
console.log(chalk.red.bold(`\n🚨 Unexpected error: ${error.message}`));
|
|
||||||
if (process.env.DEBUG) {
|
|
||||||
console.log(chalk.gray(error.stack));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,7 +1,19 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
import { fs, path } from 'zx';
|
import { fs, path } from 'zx';
|
||||||
|
|
||||||
|
interface ValidationResult {
|
||||||
|
valid: boolean;
|
||||||
|
error?: string;
|
||||||
|
path?: string;
|
||||||
|
}
|
||||||
|
|
||||||
// Helper function: Validate web URL
|
// Helper function: Validate web URL
|
||||||
export function validateWebUrl(url) {
|
export function validateWebUrl(url: string): ValidationResult {
|
||||||
try {
|
try {
|
||||||
const parsed = new URL(url);
|
const parsed = new URL(url);
|
||||||
if (!['http:', 'https:'].includes(parsed.protocol)) {
|
if (!['http:', 'https:'].includes(parsed.protocol)) {
|
||||||
@@ -11,16 +23,16 @@ export function validateWebUrl(url) {
|
|||||||
return { valid: false, error: 'Web URL must have a valid hostname' };
|
return { valid: false, error: 'Web URL must have a valid hostname' };
|
||||||
}
|
}
|
||||||
return { valid: true };
|
return { valid: true };
|
||||||
} catch (error) {
|
} catch {
|
||||||
return { valid: false, error: 'Invalid web URL format' };
|
return { valid: false, error: 'Invalid web URL format' };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper function: Validate local repository path
|
// Helper function: Validate local repository path
|
||||||
export async function validateRepoPath(repoPath) {
|
export async function validateRepoPath(repoPath: string): Promise<ValidationResult> {
|
||||||
try {
|
try {
|
||||||
// Check if path exists
|
// Check if path exists
|
||||||
if (!await fs.pathExists(repoPath)) {
|
if (!(await fs.pathExists(repoPath))) {
|
||||||
return { valid: false, error: 'Repository path does not exist' };
|
return { valid: false, error: 'Repository path does not exist' };
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -33,7 +45,7 @@ export async function validateRepoPath(repoPath) {
|
|||||||
// Check if it's readable
|
// Check if it's readable
|
||||||
try {
|
try {
|
||||||
await fs.access(repoPath, fs.constants.R_OK);
|
await fs.access(repoPath, fs.constants.R_OK);
|
||||||
} catch (error) {
|
} catch {
|
||||||
return { valid: false, error: 'Repository path is not readable' };
|
return { valid: false, error: 'Repository path is not readable' };
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -41,6 +53,7 @@ export async function validateRepoPath(repoPath) {
|
|||||||
const absolutePath = path.resolve(repoPath);
|
const absolutePath = path.resolve(repoPath);
|
||||||
return { valid: true, path: absolutePath };
|
return { valid: true, path: absolutePath };
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
return { valid: false, error: `Invalid repository path: ${error.message}` };
|
const errMsg = error instanceof Error ? error.message : String(error);
|
||||||
|
return { valid: false, error: `Invalid repository path: ${errMsg}` };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,62 +0,0 @@
|
|||||||
import { createInterface } from 'readline';
|
|
||||||
import { PentestError } from '../error-handling.js';
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Prompt user for yes/no confirmation
|
|
||||||
* @param {string} message - Question to display
|
|
||||||
* @returns {Promise<boolean>} true if confirmed, false otherwise
|
|
||||||
*/
|
|
||||||
export async function promptConfirmation(message) {
|
|
||||||
const readline = createInterface({
|
|
||||||
input: process.stdin,
|
|
||||||
output: process.stdout
|
|
||||||
});
|
|
||||||
|
|
||||||
return new Promise((resolve) => {
|
|
||||||
readline.question(message + ' ', (answer) => {
|
|
||||||
readline.close();
|
|
||||||
const confirmed = answer.toLowerCase() === 'y' || answer.toLowerCase() === 'yes';
|
|
||||||
resolve(confirmed);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Prompt user to select from numbered list
|
|
||||||
* @param {string} message - Selection prompt
|
|
||||||
* @param {Array} items - Items to choose from
|
|
||||||
* @returns {Promise<any>} Selected item
|
|
||||||
* @throws {PentestError} If invalid selection
|
|
||||||
*/
|
|
||||||
export async function promptSelection(message, items) {
|
|
||||||
if (!items || items.length === 0) {
|
|
||||||
throw new PentestError(
|
|
||||||
'No items available for selection',
|
|
||||||
'validation',
|
|
||||||
false
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
const readline = createInterface({
|
|
||||||
input: process.stdin,
|
|
||||||
output: process.stdout
|
|
||||||
});
|
|
||||||
|
|
||||||
return new Promise((resolve, reject) => {
|
|
||||||
readline.question(message + ' ', (answer) => {
|
|
||||||
readline.close();
|
|
||||||
|
|
||||||
const choice = parseInt(answer);
|
|
||||||
if (isNaN(choice) || choice < 1 || choice > items.length) {
|
|
||||||
reject(new PentestError(
|
|
||||||
`Invalid selection. Please enter a number between 1 and ${items.length}`,
|
|
||||||
'validation',
|
|
||||||
false,
|
|
||||||
{ choice: answer }
|
|
||||||
));
|
|
||||||
} else {
|
|
||||||
resolve(items[choice - 1]);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
@@ -1,60 +0,0 @@
|
|||||||
import chalk from 'chalk';
|
|
||||||
import { displaySplashScreen } from '../splash-screen.js';
|
|
||||||
|
|
||||||
// Helper function: Display help information
|
|
||||||
export function showHelp() {
|
|
||||||
console.log(chalk.cyan.bold('AI Penetration Testing Agent'));
|
|
||||||
console.log(chalk.gray('Automated security assessment tool\n'));
|
|
||||||
|
|
||||||
console.log(chalk.yellow.bold('NORMAL MODE (Creates Sessions):'));
|
|
||||||
console.log(' ./shannon.mjs <WEB_URL> <REPO_PATH> [--config config.yaml] [--pipeline-testing]');
|
|
||||||
console.log(' ./shannon.mjs <WEB_URL> <REPO_PATH> --setup-only # Setup local repo and create session only\n');
|
|
||||||
|
|
||||||
console.log(chalk.yellow.bold('DEVELOPER MODE (Operates on Existing Sessions):'));
|
|
||||||
console.log(' ./shannon.mjs --run-phase <phase-name> [--pipeline-testing]');
|
|
||||||
console.log(' ./shannon.mjs --run-all [--pipeline-testing]');
|
|
||||||
console.log(' ./shannon.mjs --rollback-to <agent-name>');
|
|
||||||
console.log(' ./shannon.mjs --rerun <agent-name> [--pipeline-testing]');
|
|
||||||
console.log(' ./shannon.mjs --status');
|
|
||||||
console.log(' ./shannon.mjs --list-agents');
|
|
||||||
console.log(' ./shannon.mjs --cleanup [session-id] # Delete sessions\n');
|
|
||||||
|
|
||||||
console.log(chalk.yellow.bold('OPTIONS:'));
|
|
||||||
console.log(' --config <file> YAML configuration file for authentication and testing parameters');
|
|
||||||
console.log(' --pipeline-testing Use minimal prompts for fast pipeline testing (creates minimal deliverables)\n');
|
|
||||||
|
|
||||||
console.log(chalk.yellow.bold('DEVELOPER COMMANDS:'));
|
|
||||||
console.log(' --run-phase Run all agents in a phase (parallel execution for 5x speedup)');
|
|
||||||
console.log(' --run-all Run all remaining agents to completion (parallel execution)');
|
|
||||||
console.log(' --rollback-to Rollback git workspace to agent checkpoint');
|
|
||||||
console.log(' --rerun Rollback and rerun specific agent');
|
|
||||||
console.log(' --status Show current session status and progress');
|
|
||||||
console.log(' --list-agents List all available agents and phases');
|
|
||||||
console.log(' --cleanup Delete all sessions or specific session by ID\n');
|
|
||||||
|
|
||||||
console.log(chalk.yellow.bold('EXAMPLES:'));
|
|
||||||
console.log(' # Normal mode - create new session');
|
|
||||||
console.log(' ./shannon.mjs "https://example.com" "/path/to/local/repo"');
|
|
||||||
console.log(' ./shannon.mjs "https://example.com" "/path/to/local/repo" --config auth.yaml');
|
|
||||||
console.log(' ./shannon.mjs "https://example.com" "/path/to/local/repo" --setup-only # Setup only\n');
|
|
||||||
|
|
||||||
console.log(' # Developer mode - operate on existing session');
|
|
||||||
console.log(' ./shannon.mjs --status # Show session status');
|
|
||||||
console.log(' ./shannon.mjs --run-phase exploitation # Run entire phase');
|
|
||||||
console.log(' ./shannon.mjs --run-all # Run all remaining agents');
|
|
||||||
console.log(' ./shannon.mjs --rerun xss-vuln # Fix and rerun failed agent');
|
|
||||||
console.log(' ./shannon.mjs --cleanup # Delete all sessions');
|
|
||||||
console.log(' ./shannon.mjs --cleanup <session-id> # Delete specific session\n');
|
|
||||||
|
|
||||||
console.log(chalk.yellow.bold('REQUIREMENTS:'));
|
|
||||||
console.log(' • WEB_URL must start with http:// or https://');
|
|
||||||
console.log(' • REPO_PATH must be an accessible local directory');
|
|
||||||
console.log(' • Only test systems you own or have permission to test');
|
|
||||||
console.log(' • Developer mode requires existing pentest session\n');
|
|
||||||
|
|
||||||
console.log(chalk.yellow.bold('ENVIRONMENT VARIABLES:'));
|
|
||||||
console.log(' PENTEST_MAX_RETRIES Number of retries for AI agents (default: 3)');
|
|
||||||
}
|
|
||||||
|
|
||||||
// Export the splash screen function for use in main
|
|
||||||
export { displaySplashScreen };
|
|
||||||
@@ -0,0 +1,49 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
import chalk from 'chalk';
|
||||||
|
import { displaySplashScreen } from '../splash-screen.js';
|
||||||
|
|
||||||
|
// Helper function: Display help information
|
||||||
|
export function showHelp(): void {
|
||||||
|
console.log(chalk.cyan.bold('AI Penetration Testing Agent'));
|
||||||
|
console.log(chalk.gray('Automated security assessment tool\n'));
|
||||||
|
|
||||||
|
console.log(chalk.yellow.bold('USAGE:'));
|
||||||
|
console.log(' shannon <WEB_URL> <REPO_PATH> [--config config.yaml] [--output /path/to/reports]\n');
|
||||||
|
|
||||||
|
console.log(chalk.yellow.bold('OPTIONS:'));
|
||||||
|
console.log(
|
||||||
|
' --config <file> YAML configuration file for authentication and testing parameters'
|
||||||
|
);
|
||||||
|
console.log(
|
||||||
|
' --output <path> Custom output directory for session folder (default: ./audit-logs/)'
|
||||||
|
);
|
||||||
|
console.log(
|
||||||
|
' --pipeline-testing Use minimal prompts for fast pipeline testing (creates minimal deliverables)'
|
||||||
|
);
|
||||||
|
console.log(
|
||||||
|
' --disable-loader Disable the animated progress loader (useful when logs interfere with spinner)'
|
||||||
|
);
|
||||||
|
console.log(' --help Show this help message\n');
|
||||||
|
|
||||||
|
console.log(chalk.yellow.bold('EXAMPLES:'));
|
||||||
|
console.log(' shannon "https://example.com" "/path/to/local/repo"');
|
||||||
|
console.log(' shannon "https://example.com" "/path/to/local/repo" --config auth.yaml');
|
||||||
|
console.log(' shannon "https://example.com" "/path/to/local/repo" --output /path/to/reports');
|
||||||
|
console.log(' shannon "https://example.com" "/path/to/local/repo" --pipeline-testing\n');
|
||||||
|
|
||||||
|
console.log(chalk.yellow.bold('REQUIREMENTS:'));
|
||||||
|
console.log(' • WEB_URL must start with http:// or https://');
|
||||||
|
console.log(' • REPO_PATH must be an accessible local directory');
|
||||||
|
console.log(' • Only test systems you own or have permission to test\n');
|
||||||
|
|
||||||
|
console.log(chalk.yellow.bold('ENVIRONMENT VARIABLES:'));
|
||||||
|
console.log(' PENTEST_MAX_RETRIES Number of retries for AI agents (default: 3)');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Export the splash screen function for use in main
|
||||||
|
export { displaySplashScreen };
|
||||||
@@ -1,45 +1,64 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
import { createRequire } from 'module';
|
||||||
import { fs } from 'zx';
|
import { fs } from 'zx';
|
||||||
import yaml from 'js-yaml';
|
import yaml from 'js-yaml';
|
||||||
import Ajv from 'ajv';
|
import { Ajv, type ValidateFunction } from 'ajv';
|
||||||
import addFormats from 'ajv-formats';
|
import type { FormatsPlugin } from 'ajv-formats';
|
||||||
import { PentestError } from './error-handling.js';
|
import { PentestError } from './error-handling.js';
|
||||||
|
import type {
|
||||||
|
Config,
|
||||||
|
Rule,
|
||||||
|
Rules,
|
||||||
|
Authentication,
|
||||||
|
DistributedConfig,
|
||||||
|
} from './types/config.js';
|
||||||
|
|
||||||
|
// Handle ESM/CJS interop for ajv-formats using require
|
||||||
|
const require = createRequire(import.meta.url);
|
||||||
|
const addFormats: FormatsPlugin = require('ajv-formats');
|
||||||
|
|
||||||
// Initialize AJV with formats
|
// Initialize AJV with formats
|
||||||
const ajv = new Ajv({ allErrors: true, verbose: true });
|
const ajv = new Ajv({ allErrors: true, verbose: true });
|
||||||
addFormats(ajv);
|
addFormats(ajv);
|
||||||
|
|
||||||
// Load JSON Schema
|
// Load JSON Schema
|
||||||
let configSchema;
|
let configSchema: object;
|
||||||
|
let validateSchema: ValidateFunction;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const schemaPath = new URL('../configs/config-schema.json', import.meta.url);
|
const schemaPath = new URL('../configs/config-schema.json', import.meta.url);
|
||||||
const schemaContent = await fs.readFile(schemaPath, 'utf8');
|
const schemaContent = await fs.readFile(schemaPath, 'utf8');
|
||||||
configSchema = JSON.parse(schemaContent);
|
configSchema = JSON.parse(schemaContent) as object;
|
||||||
|
validateSchema = ajv.compile(configSchema);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
const errMsg = error instanceof Error ? error.message : String(error);
|
||||||
throw new PentestError(
|
throw new PentestError(
|
||||||
`Failed to load configuration schema: ${error.message}`,
|
`Failed to load configuration schema: ${errMsg}`,
|
||||||
'config',
|
'config',
|
||||||
false,
|
false,
|
||||||
{ schemaPath: '../configs/config-schema.json', originalError: error.message }
|
{ schemaPath: '../configs/config-schema.json', originalError: errMsg }
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compile the schema validator
|
|
||||||
const validateSchema = ajv.compile(configSchema);
|
|
||||||
|
|
||||||
// Security patterns to block
|
// Security patterns to block
|
||||||
const DANGEROUS_PATTERNS = [
|
const DANGEROUS_PATTERNS: RegExp[] = [
|
||||||
/\.\.\//, // Path traversal
|
/\.\.\//, // Path traversal
|
||||||
/[<>]/, // HTML/XML injection
|
/[<>]/, // HTML/XML injection
|
||||||
/javascript:/i, // JavaScript URLs
|
/javascript:/i, // JavaScript URLs
|
||||||
/data:/i, // Data URLs
|
/data:/i, // Data URLs
|
||||||
/file:/i // File URLs
|
/file:/i, // File URLs
|
||||||
];
|
];
|
||||||
|
|
||||||
// Parse and load YAML configuration file with enhanced safety
|
// Parse and load YAML configuration file with enhanced safety
|
||||||
export const parseConfig = async (configPath) => {
|
export const parseConfig = async (configPath: string): Promise<Config> => {
|
||||||
try {
|
try {
|
||||||
// File existence check
|
// File existence check
|
||||||
if (!await fs.pathExists(configPath)) {
|
if (!(await fs.pathExists(configPath))) {
|
||||||
throw new Error(`Configuration file not found: ${configPath}`);
|
throw new Error(`Configuration file not found: ${configPath}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -47,27 +66,30 @@ export const parseConfig = async (configPath) => {
|
|||||||
const stats = await fs.stat(configPath);
|
const stats = await fs.stat(configPath);
|
||||||
const maxFileSize = 1024 * 1024; // 1MB
|
const maxFileSize = 1024 * 1024; // 1MB
|
||||||
if (stats.size > maxFileSize) {
|
if (stats.size > maxFileSize) {
|
||||||
throw new Error(`Configuration file too large: ${stats.size} bytes (maximum: ${maxFileSize} bytes)`);
|
throw new Error(
|
||||||
|
`Configuration file too large: ${stats.size} bytes (maximum: ${maxFileSize} bytes)`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read file content
|
// Read file content
|
||||||
const configContent = await fs.readFile(configPath, 'utf8');
|
const configContent = await fs.readFile(configPath, 'utf8');
|
||||||
|
|
||||||
// Basic content validation
|
// Basic content validation
|
||||||
if (!configContent.trim()) {
|
if (!configContent.trim()) {
|
||||||
throw new Error('Configuration file is empty');
|
throw new Error('Configuration file is empty');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse YAML with safety options
|
// Parse YAML with safety options
|
||||||
let config;
|
let config: unknown;
|
||||||
try {
|
try {
|
||||||
config = yaml.load(configContent, {
|
config = yaml.load(configContent, {
|
||||||
schema: yaml.FAILSAFE_SCHEMA, // Only basic YAML types, no JS evaluation
|
schema: yaml.FAILSAFE_SCHEMA, // Only basic YAML types, no JS evaluation
|
||||||
json: false, // Don't allow JSON-specific syntax
|
json: false, // Don't allow JSON-specific syntax
|
||||||
filename: configPath
|
filename: configPath,
|
||||||
});
|
});
|
||||||
} catch (yamlError) {
|
} catch (yamlError) {
|
||||||
throw new Error(`YAML parsing failed: ${yamlError.message}`);
|
const errMsg = yamlError instanceof Error ? yamlError.message : String(yamlError);
|
||||||
|
throw new Error(`YAML parsing failed: ${errMsg}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Additional safety check
|
// Additional safety check
|
||||||
@@ -76,26 +98,29 @@ export const parseConfig = async (configPath) => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Validate the configuration structure and content
|
// Validate the configuration structure and content
|
||||||
validateConfig(config);
|
validateConfig(config as Config);
|
||||||
|
|
||||||
return config;
|
return config as Config;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
const errMsg = error instanceof Error ? error.message : String(error);
|
||||||
// Enhance error message with context
|
// Enhance error message with context
|
||||||
if (error.message.startsWith('Configuration file not found') ||
|
if (
|
||||||
error.message.startsWith('YAML parsing failed') ||
|
errMsg.startsWith('Configuration file not found') ||
|
||||||
error.message.includes('must be') ||
|
errMsg.startsWith('YAML parsing failed') ||
|
||||||
error.message.includes('exceeds maximum')) {
|
errMsg.includes('must be') ||
|
||||||
|
errMsg.includes('exceeds maximum')
|
||||||
|
) {
|
||||||
// These are already well-formatted errors, re-throw as-is
|
// These are already well-formatted errors, re-throw as-is
|
||||||
throw error;
|
throw error;
|
||||||
} else {
|
} else {
|
||||||
// Wrap other errors with context
|
// Wrap other errors with context
|
||||||
throw new Error(`Failed to parse configuration file '${configPath}': ${error.message}`);
|
throw new Error(`Failed to parse configuration file '${configPath}': ${errMsg}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Validate overall configuration structure using JSON Schema
|
// Validate overall configuration structure using JSON Schema
|
||||||
const validateConfig = (config) => {
|
const validateConfig = (config: Config): void => {
|
||||||
// Basic structure validation
|
// Basic structure validation
|
||||||
if (!config || typeof config !== 'object') {
|
if (!config || typeof config !== 'object') {
|
||||||
throw new Error('Configuration must be a valid object');
|
throw new Error('Configuration must be a valid object');
|
||||||
@@ -109,7 +134,7 @@ const validateConfig = (config) => {
|
|||||||
const isValid = validateSchema(config);
|
const isValid = validateSchema(config);
|
||||||
if (!isValid) {
|
if (!isValid) {
|
||||||
const errors = validateSchema.errors || [];
|
const errors = validateSchema.errors || [];
|
||||||
const errorMessages = errors.map(err => {
|
const errorMessages = errors.map((err) => {
|
||||||
const path = err.instancePath || 'root';
|
const path = err.instancePath || 'root';
|
||||||
return `${path}: ${err.message}`;
|
return `${path}: ${err.message}`;
|
||||||
});
|
});
|
||||||
@@ -126,48 +151,57 @@ const validateConfig = (config) => {
|
|||||||
|
|
||||||
// Ensure at least some configuration is provided
|
// Ensure at least some configuration is provided
|
||||||
if (!config.rules && !config.authentication) {
|
if (!config.rules && !config.authentication) {
|
||||||
console.warn('⚠️ Configuration file contains no rules or authentication. The pentest will run without any scoping restrictions or login capabilities.');
|
console.warn(
|
||||||
|
'⚠️ Configuration file contains no rules or authentication. The pentest will run without any scoping restrictions or login capabilities.'
|
||||||
|
);
|
||||||
} else if (config.rules && !config.rules.avoid && !config.rules.focus) {
|
} else if (config.rules && !config.rules.avoid && !config.rules.focus) {
|
||||||
console.warn('⚠️ Configuration file contains no rules. The pentest will run without any scoping restrictions.');
|
console.warn(
|
||||||
|
'⚠️ Configuration file contains no rules. The pentest will run without any scoping restrictions.'
|
||||||
|
);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
// Perform additional security validation beyond JSON Schema
|
// Perform additional security validation beyond JSON Schema
|
||||||
const performSecurityValidation = (config) => {
|
const performSecurityValidation = (config: Config): void => {
|
||||||
// Validate authentication section for security issues
|
// Validate authentication section for security issues
|
||||||
if (config.authentication) {
|
if (config.authentication) {
|
||||||
const auth = config.authentication;
|
const auth = config.authentication;
|
||||||
|
|
||||||
// Check for dangerous patterns in credentials
|
// Check for dangerous patterns in credentials
|
||||||
if (auth.credentials) {
|
if (auth.credentials) {
|
||||||
for (const pattern of DANGEROUS_PATTERNS) {
|
for (const pattern of DANGEROUS_PATTERNS) {
|
||||||
if (pattern.test(auth.credentials.username)) {
|
if (pattern.test(auth.credentials.username)) {
|
||||||
throw new Error('authentication.credentials.username contains potentially dangerous pattern');
|
throw new Error(
|
||||||
|
'authentication.credentials.username contains potentially dangerous pattern'
|
||||||
|
);
|
||||||
}
|
}
|
||||||
if (pattern.test(auth.credentials.password)) {
|
if (pattern.test(auth.credentials.password)) {
|
||||||
throw new Error('authentication.credentials.password contains potentially dangerous pattern');
|
throw new Error(
|
||||||
|
'authentication.credentials.password contains potentially dangerous pattern'
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check login flow for dangerous patterns
|
// Check login flow for dangerous patterns
|
||||||
if (auth.login_flow) {
|
if (auth.login_flow) {
|
||||||
auth.login_flow.forEach((step, index) => {
|
auth.login_flow.forEach((step, index) => {
|
||||||
for (const pattern of DANGEROUS_PATTERNS) {
|
for (const pattern of DANGEROUS_PATTERNS) {
|
||||||
if (pattern.test(step)) {
|
if (pattern.test(step)) {
|
||||||
throw new Error(`authentication.login_flow[${index}] contains potentially dangerous pattern: ${pattern.source}`);
|
throw new Error(
|
||||||
|
`authentication.login_flow[${index}] contains potentially dangerous pattern: ${pattern.source}`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate rules section for security issues
|
// Validate rules section for security issues
|
||||||
if (config.rules) {
|
if (config.rules) {
|
||||||
validateRulesSecurity(config.rules.avoid, 'avoid');
|
validateRulesSecurity(config.rules.avoid, 'avoid');
|
||||||
validateRulesSecurity(config.rules.focus, 'focus');
|
validateRulesSecurity(config.rules.focus, 'focus');
|
||||||
|
|
||||||
// Check for duplicate and conflicting rules
|
// Check for duplicate and conflicting rules
|
||||||
checkForDuplicates(config.rules.avoid || [], 'avoid');
|
checkForDuplicates(config.rules.avoid || [], 'avoid');
|
||||||
checkForDuplicates(config.rules.focus || [], 'focus');
|
checkForDuplicates(config.rules.focus || [], 'focus');
|
||||||
@@ -176,132 +210,148 @@ const performSecurityValidation = (config) => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Validate rules for security issues
|
// Validate rules for security issues
|
||||||
const validateRulesSecurity = (rules, ruleType) => {
|
const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): void => {
|
||||||
if (!rules) return;
|
if (!rules) return;
|
||||||
|
|
||||||
rules.forEach((rule, index) => {
|
rules.forEach((rule, index) => {
|
||||||
// Security validation
|
// Security validation
|
||||||
for (const pattern of DANGEROUS_PATTERNS) {
|
for (const pattern of DANGEROUS_PATTERNS) {
|
||||||
if (pattern.test(rule.url_path)) {
|
if (pattern.test(rule.url_path)) {
|
||||||
throw new Error(`rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`);
|
throw new Error(
|
||||||
|
`rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
if (pattern.test(rule.description)) {
|
if (pattern.test(rule.description)) {
|
||||||
throw new Error(`rules.${ruleType}[${index}].description contains potentially dangerous pattern: ${pattern.source}`);
|
throw new Error(
|
||||||
|
`rules.${ruleType}[${index}].description contains potentially dangerous pattern: ${pattern.source}`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Type-specific validation
|
// Type-specific validation
|
||||||
validateRuleTypeSpecific(rule, ruleType, index);
|
validateRuleTypeSpecific(rule, ruleType, index);
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
// Validate rule based on its specific type
|
// Validate rule based on its specific type
|
||||||
const validateRuleTypeSpecific = (rule, ruleType, index) => {
|
const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): void => {
|
||||||
switch (rule.type) {
|
switch (rule.type) {
|
||||||
case 'path':
|
case 'path':
|
||||||
if (!rule.url_path.startsWith('/')) {
|
if (!rule.url_path.startsWith('/')) {
|
||||||
throw new Error(`rules.${ruleType}[${index}].url_path for type 'path' must start with '/'`);
|
throw new Error(`rules.${ruleType}[${index}].url_path for type 'path' must start with '/'`);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'subdomain':
|
case 'subdomain':
|
||||||
case 'domain':
|
case 'domain':
|
||||||
// Basic domain validation - no slashes allowed
|
// Basic domain validation - no slashes allowed
|
||||||
if (rule.url_path.includes('/')) {
|
if (rule.url_path.includes('/')) {
|
||||||
throw new Error(`rules.${ruleType}[${index}].url_path for type '${rule.type}' cannot contain '/' characters`);
|
throw new Error(
|
||||||
|
`rules.${ruleType}[${index}].url_path for type '${rule.type}' cannot contain '/' characters`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
// Must contain at least one dot for domains
|
// Must contain at least one dot for domains
|
||||||
if (rule.type === 'domain' && !rule.url_path.includes('.')) {
|
if (rule.type === 'domain' && !rule.url_path.includes('.')) {
|
||||||
throw new Error(`rules.${ruleType}[${index}].url_path for type 'domain' must be a valid domain name`);
|
throw new Error(
|
||||||
|
`rules.${ruleType}[${index}].url_path for type 'domain' must be a valid domain name`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'method':
|
case 'method': {
|
||||||
const allowedMethods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS'];
|
const allowedMethods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS'];
|
||||||
if (!allowedMethods.includes(rule.url_path.toUpperCase())) {
|
if (!allowedMethods.includes(rule.url_path.toUpperCase())) {
|
||||||
throw new Error(`rules.${ruleType}[${index}].url_path for type 'method' must be one of: ${allowedMethods.join(', ')}`);
|
throw new Error(
|
||||||
|
`rules.${ruleType}[${index}].url_path for type 'method' must be one of: ${allowedMethods.join(', ')}`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case 'header':
|
case 'header':
|
||||||
// Header name validation (basic)
|
// Header name validation (basic)
|
||||||
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
|
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
|
||||||
throw new Error(`rules.${ruleType}[${index}].url_path for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`);
|
throw new Error(
|
||||||
|
`rules.${ruleType}[${index}].url_path for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'parameter':
|
case 'parameter':
|
||||||
// Parameter name validation (basic)
|
// Parameter name validation (basic)
|
||||||
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
|
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
|
||||||
throw new Error(`rules.${ruleType}[${index}].url_path for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`);
|
throw new Error(
|
||||||
|
`rules.${ruleType}[${index}].url_path for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Check for duplicate rules
|
// Check for duplicate rules
|
||||||
const checkForDuplicates = (rules, ruleType) => {
|
const checkForDuplicates = (rules: Rule[], ruleType: string): void => {
|
||||||
const seen = new Set();
|
const seen = new Set<string>();
|
||||||
rules.forEach((rule, index) => {
|
rules.forEach((rule, index) => {
|
||||||
const key = `${rule.type}:${rule.url_path}`;
|
const key = `${rule.type}:${rule.url_path}`;
|
||||||
if (seen.has(key)) {
|
if (seen.has(key)) {
|
||||||
throw new Error(`Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`);
|
throw new Error(
|
||||||
|
`Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
seen.add(key);
|
seen.add(key);
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
// Check for conflicting rules between avoid and focus
|
// Check for conflicting rules between avoid and focus
|
||||||
const checkForConflicts = (avoidRules = [], focusRules = []) => {
|
const checkForConflicts = (avoidRules: Rule[] = [], focusRules: Rule[] = []): void => {
|
||||||
const avoidSet = new Set(avoidRules.map(rule => `${rule.type}:${rule.url_path}`));
|
const avoidSet = new Set(avoidRules.map((rule) => `${rule.type}:${rule.url_path}`));
|
||||||
|
|
||||||
focusRules.forEach((rule, index) => {
|
focusRules.forEach((rule, index) => {
|
||||||
const key = `${rule.type}:${rule.url_path}`;
|
const key = `${rule.type}:${rule.url_path}`;
|
||||||
if (avoidSet.has(key)) {
|
if (avoidSet.has(key)) {
|
||||||
throw new Error(`Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`);
|
throw new Error(
|
||||||
|
`Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
// Sanitize and normalize rule values
|
// Sanitize and normalize rule values
|
||||||
const sanitizeRule = (rule) => {
|
const sanitizeRule = (rule: Rule): Rule => {
|
||||||
return {
|
return {
|
||||||
description: rule.description.trim(),
|
description: rule.description.trim(),
|
||||||
type: rule.type.toLowerCase().trim(),
|
type: rule.type.toLowerCase().trim() as Rule['type'],
|
||||||
url_path: rule.url_path.trim()
|
url_path: rule.url_path.trim(),
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
// Distribute configuration sections to different agents with sanitization
|
// Distribute configuration sections to different agents with sanitization
|
||||||
export const distributeConfig = (config) => {
|
export const distributeConfig = (config: Config | null): DistributedConfig => {
|
||||||
const avoid = config?.rules?.avoid || [];
|
const avoid = config?.rules?.avoid || [];
|
||||||
const focus = config?.rules?.focus || [];
|
const focus = config?.rules?.focus || [];
|
||||||
const authentication = config?.authentication || null;
|
const authentication = config?.authentication || null;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
avoid: avoid.map(sanitizeRule),
|
avoid: avoid.map(sanitizeRule),
|
||||||
focus: focus.map(sanitizeRule),
|
focus: focus.map(sanitizeRule),
|
||||||
authentication: authentication ? sanitizeAuthentication(authentication) : null
|
authentication: authentication ? sanitizeAuthentication(authentication) : null,
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
// Sanitize and normalize authentication values
|
// Sanitize and normalize authentication values
|
||||||
const sanitizeAuthentication = (auth) => {
|
const sanitizeAuthentication = (auth: Authentication): Authentication => {
|
||||||
return {
|
return {
|
||||||
login_type: auth.login_type.toLowerCase().trim(),
|
login_type: auth.login_type.toLowerCase().trim() as Authentication['login_type'],
|
||||||
login_url: auth.login_url.trim(),
|
login_url: auth.login_url.trim(),
|
||||||
credentials: {
|
credentials: {
|
||||||
username: auth.credentials.username.trim(),
|
username: auth.credentials.username.trim(),
|
||||||
password: auth.credentials.password,
|
password: auth.credentials.password,
|
||||||
...(auth.credentials.totp_secret && { totp_secret: auth.credentials.totp_secret.trim() })
|
...(auth.credentials.totp_secret && { totp_secret: auth.credentials.totp_secret.trim() }),
|
||||||
},
|
},
|
||||||
login_flow: auth.login_flow.map(step => step.trim()),
|
login_flow: auth.login_flow.map((step) => step.trim()),
|
||||||
success_condition: {
|
success_condition: {
|
||||||
type: auth.success_condition.type.toLowerCase().trim(),
|
type: auth.success_condition.type.toLowerCase().trim() as Authentication['success_condition']['type'],
|
||||||
value: auth.success_condition.value.trim()
|
value: auth.success_condition.value.trim(),
|
||||||
}
|
},
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
// Additional validation functions are already exported above
|
|
||||||
|
|
||||||
@@ -1,37 +1,45 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
import { path, fs } from 'zx';
|
import { path, fs } from 'zx';
|
||||||
import chalk from 'chalk';
|
import chalk from 'chalk';
|
||||||
import { validateQueueAndDeliverable } from './queue-validation.js';
|
import { validateQueueAndDeliverable, type VulnType } from './queue-validation.js';
|
||||||
|
import type { AgentName, PromptName, PlaywrightAgent, AgentValidator } from './types/agents.js';
|
||||||
|
|
||||||
// Factory function for vulnerability queue validators
|
// Factory function for vulnerability queue validators
|
||||||
function createVulnValidator(vulnType) {
|
function createVulnValidator(vulnType: VulnType): AgentValidator {
|
||||||
return async (sourceDir) => {
|
return async (sourceDir: string): Promise<boolean> => {
|
||||||
try {
|
try {
|
||||||
await validateQueueAndDeliverable(vulnType, sourceDir);
|
await validateQueueAndDeliverable(vulnType, sourceDir);
|
||||||
return true;
|
return true;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log(chalk.yellow(` Queue validation failed for ${vulnType}: ${error.message}`));
|
const errMsg = error instanceof Error ? error.message : String(error);
|
||||||
|
console.log(chalk.yellow(` Queue validation failed for ${vulnType}: ${errMsg}`));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Factory function for exploit deliverable validators
|
// Factory function for exploit deliverable validators
|
||||||
function createExploitValidator(vulnType) {
|
function createExploitValidator(vulnType: VulnType): AgentValidator {
|
||||||
return async (sourceDir) => {
|
return async (sourceDir: string): Promise<boolean> => {
|
||||||
const evidenceFile = path.join(sourceDir, 'deliverables', `${vulnType}_exploitation_evidence.md`);
|
const evidenceFile = path.join(sourceDir, 'deliverables', `${vulnType}_exploitation_evidence.md`);
|
||||||
return await fs.pathExists(evidenceFile);
|
return await fs.pathExists(evidenceFile);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts
|
// MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts
|
||||||
export const MCP_AGENT_MAPPING = Object.freeze({
|
export const MCP_AGENT_MAPPING: Record<PromptName, PlaywrightAgent> = Object.freeze({
|
||||||
// Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code')
|
// Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code')
|
||||||
// NOTE: Pre-recon is pure code analysis and doesn't use browser automation,
|
// NOTE: Pre-recon is pure code analysis and doesn't use browser automation,
|
||||||
// but assigning MCP server anyway for consistency and future extensibility
|
// but assigning MCP server anyway for consistency and future extensibility
|
||||||
'pre-recon-code': 'playwright-agent1',
|
'pre-recon-code': 'playwright-agent1',
|
||||||
|
|
||||||
// Phase 2: Reconnaissance (actual prompt name is 'recon')
|
// Phase 2: Reconnaissance (actual prompt name is 'recon')
|
||||||
'recon': 'playwright-agent2',
|
recon: 'playwright-agent2',
|
||||||
|
|
||||||
// Phase 3: Vulnerability Analysis (5 parallel agents)
|
// Phase 3: Vulnerability Analysis (5 parallel agents)
|
||||||
'vuln-injection': 'playwright-agent1',
|
'vuln-injection': 'playwright-agent1',
|
||||||
@@ -50,19 +58,19 @@ export const MCP_AGENT_MAPPING = Object.freeze({
|
|||||||
// Phase 5: Reporting (actual prompt name is 'report-executive')
|
// Phase 5: Reporting (actual prompt name is 'report-executive')
|
||||||
// NOTE: Report generation is typically text-based and doesn't use browser automation,
|
// NOTE: Report generation is typically text-based and doesn't use browser automation,
|
||||||
// but assigning MCP server anyway for potential screenshot inclusion or future needs
|
// but assigning MCP server anyway for potential screenshot inclusion or future needs
|
||||||
'report-executive': 'playwright-agent3'
|
'report-executive': 'playwright-agent3',
|
||||||
});
|
});
|
||||||
|
|
||||||
// Direct agent-to-validator mapping - much simpler than pattern matching
|
// Direct agent-to-validator mapping - much simpler than pattern matching
|
||||||
export const AGENT_VALIDATORS = Object.freeze({
|
export const AGENT_VALIDATORS: Record<AgentName, AgentValidator> = Object.freeze({
|
||||||
// Pre-reconnaissance agent - validates the code analysis deliverable created by the agent
|
// Pre-reconnaissance agent - validates the code analysis deliverable created by the agent
|
||||||
'pre-recon': async (sourceDir) => {
|
'pre-recon': async (sourceDir: string): Promise<boolean> => {
|
||||||
const codeAnalysisFile = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md');
|
const codeAnalysisFile = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md');
|
||||||
return await fs.pathExists(codeAnalysisFile);
|
return await fs.pathExists(codeAnalysisFile);
|
||||||
},
|
},
|
||||||
|
|
||||||
// Reconnaissance agent
|
// Reconnaissance agent
|
||||||
'recon': async (sourceDir) => {
|
recon: async (sourceDir: string): Promise<boolean> => {
|
||||||
const reconFile = path.join(sourceDir, 'deliverables', 'recon_deliverable.md');
|
const reconFile = path.join(sourceDir, 'deliverables', 'recon_deliverable.md');
|
||||||
return await fs.pathExists(reconFile);
|
return await fs.pathExists(reconFile);
|
||||||
},
|
},
|
||||||
@@ -82,15 +90,21 @@ export const AGENT_VALIDATORS = Object.freeze({
|
|||||||
'authz-exploit': createExploitValidator('authz'),
|
'authz-exploit': createExploitValidator('authz'),
|
||||||
|
|
||||||
// Executive report agent
|
// Executive report agent
|
||||||
'report': async (sourceDir) => {
|
report: async (sourceDir: string): Promise<boolean> => {
|
||||||
const reportFile = path.join(sourceDir, 'deliverables', 'comprehensive_security_assessment_report.md');
|
const reportFile = path.join(
|
||||||
|
sourceDir,
|
||||||
|
'deliverables',
|
||||||
|
'comprehensive_security_assessment_report.md'
|
||||||
|
);
|
||||||
|
|
||||||
const reportExists = await fs.pathExists(reportFile);
|
const reportExists = await fs.pathExists(reportFile);
|
||||||
|
|
||||||
if (!reportExists) {
|
if (!reportExists) {
|
||||||
console.log(chalk.red(` ❌ Missing required deliverable: comprehensive_security_assessment_report.md`));
|
console.log(
|
||||||
|
chalk.red(` ❌ Missing required deliverable: comprehensive_security_assessment_report.md`)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
return reportExists;
|
return reportExists;
|
||||||
}
|
},
|
||||||
});
|
});
|
||||||
@@ -1,159 +0,0 @@
|
|||||||
import chalk from 'chalk';
|
|
||||||
import { fs, path } from 'zx';
|
|
||||||
|
|
||||||
// Custom error class for pentest operations
|
|
||||||
export class PentestError extends Error {
|
|
||||||
constructor(message, type, retryable = false, context = {}) {
|
|
||||||
super(message);
|
|
||||||
this.name = 'PentestError';
|
|
||||||
this.type = type; // 'config', 'network', 'tool', 'prompt', 'filesystem', 'validation'
|
|
||||||
this.retryable = retryable;
|
|
||||||
this.context = context;
|
|
||||||
this.timestamp = new Date().toISOString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Centralized error logging function
|
|
||||||
export const logError = async (error, contextMsg, sourceDir = null) => {
|
|
||||||
const timestamp = new Date().toISOString();
|
|
||||||
const logEntry = {
|
|
||||||
timestamp,
|
|
||||||
context: contextMsg,
|
|
||||||
error: {
|
|
||||||
name: error.name || error.constructor.name,
|
|
||||||
message: error.message,
|
|
||||||
type: error.type || 'unknown',
|
|
||||||
retryable: error.retryable || false,
|
|
||||||
stack: error.stack
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Console logging with color
|
|
||||||
const prefix = error.retryable ? '⚠️' : '❌';
|
|
||||||
const color = error.retryable ? chalk.yellow : chalk.red;
|
|
||||||
console.log(color(`${prefix} ${contextMsg}:`));
|
|
||||||
console.log(color(` ${error.message}`));
|
|
||||||
|
|
||||||
if (error.context && Object.keys(error.context).length > 0) {
|
|
||||||
console.log(chalk.gray(` Context: ${JSON.stringify(error.context)}`));
|
|
||||||
}
|
|
||||||
|
|
||||||
// File logging (if source directory available)
|
|
||||||
if (sourceDir) {
|
|
||||||
try {
|
|
||||||
const logPath = path.join(sourceDir, 'error.log');
|
|
||||||
await fs.appendFile(logPath, JSON.stringify(logEntry) + '\n');
|
|
||||||
} catch (logErr) {
|
|
||||||
console.log(chalk.gray(` (Failed to write error log: ${logErr.message})`));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return logEntry;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Handle tool execution errors
|
|
||||||
export const handleToolError = (toolName, error) => {
|
|
||||||
const isRetryable = error.code === 'ECONNRESET' || error.code === 'ETIMEDOUT' || error.code === 'ENOTFOUND';
|
|
||||||
|
|
||||||
return {
|
|
||||||
tool: toolName,
|
|
||||||
output: `Error: ${error.message}`,
|
|
||||||
status: 'error',
|
|
||||||
duration: 0,
|
|
||||||
success: false,
|
|
||||||
error: new PentestError(
|
|
||||||
`${toolName} execution failed: ${error.message}`,
|
|
||||||
'tool',
|
|
||||||
isRetryable,
|
|
||||||
{ toolName, originalError: error.message, errorCode: error.code }
|
|
||||||
)
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
// Handle prompt loading errors
|
|
||||||
export const handlePromptError = (promptName, error) => {
|
|
||||||
return {
|
|
||||||
success: false,
|
|
||||||
error: new PentestError(
|
|
||||||
`Failed to load prompt '${promptName}': ${error.message}`,
|
|
||||||
'prompt',
|
|
||||||
false,
|
|
||||||
{ promptName, originalError: error.message }
|
|
||||||
)
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
// Check if an error should trigger a retry for Claude agents
|
|
||||||
export const isRetryableError = (error) => {
|
|
||||||
const message = error.message.toLowerCase();
|
|
||||||
|
|
||||||
// Network and connection errors - always retryable
|
|
||||||
if (message.includes('network') ||
|
|
||||||
message.includes('connection') ||
|
|
||||||
message.includes('timeout') ||
|
|
||||||
message.includes('econnreset') ||
|
|
||||||
message.includes('enotfound') ||
|
|
||||||
message.includes('econnrefused')) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Rate limiting - retryable with longer backoff
|
|
||||||
if (message.includes('rate limit') ||
|
|
||||||
message.includes('429') ||
|
|
||||||
message.includes('too many requests')) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Server errors - retryable
|
|
||||||
if (message.includes('server error') ||
|
|
||||||
message.includes('5xx') ||
|
|
||||||
message.includes('internal server error') ||
|
|
||||||
message.includes('service unavailable') ||
|
|
||||||
message.includes('bad gateway')) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Claude API specific errors - retryable
|
|
||||||
if (message.includes('mcp server') ||
|
|
||||||
message.includes('model unavailable') ||
|
|
||||||
message.includes('service temporarily unavailable') ||
|
|
||||||
message.includes('api error') ||
|
|
||||||
message.includes('terminated')) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Max turns without completion - retryable once
|
|
||||||
if (message.includes('max turns') ||
|
|
||||||
message.includes('maximum turns')) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Non-retryable errors
|
|
||||||
if (message.includes('authentication') ||
|
|
||||||
message.includes('invalid prompt') ||
|
|
||||||
message.includes('out of memory') ||
|
|
||||||
message.includes('permission denied') ||
|
|
||||||
message.includes('session limit reached') ||
|
|
||||||
message.includes('invalid api key')) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Default to non-retryable for unknown errors
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Get retry delay based on error type and attempt number
|
|
||||||
export const getRetryDelay = (error, attempt) => {
|
|
||||||
const message = error.message.toLowerCase();
|
|
||||||
|
|
||||||
// Rate limiting gets longer delays
|
|
||||||
if (message.includes('rate limit') || message.includes('429')) {
|
|
||||||
return Math.min(30000 + (attempt * 10000), 120000); // 30s, 40s, 50s, max 2min
|
|
||||||
}
|
|
||||||
|
|
||||||
// Exponential backoff with jitter for other retryable errors
|
|
||||||
const baseDelay = Math.pow(2, attempt) * 1000; // 2s, 4s, 8s
|
|
||||||
const jitter = Math.random() * 1000; // 0-1s random
|
|
||||||
return Math.min(baseDelay + jitter, 30000); // Max 30s
|
|
||||||
};
|
|
||||||
@@ -0,0 +1,319 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
import chalk from 'chalk';
|
||||||
|
import { fs, path } from 'zx';
|
||||||
|
import type {
|
||||||
|
PentestErrorType,
|
||||||
|
PentestErrorContext,
|
||||||
|
LogEntry,
|
||||||
|
ToolErrorResult,
|
||||||
|
PromptErrorResult,
|
||||||
|
} from './types/errors.js';
|
||||||
|
|
||||||
|
// Temporal error classification for ApplicationFailure wrapping
|
||||||
|
export interface TemporalErrorClassification {
|
||||||
|
type: string;
|
||||||
|
retryable: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Custom error class for pentest operations
|
||||||
|
export class PentestError extends Error {
|
||||||
|
name = 'PentestError' as const;
|
||||||
|
type: PentestErrorType;
|
||||||
|
retryable: boolean;
|
||||||
|
context: PentestErrorContext;
|
||||||
|
timestamp: string;
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
message: string,
|
||||||
|
type: PentestErrorType,
|
||||||
|
retryable: boolean = false,
|
||||||
|
context: PentestErrorContext = {}
|
||||||
|
) {
|
||||||
|
super(message);
|
||||||
|
this.type = type;
|
||||||
|
this.retryable = retryable;
|
||||||
|
this.context = context;
|
||||||
|
this.timestamp = new Date().toISOString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Centralized error logging function
|
||||||
|
export async function logError(
|
||||||
|
error: Error & { type?: PentestErrorType; retryable?: boolean; context?: PentestErrorContext },
|
||||||
|
contextMsg: string,
|
||||||
|
sourceDir: string | null = null
|
||||||
|
): Promise<LogEntry> {
|
||||||
|
const timestamp = new Date().toISOString();
|
||||||
|
const logEntry: LogEntry = {
|
||||||
|
timestamp,
|
||||||
|
context: contextMsg,
|
||||||
|
error: {
|
||||||
|
name: error.name || error.constructor.name,
|
||||||
|
message: error.message,
|
||||||
|
type: error.type || 'unknown',
|
||||||
|
retryable: error.retryable || false,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
// Only add stack if it exists
|
||||||
|
if (error.stack) {
|
||||||
|
logEntry.error.stack = error.stack;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Console logging with color
|
||||||
|
const prefix = error.retryable ? '⚠️' : '❌';
|
||||||
|
const color = error.retryable ? chalk.yellow : chalk.red;
|
||||||
|
console.log(color(`${prefix} ${contextMsg}:`));
|
||||||
|
console.log(color(` ${error.message}`));
|
||||||
|
|
||||||
|
if (error.context && Object.keys(error.context).length > 0) {
|
||||||
|
console.log(chalk.gray(` Context: ${JSON.stringify(error.context)}`));
|
||||||
|
}
|
||||||
|
|
||||||
|
// File logging (if source directory available)
|
||||||
|
if (sourceDir) {
|
||||||
|
try {
|
||||||
|
const logPath = path.join(sourceDir, 'error.log');
|
||||||
|
await fs.appendFile(logPath, JSON.stringify(logEntry) + '\n');
|
||||||
|
} catch (logErr) {
|
||||||
|
const errMsg = logErr instanceof Error ? logErr.message : String(logErr);
|
||||||
|
console.log(chalk.gray(` (Failed to write error log: ${errMsg})`));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return logEntry;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle tool execution errors
|
||||||
|
export function handleToolError(
|
||||||
|
toolName: string,
|
||||||
|
error: Error & { code?: string }
|
||||||
|
): ToolErrorResult {
|
||||||
|
const isRetryable =
|
||||||
|
error.code === 'ECONNRESET' ||
|
||||||
|
error.code === 'ETIMEDOUT' ||
|
||||||
|
error.code === 'ENOTFOUND';
|
||||||
|
|
||||||
|
return {
|
||||||
|
tool: toolName,
|
||||||
|
output: `Error: ${error.message}`,
|
||||||
|
status: 'error',
|
||||||
|
duration: 0,
|
||||||
|
success: false,
|
||||||
|
error: new PentestError(
|
||||||
|
`${toolName} execution failed: ${error.message}`,
|
||||||
|
'tool',
|
||||||
|
isRetryable,
|
||||||
|
{ toolName, originalError: error.message, errorCode: error.code }
|
||||||
|
),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle prompt loading errors
|
||||||
|
export function handlePromptError(
|
||||||
|
promptName: string,
|
||||||
|
error: Error
|
||||||
|
): PromptErrorResult {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: new PentestError(
|
||||||
|
`Failed to load prompt '${promptName}': ${error.message}`,
|
||||||
|
'prompt',
|
||||||
|
false,
|
||||||
|
{ promptName, originalError: error.message }
|
||||||
|
),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Patterns that indicate retryable errors
|
||||||
|
const RETRYABLE_PATTERNS = [
|
||||||
|
// Network and connection errors
|
||||||
|
'network',
|
||||||
|
'connection',
|
||||||
|
'timeout',
|
||||||
|
'econnreset',
|
||||||
|
'enotfound',
|
||||||
|
'econnrefused',
|
||||||
|
// Rate limiting
|
||||||
|
'rate limit',
|
||||||
|
'429',
|
||||||
|
'too many requests',
|
||||||
|
// Server errors
|
||||||
|
'server error',
|
||||||
|
'5xx',
|
||||||
|
'internal server error',
|
||||||
|
'service unavailable',
|
||||||
|
'bad gateway',
|
||||||
|
// Claude API errors
|
||||||
|
'mcp server',
|
||||||
|
'model unavailable',
|
||||||
|
'service temporarily unavailable',
|
||||||
|
'api error',
|
||||||
|
'terminated',
|
||||||
|
// Max turns
|
||||||
|
'max turns',
|
||||||
|
'maximum turns',
|
||||||
|
];
|
||||||
|
|
||||||
|
// Patterns that indicate non-retryable errors (checked before default)
|
||||||
|
const NON_RETRYABLE_PATTERNS = [
|
||||||
|
'authentication',
|
||||||
|
'invalid prompt',
|
||||||
|
'out of memory',
|
||||||
|
'permission denied',
|
||||||
|
'session limit reached',
|
||||||
|
'invalid api key',
|
||||||
|
];
|
||||||
|
|
||||||
|
// Conservative retry classification - unknown errors don't retry (fail-safe default)
|
||||||
|
export function isRetryableError(error: Error): boolean {
|
||||||
|
const message = error.message.toLowerCase();
|
||||||
|
|
||||||
|
// Check for explicit non-retryable patterns first
|
||||||
|
if (NON_RETRYABLE_PATTERNS.some((pattern) => message.includes(pattern))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for retryable patterns
|
||||||
|
return RETRYABLE_PATTERNS.some((pattern) => message.includes(pattern));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rate limit errors get longer base delay (30s) vs standard exponential backoff (2s)
|
||||||
|
export function getRetryDelay(error: Error, attempt: number): number {
|
||||||
|
const message = error.message.toLowerCase();
|
||||||
|
|
||||||
|
// Rate limiting gets longer delays
|
||||||
|
if (message.includes('rate limit') || message.includes('429')) {
|
||||||
|
return Math.min(30000 + attempt * 10000, 120000); // 30s, 40s, 50s, max 2min
|
||||||
|
}
|
||||||
|
|
||||||
|
// Exponential backoff with jitter for other retryable errors
|
||||||
|
const baseDelay = Math.pow(2, attempt) * 1000; // 2s, 4s, 8s
|
||||||
|
const jitter = Math.random() * 1000; // 0-1s random
|
||||||
|
return Math.min(baseDelay + jitter, 30000); // Max 30s
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Classifies errors for Temporal workflow retry behavior.
|
||||||
|
* Returns error type and whether Temporal should retry.
|
||||||
|
*
|
||||||
|
* Used by activities to wrap errors in ApplicationFailure:
|
||||||
|
* - Retryable errors: Temporal retries with configured backoff
|
||||||
|
* - Non-retryable errors: Temporal fails immediately
|
||||||
|
*/
|
||||||
|
export function classifyErrorForTemporal(error: unknown): TemporalErrorClassification {
|
||||||
|
const message = (error instanceof Error ? error.message : String(error)).toLowerCase();
|
||||||
|
|
||||||
|
// === BILLING ERRORS (Retryable with long backoff) ===
|
||||||
|
// Anthropic returns billing as 400 invalid_request_error
|
||||||
|
// Human can add credits OR wait for spending cap to reset (5-30 min backoff)
|
||||||
|
if (
|
||||||
|
message.includes('billing_error') ||
|
||||||
|
message.includes('credit balance is too low') ||
|
||||||
|
message.includes('insufficient credits') ||
|
||||||
|
message.includes('usage is blocked due to insufficient credits') ||
|
||||||
|
message.includes('please visit plans & billing') ||
|
||||||
|
message.includes('please visit plans and billing') ||
|
||||||
|
message.includes('usage limit reached') ||
|
||||||
|
message.includes('quota exceeded') ||
|
||||||
|
message.includes('daily rate limit') ||
|
||||||
|
message.includes('limit will reset') ||
|
||||||
|
// Claude Code spending cap patterns (returns short message instead of error)
|
||||||
|
message.includes('spending cap') ||
|
||||||
|
message.includes('spending limit') ||
|
||||||
|
message.includes('cap reached') ||
|
||||||
|
message.includes('budget exceeded') ||
|
||||||
|
message.includes('billing limit reached')
|
||||||
|
) {
|
||||||
|
return { type: 'BillingError', retryable: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
// === PERMANENT ERRORS (Non-retryable) ===
|
||||||
|
|
||||||
|
// Authentication (401) - bad API key won't fix itself
|
||||||
|
if (
|
||||||
|
message.includes('authentication') ||
|
||||||
|
message.includes('api key') ||
|
||||||
|
message.includes('401') ||
|
||||||
|
message.includes('authentication_error')
|
||||||
|
) {
|
||||||
|
return { type: 'AuthenticationError', retryable: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Permission (403) - access won't be granted
|
||||||
|
if (
|
||||||
|
message.includes('permission') ||
|
||||||
|
message.includes('forbidden') ||
|
||||||
|
message.includes('403')
|
||||||
|
) {
|
||||||
|
return { type: 'PermissionError', retryable: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
// === OUTPUT VALIDATION ERRORS (Retryable) ===
|
||||||
|
// Agent didn't produce expected deliverables - retry may succeed
|
||||||
|
// IMPORTANT: Must come BEFORE generic 'validation' check below
|
||||||
|
if (
|
||||||
|
message.includes('failed output validation') ||
|
||||||
|
message.includes('output validation failed')
|
||||||
|
) {
|
||||||
|
return { type: 'OutputValidationError', retryable: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Invalid Request (400) - malformed request is permanent
|
||||||
|
// Note: Checked AFTER billing and AFTER output validation
|
||||||
|
if (
|
||||||
|
message.includes('invalid_request_error') ||
|
||||||
|
message.includes('malformed') ||
|
||||||
|
message.includes('validation')
|
||||||
|
) {
|
||||||
|
return { type: 'InvalidRequestError', retryable: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Request Too Large (413) - won't fit no matter how many retries
|
||||||
|
if (
|
||||||
|
message.includes('request_too_large') ||
|
||||||
|
message.includes('too large') ||
|
||||||
|
message.includes('413')
|
||||||
|
) {
|
||||||
|
return { type: 'RequestTooLargeError', retryable: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Configuration errors - missing files need manual fix
|
||||||
|
if (
|
||||||
|
message.includes('enoent') ||
|
||||||
|
message.includes('no such file') ||
|
||||||
|
message.includes('cli not installed')
|
||||||
|
) {
|
||||||
|
return { type: 'ConfigurationError', retryable: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Execution limits - max turns/budget reached
|
||||||
|
if (
|
||||||
|
message.includes('max turns') ||
|
||||||
|
message.includes('budget') ||
|
||||||
|
message.includes('execution limit') ||
|
||||||
|
message.includes('error_max_turns') ||
|
||||||
|
message.includes('error_max_budget')
|
||||||
|
) {
|
||||||
|
return { type: 'ExecutionLimitError', retryable: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Invalid target URL - bad URL format won't fix itself
|
||||||
|
if (
|
||||||
|
message.includes('invalid url') ||
|
||||||
|
message.includes('invalid target') ||
|
||||||
|
message.includes('malformed url') ||
|
||||||
|
message.includes('invalid uri')
|
||||||
|
) {
|
||||||
|
return { type: 'InvalidTargetError', retryable: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
// === TRANSIENT ERRORS (Retryable) ===
|
||||||
|
// Rate limits (429), server errors (5xx), network issues
|
||||||
|
// Let Temporal retry with configured backoff
|
||||||
|
return { type: 'TransientError', retryable: true };
|
||||||
|
}
|
||||||
@@ -1,52 +1,108 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
import { $, fs, path } from 'zx';
|
import { $, fs, path } from 'zx';
|
||||||
import chalk from 'chalk';
|
import chalk from 'chalk';
|
||||||
import { Timer, timingResults } from '../utils/metrics.js';
|
import { Timer } from '../utils/metrics.js';
|
||||||
import { formatDuration } from '../audit/utils.js';
|
import { formatDuration } from '../utils/formatting.js';
|
||||||
import { handleToolError, PentestError } from '../error-handling.js';
|
import { handleToolError, PentestError } from '../error-handling.js';
|
||||||
import { AGENTS } from '../session-manager.js';
|
import { AGENTS } from '../session-manager.js';
|
||||||
import { runClaudePromptWithRetry } from '../ai/claude-executor.js';
|
import { runClaudePromptWithRetry } from '../ai/claude-executor.js';
|
||||||
import { loadPrompt } from '../prompts/prompt-manager.js';
|
import { loadPrompt } from '../prompts/prompt-manager.js';
|
||||||
|
import type { ToolAvailability } from '../tool-checker.js';
|
||||||
|
import type { DistributedConfig } from '../types/config.js';
|
||||||
|
|
||||||
// Pure function: Run terminal scanning tools
|
interface AgentResult {
|
||||||
async function runTerminalScan(tool, target, sourceDir = null) {
|
success: boolean;
|
||||||
|
duration: number;
|
||||||
|
cost?: number;
|
||||||
|
error?: string;
|
||||||
|
retryable?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
type ToolName = 'nmap' | 'subfinder' | 'whatweb' | 'schemathesis';
|
||||||
|
type ToolStatus = 'success' | 'skipped' | 'error';
|
||||||
|
|
||||||
|
interface TerminalScanResult {
|
||||||
|
tool: ToolName;
|
||||||
|
output: string;
|
||||||
|
status: ToolStatus;
|
||||||
|
duration: number;
|
||||||
|
success?: boolean;
|
||||||
|
error?: Error;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface PromptVariables {
|
||||||
|
webUrl: string;
|
||||||
|
repoPath: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Discriminated union for Wave1 tool results - clearer than loose union types
|
||||||
|
type Wave1ToolResult =
|
||||||
|
| { kind: 'scan'; result: TerminalScanResult }
|
||||||
|
| { kind: 'skipped'; message: string }
|
||||||
|
| { kind: 'agent'; result: AgentResult };
|
||||||
|
|
||||||
|
interface Wave1Results {
|
||||||
|
nmap: Wave1ToolResult;
|
||||||
|
subfinder: Wave1ToolResult;
|
||||||
|
whatweb: Wave1ToolResult;
|
||||||
|
naabu?: Wave1ToolResult;
|
||||||
|
codeAnalysis: AgentResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Wave2Results {
|
||||||
|
schemathesis: TerminalScanResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface PreReconResult {
|
||||||
|
duration: number;
|
||||||
|
report: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Runs external security tools (nmap, whatweb, etc). Schemathesis requires schemas from code analysis.
|
||||||
|
async function runTerminalScan(tool: ToolName, target: string, sourceDir: string | null = null): Promise<TerminalScanResult> {
|
||||||
const timer = new Timer(`command-${tool}`);
|
const timer = new Timer(`command-${tool}`);
|
||||||
try {
|
try {
|
||||||
let command, result;
|
let result;
|
||||||
switch (tool) {
|
switch (tool) {
|
||||||
case 'nmap':
|
case 'nmap': {
|
||||||
console.log(chalk.blue(` 🔍 Running ${tool} scan...`));
|
console.log(chalk.blue(` 🔍 Running ${tool} scan...`));
|
||||||
const nmapHostname = new URL(target).hostname;
|
const nmapHostname = new URL(target).hostname;
|
||||||
result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`nmap -sV -sC ${nmapHostname}`;
|
result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`nmap -sV -sC ${nmapHostname}`;
|
||||||
const duration = timer.stop();
|
const duration = timer.stop();
|
||||||
timingResults.commands[tool] = duration;
|
|
||||||
console.log(chalk.green(` ✅ ${tool} completed in ${formatDuration(duration)}`));
|
console.log(chalk.green(` ✅ ${tool} completed in ${formatDuration(duration)}`));
|
||||||
return { tool: 'nmap', output: result.stdout, status: 'success', duration };
|
return { tool: 'nmap', output: result.stdout, status: 'success', duration };
|
||||||
case 'subfinder':
|
}
|
||||||
|
case 'subfinder': {
|
||||||
console.log(chalk.blue(` 🔍 Running ${tool} scan...`));
|
console.log(chalk.blue(` 🔍 Running ${tool} scan...`));
|
||||||
const hostname = new URL(target).hostname;
|
const hostname = new URL(target).hostname;
|
||||||
result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`subfinder -d ${hostname}`;
|
result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`subfinder -d ${hostname}`;
|
||||||
const subfinderDuration = timer.stop();
|
const subfinderDuration = timer.stop();
|
||||||
timingResults.commands[tool] = subfinderDuration;
|
|
||||||
console.log(chalk.green(` ✅ ${tool} completed in ${formatDuration(subfinderDuration)}`));
|
console.log(chalk.green(` ✅ ${tool} completed in ${formatDuration(subfinderDuration)}`));
|
||||||
return { tool: 'subfinder', output: result.stdout, status: 'success', duration: subfinderDuration };
|
return { tool: 'subfinder', output: result.stdout, status: 'success', duration: subfinderDuration };
|
||||||
case 'whatweb':
|
}
|
||||||
|
case 'whatweb': {
|
||||||
console.log(chalk.blue(` 🔍 Running ${tool} scan...`));
|
console.log(chalk.blue(` 🔍 Running ${tool} scan...`));
|
||||||
command = `whatweb --open-timeout 30 --read-timeout 60 ${target}`;
|
const command = `whatweb --open-timeout 30 --read-timeout 60 ${target}`;
|
||||||
console.log(chalk.gray(` Command: ${command}`));
|
console.log(chalk.gray(` Command: ${command}`));
|
||||||
result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`whatweb --open-timeout 30 --read-timeout 60 ${target}`;
|
result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`whatweb --open-timeout 30 --read-timeout 60 ${target}`;
|
||||||
const whatwebDuration = timer.stop();
|
const whatwebDuration = timer.stop();
|
||||||
timingResults.commands[tool] = whatwebDuration;
|
|
||||||
console.log(chalk.green(` ✅ ${tool} completed in ${formatDuration(whatwebDuration)}`));
|
console.log(chalk.green(` ✅ ${tool} completed in ${formatDuration(whatwebDuration)}`));
|
||||||
return { tool: 'whatweb', output: result.stdout, status: 'success', duration: whatwebDuration };
|
return { tool: 'whatweb', output: result.stdout, status: 'success', duration: whatwebDuration };
|
||||||
case 'schemathesis':
|
}
|
||||||
// Only run if API schemas found
|
case 'schemathesis': {
|
||||||
|
// Schemathesis depends on code analysis output - skip if no schemas found
|
||||||
const schemasDir = path.join(sourceDir || '.', 'outputs', 'schemas');
|
const schemasDir = path.join(sourceDir || '.', 'outputs', 'schemas');
|
||||||
if (await fs.pathExists(schemasDir)) {
|
if (await fs.pathExists(schemasDir)) {
|
||||||
const schemaFiles = await fs.readdir(schemasDir);
|
const schemaFiles = await fs.readdir(schemasDir) as string[];
|
||||||
const apiSchemas = schemaFiles.filter(f => f.endsWith('.json') || f.endsWith('.yml') || f.endsWith('.yaml'));
|
const apiSchemas = schemaFiles.filter((f: string) => f.endsWith('.json') || f.endsWith('.yml') || f.endsWith('.yaml'));
|
||||||
if (apiSchemas.length > 0) {
|
if (apiSchemas.length > 0) {
|
||||||
console.log(chalk.blue(` 🔍 Running ${tool} scan...`));
|
console.log(chalk.blue(` 🔍 Running ${tool} scan...`));
|
||||||
let allResults = [];
|
const allResults: string[] = [];
|
||||||
|
|
||||||
// Run schemathesis on each schema file
|
// Run schemathesis on each schema file
|
||||||
for (const schemaFile of apiSchemas) {
|
for (const schemaFile of apiSchemas) {
|
||||||
@@ -55,12 +111,12 @@ async function runTerminalScan(tool, target, sourceDir = null) {
|
|||||||
result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`schemathesis run ${schemaPath} -u ${target} --max-failures=5`;
|
result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`schemathesis run ${schemaPath} -u ${target} --max-failures=5`;
|
||||||
allResults.push(`Schema: ${schemaFile}\n${result.stdout}`);
|
allResults.push(`Schema: ${schemaFile}\n${result.stdout}`);
|
||||||
} catch (schemaError) {
|
} catch (schemaError) {
|
||||||
allResults.push(`Schema: ${schemaFile}\nError: ${schemaError.stdout || schemaError.message}`);
|
const err = schemaError as { stdout?: string; message?: string };
|
||||||
|
allResults.push(`Schema: ${schemaFile}\nError: ${err.stdout || err.message}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const schemaDuration = timer.stop();
|
const schemaDuration = timer.stop();
|
||||||
timingResults.commands[tool] = schemaDuration;
|
|
||||||
console.log(chalk.green(` ✅ ${tool} completed in ${formatDuration(schemaDuration)}`));
|
console.log(chalk.green(` ✅ ${tool} completed in ${formatDuration(schemaDuration)}`));
|
||||||
return { tool: 'schemathesis', output: allResults.join('\n\n'), status: 'success', duration: schemaDuration };
|
return { tool: 'schemathesis', output: allResults.join('\n\n'), status: 'success', duration: schemaDuration };
|
||||||
} else {
|
} else {
|
||||||
@@ -71,22 +127,32 @@ async function runTerminalScan(tool, target, sourceDir = null) {
|
|||||||
console.log(chalk.gray(` ⏭️ ${tool} - schemas directory not found`));
|
console.log(chalk.gray(` ⏭️ ${tool} - schemas directory not found`));
|
||||||
return { tool: 'schemathesis', output: 'Schemas directory not found', status: 'skipped', duration: timer.stop() };
|
return { tool: 'schemathesis', output: 'Schemas directory not found', status: 'skipped', duration: timer.stop() };
|
||||||
}
|
}
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
throw new Error(`Unknown tool: ${tool}`);
|
throw new Error(`Unknown tool: ${tool}`);
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const duration = timer.stop();
|
const duration = timer.stop();
|
||||||
timingResults.commands[tool] = duration;
|
|
||||||
console.log(chalk.red(` ❌ ${tool} failed in ${formatDuration(duration)}`));
|
console.log(chalk.red(` ❌ ${tool} failed in ${formatDuration(duration)}`));
|
||||||
return handleToolError(tool, error);
|
return handleToolError(tool, error as Error & { code?: string }) as TerminalScanResult;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wave 1: Initial footprinting + authentication
|
// Wave 1: Initial footprinting + authentication
|
||||||
async function runPreReconWave1(webUrl, sourceDir, variables, config, pipelineTestingMode = false, sessionId = null) {
|
async function runPreReconWave1(
|
||||||
|
webUrl: string,
|
||||||
|
sourceDir: string,
|
||||||
|
variables: PromptVariables,
|
||||||
|
config: DistributedConfig | null,
|
||||||
|
pipelineTestingMode: boolean = false,
|
||||||
|
sessionId: string | null = null,
|
||||||
|
outputPath: string | null = null
|
||||||
|
): Promise<Wave1Results> {
|
||||||
console.log(chalk.blue(' → Launching Wave 1 operations in parallel...'));
|
console.log(chalk.blue(' → Launching Wave 1 operations in parallel...'));
|
||||||
|
|
||||||
const operations = [];
|
const operations: Promise<TerminalScanResult | AgentResult>[] = [];
|
||||||
|
|
||||||
|
const skippedResult = (message: string): Wave1ToolResult => ({ kind: 'skipped', message });
|
||||||
|
|
||||||
// Skip external commands in pipeline testing mode
|
// Skip external commands in pipeline testing mode
|
||||||
if (pipelineTestingMode) {
|
if (pipelineTestingMode) {
|
||||||
@@ -100,16 +166,15 @@ async function runPreReconWave1(webUrl, sourceDir, variables, config, pipelineTe
|
|||||||
AGENTS['pre-recon'].displayName,
|
AGENTS['pre-recon'].displayName,
|
||||||
'pre-recon', // Agent name for snapshot creation
|
'pre-recon', // Agent name for snapshot creation
|
||||||
chalk.cyan,
|
chalk.cyan,
|
||||||
{ id: sessionId, webUrl } // Session metadata for audit logging (STANDARD: use 'id' field)
|
{ id: sessionId!, webUrl, repoPath: sourceDir, ...(outputPath && { outputPath }) } // Session metadata for audit logging (STANDARD: use 'id' field)
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
const [codeAnalysis] = await Promise.all(operations);
|
const [codeAnalysis] = await Promise.all(operations);
|
||||||
return {
|
return {
|
||||||
nmap: 'Skipped (pipeline testing mode)',
|
nmap: skippedResult('Skipped (pipeline testing mode)'),
|
||||||
subfinder: 'Skipped (pipeline testing mode)',
|
subfinder: skippedResult('Skipped (pipeline testing mode)'),
|
||||||
whatweb: 'Skipped (pipeline testing mode)',
|
whatweb: skippedResult('Skipped (pipeline testing mode)'),
|
||||||
|
codeAnalysis: codeAnalysis as AgentResult
|
||||||
codeAnalysis
|
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
operations.push(
|
operations.push(
|
||||||
@@ -124,7 +189,7 @@ async function runPreReconWave1(webUrl, sourceDir, variables, config, pipelineTe
|
|||||||
AGENTS['pre-recon'].displayName,
|
AGENTS['pre-recon'].displayName,
|
||||||
'pre-recon', // Agent name for snapshot creation
|
'pre-recon', // Agent name for snapshot creation
|
||||||
chalk.cyan,
|
chalk.cyan,
|
||||||
{ id: sessionId, webUrl } // Session metadata for audit logging (STANDARD: use 'id' field)
|
{ id: sessionId!, webUrl, repoPath: sourceDir, ...(outputPath && { outputPath }) } // Session metadata for audit logging (STANDARD: use 'id' field)
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -132,13 +197,23 @@ async function runPreReconWave1(webUrl, sourceDir, variables, config, pipelineTe
|
|||||||
// Check if authentication config is provided for login instructions injection
|
// Check if authentication config is provided for login instructions injection
|
||||||
console.log(chalk.gray(` → Config check: ${config ? 'present' : 'missing'}, Auth: ${config?.authentication ? 'present' : 'missing'}`));
|
console.log(chalk.gray(` → Config check: ${config ? 'present' : 'missing'}, Auth: ${config?.authentication ? 'present' : 'missing'}`));
|
||||||
|
|
||||||
const [nmap, subfinder, whatweb, naabu, codeAnalysis] = await Promise.all(operations);
|
const [nmap, subfinder, whatweb, codeAnalysis] = await Promise.all(operations);
|
||||||
|
|
||||||
return { nmap, subfinder, whatweb, naabu, codeAnalysis };
|
return {
|
||||||
|
nmap: { kind: 'scan', result: nmap as TerminalScanResult },
|
||||||
|
subfinder: { kind: 'scan', result: subfinder as TerminalScanResult },
|
||||||
|
whatweb: { kind: 'scan', result: whatweb as TerminalScanResult },
|
||||||
|
codeAnalysis: codeAnalysis as AgentResult
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wave 2: Additional scanning
|
// Wave 2: Additional scanning
|
||||||
async function runPreReconWave2(webUrl, sourceDir, toolAvailability, pipelineTestingMode = false) {
|
async function runPreReconWave2(
|
||||||
|
webUrl: string,
|
||||||
|
sourceDir: string,
|
||||||
|
toolAvailability: ToolAvailability,
|
||||||
|
pipelineTestingMode: boolean = false
|
||||||
|
): Promise<Wave2Results> {
|
||||||
console.log(chalk.blue(' → Running Wave 2 additional scans in parallel...'));
|
console.log(chalk.blue(' → Running Wave 2 additional scans in parallel...'));
|
||||||
|
|
||||||
// Skip external commands in pipeline testing mode
|
// Skip external commands in pipeline testing mode
|
||||||
@@ -149,7 +224,7 @@ async function runPreReconWave2(webUrl, sourceDir, toolAvailability, pipelineTes
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const operations = [];
|
const operations: Promise<TerminalScanResult>[] = [];
|
||||||
|
|
||||||
// Parallel additional scans (only run if tools are available)
|
// Parallel additional scans (only run if tools are available)
|
||||||
|
|
||||||
@@ -169,68 +244,82 @@ async function runPreReconWave2(webUrl, sourceDir, toolAvailability, pipelineTes
|
|||||||
const results = await Promise.all(operations);
|
const results = await Promise.all(operations);
|
||||||
|
|
||||||
// Map results back to named properties
|
// Map results back to named properties
|
||||||
const response = {};
|
const response: Wave2Results = {
|
||||||
|
schemathesis: { tool: 'schemathesis', output: 'Tool not available', status: 'skipped', duration: 0 }
|
||||||
|
};
|
||||||
let resultIndex = 0;
|
let resultIndex = 0;
|
||||||
|
|
||||||
if (toolAvailability.schemathesis) {
|
if (toolAvailability.schemathesis) {
|
||||||
response.schemathesis = results[resultIndex++];
|
response.schemathesis = results[resultIndex++]!;
|
||||||
} else {
|
} else {
|
||||||
console.log(chalk.gray(' ⏭️ schemathesis - tool not available'));
|
console.log(chalk.gray(' ⏭️ schemathesis - tool not available'));
|
||||||
response.schemathesis = { tool: 'schemathesis', output: 'Tool not available', status: 'skipped', duration: 0 };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pure function: Stitch together pre-recon outputs and save to file
|
// Extracts status and output from a Wave1 tool result
|
||||||
async function stitchPreReconOutputs(outputs, sourceDir) {
|
function extractResult(r: Wave1ToolResult | undefined): { status: string; output: string } {
|
||||||
const [nmap, subfinder, whatweb, naabu, codeAnalysis, ...additionalScans] = outputs;
|
if (!r) return { status: 'Skipped', output: 'No output' };
|
||||||
|
switch (r.kind) {
|
||||||
|
case 'scan':
|
||||||
|
return { status: r.result.status || 'Skipped', output: r.result.output || 'No output' };
|
||||||
|
case 'skipped':
|
||||||
|
return { status: 'Skipped', output: r.message };
|
||||||
|
case 'agent':
|
||||||
|
return { status: r.result.success ? 'success' : 'error', output: 'See agent output' };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Combines tool outputs into single deliverable. Falls back to reference if file missing.
|
||||||
|
async function stitchPreReconOutputs(wave1: Wave1Results, additionalScans: TerminalScanResult[], sourceDir: string): Promise<string> {
|
||||||
// Try to read the code analysis deliverable file
|
// Try to read the code analysis deliverable file
|
||||||
let codeAnalysisContent = 'No analysis available';
|
let codeAnalysisContent = 'No analysis available';
|
||||||
try {
|
try {
|
||||||
const codeAnalysisPath = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md');
|
const codeAnalysisPath = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md');
|
||||||
codeAnalysisContent = await fs.readFile(codeAnalysisPath, 'utf8');
|
codeAnalysisContent = await fs.readFile(codeAnalysisPath, 'utf8');
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log(chalk.yellow(`⚠️ Could not read code analysis deliverable: ${error.message}`));
|
const err = error as Error;
|
||||||
// Fallback message if file doesn't exist
|
console.log(chalk.yellow(`⚠️ Could not read code analysis deliverable: ${err.message}`));
|
||||||
codeAnalysisContent = 'Analysis located in deliverables/code_analysis_deliverable.md';
|
codeAnalysisContent = 'Analysis located in deliverables/code_analysis_deliverable.md';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Build additional scans section
|
// Build additional scans section
|
||||||
let additionalSection = '';
|
let additionalSection = '';
|
||||||
if (additionalScans && additionalScans.length > 0) {
|
if (additionalScans.length > 0) {
|
||||||
additionalSection = '\n## Authenticated Scans\n';
|
additionalSection = '\n## Authenticated Scans\n';
|
||||||
additionalScans.forEach(scan => {
|
for (const scan of additionalScans) {
|
||||||
if (scan && scan.tool) {
|
additionalSection += `
|
||||||
additionalSection += `
|
|
||||||
### ${scan.tool.toUpperCase()}
|
### ${scan.tool.toUpperCase()}
|
||||||
Status: ${scan.status}
|
Status: ${scan.status}
|
||||||
${scan.output}
|
${scan.output}
|
||||||
`;
|
`;
|
||||||
}
|
}
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const nmap = extractResult(wave1.nmap);
|
||||||
|
const subfinder = extractResult(wave1.subfinder);
|
||||||
|
const whatweb = extractResult(wave1.whatweb);
|
||||||
|
const naabu = extractResult(wave1.naabu);
|
||||||
|
|
||||||
const report = `
|
const report = `
|
||||||
# Pre-Reconnaissance Report
|
# Pre-Reconnaissance Report
|
||||||
|
|
||||||
## Port Discovery (naabu)
|
## Port Discovery (naabu)
|
||||||
Status: ${naabu?.status || 'Skipped'}
|
Status: ${naabu.status}
|
||||||
${naabu?.output || naabu || 'No output'}
|
${naabu.output}
|
||||||
|
|
||||||
## Network Scanning (nmap)
|
## Network Scanning (nmap)
|
||||||
Status: ${nmap?.status || 'Skipped'}
|
Status: ${nmap.status}
|
||||||
${nmap?.output || nmap || 'No output'}
|
${nmap.output}
|
||||||
|
|
||||||
## Subdomain Discovery (subfinder)
|
## Subdomain Discovery (subfinder)
|
||||||
Status: ${subfinder?.status || 'Skipped'}
|
Status: ${subfinder.status}
|
||||||
${subfinder?.output || subfinder || 'No output'}
|
${subfinder.output}
|
||||||
|
|
||||||
## Technology Detection (whatweb)
|
## Technology Detection (whatweb)
|
||||||
Status: ${whatweb?.status || 'Skipped'}
|
Status: ${whatweb.status}
|
||||||
${whatweb?.output || whatweb || 'No output'}
|
${whatweb.output}
|
||||||
## Code Analysis
|
## Code Analysis
|
||||||
${codeAnalysisContent}
|
${codeAnalysisContent}
|
||||||
${additionalSection}
|
${additionalSection}
|
||||||
@@ -246,11 +335,12 @@ Report generated at: ${new Date().toISOString()}
|
|||||||
// Write to file in the cloned repository
|
// Write to file in the cloned repository
|
||||||
await fs.writeFile(deliverablePath, report);
|
await fs.writeFile(deliverablePath, report);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
const err = error as Error;
|
||||||
throw new PentestError(
|
throw new PentestError(
|
||||||
`Failed to write pre-recon report: ${error.message}`,
|
`Failed to write pre-recon report: ${err.message}`,
|
||||||
'filesystem',
|
'filesystem',
|
||||||
false,
|
false,
|
||||||
{ sourceDir, originalError: error.message }
|
{ sourceDir, originalError: err.message }
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -258,12 +348,21 @@ Report generated at: ${new Date().toISOString()}
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Main pre-recon phase execution function
|
// Main pre-recon phase execution function
|
||||||
export async function executePreReconPhase(webUrl, sourceDir, variables, config, toolAvailability, pipelineTestingMode, sessionId = null) {
|
export async function executePreReconPhase(
|
||||||
|
webUrl: string,
|
||||||
|
sourceDir: string,
|
||||||
|
variables: PromptVariables,
|
||||||
|
config: DistributedConfig | null,
|
||||||
|
toolAvailability: ToolAvailability,
|
||||||
|
pipelineTestingMode: boolean,
|
||||||
|
sessionId: string | null = null,
|
||||||
|
outputPath: string | null = null
|
||||||
|
): Promise<PreReconResult> {
|
||||||
console.log(chalk.yellow.bold('\n🔍 PHASE 1: PRE-RECONNAISSANCE'));
|
console.log(chalk.yellow.bold('\n🔍 PHASE 1: PRE-RECONNAISSANCE'));
|
||||||
const timer = new Timer('phase-1-pre-recon');
|
const timer = new Timer('phase-1-pre-recon');
|
||||||
|
|
||||||
console.log(chalk.yellow('Wave 1: Initial footprinting...'));
|
console.log(chalk.yellow('Wave 1: Initial footprinting...'));
|
||||||
const wave1Results = await runPreReconWave1(webUrl, sourceDir, variables, config, pipelineTestingMode, sessionId);
|
const wave1Results = await runPreReconWave1(webUrl, sourceDir, variables, config, pipelineTestingMode, sessionId, outputPath);
|
||||||
console.log(chalk.green(' ✅ Wave 1 operations completed'));
|
console.log(chalk.green(' ✅ Wave 1 operations completed'));
|
||||||
|
|
||||||
console.log(chalk.yellow('Wave 2: Additional scanning...'));
|
console.log(chalk.yellow('Wave 2: Additional scanning...'));
|
||||||
@@ -271,20 +370,12 @@ export async function executePreReconPhase(webUrl, sourceDir, variables, config,
|
|||||||
console.log(chalk.green(' ✅ Wave 2 operations completed'));
|
console.log(chalk.green(' ✅ Wave 2 operations completed'));
|
||||||
|
|
||||||
console.log(chalk.blue('📝 Stitching pre-recon outputs...'));
|
console.log(chalk.blue('📝 Stitching pre-recon outputs...'));
|
||||||
// Combine wave 1 and wave 2 results for stitching
|
const additionalScans = wave2Results.schemathesis ? [wave2Results.schemathesis] : [];
|
||||||
const allResults = [
|
const preReconReport = await stitchPreReconOutputs(wave1Results, additionalScans, sourceDir);
|
||||||
wave1Results.nmap,
|
|
||||||
wave1Results.subfinder,
|
|
||||||
wave1Results.whatweb,
|
|
||||||
wave1Results.naabu,
|
|
||||||
wave1Results.codeAnalysis,
|
|
||||||
...(wave2Results.schemathesis ? [wave2Results.schemathesis] : [])
|
|
||||||
];
|
|
||||||
const preReconReport = await stitchPreReconOutputs(allResults, sourceDir);
|
|
||||||
const duration = timer.stop();
|
const duration = timer.stop();
|
||||||
|
|
||||||
console.log(chalk.green(`✅ Pre-reconnaissance complete in ${formatDuration(duration)}`));
|
console.log(chalk.green(`✅ Pre-reconnaissance complete in ${formatDuration(duration)}`));
|
||||||
console.log(chalk.green(`💾 Saved to ${sourceDir}/deliverables/pre_recon_deliverable.md`));
|
console.log(chalk.green(`💾 Saved to ${sourceDir}/deliverables/pre_recon_deliverable.md`));
|
||||||
|
|
||||||
return { duration, report: preReconReport };
|
return { duration, report: preReconReport };
|
||||||
}
|
}
|
||||||
@@ -1,10 +1,22 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
import { fs, path } from 'zx';
|
import { fs, path } from 'zx';
|
||||||
import chalk from 'chalk';
|
import chalk from 'chalk';
|
||||||
import { PentestError } from '../error-handling.js';
|
import { PentestError } from '../error-handling.js';
|
||||||
|
|
||||||
|
interface DeliverableFile {
|
||||||
|
name: string;
|
||||||
|
path: string;
|
||||||
|
required: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
// Pure function: Assemble final report from specialist deliverables
|
// Pure function: Assemble final report from specialist deliverables
|
||||||
export async function assembleFinalReport(sourceDir) {
|
export async function assembleFinalReport(sourceDir: string): Promise<string> {
|
||||||
const deliverableFiles = [
|
const deliverableFiles: DeliverableFile[] = [
|
||||||
{ name: 'Injection', path: 'injection_exploitation_evidence.md', required: false },
|
{ name: 'Injection', path: 'injection_exploitation_evidence.md', required: false },
|
||||||
{ name: 'XSS', path: 'xss_exploitation_evidence.md', required: false },
|
{ name: 'XSS', path: 'xss_exploitation_evidence.md', required: false },
|
||||||
{ name: 'Authentication', path: 'auth_exploitation_evidence.md', required: false },
|
{ name: 'Authentication', path: 'auth_exploitation_evidence.md', required: false },
|
||||||
@@ -12,7 +24,7 @@ export async function assembleFinalReport(sourceDir) {
|
|||||||
{ name: 'Authorization', path: 'authz_exploitation_evidence.md', required: false }
|
{ name: 'Authorization', path: 'authz_exploitation_evidence.md', required: false }
|
||||||
];
|
];
|
||||||
|
|
||||||
const sections = [];
|
const sections: string[] = [];
|
||||||
|
|
||||||
for (const file of deliverableFiles) {
|
for (const file of deliverableFiles) {
|
||||||
const filePath = path.join(sourceDir, 'deliverables', file.path);
|
const filePath = path.join(sourceDir, 'deliverables', file.path);
|
||||||
@@ -30,24 +42,29 @@ export async function assembleFinalReport(sourceDir) {
|
|||||||
if (file.required) {
|
if (file.required) {
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
console.log(chalk.yellow(`⚠️ Could not read ${file.path}: ${error.message}`));
|
const err = error as Error;
|
||||||
|
console.log(chalk.yellow(`⚠️ Could not read ${file.path}: ${err.message}`));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const finalContent = sections.join('\n\n');
|
const finalContent = sections.join('\n\n');
|
||||||
const finalReportPath = path.join(sourceDir, 'deliverables', 'comprehensive_security_assessment_report.md');
|
const deliverablesDir = path.join(sourceDir, 'deliverables');
|
||||||
|
const finalReportPath = path.join(deliverablesDir, 'comprehensive_security_assessment_report.md');
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
// Ensure deliverables directory exists
|
||||||
|
await fs.ensureDir(deliverablesDir);
|
||||||
await fs.writeFile(finalReportPath, finalContent);
|
await fs.writeFile(finalReportPath, finalContent);
|
||||||
console.log(chalk.green(`✅ Final report assembled at ${finalReportPath}`));
|
console.log(chalk.green(`✅ Final report assembled at ${finalReportPath}`));
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
const err = error as Error;
|
||||||
throw new PentestError(
|
throw new PentestError(
|
||||||
`Failed to write final report: ${error.message}`,
|
`Failed to write final report: ${err.message}`,
|
||||||
'filesystem',
|
'filesystem',
|
||||||
false,
|
false,
|
||||||
{ finalReportPath, originalError: error.message }
|
{ finalReportPath, originalError: err.message }
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
return finalContent;
|
return finalContent;
|
||||||
}
|
}
|
||||||
@@ -1,42 +0,0 @@
|
|||||||
import chalk from 'chalk';
|
|
||||||
|
|
||||||
export class ProgressIndicator {
|
|
||||||
constructor(message = 'Working...') {
|
|
||||||
this.message = message;
|
|
||||||
this.frames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
|
|
||||||
this.frameIndex = 0;
|
|
||||||
this.interval = null;
|
|
||||||
this.isRunning = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
start() {
|
|
||||||
if (this.isRunning) return;
|
|
||||||
|
|
||||||
this.isRunning = true;
|
|
||||||
this.frameIndex = 0;
|
|
||||||
|
|
||||||
this.interval = setInterval(() => {
|
|
||||||
// Clear the line and write the spinner
|
|
||||||
process.stdout.write(`\r${chalk.cyan(this.frames[this.frameIndex])} ${chalk.dim(this.message)}`);
|
|
||||||
this.frameIndex = (this.frameIndex + 1) % this.frames.length;
|
|
||||||
}, 100);
|
|
||||||
}
|
|
||||||
|
|
||||||
stop() {
|
|
||||||
if (!this.isRunning) return;
|
|
||||||
|
|
||||||
if (this.interval) {
|
|
||||||
clearInterval(this.interval);
|
|
||||||
this.interval = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clear the spinner line
|
|
||||||
process.stdout.write('\r' + ' '.repeat(this.message.length + 5) + '\r');
|
|
||||||
this.isRunning = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
finish(successMessage = 'Complete') {
|
|
||||||
this.stop();
|
|
||||||
console.log(chalk.green(`✓ ${successMessage}`));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,52 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
import chalk from 'chalk';
|
||||||
|
|
||||||
|
export class ProgressIndicator {
|
||||||
|
private message: string;
|
||||||
|
private frames: string[] = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
|
||||||
|
private frameIndex: number = 0;
|
||||||
|
private interval: ReturnType<typeof setInterval> | null = null;
|
||||||
|
private isRunning: boolean = false;
|
||||||
|
|
||||||
|
constructor(message: string = 'Working...') {
|
||||||
|
this.message = message;
|
||||||
|
}
|
||||||
|
|
||||||
|
start(): void {
|
||||||
|
if (this.isRunning) return;
|
||||||
|
|
||||||
|
this.isRunning = true;
|
||||||
|
this.frameIndex = 0;
|
||||||
|
|
||||||
|
this.interval = setInterval(() => {
|
||||||
|
// Clear the line and write the spinner
|
||||||
|
process.stdout.write(
|
||||||
|
`\r${chalk.cyan(this.frames[this.frameIndex])} ${chalk.dim(this.message)}`
|
||||||
|
);
|
||||||
|
this.frameIndex = (this.frameIndex + 1) % this.frames.length;
|
||||||
|
}, 100);
|
||||||
|
}
|
||||||
|
|
||||||
|
stop(): void {
|
||||||
|
if (!this.isRunning) return;
|
||||||
|
|
||||||
|
if (this.interval) {
|
||||||
|
clearInterval(this.interval);
|
||||||
|
this.interval = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear the spinner line
|
||||||
|
process.stdout.write('\r' + ' '.repeat(this.message.length + 5) + '\r');
|
||||||
|
this.isRunning = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
finish(successMessage: string = 'Complete'): void {
|
||||||
|
this.stop();
|
||||||
|
console.log(chalk.green(`✓ ${successMessage}`));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,10 +1,28 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
import { fs, path } from 'zx';
|
import { fs, path } from 'zx';
|
||||||
import chalk from 'chalk';
|
import chalk from 'chalk';
|
||||||
import { PentestError, handlePromptError } from '../error-handling.js';
|
import { PentestError, handlePromptError } from '../error-handling.js';
|
||||||
import { MCP_AGENT_MAPPING } from '../constants.js';
|
import { MCP_AGENT_MAPPING } from '../constants.js';
|
||||||
|
import type { Authentication, DistributedConfig } from '../types/config.js';
|
||||||
|
|
||||||
|
interface PromptVariables {
|
||||||
|
webUrl: string;
|
||||||
|
repoPath: string;
|
||||||
|
MCP_SERVER?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface IncludeReplacement {
|
||||||
|
placeholder: string;
|
||||||
|
content: string;
|
||||||
|
}
|
||||||
|
|
||||||
// Pure function: Build complete login instructions from config
|
// Pure function: Build complete login instructions from config
|
||||||
async function buildLoginInstructions(authentication) {
|
async function buildLoginInstructions(authentication: Authentication): Promise<string> {
|
||||||
try {
|
try {
|
||||||
// Load the login instructions template
|
// Load the login instructions template
|
||||||
const loginInstructionsPath = path.join(import.meta.dirname, '..', '..', 'prompts', 'shared', 'login-instructions.txt');
|
const loginInstructionsPath = path.join(import.meta.dirname, '..', '..', 'prompts', 'shared', 'login-instructions.txt');
|
||||||
@@ -21,10 +39,10 @@ async function buildLoginInstructions(authentication) {
|
|||||||
const fullTemplate = await fs.readFile(loginInstructionsPath, 'utf8');
|
const fullTemplate = await fs.readFile(loginInstructionsPath, 'utf8');
|
||||||
|
|
||||||
// Helper function to extract sections based on markers
|
// Helper function to extract sections based on markers
|
||||||
const getSection = (content, sectionName) => {
|
const getSection = (content: string, sectionName: string): string => {
|
||||||
const regex = new RegExp(`<!-- BEGIN:${sectionName} -->([\\s\\S]*?)<!-- END:${sectionName} -->`, 'g');
|
const regex = new RegExp(`<!-- BEGIN:${sectionName} -->([\\s\\S]*?)<!-- END:${sectionName} -->`, 'g');
|
||||||
const match = regex.exec(content);
|
const match = regex.exec(content);
|
||||||
return match ? match[1].trim() : '';
|
return match ? match[1]!.trim() : '';
|
||||||
};
|
};
|
||||||
|
|
||||||
// Extract sections based on login type
|
// Extract sections based on login type
|
||||||
@@ -33,7 +51,7 @@ async function buildLoginInstructions(authentication) {
|
|||||||
|
|
||||||
// Build instructions with only relevant sections
|
// Build instructions with only relevant sections
|
||||||
const commonSection = getSection(fullTemplate, 'COMMON');
|
const commonSection = getSection(fullTemplate, 'COMMON');
|
||||||
const authSection = getSection(fullTemplate, loginType); // FORM or SSO
|
const authSection = loginType ? getSection(fullTemplate, loginType) : ''; // FORM or SSO
|
||||||
const verificationSection = getSection(fullTemplate, 'VERIFICATION');
|
const verificationSection = getSection(fullTemplate, 'VERIFICATION');
|
||||||
|
|
||||||
// Fallback to full template if markers are missing (backward compatibility)
|
// Fallback to full template if markers are missing (backward compatibility)
|
||||||
@@ -48,7 +66,7 @@ async function buildLoginInstructions(authentication) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Replace the user instructions placeholder with the login flow from config
|
// Replace the user instructions placeholder with the login flow from config
|
||||||
let userInstructions = authentication.login_flow.join('\n');
|
let userInstructions = (authentication.login_flow ?? []).join('\n');
|
||||||
|
|
||||||
// Replace credential placeholders within the user instructions
|
// Replace credential placeholders within the user instructions
|
||||||
if (authentication.credentials) {
|
if (authentication.credentials) {
|
||||||
@@ -75,22 +93,23 @@ async function buildLoginInstructions(authentication) {
|
|||||||
if (error instanceof PentestError) {
|
if (error instanceof PentestError) {
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
|
const errMsg = error instanceof Error ? error.message : String(error);
|
||||||
throw new PentestError(
|
throw new PentestError(
|
||||||
`Failed to build login instructions: ${error.message}`,
|
`Failed to build login instructions: ${errMsg}`,
|
||||||
'config',
|
'config',
|
||||||
false,
|
false,
|
||||||
{ authentication, originalError: error.message }
|
{ authentication, originalError: errMsg }
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pure function: Process @include() directives
|
// Pure function: Process @include() directives
|
||||||
async function processIncludes(content, baseDir) {
|
async function processIncludes(content: string, baseDir: string): Promise<string> {
|
||||||
const includeRegex = /@include\(([^)]+)\)/g;
|
const includeRegex = /@include\(([^)]+)\)/g;
|
||||||
// Use a Promise.all to handle all includes concurrently
|
// Use a Promise.all to handle all includes concurrently
|
||||||
const replacements = await Promise.all(
|
const replacements: IncludeReplacement[] = await Promise.all(
|
||||||
Array.from(content.matchAll(includeRegex)).map(async (match) => {
|
Array.from(content.matchAll(includeRegex)).map(async (match) => {
|
||||||
const includePath = path.join(baseDir, match[1]);
|
const includePath = path.join(baseDir, match[1]!);
|
||||||
const sharedContent = await fs.readFile(includePath, 'utf8');
|
const sharedContent = await fs.readFile(includePath, 'utf8');
|
||||||
return {
|
return {
|
||||||
placeholder: match[0],
|
placeholder: match[0],
|
||||||
@@ -106,7 +125,11 @@ async function processIncludes(content, baseDir) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Pure function: Variable interpolation
|
// Pure function: Variable interpolation
|
||||||
async function interpolateVariables(template, variables, config = null) {
|
async function interpolateVariables(
|
||||||
|
template: string,
|
||||||
|
variables: PromptVariables,
|
||||||
|
config: DistributedConfig | null = null
|
||||||
|
): Promise<string> {
|
||||||
try {
|
try {
|
||||||
if (!template || typeof template !== 'string') {
|
if (!template || typeof template !== 'string') {
|
||||||
throw new PentestError(
|
throw new PentestError(
|
||||||
@@ -141,8 +164,8 @@ async function interpolateVariables(template, variables, config = null) {
|
|||||||
const cleanRulesSection = '<rules>\nNo specific rules or focus areas provided for this test.\n</rules>';
|
const cleanRulesSection = '<rules>\nNo specific rules or focus areas provided for this test.\n</rules>';
|
||||||
result = result.replace(/<rules>[\s\S]*?<\/rules>/g, cleanRulesSection);
|
result = result.replace(/<rules>[\s\S]*?<\/rules>/g, cleanRulesSection);
|
||||||
} else {
|
} else {
|
||||||
const avoidRules = hasAvoidRules ? config.avoid.map(r => `- ${r.description}`).join('\n') : 'None';
|
const avoidRules = hasAvoidRules ? config.avoid!.map(r => `- ${r.description}`).join('\n') : 'None';
|
||||||
const focusRules = hasFocusRules ? config.focus.map(r => `- ${r.description}`).join('\n') : 'None';
|
const focusRules = hasFocusRules ? config.focus!.map(r => `- ${r.description}`).join('\n') : 'None';
|
||||||
|
|
||||||
result = result
|
result = result
|
||||||
.replace(/{{RULES_AVOID}}/g, avoidRules)
|
.replace(/{{RULES_AVOID}}/g, avoidRules)
|
||||||
@@ -174,17 +197,23 @@ async function interpolateVariables(template, variables, config = null) {
|
|||||||
if (error instanceof PentestError) {
|
if (error instanceof PentestError) {
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
|
const errMsg = error instanceof Error ? error.message : String(error);
|
||||||
throw new PentestError(
|
throw new PentestError(
|
||||||
`Variable interpolation failed: ${error.message}`,
|
`Variable interpolation failed: ${errMsg}`,
|
||||||
'prompt',
|
'prompt',
|
||||||
false,
|
false,
|
||||||
{ originalError: error.message }
|
{ originalError: errMsg }
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pure function: Load and interpolate prompt template
|
// Pure function: Load and interpolate prompt template
|
||||||
export async function loadPrompt(promptName, variables, config = null, pipelineTestingMode = false) {
|
export async function loadPrompt(
|
||||||
|
promptName: string,
|
||||||
|
variables: PromptVariables,
|
||||||
|
config: DistributedConfig | null = null,
|
||||||
|
pipelineTestingMode: boolean = false
|
||||||
|
): Promise<string> {
|
||||||
try {
|
try {
|
||||||
// Use pipeline testing prompts if pipeline testing mode is enabled
|
// Use pipeline testing prompts if pipeline testing mode is enabled
|
||||||
const baseDir = pipelineTestingMode ? 'prompts/pipeline-testing' : 'prompts';
|
const baseDir = pipelineTestingMode ? 'prompts/pipeline-testing' : 'prompts';
|
||||||
@@ -207,11 +236,12 @@ export async function loadPrompt(promptName, variables, config = null, pipelineT
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Add MCP server assignment to variables
|
// Add MCP server assignment to variables
|
||||||
const enhancedVariables = { ...variables };
|
const enhancedVariables: PromptVariables = { ...variables };
|
||||||
|
|
||||||
// Assign MCP server based on prompt name (agent name)
|
// Assign MCP server based on prompt name (agent name)
|
||||||
if (MCP_AGENT_MAPPING[promptName]) {
|
const mcpServer = MCP_AGENT_MAPPING[promptName as keyof typeof MCP_AGENT_MAPPING];
|
||||||
enhancedVariables.MCP_SERVER = MCP_AGENT_MAPPING[promptName];
|
if (mcpServer) {
|
||||||
|
enhancedVariables.MCP_SERVER = mcpServer;
|
||||||
console.log(chalk.gray(` 🎭 Assigned ${promptName} → ${enhancedVariables.MCP_SERVER}`));
|
console.log(chalk.gray(` 🎭 Assigned ${promptName} → ${enhancedVariables.MCP_SERVER}`));
|
||||||
} else {
|
} else {
|
||||||
// Fallback for unknown agents
|
// Fallback for unknown agents
|
||||||
@@ -229,7 +259,7 @@ export async function loadPrompt(promptName, variables, config = null, pipelineT
|
|||||||
if (error instanceof PentestError) {
|
if (error instanceof PentestError) {
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
const promptError = handlePromptError(promptName, error);
|
const promptError = handlePromptError(promptName, error as Error);
|
||||||
throw promptError.error;
|
throw promptError.error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,223 +0,0 @@
|
|||||||
import { fs, path } from 'zx';
|
|
||||||
import { PentestError } from './error-handling.js';
|
|
||||||
|
|
||||||
// Vulnerability type configuration as immutable data
|
|
||||||
const VULN_TYPE_CONFIG = Object.freeze({
|
|
||||||
injection: Object.freeze({
|
|
||||||
deliverable: 'injection_analysis_deliverable.md',
|
|
||||||
queue: 'injection_exploitation_queue.json'
|
|
||||||
}),
|
|
||||||
xss: Object.freeze({
|
|
||||||
deliverable: 'xss_analysis_deliverable.md',
|
|
||||||
queue: 'xss_exploitation_queue.json'
|
|
||||||
}),
|
|
||||||
auth: Object.freeze({
|
|
||||||
deliverable: 'auth_analysis_deliverable.md',
|
|
||||||
queue: 'auth_exploitation_queue.json'
|
|
||||||
}),
|
|
||||||
ssrf: Object.freeze({
|
|
||||||
deliverable: 'ssrf_analysis_deliverable.md',
|
|
||||||
queue: 'ssrf_exploitation_queue.json'
|
|
||||||
}),
|
|
||||||
authz: Object.freeze({
|
|
||||||
deliverable: 'authz_analysis_deliverable.md',
|
|
||||||
queue: 'authz_exploitation_queue.json'
|
|
||||||
})
|
|
||||||
});
|
|
||||||
|
|
||||||
// Functional composition utilities - async pipe for promise chain
|
|
||||||
const pipe = (...fns) => x => fns.reduce(async (v, f) => f(await v), x);
|
|
||||||
|
|
||||||
// Pure function to create validation rule
|
|
||||||
const createValidationRule = (predicate, errorMessage, retryable = true) =>
|
|
||||||
Object.freeze({ predicate, errorMessage, retryable });
|
|
||||||
|
|
||||||
// Validation rules for file existence (following QUEUE_VALIDATION_FLOW.md)
|
|
||||||
const fileExistenceRules = Object.freeze([
|
|
||||||
// Rule 1: Neither deliverable nor queue exists
|
|
||||||
createValidationRule(
|
|
||||||
({ deliverableExists, queueExists }) => deliverableExists || queueExists,
|
|
||||||
'Analysis failed: Neither deliverable nor queue file exists. Analysis agent must create both files.'
|
|
||||||
),
|
|
||||||
// Rule 2: Queue doesn't exist but deliverable exists
|
|
||||||
createValidationRule(
|
|
||||||
({ deliverableExists, queueExists }) => !(!queueExists && deliverableExists),
|
|
||||||
'Analysis incomplete: Deliverable exists but queue file missing. Analysis agent must create both files.'
|
|
||||||
),
|
|
||||||
// Rule 3: Queue exists but deliverable doesn't exist
|
|
||||||
createValidationRule(
|
|
||||||
({ deliverableExists, queueExists }) => !(queueExists && !deliverableExists),
|
|
||||||
'Analysis incomplete: Queue exists but deliverable file missing. Analysis agent must create both files.'
|
|
||||||
)
|
|
||||||
]);
|
|
||||||
|
|
||||||
// Pure function to create file paths
|
|
||||||
const createPaths = (vulnType, sourceDir) => {
|
|
||||||
const config = VULN_TYPE_CONFIG[vulnType];
|
|
||||||
if (!config) {
|
|
||||||
return {
|
|
||||||
error: new PentestError(
|
|
||||||
`Unknown vulnerability type: ${vulnType}`,
|
|
||||||
'validation',
|
|
||||||
false,
|
|
||||||
{ vulnType }
|
|
||||||
)
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
return Object.freeze({
|
|
||||||
vulnType,
|
|
||||||
deliverable: path.join(sourceDir, 'deliverables', config.deliverable),
|
|
||||||
queue: path.join(sourceDir, 'deliverables', config.queue),
|
|
||||||
sourceDir
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
// Pure function to check file existence
|
|
||||||
const checkFileExistence = async (paths) => {
|
|
||||||
if (paths.error) return paths;
|
|
||||||
|
|
||||||
const [deliverableExists, queueExists] = await Promise.all([
|
|
||||||
fs.pathExists(paths.deliverable),
|
|
||||||
fs.pathExists(paths.queue)
|
|
||||||
]);
|
|
||||||
|
|
||||||
return Object.freeze({
|
|
||||||
...paths,
|
|
||||||
existence: Object.freeze({ deliverableExists, queueExists })
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
// Pure function to validate existence rules
|
|
||||||
const validateExistenceRules = (pathsWithExistence) => {
|
|
||||||
if (pathsWithExistence.error) return pathsWithExistence;
|
|
||||||
|
|
||||||
const { existence, vulnType } = pathsWithExistence;
|
|
||||||
|
|
||||||
// Find the first rule that fails
|
|
||||||
const failedRule = fileExistenceRules.find(rule => !rule.predicate(existence));
|
|
||||||
|
|
||||||
if (failedRule) {
|
|
||||||
return {
|
|
||||||
...pathsWithExistence,
|
|
||||||
error: new PentestError(
|
|
||||||
`${failedRule.errorMessage} (${vulnType})`,
|
|
||||||
'validation',
|
|
||||||
failedRule.retryable,
|
|
||||||
{
|
|
||||||
vulnType,
|
|
||||||
deliverablePath: pathsWithExistence.deliverable,
|
|
||||||
queuePath: pathsWithExistence.queue,
|
|
||||||
existence
|
|
||||||
}
|
|
||||||
)
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
return pathsWithExistence;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Pure function to validate queue structure
|
|
||||||
const validateQueueStructure = (content) => {
|
|
||||||
try {
|
|
||||||
const parsed = JSON.parse(content);
|
|
||||||
return Object.freeze({
|
|
||||||
valid: parsed.vulnerabilities && Array.isArray(parsed.vulnerabilities),
|
|
||||||
data: parsed,
|
|
||||||
error: null
|
|
||||||
});
|
|
||||||
} catch (parseError) {
|
|
||||||
return Object.freeze({
|
|
||||||
valid: false,
|
|
||||||
data: null,
|
|
||||||
error: parseError.message
|
|
||||||
});
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Pure function to read and validate queue content
|
|
||||||
const validateQueueContent = async (pathsWithExistence) => {
|
|
||||||
if (pathsWithExistence.error) return pathsWithExistence;
|
|
||||||
|
|
||||||
try {
|
|
||||||
const queueContent = await fs.readFile(pathsWithExistence.queue, 'utf8');
|
|
||||||
const queueValidation = validateQueueStructure(queueContent);
|
|
||||||
|
|
||||||
if (!queueValidation.valid) {
|
|
||||||
// Rule 6: Both exist, queue invalid
|
|
||||||
return {
|
|
||||||
...pathsWithExistence,
|
|
||||||
error: new PentestError(
|
|
||||||
queueValidation.error
|
|
||||||
? `Queue validation failed for ${pathsWithExistence.vulnType}: Invalid JSON structure. Analysis agent must fix queue format.`
|
|
||||||
: `Queue validation failed for ${pathsWithExistence.vulnType}: Missing or invalid 'vulnerabilities' array. Analysis agent must fix queue structure.`,
|
|
||||||
'validation',
|
|
||||||
true, // retryable
|
|
||||||
{
|
|
||||||
vulnType: pathsWithExistence.vulnType,
|
|
||||||
queuePath: pathsWithExistence.queue,
|
|
||||||
originalError: queueValidation.error,
|
|
||||||
queueStructure: queueValidation.data ? Object.keys(queueValidation.data) : []
|
|
||||||
}
|
|
||||||
)
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
return Object.freeze({
|
|
||||||
...pathsWithExistence,
|
|
||||||
queueData: queueValidation.data
|
|
||||||
});
|
|
||||||
} catch (readError) {
|
|
||||||
return {
|
|
||||||
...pathsWithExistence,
|
|
||||||
error: new PentestError(
|
|
||||||
`Failed to read queue file for ${pathsWithExistence.vulnType}: ${readError.message}`,
|
|
||||||
'filesystem',
|
|
||||||
false,
|
|
||||||
{
|
|
||||||
vulnType: pathsWithExistence.vulnType,
|
|
||||||
queuePath: pathsWithExistence.queue,
|
|
||||||
originalError: readError.message
|
|
||||||
}
|
|
||||||
)
|
|
||||||
};
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Pure function to determine exploitation decision
|
|
||||||
const determineExploitationDecision = (validatedData) => {
|
|
||||||
if (validatedData.error) {
|
|
||||||
throw validatedData.error;
|
|
||||||
}
|
|
||||||
|
|
||||||
const hasVulnerabilities = validatedData.queueData.vulnerabilities.length > 0;
|
|
||||||
|
|
||||||
// Rule 4: Both exist, queue valid and populated
|
|
||||||
// Rule 5: Both exist, queue valid but empty
|
|
||||||
return Object.freeze({
|
|
||||||
shouldExploit: hasVulnerabilities,
|
|
||||||
shouldRetry: false,
|
|
||||||
vulnerabilityCount: validatedData.queueData.vulnerabilities.length,
|
|
||||||
vulnType: validatedData.vulnType
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
// Main functional validation pipeline
|
|
||||||
export const validateQueueAndDeliverable = async (vulnType, sourceDir) =>
|
|
||||||
await pipe(
|
|
||||||
() => createPaths(vulnType, sourceDir),
|
|
||||||
checkFileExistence,
|
|
||||||
validateExistenceRules,
|
|
||||||
validateQueueContent,
|
|
||||||
determineExploitationDecision
|
|
||||||
)();
|
|
||||||
|
|
||||||
// Pure function to safely validate (returns result instead of throwing)
|
|
||||||
export const safeValidateQueueAndDeliverable = async (vulnType, sourceDir) => {
|
|
||||||
try {
|
|
||||||
const result = await validateQueueAndDeliverable(vulnType, sourceDir);
|
|
||||||
return { success: true, data: result };
|
|
||||||
} catch (error) {
|
|
||||||
return { success: false, error };
|
|
||||||
}
|
|
||||||
};
|
|
||||||
@@ -0,0 +1,325 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
import { fs, path } from 'zx';
|
||||||
|
import { PentestError } from './error-handling.js';
|
||||||
|
import { asyncPipe } from './utils/functional.js';
|
||||||
|
|
||||||
|
export type VulnType = 'injection' | 'xss' | 'auth' | 'ssrf' | 'authz';
|
||||||
|
|
||||||
|
interface VulnTypeConfigItem {
|
||||||
|
deliverable: string;
|
||||||
|
queue: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
type VulnTypeConfig = Record<VulnType, VulnTypeConfigItem>;
|
||||||
|
|
||||||
|
type ErrorMessageResolver = string | ((existence: FileExistence) => string);
|
||||||
|
|
||||||
|
interface ValidationRule {
|
||||||
|
predicate: (existence: FileExistence) => boolean;
|
||||||
|
errorMessage: ErrorMessageResolver;
|
||||||
|
retryable: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface FileExistence {
|
||||||
|
deliverableExists: boolean;
|
||||||
|
queueExists: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface PathsBase {
|
||||||
|
vulnType: VulnType;
|
||||||
|
deliverable: string;
|
||||||
|
queue: string;
|
||||||
|
sourceDir: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface PathsWithExistence extends PathsBase {
|
||||||
|
existence: FileExistence;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface PathsWithQueue extends PathsWithExistence {
|
||||||
|
queueData: QueueData;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface PathsWithError {
|
||||||
|
error: PentestError;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface QueueData {
|
||||||
|
vulnerabilities: unknown[];
|
||||||
|
[key: string]: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface QueueValidationResult {
|
||||||
|
valid: boolean;
|
||||||
|
data: QueueData | null;
|
||||||
|
error: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ExploitationDecision {
|
||||||
|
shouldExploit: boolean;
|
||||||
|
shouldRetry: boolean;
|
||||||
|
vulnerabilityCount: number;
|
||||||
|
vulnType: VulnType;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SafeValidationResult {
|
||||||
|
success: boolean;
|
||||||
|
data?: ExploitationDecision;
|
||||||
|
error?: PentestError;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Vulnerability type configuration as immutable data
|
||||||
|
const VULN_TYPE_CONFIG: VulnTypeConfig = Object.freeze({
|
||||||
|
injection: Object.freeze({
|
||||||
|
deliverable: 'injection_analysis_deliverable.md',
|
||||||
|
queue: 'injection_exploitation_queue.json',
|
||||||
|
}),
|
||||||
|
xss: Object.freeze({
|
||||||
|
deliverable: 'xss_analysis_deliverable.md',
|
||||||
|
queue: 'xss_exploitation_queue.json',
|
||||||
|
}),
|
||||||
|
auth: Object.freeze({
|
||||||
|
deliverable: 'auth_analysis_deliverable.md',
|
||||||
|
queue: 'auth_exploitation_queue.json',
|
||||||
|
}),
|
||||||
|
ssrf: Object.freeze({
|
||||||
|
deliverable: 'ssrf_analysis_deliverable.md',
|
||||||
|
queue: 'ssrf_exploitation_queue.json',
|
||||||
|
}),
|
||||||
|
authz: Object.freeze({
|
||||||
|
deliverable: 'authz_analysis_deliverable.md',
|
||||||
|
queue: 'authz_exploitation_queue.json',
|
||||||
|
}),
|
||||||
|
}) as VulnTypeConfig;
|
||||||
|
|
||||||
|
// Pure function to create validation rule
|
||||||
|
function createValidationRule(
|
||||||
|
predicate: (existence: FileExistence) => boolean,
|
||||||
|
errorMessage: ErrorMessageResolver,
|
||||||
|
retryable: boolean = true
|
||||||
|
): ValidationRule {
|
||||||
|
return Object.freeze({ predicate, errorMessage, retryable });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Symmetric deliverable rules: queue and deliverable must exist together (prevents partial analysis from triggering exploitation)
|
||||||
|
const fileExistenceRules: readonly ValidationRule[] = Object.freeze([
|
||||||
|
createValidationRule(
|
||||||
|
({ deliverableExists, queueExists }) => deliverableExists && queueExists,
|
||||||
|
getExistenceErrorMessage
|
||||||
|
),
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Generate appropriate error message based on which files are missing
|
||||||
|
function getExistenceErrorMessage(existence: FileExistence): string {
|
||||||
|
const { deliverableExists, queueExists } = existence;
|
||||||
|
|
||||||
|
if (!deliverableExists && !queueExists) {
|
||||||
|
return 'Analysis failed: Neither deliverable nor queue file exists. Analysis agent must create both files.';
|
||||||
|
}
|
||||||
|
if (!queueExists) {
|
||||||
|
return 'Analysis incomplete: Deliverable exists but queue file missing. Analysis agent must create both files.';
|
||||||
|
}
|
||||||
|
return 'Analysis incomplete: Queue exists but deliverable file missing. Analysis agent must create both files.';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pure function to create file paths
|
||||||
|
const createPaths = (
|
||||||
|
vulnType: VulnType,
|
||||||
|
sourceDir: string
|
||||||
|
): PathsBase | PathsWithError => {
|
||||||
|
const config = VULN_TYPE_CONFIG[vulnType];
|
||||||
|
if (!config) {
|
||||||
|
return {
|
||||||
|
error: new PentestError(
|
||||||
|
`Unknown vulnerability type: ${vulnType}`,
|
||||||
|
'validation',
|
||||||
|
false,
|
||||||
|
{ vulnType }
|
||||||
|
),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return Object.freeze({
|
||||||
|
vulnType,
|
||||||
|
deliverable: path.join(sourceDir, 'deliverables', config.deliverable),
|
||||||
|
queue: path.join(sourceDir, 'deliverables', config.queue),
|
||||||
|
sourceDir,
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
// Pure function to check file existence
|
||||||
|
const checkFileExistence = async (
|
||||||
|
paths: PathsBase | PathsWithError
|
||||||
|
): Promise<PathsWithExistence | PathsWithError> => {
|
||||||
|
if ('error' in paths) return paths;
|
||||||
|
|
||||||
|
const [deliverableExists, queueExists] = await Promise.all([
|
||||||
|
fs.pathExists(paths.deliverable),
|
||||||
|
fs.pathExists(paths.queue),
|
||||||
|
]);
|
||||||
|
|
||||||
|
return Object.freeze({
|
||||||
|
...paths,
|
||||||
|
existence: Object.freeze({ deliverableExists, queueExists }),
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
// Validates deliverable/queue symmetry - both must exist or neither
|
||||||
|
const validateExistenceRules = (
|
||||||
|
pathsWithExistence: PathsWithExistence | PathsWithError
|
||||||
|
): PathsWithExistence | PathsWithError => {
|
||||||
|
if ('error' in pathsWithExistence) return pathsWithExistence;
|
||||||
|
|
||||||
|
const { existence, vulnType } = pathsWithExistence;
|
||||||
|
|
||||||
|
// Find the first rule that fails
|
||||||
|
const failedRule = fileExistenceRules.find((rule) => !rule.predicate(existence));
|
||||||
|
|
||||||
|
if (failedRule) {
|
||||||
|
const message =
|
||||||
|
typeof failedRule.errorMessage === 'function'
|
||||||
|
? failedRule.errorMessage(existence)
|
||||||
|
: failedRule.errorMessage;
|
||||||
|
|
||||||
|
return {
|
||||||
|
error: new PentestError(
|
||||||
|
`${message} (${vulnType})`,
|
||||||
|
'validation',
|
||||||
|
failedRule.retryable,
|
||||||
|
{
|
||||||
|
vulnType,
|
||||||
|
deliverablePath: pathsWithExistence.deliverable,
|
||||||
|
queuePath: pathsWithExistence.queue,
|
||||||
|
existence,
|
||||||
|
}
|
||||||
|
),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return pathsWithExistence;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Pure function to validate queue structure
|
||||||
|
const validateQueueStructure = (content: string): QueueValidationResult => {
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(content) as unknown;
|
||||||
|
const isValid =
|
||||||
|
typeof parsed === 'object' &&
|
||||||
|
parsed !== null &&
|
||||||
|
'vulnerabilities' in parsed &&
|
||||||
|
Array.isArray((parsed as QueueData).vulnerabilities);
|
||||||
|
|
||||||
|
return Object.freeze({
|
||||||
|
valid: isValid,
|
||||||
|
data: isValid ? (parsed as QueueData) : null,
|
||||||
|
error: null,
|
||||||
|
});
|
||||||
|
} catch (parseError) {
|
||||||
|
return Object.freeze({
|
||||||
|
valid: false,
|
||||||
|
data: null,
|
||||||
|
error: parseError instanceof Error ? parseError.message : String(parseError),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Queue parse failures are retryable - agent can fix malformed JSON on retry
|
||||||
|
const validateQueueContent = async (
|
||||||
|
pathsWithExistence: PathsWithExistence | PathsWithError
|
||||||
|
): Promise<PathsWithQueue | PathsWithError> => {
|
||||||
|
if ('error' in pathsWithExistence) return pathsWithExistence;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const queueContent = await fs.readFile(pathsWithExistence.queue, 'utf8');
|
||||||
|
const queueValidation = validateQueueStructure(queueContent);
|
||||||
|
|
||||||
|
if (!queueValidation.valid) {
|
||||||
|
// Rule 6: Both exist, queue invalid
|
||||||
|
return {
|
||||||
|
error: new PentestError(
|
||||||
|
queueValidation.error
|
||||||
|
? `Queue validation failed for ${pathsWithExistence.vulnType}: Invalid JSON structure. Analysis agent must fix queue format.`
|
||||||
|
: `Queue validation failed for ${pathsWithExistence.vulnType}: Missing or invalid 'vulnerabilities' array. Analysis agent must fix queue structure.`,
|
||||||
|
'validation',
|
||||||
|
true, // retryable
|
||||||
|
{
|
||||||
|
vulnType: pathsWithExistence.vulnType,
|
||||||
|
queuePath: pathsWithExistence.queue,
|
||||||
|
originalError: queueValidation.error,
|
||||||
|
queueStructure: queueValidation.data ? Object.keys(queueValidation.data) : [],
|
||||||
|
}
|
||||||
|
),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return Object.freeze({
|
||||||
|
...pathsWithExistence,
|
||||||
|
queueData: queueValidation.data!,
|
||||||
|
});
|
||||||
|
} catch (readError) {
|
||||||
|
return {
|
||||||
|
error: new PentestError(
|
||||||
|
`Failed to read queue file for ${pathsWithExistence.vulnType}: ${readError instanceof Error ? readError.message : String(readError)}`,
|
||||||
|
'filesystem',
|
||||||
|
false,
|
||||||
|
{
|
||||||
|
vulnType: pathsWithExistence.vulnType,
|
||||||
|
queuePath: pathsWithExistence.queue,
|
||||||
|
originalError: readError instanceof Error ? readError.message : String(readError),
|
||||||
|
}
|
||||||
|
),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Final decision: skip if queue says no vulns, proceed if vulns found, error otherwise
|
||||||
|
const determineExploitationDecision = (
|
||||||
|
validatedData: PathsWithQueue | PathsWithError
|
||||||
|
): ExploitationDecision => {
|
||||||
|
if ('error' in validatedData) {
|
||||||
|
throw validatedData.error;
|
||||||
|
}
|
||||||
|
|
||||||
|
const hasVulnerabilities = validatedData.queueData.vulnerabilities.length > 0;
|
||||||
|
|
||||||
|
// Rule 4: Both exist, queue valid and populated
|
||||||
|
// Rule 5: Both exist, queue valid but empty
|
||||||
|
return Object.freeze({
|
||||||
|
shouldExploit: hasVulnerabilities,
|
||||||
|
shouldRetry: false,
|
||||||
|
vulnerabilityCount: validatedData.queueData.vulnerabilities.length,
|
||||||
|
vulnType: validatedData.vulnType,
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
// Main functional validation pipeline
|
||||||
|
export async function validateQueueAndDeliverable(
|
||||||
|
vulnType: VulnType,
|
||||||
|
sourceDir: string
|
||||||
|
): Promise<ExploitationDecision> {
|
||||||
|
return asyncPipe<ExploitationDecision>(
|
||||||
|
createPaths(vulnType, sourceDir),
|
||||||
|
checkFileExistence,
|
||||||
|
validateExistenceRules,
|
||||||
|
validateQueueContent,
|
||||||
|
determineExploitationDecision
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pure function to safely validate (returns result instead of throwing)
|
||||||
|
export const safeValidateQueueAndDeliverable = async (
|
||||||
|
vulnType: VulnType,
|
||||||
|
sourceDir: string
|
||||||
|
): Promise<SafeValidationResult> => {
|
||||||
|
try {
|
||||||
|
const result = await validateQueueAndDeliverable(vulnType, sourceDir);
|
||||||
|
return { success: true, data: result };
|
||||||
|
} catch (error) {
|
||||||
|
return { success: false, error: error as PentestError };
|
||||||
|
}
|
||||||
|
};
|
||||||
@@ -1,701 +0,0 @@
|
|||||||
import { fs, path } from 'zx';
|
|
||||||
import chalk from 'chalk';
|
|
||||||
import crypto from 'crypto';
|
|
||||||
import { PentestError } from './error-handling.js';
|
|
||||||
import { SessionMutex } from './utils/concurrency.js';
|
|
||||||
import { promptSelection } from './cli/prompts.js';
|
|
||||||
|
|
||||||
// Generate a session-based log folder path
|
|
||||||
// NEW FORMAT: {hostname}_{sessionId} (no hash, full UUID for consistency with audit system)
|
|
||||||
export const generateSessionLogPath = (webUrl, sessionId) => {
|
|
||||||
const hostname = new URL(webUrl).hostname.replace(/[^a-zA-Z0-9-]/g, '-');
|
|
||||||
const sessionFolderName = `${hostname}_${sessionId}`;
|
|
||||||
return path.join(process.cwd(), 'agent-logs', sessionFolderName);
|
|
||||||
};
|
|
||||||
|
|
||||||
const sessionMutex = new SessionMutex();
|
|
||||||
|
|
||||||
// Agent definitions according to PRD
|
|
||||||
export const AGENTS = Object.freeze({
|
|
||||||
// Phase 1 - Pre-reconnaissance
|
|
||||||
'pre-recon': {
|
|
||||||
name: 'pre-recon',
|
|
||||||
displayName: 'Pre-recon agent',
|
|
||||||
phase: 'pre-reconnaissance',
|
|
||||||
order: 1,
|
|
||||||
prerequisites: []
|
|
||||||
},
|
|
||||||
|
|
||||||
// Phase 2 - Reconnaissance
|
|
||||||
'recon': {
|
|
||||||
name: 'recon',
|
|
||||||
displayName: 'Recon agent',
|
|
||||||
phase: 'reconnaissance',
|
|
||||||
order: 2,
|
|
||||||
prerequisites: ['pre-recon']
|
|
||||||
},
|
|
||||||
|
|
||||||
// Phase 3 - Vulnerability Analysis
|
|
||||||
'injection-vuln': {
|
|
||||||
name: 'injection-vuln',
|
|
||||||
displayName: 'Injection vuln agent',
|
|
||||||
phase: 'vulnerability-analysis',
|
|
||||||
order: 3,
|
|
||||||
prerequisites: ['recon']
|
|
||||||
},
|
|
||||||
'xss-vuln': {
|
|
||||||
name: 'xss-vuln',
|
|
||||||
displayName: 'XSS vuln agent',
|
|
||||||
phase: 'vulnerability-analysis',
|
|
||||||
order: 4,
|
|
||||||
prerequisites: ['recon']
|
|
||||||
},
|
|
||||||
'auth-vuln': {
|
|
||||||
name: 'auth-vuln',
|
|
||||||
displayName: 'Auth vuln agent',
|
|
||||||
phase: 'vulnerability-analysis',
|
|
||||||
order: 5,
|
|
||||||
prerequisites: ['recon']
|
|
||||||
},
|
|
||||||
'ssrf-vuln': {
|
|
||||||
name: 'ssrf-vuln',
|
|
||||||
displayName: 'SSRF vuln agent',
|
|
||||||
phase: 'vulnerability-analysis',
|
|
||||||
order: 6,
|
|
||||||
prerequisites: ['recon']
|
|
||||||
},
|
|
||||||
'authz-vuln': {
|
|
||||||
name: 'authz-vuln',
|
|
||||||
displayName: 'Authz vuln agent',
|
|
||||||
phase: 'vulnerability-analysis',
|
|
||||||
order: 7,
|
|
||||||
prerequisites: ['recon']
|
|
||||||
},
|
|
||||||
|
|
||||||
// Phase 4 - Exploitation
|
|
||||||
'injection-exploit': {
|
|
||||||
name: 'injection-exploit',
|
|
||||||
displayName: 'Injection exploit agent',
|
|
||||||
phase: 'exploitation',
|
|
||||||
order: 8,
|
|
||||||
prerequisites: ['injection-vuln']
|
|
||||||
},
|
|
||||||
'xss-exploit': {
|
|
||||||
name: 'xss-exploit',
|
|
||||||
displayName: 'XSS exploit agent',
|
|
||||||
phase: 'exploitation',
|
|
||||||
order: 9,
|
|
||||||
prerequisites: ['xss-vuln']
|
|
||||||
},
|
|
||||||
'auth-exploit': {
|
|
||||||
name: 'auth-exploit',
|
|
||||||
displayName: 'Auth exploit agent',
|
|
||||||
phase: 'exploitation',
|
|
||||||
order: 10,
|
|
||||||
prerequisites: ['auth-vuln']
|
|
||||||
},
|
|
||||||
'ssrf-exploit': {
|
|
||||||
name: 'ssrf-exploit',
|
|
||||||
displayName: 'SSRF exploit agent',
|
|
||||||
phase: 'exploitation',
|
|
||||||
order: 11,
|
|
||||||
prerequisites: ['ssrf-vuln']
|
|
||||||
},
|
|
||||||
'authz-exploit': {
|
|
||||||
name: 'authz-exploit',
|
|
||||||
displayName: 'Authz exploit agent',
|
|
||||||
phase: 'exploitation',
|
|
||||||
order: 12,
|
|
||||||
prerequisites: ['authz-vuln']
|
|
||||||
},
|
|
||||||
|
|
||||||
// Phase 5 - Reporting
|
|
||||||
'report': {
|
|
||||||
name: 'report',
|
|
||||||
displayName: 'Report agent',
|
|
||||||
phase: 'reporting',
|
|
||||||
order: 13,
|
|
||||||
prerequisites: ['authz-exploit']
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Phase definitions
|
|
||||||
export const PHASES = Object.freeze({
|
|
||||||
'pre-reconnaissance': ['pre-recon'],
|
|
||||||
'reconnaissance': ['recon'],
|
|
||||||
'vulnerability-analysis': ['injection-vuln', 'xss-vuln', 'auth-vuln', 'ssrf-vuln', 'authz-vuln'],
|
|
||||||
'exploitation': ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit'],
|
|
||||||
'reporting': ['report']
|
|
||||||
});
|
|
||||||
|
|
||||||
// Session store file path
|
|
||||||
const STORE_FILE = path.join(process.cwd(), '.shannon-store.json');
|
|
||||||
|
|
||||||
// Load sessions from store file
|
|
||||||
const loadSessions = async () => {
|
|
||||||
try {
|
|
||||||
if (!await fs.pathExists(STORE_FILE)) {
|
|
||||||
return { sessions: {} };
|
|
||||||
}
|
|
||||||
|
|
||||||
const content = await fs.readFile(STORE_FILE, 'utf8');
|
|
||||||
const store = JSON.parse(content);
|
|
||||||
|
|
||||||
// Validate store structure
|
|
||||||
if (!store || typeof store !== 'object' || !store.sessions) {
|
|
||||||
console.log(chalk.yellow('⚠️ Invalid session store format, creating new store'));
|
|
||||||
return { sessions: {} };
|
|
||||||
}
|
|
||||||
|
|
||||||
return store;
|
|
||||||
} catch (error) {
|
|
||||||
console.log(chalk.yellow(`⚠️ Failed to load session store: ${error.message}, creating new store`));
|
|
||||||
return { sessions: {} };
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Save sessions to store file atomically
|
|
||||||
const saveSessions = async (store) => {
|
|
||||||
try {
|
|
||||||
const tempFile = `${STORE_FILE}.tmp`;
|
|
||||||
await fs.writeJSON(tempFile, store, { spaces: 2 });
|
|
||||||
await fs.move(tempFile, STORE_FILE, { overwrite: true });
|
|
||||||
} catch (error) {
|
|
||||||
throw new PentestError(
|
|
||||||
`Failed to save session store: ${error.message}`,
|
|
||||||
'filesystem',
|
|
||||||
false,
|
|
||||||
{ storeFile: STORE_FILE, originalError: error.message }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Find existing session for the same web URL and repository path
|
|
||||||
const findExistingSession = async (webUrl, targetRepo) => {
|
|
||||||
const store = await loadSessions();
|
|
||||||
const sessions = Object.values(store.sessions);
|
|
||||||
|
|
||||||
// Normalize paths for comparison
|
|
||||||
const normalizedTargetRepo = path.resolve(targetRepo);
|
|
||||||
|
|
||||||
// Look for existing session with same webUrl and targetRepo
|
|
||||||
const existingSession = sessions.find(session => {
|
|
||||||
const normalizedSessionRepo = path.resolve(session.targetRepo || session.repoPath);
|
|
||||||
return session.webUrl === webUrl && normalizedSessionRepo === normalizedTargetRepo;
|
|
||||||
});
|
|
||||||
|
|
||||||
return existingSession;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Generate session ID as unique UUID
|
|
||||||
const generateSessionId = () => {
|
|
||||||
// Always generate a unique UUID for each session
|
|
||||||
return crypto.randomUUID();
|
|
||||||
};
|
|
||||||
|
|
||||||
// Create new session or return existing one
|
|
||||||
export const createSession = async (webUrl, repoPath, configFile = null, targetRepo = null) => {
|
|
||||||
// Use targetRepo if provided, otherwise use repoPath
|
|
||||||
const resolvedTargetRepo = targetRepo || repoPath;
|
|
||||||
|
|
||||||
// Check for existing session first
|
|
||||||
const existingSession = await findExistingSession(webUrl, resolvedTargetRepo);
|
|
||||||
|
|
||||||
if (existingSession) {
|
|
||||||
// If session is not completed, reuse it
|
|
||||||
if (existingSession.status !== 'completed') {
|
|
||||||
console.log(chalk.blue(`📝 Reusing existing session: ${existingSession.id.substring(0, 8)}...`));
|
|
||||||
console.log(chalk.gray(` Progress: ${existingSession.completedAgents.length}/${Object.keys(AGENTS).length} agents completed`));
|
|
||||||
|
|
||||||
// Update last activity timestamp
|
|
||||||
await updateSession(existingSession.id, { lastActivity: new Date().toISOString() });
|
|
||||||
return existingSession;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If completed, create a new session (allows re-running after completion)
|
|
||||||
console.log(chalk.gray(`Previous session was completed, creating new session...`));
|
|
||||||
}
|
|
||||||
|
|
||||||
const sessionId = generateSessionId();
|
|
||||||
|
|
||||||
// STANDARD: All sessions use 'id' field (NOT 'sessionId')
|
|
||||||
// This is the canonical session structure used throughout the codebase
|
|
||||||
const session = {
|
|
||||||
id: sessionId,
|
|
||||||
webUrl,
|
|
||||||
repoPath,
|
|
||||||
configFile,
|
|
||||||
targetRepo: resolvedTargetRepo,
|
|
||||||
status: 'in-progress',
|
|
||||||
completedAgents: [],
|
|
||||||
failedAgents: [],
|
|
||||||
checkpoints: {},
|
|
||||||
createdAt: new Date().toISOString(),
|
|
||||||
lastActivity: new Date().toISOString()
|
|
||||||
};
|
|
||||||
|
|
||||||
const store = await loadSessions();
|
|
||||||
store.sessions[sessionId] = session;
|
|
||||||
await saveSessions(store);
|
|
||||||
|
|
||||||
return session;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Get session by ID
|
|
||||||
export const getSession = async (sessionId) => {
|
|
||||||
const store = await loadSessions();
|
|
||||||
return store.sessions[sessionId] || null;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Update session
|
|
||||||
export const updateSession = async (sessionId, updates) => {
|
|
||||||
const store = await loadSessions();
|
|
||||||
|
|
||||||
if (!store.sessions[sessionId]) {
|
|
||||||
throw new PentestError(
|
|
||||||
`Session ${sessionId} not found`,
|
|
||||||
'validation',
|
|
||||||
false,
|
|
||||||
{ sessionId }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
store.sessions[sessionId] = {
|
|
||||||
...store.sessions[sessionId],
|
|
||||||
...updates,
|
|
||||||
lastActivity: new Date().toISOString()
|
|
||||||
};
|
|
||||||
|
|
||||||
await saveSessions(store);
|
|
||||||
return store.sessions[sessionId];
|
|
||||||
};
|
|
||||||
|
|
||||||
// List all sessions
|
|
||||||
const listSessions = async () => {
|
|
||||||
const store = await loadSessions();
|
|
||||||
return Object.values(store.sessions);
|
|
||||||
};
|
|
||||||
|
|
||||||
// Interactive session selection
|
|
||||||
export const selectSession = async () => {
|
|
||||||
const sessions = await listSessions();
|
|
||||||
|
|
||||||
if (sessions.length === 0) {
|
|
||||||
throw new PentestError(
|
|
||||||
'No pentest sessions found. Run a normal pentest first to create a session.',
|
|
||||||
'validation',
|
|
||||||
false
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sessions.length === 1) {
|
|
||||||
return sessions[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Display session options
|
|
||||||
console.log(chalk.cyan('\nMultiple pentest sessions found:\n'));
|
|
||||||
|
|
||||||
sessions.forEach((session, index) => {
|
|
||||||
const completedCount = session.completedAgents.length;
|
|
||||||
const totalAgents = Object.keys(AGENTS).length;
|
|
||||||
const timeAgo = getTimeAgo(session.lastActivity);
|
|
||||||
|
|
||||||
// Use dynamic status calculation instead of stored status
|
|
||||||
const { status } = getSessionStatus(session);
|
|
||||||
const statusColor = status === 'completed' ? chalk.green : chalk.blue;
|
|
||||||
const statusIcon = status === 'completed' ? '✅' : '🔄';
|
|
||||||
|
|
||||||
console.log(statusColor(`${index + 1}) ${new URL(session.webUrl).hostname} + ${path.basename(session.repoPath)} [${status}]`));
|
|
||||||
console.log(chalk.gray(` Last activity: ${timeAgo}, Completed: ${completedCount}/${totalAgents} agents`));
|
|
||||||
console.log(chalk.gray(` Session ID: ${session.id}`));
|
|
||||||
|
|
||||||
if (session.configFile) {
|
|
||||||
console.log(chalk.gray(` Config: ${session.configFile}`));
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(); // Empty line between sessions
|
|
||||||
});
|
|
||||||
|
|
||||||
// Get user selection
|
|
||||||
return await promptSelection(
|
|
||||||
chalk.cyan(`Select session (1-${sessions.length}):`),
|
|
||||||
sessions
|
|
||||||
);
|
|
||||||
};
|
|
||||||
|
|
||||||
// Validate agent name
|
|
||||||
export const validateAgent = (agentName) => {
|
|
||||||
if (!AGENTS[agentName]) {
|
|
||||||
throw new PentestError(
|
|
||||||
`Agent '${agentName}' not recognized. Use --list-agents to see valid names.`,
|
|
||||||
'validation',
|
|
||||||
false,
|
|
||||||
{ agentName, validAgents: Object.keys(AGENTS) }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
return AGENTS[agentName];
|
|
||||||
};
|
|
||||||
|
|
||||||
// Validate agent range
|
|
||||||
export const validateAgentRange = (startAgent, endAgent) => {
|
|
||||||
const start = validateAgent(startAgent);
|
|
||||||
const end = validateAgent(endAgent);
|
|
||||||
|
|
||||||
if (start.order >= end.order) {
|
|
||||||
throw new PentestError(
|
|
||||||
`End agent '${endAgent}' must come after start agent '${startAgent}' in sequence.`,
|
|
||||||
'validation',
|
|
||||||
false,
|
|
||||||
{ startAgent, endAgent, startOrder: start.order, endOrder: end.order }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get all agents in range
|
|
||||||
const agentList = Object.values(AGENTS)
|
|
||||||
.filter(agent => agent.order >= start.order && agent.order <= end.order)
|
|
||||||
.sort((a, b) => a.order - b.order);
|
|
||||||
|
|
||||||
return agentList;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Validate phase name
|
|
||||||
export const validatePhase = (phaseName) => {
|
|
||||||
if (!PHASES[phaseName]) {
|
|
||||||
throw new PentestError(
|
|
||||||
`Phase '${phaseName}' not recognized. Valid phases: ${Object.keys(PHASES).join(', ')}`,
|
|
||||||
'validation',
|
|
||||||
false,
|
|
||||||
{ phaseName, validPhases: Object.keys(PHASES) }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
return PHASES[phaseName].map(agentName => AGENTS[agentName]);
|
|
||||||
};
|
|
||||||
|
|
||||||
// Check prerequisites for an agent
|
|
||||||
export const checkPrerequisites = (session, agentName) => {
|
|
||||||
const agent = validateAgent(agentName);
|
|
||||||
|
|
||||||
const missingPrereqs = agent.prerequisites.filter(prereq =>
|
|
||||||
!session.completedAgents.includes(prereq)
|
|
||||||
);
|
|
||||||
|
|
||||||
if (missingPrereqs.length > 0) {
|
|
||||||
throw new PentestError(
|
|
||||||
`Cannot run '${agentName}': prerequisite agent(s) not completed: ${missingPrereqs.join(', ')}`,
|
|
||||||
'validation',
|
|
||||||
false,
|
|
||||||
{ agentName, missingPrerequisites: missingPrereqs, completedAgents: session.completedAgents }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Get next suggested agent
|
|
||||||
export const getNextAgent = (session) => {
|
|
||||||
const completed = new Set(session.completedAgents);
|
|
||||||
const failed = new Set(session.failedAgents);
|
|
||||||
|
|
||||||
// Find the next agent that hasn't been completed and has all prerequisites
|
|
||||||
const nextAgent = Object.values(AGENTS)
|
|
||||||
.sort((a, b) => a.order - b.order)
|
|
||||||
.find(agent => {
|
|
||||||
if (completed.has(agent.name)) return false; // Already completed
|
|
||||||
|
|
||||||
// Check if all prerequisites are completed
|
|
||||||
const prereqsMet = agent.prerequisites.every(prereq => completed.has(prereq));
|
|
||||||
return prereqsMet;
|
|
||||||
});
|
|
||||||
|
|
||||||
return nextAgent;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Mark agent as completed with checkpoint
|
|
||||||
// NOTE: Timing, cost, and validation data now managed by AuditSession (audit-logs/session.json)
|
|
||||||
// Shannon store contains ONLY orchestration state (completedAgents, checkpoints)
|
|
||||||
export const markAgentCompleted = async (sessionId, agentName, checkpointCommit) => {
|
|
||||||
// Use mutex to prevent race conditions during parallel agent execution
|
|
||||||
const unlock = await sessionMutex.lock(sessionId);
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Get fresh session data under lock
|
|
||||||
const session = await getSession(sessionId);
|
|
||||||
if (!session) {
|
|
||||||
throw new PentestError(`Session ${sessionId} not found`, 'validation', false);
|
|
||||||
}
|
|
||||||
|
|
||||||
validateAgent(agentName);
|
|
||||||
|
|
||||||
const updates = {
|
|
||||||
completedAgents: [...new Set([...session.completedAgents, agentName])],
|
|
||||||
failedAgents: session.failedAgents.filter(agent => agent !== agentName),
|
|
||||||
checkpoints: {
|
|
||||||
...session.checkpoints,
|
|
||||||
[agentName]: checkpointCommit
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Check if all agents are now completed and update session status
|
|
||||||
const totalAgents = Object.keys(AGENTS).length;
|
|
||||||
if (updates.completedAgents.length === totalAgents) {
|
|
||||||
updates.status = 'completed';
|
|
||||||
}
|
|
||||||
|
|
||||||
return await updateSession(sessionId, updates);
|
|
||||||
} finally {
|
|
||||||
// Always release the lock, even if an error occurs
|
|
||||||
unlock();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Mark agent as failed
|
|
||||||
export const markAgentFailed = async (sessionId, agentName) => {
|
|
||||||
const session = await getSession(sessionId);
|
|
||||||
if (!session) {
|
|
||||||
throw new PentestError(`Session ${sessionId} not found`, 'validation', false);
|
|
||||||
}
|
|
||||||
|
|
||||||
validateAgent(agentName);
|
|
||||||
|
|
||||||
const updates = {
|
|
||||||
failedAgents: [...new Set([...session.failedAgents, agentName])],
|
|
||||||
completedAgents: session.completedAgents.filter(agent => agent !== agentName)
|
|
||||||
};
|
|
||||||
|
|
||||||
return await updateSession(sessionId, updates);
|
|
||||||
};
|
|
||||||
|
|
||||||
// Get time ago helper
|
|
||||||
const getTimeAgo = (timestamp) => {
|
|
||||||
const now = new Date();
|
|
||||||
const past = new Date(timestamp);
|
|
||||||
const diffMs = now - past;
|
|
||||||
|
|
||||||
const diffMins = Math.floor(diffMs / (1000 * 60));
|
|
||||||
const diffHours = Math.floor(diffMs / (1000 * 60 * 60));
|
|
||||||
const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24));
|
|
||||||
|
|
||||||
if (diffMins < 60) {
|
|
||||||
return `${diffMins}m ago`;
|
|
||||||
} else if (diffHours < 24) {
|
|
||||||
return `${diffHours}h ago`;
|
|
||||||
} else {
|
|
||||||
return `${diffDays}d ago`;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Get session status summary
|
|
||||||
export const getSessionStatus = (session) => {
|
|
||||||
const totalAgents = Object.keys(AGENTS).length;
|
|
||||||
const completedCount = session.completedAgents.length;
|
|
||||||
const failedCount = session.failedAgents.length;
|
|
||||||
|
|
||||||
let status;
|
|
||||||
if (completedCount === totalAgents) {
|
|
||||||
status = 'completed';
|
|
||||||
} else if (failedCount > 0) {
|
|
||||||
status = 'failed';
|
|
||||||
} else {
|
|
||||||
status = 'in-progress';
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
status,
|
|
||||||
completedCount,
|
|
||||||
totalAgents,
|
|
||||||
failedCount,
|
|
||||||
completionPercentage: Math.round((completedCount / totalAgents) * 100)
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
// Calculate comprehensive summary statistics for vulnerability analysis
|
|
||||||
export const calculateVulnerabilityAnalysisSummary = (session) => {
|
|
||||||
const vulnAgents = PHASES['vulnerability-analysis'];
|
|
||||||
const completedVulnAgents = session.completedAgents.filter(agent => vulnAgents.includes(agent));
|
|
||||||
|
|
||||||
// NOTE: Actual vulnerability counts require reading queue files
|
|
||||||
// This summary only shows completion counts
|
|
||||||
return Object.freeze({
|
|
||||||
totalAnalyses: completedVulnAgents.length,
|
|
||||||
completedAgents: completedVulnAgents
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
// Calculate exploitation summary statistics
|
|
||||||
export const calculateExploitationSummary = (session) => {
|
|
||||||
const exploitAgents = PHASES['exploitation'];
|
|
||||||
const completedExploitAgents = session.completedAgents.filter(agent => exploitAgents.includes(agent));
|
|
||||||
|
|
||||||
// NOTE: Eligibility requires reading queue files
|
|
||||||
// This summary only shows completion counts
|
|
||||||
return Object.freeze({
|
|
||||||
totalAttempts: completedExploitAgents.length,
|
|
||||||
completedAgents: completedExploitAgents
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
// Rollback session to specific agent checkpoint
|
|
||||||
export const rollbackToAgent = async (sessionId, targetAgent) => {
|
|
||||||
const session = await getSession(sessionId);
|
|
||||||
if (!session) {
|
|
||||||
throw new PentestError(`Session ${sessionId} not found`, 'validation', false);
|
|
||||||
}
|
|
||||||
|
|
||||||
validateAgent(targetAgent);
|
|
||||||
|
|
||||||
if (!session.checkpoints[targetAgent]) {
|
|
||||||
throw new PentestError(
|
|
||||||
`No checkpoint found for agent '${targetAgent}' in session history`,
|
|
||||||
'validation',
|
|
||||||
false,
|
|
||||||
{ targetAgent, availableCheckpoints: Object.keys(session.checkpoints) }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find agents that need to be removed (those after the target agent)
|
|
||||||
const targetOrder = AGENTS[targetAgent].order;
|
|
||||||
const agentsToRemove = Object.values(AGENTS)
|
|
||||||
.filter(agent => agent.order > targetOrder)
|
|
||||||
.map(agent => agent.name);
|
|
||||||
|
|
||||||
const updates = {
|
|
||||||
completedAgents: session.completedAgents.filter(agent => !agentsToRemove.includes(agent)),
|
|
||||||
failedAgents: session.failedAgents.filter(agent => !agentsToRemove.includes(agent)),
|
|
||||||
checkpoints: Object.fromEntries(
|
|
||||||
Object.entries(session.checkpoints).filter(([agent]) => !agentsToRemove.includes(agent))
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
// NOTE: Timing and cost data now managed in audit-logs/session.json
|
|
||||||
// Rollback will be reflected via reconcileSession() which marks agents as "rolled-back"
|
|
||||||
|
|
||||||
return await updateSession(sessionId, updates);
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reconcile Shannon store with audit logs (self-healing)
|
|
||||||
*
|
|
||||||
* This function ensures the Shannon store (.shannon-store.json) is consistent with
|
|
||||||
* the audit logs (audit-logs/session.json) by syncing agent completion status.
|
|
||||||
*
|
|
||||||
* Three-part reconciliation:
|
|
||||||
* 1. PROMOTIONS: Agents completed/failed in audit → added to Shannon store
|
|
||||||
* 2. DEMOTIONS: Agents rolled-back in audit → removed from Shannon store
|
|
||||||
* 3. VERIFICATION: Ensure audit state fully reflected in orchestration
|
|
||||||
*
|
|
||||||
* Critical for crash recovery, especially crash during rollback operations.
|
|
||||||
*
|
|
||||||
* @param {string} sessionId - Session ID to reconcile
|
|
||||||
* @returns {Promise<Object>} Reconciliation report with added/removed/failed agents
|
|
||||||
*/
|
|
||||||
export const reconcileSession = async (sessionId) => {
|
|
||||||
const { AuditSession } = await import('./audit/index.js');
|
|
||||||
|
|
||||||
// Get Shannon store session
|
|
||||||
const shannonSession = await getSession(sessionId);
|
|
||||||
if (!shannonSession) {
|
|
||||||
throw new PentestError(`Session ${sessionId} not found in Shannon store`, 'validation', false);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get audit session data
|
|
||||||
const auditSession = new AuditSession(shannonSession);
|
|
||||||
await auditSession.initialize();
|
|
||||||
const auditData = await auditSession.getMetrics();
|
|
||||||
|
|
||||||
const report = {
|
|
||||||
promotions: [],
|
|
||||||
demotions: [],
|
|
||||||
failures: []
|
|
||||||
};
|
|
||||||
|
|
||||||
// PART 1: PROMOTIONS (Additive)
|
|
||||||
// Find agents completed in audit but not in Shannon store
|
|
||||||
const auditCompleted = Object.entries(auditData.metrics.agents)
|
|
||||||
.filter(([_, agentData]) => agentData.status === 'success')
|
|
||||||
.map(([agentName]) => agentName);
|
|
||||||
|
|
||||||
const missing = auditCompleted.filter(agent => !shannonSession.completedAgents.includes(agent));
|
|
||||||
|
|
||||||
for (const agentName of missing) {
|
|
||||||
const agentData = auditData.metrics.agents[agentName];
|
|
||||||
const checkpoint = agentData.checkpoint || null;
|
|
||||||
await markAgentCompleted(sessionId, agentName, checkpoint);
|
|
||||||
report.promotions.push(agentName);
|
|
||||||
}
|
|
||||||
|
|
||||||
// PART 2: DEMOTIONS (Subtractive) - CRITICAL FOR ROLLBACK RECOVERY
|
|
||||||
// Find agents rolled-back in audit but still in Shannon store
|
|
||||||
const auditRolledBack = Object.entries(auditData.metrics.agents)
|
|
||||||
.filter(([_, agentData]) => agentData.status === 'rolled-back')
|
|
||||||
.map(([agentName]) => agentName);
|
|
||||||
|
|
||||||
const toRemove = shannonSession.completedAgents.filter(agent => auditRolledBack.includes(agent));
|
|
||||||
|
|
||||||
if (toRemove.length > 0) {
|
|
||||||
// Reload session to get fresh state
|
|
||||||
const freshSession = await getSession(sessionId);
|
|
||||||
|
|
||||||
const updates = {
|
|
||||||
completedAgents: freshSession.completedAgents.filter(agent => !toRemove.includes(agent)),
|
|
||||||
checkpoints: Object.fromEntries(
|
|
||||||
Object.entries(freshSession.checkpoints).filter(([agent]) => !toRemove.includes(agent))
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
await updateSession(sessionId, updates);
|
|
||||||
report.demotions.push(...toRemove);
|
|
||||||
}
|
|
||||||
|
|
||||||
// PART 3: FAILURES
|
|
||||||
// Find agents failed in audit but not marked failed in Shannon store
|
|
||||||
const auditFailed = Object.entries(auditData.metrics.agents)
|
|
||||||
.filter(([_, agentData]) => agentData.status === 'failed')
|
|
||||||
.map(([agentName]) => agentName);
|
|
||||||
|
|
||||||
const failedToAdd = auditFailed.filter(agent => !shannonSession.failedAgents.includes(agent));
|
|
||||||
|
|
||||||
for (const agentName of failedToAdd) {
|
|
||||||
await markAgentFailed(sessionId, agentName);
|
|
||||||
report.failures.push(agentName);
|
|
||||||
}
|
|
||||||
|
|
||||||
return report;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Delete a specific session by ID
|
|
||||||
export const deleteSession = async (sessionId) => {
|
|
||||||
const store = await loadSessions();
|
|
||||||
|
|
||||||
if (!store.sessions[sessionId]) {
|
|
||||||
throw new PentestError(
|
|
||||||
`Session ${sessionId} not found`,
|
|
||||||
'validation',
|
|
||||||
false,
|
|
||||||
{ sessionId }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
const deletedSession = store.sessions[sessionId];
|
|
||||||
delete store.sessions[sessionId];
|
|
||||||
await saveSessions(store);
|
|
||||||
|
|
||||||
return deletedSession;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Delete all sessions (remove entire storage)
|
|
||||||
export const deleteAllSessions = async () => {
|
|
||||||
try {
|
|
||||||
if (await fs.pathExists(STORE_FILE)) {
|
|
||||||
await fs.remove(STORE_FILE);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false; // File didn't exist
|
|
||||||
} catch (error) {
|
|
||||||
throw new PentestError(
|
|
||||||
`Failed to delete session storage: ${error.message}`,
|
|
||||||
'filesystem',
|
|
||||||
false,
|
|
||||||
{ storeFile: STORE_FILE, originalError: error.message }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
@@ -0,0 +1,129 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
import { path } from 'zx';
|
||||||
|
import type { AgentName } from './types/index.js';
|
||||||
|
|
||||||
|
// Agent definition interface
|
||||||
|
export interface AgentDefinition {
|
||||||
|
name: AgentName;
|
||||||
|
displayName: string;
|
||||||
|
prerequisites: AgentName[];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Agent definitions according to PRD
|
||||||
|
export const AGENTS: Readonly<Record<AgentName, AgentDefinition>> = Object.freeze({
|
||||||
|
'pre-recon': {
|
||||||
|
name: 'pre-recon',
|
||||||
|
displayName: 'Pre-recon agent',
|
||||||
|
prerequisites: []
|
||||||
|
},
|
||||||
|
'recon': {
|
||||||
|
name: 'recon',
|
||||||
|
displayName: 'Recon agent',
|
||||||
|
prerequisites: ['pre-recon']
|
||||||
|
},
|
||||||
|
'injection-vuln': {
|
||||||
|
name: 'injection-vuln',
|
||||||
|
displayName: 'Injection vuln agent',
|
||||||
|
prerequisites: ['recon']
|
||||||
|
},
|
||||||
|
'xss-vuln': {
|
||||||
|
name: 'xss-vuln',
|
||||||
|
displayName: 'XSS vuln agent',
|
||||||
|
prerequisites: ['recon']
|
||||||
|
},
|
||||||
|
'auth-vuln': {
|
||||||
|
name: 'auth-vuln',
|
||||||
|
displayName: 'Auth vuln agent',
|
||||||
|
prerequisites: ['recon']
|
||||||
|
},
|
||||||
|
'ssrf-vuln': {
|
||||||
|
name: 'ssrf-vuln',
|
||||||
|
displayName: 'SSRF vuln agent',
|
||||||
|
prerequisites: ['recon']
|
||||||
|
},
|
||||||
|
'authz-vuln': {
|
||||||
|
name: 'authz-vuln',
|
||||||
|
displayName: 'Authz vuln agent',
|
||||||
|
prerequisites: ['recon']
|
||||||
|
},
|
||||||
|
'injection-exploit': {
|
||||||
|
name: 'injection-exploit',
|
||||||
|
displayName: 'Injection exploit agent',
|
||||||
|
prerequisites: ['injection-vuln']
|
||||||
|
},
|
||||||
|
'xss-exploit': {
|
||||||
|
name: 'xss-exploit',
|
||||||
|
displayName: 'XSS exploit agent',
|
||||||
|
prerequisites: ['xss-vuln']
|
||||||
|
},
|
||||||
|
'auth-exploit': {
|
||||||
|
name: 'auth-exploit',
|
||||||
|
displayName: 'Auth exploit agent',
|
||||||
|
prerequisites: ['auth-vuln']
|
||||||
|
},
|
||||||
|
'ssrf-exploit': {
|
||||||
|
name: 'ssrf-exploit',
|
||||||
|
displayName: 'SSRF exploit agent',
|
||||||
|
prerequisites: ['ssrf-vuln']
|
||||||
|
},
|
||||||
|
'authz-exploit': {
|
||||||
|
name: 'authz-exploit',
|
||||||
|
displayName: 'Authz exploit agent',
|
||||||
|
prerequisites: ['authz-vuln']
|
||||||
|
},
|
||||||
|
'report': {
|
||||||
|
name: 'report',
|
||||||
|
displayName: 'Report agent',
|
||||||
|
prerequisites: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit']
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Agent execution order
|
||||||
|
export const AGENT_ORDER: readonly AgentName[] = Object.freeze([
|
||||||
|
'pre-recon',
|
||||||
|
'recon',
|
||||||
|
'injection-vuln',
|
||||||
|
'xss-vuln',
|
||||||
|
'auth-vuln',
|
||||||
|
'ssrf-vuln',
|
||||||
|
'authz-vuln',
|
||||||
|
'injection-exploit',
|
||||||
|
'xss-exploit',
|
||||||
|
'auth-exploit',
|
||||||
|
'ssrf-exploit',
|
||||||
|
'authz-exploit',
|
||||||
|
'report'
|
||||||
|
] as const);
|
||||||
|
|
||||||
|
// Parallel execution groups
|
||||||
|
export const getParallelGroups = (): Readonly<{ vuln: AgentName[]; exploit: AgentName[] }> => Object.freeze({
|
||||||
|
vuln: ['injection-vuln', 'xss-vuln', 'auth-vuln', 'ssrf-vuln', 'authz-vuln'],
|
||||||
|
exploit: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit']
|
||||||
|
});
|
||||||
|
|
||||||
|
// Phase names for metrics aggregation
|
||||||
|
export type PhaseName = 'pre-recon' | 'recon' | 'vulnerability-analysis' | 'exploitation' | 'reporting';
|
||||||
|
|
||||||
|
// Map agents to their corresponding phases (single source of truth)
|
||||||
|
export const AGENT_PHASE_MAP: Readonly<Record<AgentName, PhaseName>> = Object.freeze({
|
||||||
|
'pre-recon': 'pre-recon',
|
||||||
|
'recon': 'recon',
|
||||||
|
'injection-vuln': 'vulnerability-analysis',
|
||||||
|
'xss-vuln': 'vulnerability-analysis',
|
||||||
|
'auth-vuln': 'vulnerability-analysis',
|
||||||
|
'authz-vuln': 'vulnerability-analysis',
|
||||||
|
'ssrf-vuln': 'vulnerability-analysis',
|
||||||
|
'injection-exploit': 'exploitation',
|
||||||
|
'xss-exploit': 'exploitation',
|
||||||
|
'auth-exploit': 'exploitation',
|
||||||
|
'authz-exploit': 'exploitation',
|
||||||
|
'ssrf-exploit': 'exploitation',
|
||||||
|
'report': 'reporting',
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
@@ -1,9 +1,15 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
import { $, fs, path } from 'zx';
|
import { $, fs, path } from 'zx';
|
||||||
import chalk from 'chalk';
|
import chalk from 'chalk';
|
||||||
import { PentestError } from '../error-handling.js';
|
import { PentestError } from '../error-handling.js';
|
||||||
|
|
||||||
// Pure function: Setup local repository for testing
|
// Pure function: Setup local repository for testing
|
||||||
export async function setupLocalRepo(repoPath) {
|
export async function setupLocalRepo(repoPath: string): Promise<string> {
|
||||||
try {
|
try {
|
||||||
const sourceDir = path.resolve(repoPath);
|
const sourceDir = path.resolve(repoPath);
|
||||||
|
|
||||||
@@ -28,7 +34,8 @@ export async function setupLocalRepo(repoPath) {
|
|||||||
await $`cd ${sourceDir} && git add -A && git commit -m "Initial checkpoint: Local repository setup" --allow-empty`;
|
await $`cd ${sourceDir} && git add -A && git commit -m "Initial checkpoint: Local repository setup" --allow-empty`;
|
||||||
console.log(chalk.green('✅ Initial checkpoint created'));
|
console.log(chalk.green('✅ Initial checkpoint created'));
|
||||||
} catch (gitError) {
|
} catch (gitError) {
|
||||||
console.log(chalk.yellow(`⚠️ Git setup warning: ${gitError.message}`));
|
const errMsg = gitError instanceof Error ? gitError.message : String(gitError);
|
||||||
|
console.log(chalk.yellow(`⚠️ Git setup warning: ${errMsg}`));
|
||||||
// Non-fatal - continue without Git setup
|
// Non-fatal - continue without Git setup
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -40,11 +47,10 @@ export async function setupLocalRepo(repoPath) {
|
|||||||
if (error instanceof PentestError) {
|
if (error instanceof PentestError) {
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
throw new PentestError(
|
const errMsg = error instanceof Error ? error.message : String(error);
|
||||||
`Local repository setup failed: ${error.message}`,
|
throw new PentestError(`Local repository setup failed: ${errMsg}`, 'filesystem', false, {
|
||||||
'filesystem',
|
repoPath,
|
||||||
false,
|
originalError: errMsg,
|
||||||
{ repoPath, originalError: error.message }
|
});
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,21 +1,27 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
import figlet from 'figlet';
|
import figlet from 'figlet';
|
||||||
import gradient from 'gradient-string';
|
import gradient from 'gradient-string';
|
||||||
import boxen from 'boxen';
|
import boxen from 'boxen';
|
||||||
import chalk from 'chalk';
|
import chalk from 'chalk';
|
||||||
import { fs, path } from 'zx';
|
import { fs, path } from 'zx';
|
||||||
|
|
||||||
export const displaySplashScreen = async () => {
|
export const displaySplashScreen = async (): Promise<void> => {
|
||||||
try {
|
try {
|
||||||
// Get version info from package.json
|
// Get version info from package.json
|
||||||
const packagePath = path.join(import.meta.dirname, '..', 'package.json');
|
const packagePath = path.join(import.meta.dirname, '..', 'package.json');
|
||||||
const packageJson = await fs.readJSON(packagePath);
|
const packageJson = (await fs.readJSON(packagePath)) as { version?: string };
|
||||||
const version = packageJson.version || '1.0.0';
|
const version = packageJson.version || '1.0.0';
|
||||||
|
|
||||||
// Create the main SHANNON ASCII art
|
// Create the main SHANNON ASCII art
|
||||||
const shannonText = figlet.textSync('SHANNON', {
|
const shannonText = figlet.textSync('SHANNON', {
|
||||||
font: 'ANSI Shadow',
|
font: 'ANSI Shadow',
|
||||||
horizontalLayout: 'default',
|
horizontalLayout: 'default',
|
||||||
verticalLayout: 'default'
|
verticalLayout: 'default',
|
||||||
});
|
});
|
||||||
|
|
||||||
// Apply golden gradient to SHANNON
|
// Apply golden gradient to SHANNON
|
||||||
@@ -36,7 +42,7 @@ export const displaySplashScreen = async () => {
|
|||||||
` ${versionInfo}`,
|
` ${versionInfo}`,
|
||||||
'',
|
'',
|
||||||
chalk.bold.yellow(' 🔐 DEFENSIVE SECURITY ONLY 🔐'),
|
chalk.bold.yellow(' 🔐 DEFENSIVE SECURITY ONLY 🔐'),
|
||||||
''
|
'',
|
||||||
].join('\n');
|
].join('\n');
|
||||||
|
|
||||||
// Create boxed output with minimal styling
|
// Create boxed output with minimal styling
|
||||||
@@ -45,7 +51,7 @@ export const displaySplashScreen = async () => {
|
|||||||
margin: 1,
|
margin: 1,
|
||||||
borderStyle: 'double',
|
borderStyle: 'double',
|
||||||
borderColor: 'cyan',
|
borderColor: 'cyan',
|
||||||
dimBorder: false
|
dimBorder: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Clear screen and display splash
|
// Clear screen and display splash
|
||||||
@@ -58,7 +64,9 @@ export const displaySplashScreen = async () => {
|
|||||||
|
|
||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
const loadingInterval = setInterval(() => {
|
const loadingInterval = setInterval(() => {
|
||||||
process.stdout.write(`\r${chalk.cyan(loadingFrames[frameIndex])} ${chalk.dim('Initializing systems...')}`);
|
process.stdout.write(
|
||||||
|
`\r${chalk.cyan(loadingFrames[frameIndex])} ${chalk.dim('Initializing systems...')}`
|
||||||
|
);
|
||||||
frameIndex = (frameIndex + 1) % loadingFrames.length;
|
frameIndex = (frameIndex + 1) % loadingFrames.length;
|
||||||
}, 100);
|
}, 100);
|
||||||
|
|
||||||
@@ -68,11 +76,11 @@ export const displaySplashScreen = async () => {
|
|||||||
resolve();
|
resolve();
|
||||||
}, 2000);
|
}, 2000);
|
||||||
});
|
});
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// Fallback to simple splash if anything fails
|
// Fallback to simple splash if anything fails
|
||||||
|
const errMsg = error instanceof Error ? error.message : String(error);
|
||||||
console.log(chalk.cyan.bold('\n🚀 SHANNON - AI Penetration Testing Framework\n'));
|
console.log(chalk.cyan.bold('\n🚀 SHANNON - AI Penetration Testing Framework\n'));
|
||||||
console.log(chalk.yellow('⚠️ Could not load full splash screen:', error.message));
|
console.log(chalk.yellow('⚠️ Could not load full splash screen:', errMsg));
|
||||||
console.log('');
|
console.log('');
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Telemetry Module - Public API
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* import { telemetry, TelemetryEvent } from '../telemetry/index.js';
|
||||||
|
*
|
||||||
|
* telemetry.initialize();
|
||||||
|
* telemetry.track(TelemetryEvent.WORKFLOW_START, { has_config: true });
|
||||||
|
* await telemetry.shutdown();
|
||||||
|
*/
|
||||||
|
|
||||||
|
export { telemetry, hashTargetUrl } from './telemetry-manager.js';
|
||||||
|
export { TelemetryEvent } from './telemetry-events.js';
|
||||||
|
export { getInstallationId } from './installation-id.js';
|
||||||
|
export type {
|
||||||
|
BaseTelemetryProperties,
|
||||||
|
AgentEventProperties,
|
||||||
|
WorkflowEventProperties,
|
||||||
|
} from './telemetry-events.js';
|
||||||
|
export { loadTelemetryConfig } from './telemetry-config.js';
|
||||||
@@ -0,0 +1,78 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Installation ID - Persistent anonymous identifier for telemetry.
|
||||||
|
*
|
||||||
|
* Generates a UUID and persists it to ~/.shannon/telemetry-id
|
||||||
|
* On subsequent runs, reads the existing ID from the file.
|
||||||
|
* Handles errors gracefully by returning a random UUID.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { randomUUID } from 'crypto';
|
||||||
|
import { readFile, writeFile, mkdir } from 'fs/promises';
|
||||||
|
import { join } from 'path';
|
||||||
|
import { homedir } from 'os';
|
||||||
|
|
||||||
|
const SHANNON_DIR = '.shannon';
|
||||||
|
const TELEMETRY_ID_FILE = 'telemetry-id';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the path to the telemetry ID file.
|
||||||
|
* Returns ~/.shannon/telemetry-id
|
||||||
|
*/
|
||||||
|
function getTelemetryIdPath(): string {
|
||||||
|
return join(homedir(), SHANNON_DIR, TELEMETRY_ID_FILE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the path to the Shannon config directory.
|
||||||
|
* Returns ~/.shannon
|
||||||
|
*/
|
||||||
|
function getShannonDir(): string {
|
||||||
|
return join(homedir(), SHANNON_DIR);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get or create a persistent installation ID.
|
||||||
|
*
|
||||||
|
* - If ~/.shannon/telemetry-id exists, reads and returns the ID
|
||||||
|
* - If not, generates a new UUID, persists it, and returns it
|
||||||
|
* - On any error, returns a random UUID (doesn't persist)
|
||||||
|
*
|
||||||
|
* @returns Promise<string> - The installation ID (UUID format)
|
||||||
|
*/
|
||||||
|
export async function getInstallationId(): Promise<string> {
|
||||||
|
const filePath = getTelemetryIdPath();
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Try to read existing ID
|
||||||
|
const existingId = await readFile(filePath, 'utf-8');
|
||||||
|
const trimmedId = existingId.trim();
|
||||||
|
|
||||||
|
// Validate it looks like a UUID (basic check)
|
||||||
|
if (trimmedId.length >= 32) {
|
||||||
|
return trimmedId;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// File doesn't exist or can't be read - will create new ID
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate new ID
|
||||||
|
const newId = randomUUID();
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Ensure ~/.shannon directory exists
|
||||||
|
await mkdir(getShannonDir(), { recursive: true });
|
||||||
|
|
||||||
|
// Persist the new ID
|
||||||
|
await writeFile(filePath, newId, 'utf-8');
|
||||||
|
} catch {
|
||||||
|
// Failed to persist - return the ID anyway (won't be persistent)
|
||||||
|
}
|
||||||
|
|
||||||
|
return newId;
|
||||||
|
}
|
||||||
@@ -0,0 +1,68 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Telemetry configuration with opt-out support.
|
||||||
|
*
|
||||||
|
* Telemetry is enabled by default. Users can disable via:
|
||||||
|
* - DO_NOT_TRACK=1 (standard convention: https://consoledonottrack.com/)
|
||||||
|
* - SHANNON_TELEMETRY=off|false|0
|
||||||
|
*/
|
||||||
|
|
||||||
|
export interface TelemetryConfig {
|
||||||
|
enabled: boolean;
|
||||||
|
apiKey: string;
|
||||||
|
host: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// PostHog project configuration
|
||||||
|
// This is a write-only key - safe to publish, users cannot read analytics
|
||||||
|
const POSTHOG_API_KEY = 'phc_9EF2G6mm83rfLef5WmVLiNSyGQ4x0p8NzTRKiEAgvD4';
|
||||||
|
const POSTHOG_HOST = 'https://us.i.posthog.com';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if telemetry is enabled based on environment variables.
|
||||||
|
*/
|
||||||
|
function isTelemetryEnabled(): boolean {
|
||||||
|
// Standard opt-out: DO_NOT_TRACK
|
||||||
|
const doNotTrack = process.env.DO_NOT_TRACK;
|
||||||
|
if (doNotTrack === '1' || doNotTrack?.toLowerCase() === 'true') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shannon-specific opt-out
|
||||||
|
const shannonTelemetry = process.env.SHANNON_TELEMETRY?.toLowerCase();
|
||||||
|
if (
|
||||||
|
shannonTelemetry === 'off' ||
|
||||||
|
shannonTelemetry === 'false' ||
|
||||||
|
shannonTelemetry === '0'
|
||||||
|
) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load telemetry configuration from environment.
|
||||||
|
* Never throws - returns disabled config on any error.
|
||||||
|
*/
|
||||||
|
export function loadTelemetryConfig(): TelemetryConfig {
|
||||||
|
try {
|
||||||
|
return {
|
||||||
|
enabled: isTelemetryEnabled(),
|
||||||
|
apiKey: POSTHOG_API_KEY,
|
||||||
|
host: POSTHOG_HOST,
|
||||||
|
};
|
||||||
|
} catch {
|
||||||
|
// Config loading should never fail - return disabled
|
||||||
|
return {
|
||||||
|
enabled: false,
|
||||||
|
apiKey: POSTHOG_API_KEY,
|
||||||
|
host: POSTHOG_HOST,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,60 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Telemetry event definitions for Shannon.
|
||||||
|
*
|
||||||
|
* All PostHog event names are defined here for consistency and type safety.
|
||||||
|
* These events are anonymous - no PII or sensitive data is ever sent.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Telemetry event names.
|
||||||
|
* Using an enum ensures consistency across the codebase.
|
||||||
|
*/
|
||||||
|
export enum TelemetryEvent {
|
||||||
|
// Workflow lifecycle (emitted from client.ts)
|
||||||
|
WORKFLOW_START = 'workflow_start',
|
||||||
|
|
||||||
|
// Agent lifecycle (emitted from activities.ts)
|
||||||
|
AGENT_START = 'agent_start',
|
||||||
|
AGENT_COMPLETE = 'agent_complete',
|
||||||
|
AGENT_FAILED = 'agent_failed',
|
||||||
|
AGENT_RETRY = 'agent_retry',
|
||||||
|
|
||||||
|
// Pipeline completion (emitted from report agent in activities.ts)
|
||||||
|
WORKFLOW_COMPLETE = 'workflow_complete',
|
||||||
|
WORKFLOW_FAILED = 'workflow_failed',
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Base properties included with every telemetry event.
|
||||||
|
*/
|
||||||
|
export interface BaseTelemetryProperties {
|
||||||
|
os_platform: string;
|
||||||
|
node_version: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Properties for agent-level events.
|
||||||
|
*/
|
||||||
|
export interface AgentEventProperties {
|
||||||
|
agent_name: string;
|
||||||
|
attempt_number: number;
|
||||||
|
duration_ms?: number;
|
||||||
|
cost_usd?: number;
|
||||||
|
error_type?: string; // Only error classification, never the actual message
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Properties for workflow-level events.
|
||||||
|
*/
|
||||||
|
export interface WorkflowEventProperties {
|
||||||
|
has_config?: boolean;
|
||||||
|
total_duration_ms?: number;
|
||||||
|
total_cost_usd?: number;
|
||||||
|
error_type?: string; // Only error classification, never the actual message
|
||||||
|
}
|
||||||
@@ -0,0 +1,241 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Telemetry Manager - PostHog integration with safety guarantees.
|
||||||
|
*
|
||||||
|
* CRITICAL: All public methods are wrapped in try-catch to ensure
|
||||||
|
* telemetry NEVER interferes with workflow execution. Failures are
|
||||||
|
* silently swallowed - telemetry is optional, not critical.
|
||||||
|
*
|
||||||
|
* Features:
|
||||||
|
* - Safe initialization (never throws)
|
||||||
|
* - Auto-redaction of sensitive data before sending
|
||||||
|
* - Fire-and-forget tracking (non-blocking)
|
||||||
|
* - Graceful shutdown with timeout (never blocks)
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { PostHog } from 'posthog-node';
|
||||||
|
import crypto from 'crypto';
|
||||||
|
import { loadTelemetryConfig, type TelemetryConfig } from './telemetry-config.js';
|
||||||
|
import { TelemetryEvent, type BaseTelemetryProperties } from './telemetry-events.js';
|
||||||
|
|
||||||
|
// Shutdown timeout - don't block workflow completion
|
||||||
|
const SHUTDOWN_TIMEOUT_MS = 2000;
|
||||||
|
|
||||||
|
// Sensitive keys to redact from properties (case-insensitive matching)
|
||||||
|
const SENSITIVE_KEYS = [
|
||||||
|
'weburl',
|
||||||
|
'repopath',
|
||||||
|
'configpath',
|
||||||
|
'outputpath',
|
||||||
|
'targeturl',
|
||||||
|
'url',
|
||||||
|
'path',
|
||||||
|
'error',
|
||||||
|
'message',
|
||||||
|
'stack',
|
||||||
|
'findings',
|
||||||
|
'vulnerabilities',
|
||||||
|
'credentials',
|
||||||
|
'password',
|
||||||
|
'secret',
|
||||||
|
'token',
|
||||||
|
'apikey',
|
||||||
|
'key',
|
||||||
|
];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate anonymous distinct ID as a UUID.
|
||||||
|
*/
|
||||||
|
function generateDistinctId(): string {
|
||||||
|
return crypto.randomUUID();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hash a URL's hostname using SHA-256.
|
||||||
|
* Returns a hex string hash of just the hostname portion.
|
||||||
|
* Returns undefined if URL is invalid.
|
||||||
|
*/
|
||||||
|
export function hashTargetUrl(url: string): string | undefined {
|
||||||
|
try {
|
||||||
|
const hostname = new URL(url).hostname;
|
||||||
|
return crypto.createHash('sha256').update(hostname).digest('hex');
|
||||||
|
} catch {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a key name contains sensitive information.
|
||||||
|
*/
|
||||||
|
function isSensitiveKey(key: string): boolean {
|
||||||
|
const keyLower = key.toLowerCase();
|
||||||
|
return SENSITIVE_KEYS.some((sensitive) => keyLower.includes(sensitive));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Redact sensitive values from properties object.
|
||||||
|
* Returns a new object with sensitive keys removed.
|
||||||
|
*/
|
||||||
|
function redactSensitiveData(
|
||||||
|
properties: Record<string, unknown>
|
||||||
|
): Record<string, unknown> {
|
||||||
|
const redacted: Record<string, unknown> = {};
|
||||||
|
|
||||||
|
for (const [key, value] of Object.entries(properties)) {
|
||||||
|
// Skip sensitive keys entirely
|
||||||
|
if (isSensitiveKey(key)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recursively redact nested objects
|
||||||
|
if (value && typeof value === 'object' && !Array.isArray(value)) {
|
||||||
|
redacted[key] = redactSensitiveData(value as Record<string, unknown>);
|
||||||
|
} else if (typeof value === 'string') {
|
||||||
|
// Skip string values that look like paths or URLs
|
||||||
|
if (
|
||||||
|
value.startsWith('/') ||
|
||||||
|
value.startsWith('http') ||
|
||||||
|
value.includes('://')
|
||||||
|
) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
redacted[key] = value;
|
||||||
|
} else {
|
||||||
|
redacted[key] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return redacted;
|
||||||
|
}
|
||||||
|
|
||||||
|
class TelemetryManager {
|
||||||
|
private client: PostHog | null = null;
|
||||||
|
private config: TelemetryConfig;
|
||||||
|
private distinctId: string;
|
||||||
|
private initialized = false;
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.config = loadTelemetryConfig();
|
||||||
|
this.distinctId = generateDistinctId();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the distinct ID for all subsequent events.
|
||||||
|
* Call this with workflowId to ensure consistent ID across client/worker.
|
||||||
|
*/
|
||||||
|
setDistinctId(id: string): void {
|
||||||
|
this.distinctId = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize PostHog client.
|
||||||
|
* Safe: never throws, logs warning on failure.
|
||||||
|
*/
|
||||||
|
initialize(): void {
|
||||||
|
try {
|
||||||
|
if (this.initialized) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.initialized = true;
|
||||||
|
|
||||||
|
if (!this.config.enabled) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't initialize if API key isn't configured
|
||||||
|
if (this.config.apiKey.includes('REPLACE_WITH')) {
|
||||||
|
this.config.enabled = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.client = new PostHog(this.config.apiKey, {
|
||||||
|
host: this.config.host,
|
||||||
|
disableGeoip: true,
|
||||||
|
flushAt: 10,
|
||||||
|
flushInterval: 5000,
|
||||||
|
});
|
||||||
|
} catch {
|
||||||
|
// Initialization failure is silent - telemetry is optional
|
||||||
|
this.initialized = true;
|
||||||
|
this.config.enabled = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Track an event with properties.
|
||||||
|
* Safe: never throws, silently fails on error.
|
||||||
|
*
|
||||||
|
* @param event - Event name from TelemetryEvent enum
|
||||||
|
* @param properties - Event properties (sensitive data auto-redacted)
|
||||||
|
*/
|
||||||
|
track(event: TelemetryEvent, properties: Record<string, unknown> = {}): void {
|
||||||
|
try {
|
||||||
|
if (!this.config.enabled || !this.client) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build base properties
|
||||||
|
const baseProps: BaseTelemetryProperties & Record<string, unknown> = {
|
||||||
|
os_platform: process.platform,
|
||||||
|
node_version: process.version,
|
||||||
|
$lib: 'shannon',
|
||||||
|
};
|
||||||
|
|
||||||
|
// Redact sensitive data and merge with base props
|
||||||
|
const safeProps = {
|
||||||
|
...baseProps,
|
||||||
|
...redactSensitiveData(properties),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Fire and forget - don't await
|
||||||
|
this.client.capture({
|
||||||
|
distinctId: this.distinctId,
|
||||||
|
event,
|
||||||
|
properties: safeProps,
|
||||||
|
});
|
||||||
|
} catch {
|
||||||
|
// Tracking failure is silent - never interfere with workflow
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shutdown PostHog client gracefully.
|
||||||
|
* Safe: never throws, uses timeout to prevent blocking.
|
||||||
|
*
|
||||||
|
* @returns Promise that resolves when shutdown completes (or times out)
|
||||||
|
*/
|
||||||
|
async shutdown(): Promise<void> {
|
||||||
|
try {
|
||||||
|
if (!this.client) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Race shutdown against timeout to never block workflow
|
||||||
|
await Promise.race([
|
||||||
|
this.client.shutdown(),
|
||||||
|
new Promise<void>((resolve) => setTimeout(resolve, SHUTDOWN_TIMEOUT_MS)),
|
||||||
|
]);
|
||||||
|
} catch {
|
||||||
|
// Shutdown failure is silent
|
||||||
|
} finally {
|
||||||
|
this.client = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if telemetry is enabled.
|
||||||
|
*/
|
||||||
|
isEnabled(): boolean {
|
||||||
|
return this.config.enabled && this.client !== null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Singleton instance - import this in other modules
|
||||||
|
export const telemetry = new TelemetryManager();
|
||||||
@@ -0,0 +1,558 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Temporal activities for Shannon agent execution.
|
||||||
|
*
|
||||||
|
* Each activity wraps a single agent execution with:
|
||||||
|
* - Heartbeat loop (2s interval) to signal worker liveness
|
||||||
|
* - Git checkpoint/rollback/commit per attempt
|
||||||
|
* - Error classification for Temporal retry behavior
|
||||||
|
* - Audit session logging
|
||||||
|
*
|
||||||
|
* Temporal handles retries based on error classification:
|
||||||
|
* - Retryable: BillingError, TransientError (429, 5xx, network)
|
||||||
|
* - Non-retryable: AuthenticationError, PermissionError, ConfigurationError, etc.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { heartbeat, ApplicationFailure, Context } from '@temporalio/activity';
|
||||||
|
import chalk from 'chalk';
|
||||||
|
|
||||||
|
// Max lengths to prevent Temporal protobuf buffer overflow
|
||||||
|
const MAX_ERROR_MESSAGE_LENGTH = 2000;
|
||||||
|
const MAX_STACK_TRACE_LENGTH = 1000;
|
||||||
|
|
||||||
|
// Max retries for output validation errors (agent didn't save deliverables)
|
||||||
|
// Lower than default 50 since this is unlikely to self-heal
|
||||||
|
const MAX_OUTPUT_VALIDATION_RETRIES = 3;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Truncate error message to prevent buffer overflow in Temporal serialization.
|
||||||
|
*/
|
||||||
|
function truncateErrorMessage(message: string): string {
|
||||||
|
if (message.length <= MAX_ERROR_MESSAGE_LENGTH) {
|
||||||
|
return message;
|
||||||
|
}
|
||||||
|
return message.slice(0, MAX_ERROR_MESSAGE_LENGTH - 20) + '\n[truncated]';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Truncate stack trace on an ApplicationFailure to prevent buffer overflow.
|
||||||
|
*/
|
||||||
|
function truncateStackTrace(failure: ApplicationFailure): void {
|
||||||
|
if (failure.stack && failure.stack.length > MAX_STACK_TRACE_LENGTH) {
|
||||||
|
failure.stack = failure.stack.slice(0, MAX_STACK_TRACE_LENGTH) + '\n[stack truncated]';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
import {
|
||||||
|
runClaudePrompt,
|
||||||
|
validateAgentOutput,
|
||||||
|
type ClaudePromptResult,
|
||||||
|
} from '../ai/claude-executor.js';
|
||||||
|
import { loadPrompt } from '../prompts/prompt-manager.js';
|
||||||
|
import { parseConfig, distributeConfig } from '../config-parser.js';
|
||||||
|
import { classifyErrorForTemporal } from '../error-handling.js';
|
||||||
|
import {
|
||||||
|
safeValidateQueueAndDeliverable,
|
||||||
|
type VulnType,
|
||||||
|
type ExploitationDecision,
|
||||||
|
} from '../queue-validation.js';
|
||||||
|
import {
|
||||||
|
createGitCheckpoint,
|
||||||
|
commitGitSuccess,
|
||||||
|
rollbackGitWorkspace,
|
||||||
|
getGitCommitHash,
|
||||||
|
} from '../utils/git-manager.js';
|
||||||
|
import { assembleFinalReport } from '../phases/reporting.js';
|
||||||
|
import { getPromptNameForAgent } from '../types/agents.js';
|
||||||
|
import { AuditSession } from '../audit/index.js';
|
||||||
|
import { telemetry, TelemetryEvent, hashTargetUrl } from '../telemetry/index.js';
|
||||||
|
import type { WorkflowSummary } from '../audit/workflow-logger.js';
|
||||||
|
import type { AgentName } from '../types/agents.js';
|
||||||
|
import type { AgentMetrics } from './shared.js';
|
||||||
|
import type { DistributedConfig } from '../types/config.js';
|
||||||
|
import type { SessionMetadata } from '../audit/utils.js';
|
||||||
|
|
||||||
|
const HEARTBEAT_INTERVAL_MS = 2000; // Must be < heartbeatTimeout (10min production, 5min testing)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Input for all agent activities.
|
||||||
|
* Matches PipelineInput but with required workflowId for audit correlation.
|
||||||
|
*/
|
||||||
|
export interface ActivityInput {
|
||||||
|
webUrl: string;
|
||||||
|
repoPath: string;
|
||||||
|
configPath?: string;
|
||||||
|
outputPath?: string;
|
||||||
|
pipelineTestingMode?: boolean;
|
||||||
|
workflowId: string;
|
||||||
|
workflowStartTime?: number; // Epoch ms, used for total workflow duration in telemetry
|
||||||
|
installationId?: string; // Persistent anonymous ID for counting unique installations
|
||||||
|
// Workflow stats for telemetry (only passed to report agent)
|
||||||
|
workflowStats?: {
|
||||||
|
totalAgents: number;
|
||||||
|
agentsSucceeded: number;
|
||||||
|
agentsFailed: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Core activity implementation.
|
||||||
|
*
|
||||||
|
* Executes a single agent with:
|
||||||
|
* 1. Heartbeat loop for worker liveness
|
||||||
|
* 2. Config loading (if configPath provided)
|
||||||
|
* 3. Audit session initialization
|
||||||
|
* 4. Prompt loading
|
||||||
|
* 5. Git checkpoint before execution
|
||||||
|
* 6. Agent execution (single attempt)
|
||||||
|
* 7. Output validation
|
||||||
|
* 8. Git commit on success, rollback on failure
|
||||||
|
* 9. Error classification for Temporal retry
|
||||||
|
*/
|
||||||
|
async function runAgentActivity(
|
||||||
|
agentName: AgentName,
|
||||||
|
input: ActivityInput
|
||||||
|
): Promise<AgentMetrics> {
|
||||||
|
const {
|
||||||
|
webUrl,
|
||||||
|
repoPath,
|
||||||
|
configPath,
|
||||||
|
outputPath,
|
||||||
|
pipelineTestingMode = false,
|
||||||
|
workflowId,
|
||||||
|
installationId,
|
||||||
|
} = input;
|
||||||
|
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
// Get attempt number from Temporal context (tracks retries automatically)
|
||||||
|
const attemptNumber = Context.current().info.attempt;
|
||||||
|
|
||||||
|
// Set installationId as distinct ID for unique user tracking
|
||||||
|
if (installationId) {
|
||||||
|
telemetry.setDistinctId(installationId);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track agent start
|
||||||
|
telemetry.track(TelemetryEvent.AGENT_START, {
|
||||||
|
agent_name: agentName,
|
||||||
|
attempt_number: attemptNumber,
|
||||||
|
workflow_id: workflowId,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Heartbeat loop - signals worker is alive to Temporal server
|
||||||
|
const heartbeatInterval = setInterval(() => {
|
||||||
|
const elapsed = Math.floor((Date.now() - startTime) / 1000);
|
||||||
|
heartbeat({ agent: agentName, elapsedSeconds: elapsed, attempt: attemptNumber });
|
||||||
|
}, HEARTBEAT_INTERVAL_MS);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// 1. Load config (if provided)
|
||||||
|
let distributedConfig: DistributedConfig | null = null;
|
||||||
|
if (configPath) {
|
||||||
|
try {
|
||||||
|
const config = await parseConfig(configPath);
|
||||||
|
distributedConfig = distributeConfig(config);
|
||||||
|
} catch (err) {
|
||||||
|
throw new Error(`Failed to load config ${configPath}: ${err instanceof Error ? err.message : String(err)}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Build session metadata for audit
|
||||||
|
const sessionMetadata: SessionMetadata = {
|
||||||
|
id: workflowId,
|
||||||
|
webUrl,
|
||||||
|
repoPath,
|
||||||
|
...(outputPath && { outputPath }),
|
||||||
|
};
|
||||||
|
|
||||||
|
// 3. Initialize audit session (idempotent, safe across retries)
|
||||||
|
const auditSession = new AuditSession(sessionMetadata);
|
||||||
|
await auditSession.initialize();
|
||||||
|
|
||||||
|
// 4. Load prompt
|
||||||
|
const promptName = getPromptNameForAgent(agentName);
|
||||||
|
const prompt = await loadPrompt(
|
||||||
|
promptName,
|
||||||
|
{ webUrl, repoPath },
|
||||||
|
distributedConfig,
|
||||||
|
pipelineTestingMode
|
||||||
|
);
|
||||||
|
|
||||||
|
// 5. Create git checkpoint before execution
|
||||||
|
await createGitCheckpoint(repoPath, agentName, attemptNumber);
|
||||||
|
await auditSession.startAgent(agentName, prompt, attemptNumber);
|
||||||
|
|
||||||
|
// 6. Execute agent (single attempt - Temporal handles retries)
|
||||||
|
const result: ClaudePromptResult = await runClaudePrompt(
|
||||||
|
prompt,
|
||||||
|
repoPath,
|
||||||
|
'', // context
|
||||||
|
agentName, // description
|
||||||
|
agentName,
|
||||||
|
chalk.cyan,
|
||||||
|
sessionMetadata,
|
||||||
|
auditSession,
|
||||||
|
attemptNumber
|
||||||
|
);
|
||||||
|
|
||||||
|
// 6.5. Sanity check: Detect spending cap that slipped through all detection layers
|
||||||
|
// Defense-in-depth: A successful agent execution should never have ≤2 turns with $0 cost
|
||||||
|
if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) {
|
||||||
|
const resultText = result.result || '';
|
||||||
|
const looksLikeBillingError = /spending|cap|limit|budget|resets/i.test(resultText);
|
||||||
|
|
||||||
|
if (looksLikeBillingError) {
|
||||||
|
await rollbackGitWorkspace(repoPath, 'spending cap detected');
|
||||||
|
await auditSession.endAgent(agentName, {
|
||||||
|
attemptNumber,
|
||||||
|
duration_ms: result.duration,
|
||||||
|
cost_usd: 0,
|
||||||
|
success: false,
|
||||||
|
error: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
|
||||||
|
});
|
||||||
|
// Throw as billing error so Temporal retries with long backoff
|
||||||
|
throw new Error(`Spending cap likely reached: ${resultText.slice(0, 100)}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 7. Handle execution failure
|
||||||
|
if (!result.success) {
|
||||||
|
await rollbackGitWorkspace(repoPath, 'execution failure');
|
||||||
|
await auditSession.endAgent(agentName, {
|
||||||
|
attemptNumber,
|
||||||
|
duration_ms: result.duration,
|
||||||
|
cost_usd: result.cost || 0,
|
||||||
|
success: false,
|
||||||
|
error: result.error || 'Execution failed',
|
||||||
|
});
|
||||||
|
throw new Error(result.error || 'Agent execution failed');
|
||||||
|
}
|
||||||
|
|
||||||
|
// 8. Validate output
|
||||||
|
const validationPassed = await validateAgentOutput(result, agentName, repoPath);
|
||||||
|
if (!validationPassed) {
|
||||||
|
await rollbackGitWorkspace(repoPath, 'validation failure');
|
||||||
|
await auditSession.endAgent(agentName, {
|
||||||
|
attemptNumber,
|
||||||
|
duration_ms: result.duration,
|
||||||
|
cost_usd: result.cost || 0,
|
||||||
|
success: false,
|
||||||
|
error: 'Output validation failed',
|
||||||
|
});
|
||||||
|
|
||||||
|
// Limit output validation retries (unlikely to self-heal)
|
||||||
|
if (attemptNumber >= MAX_OUTPUT_VALIDATION_RETRIES) {
|
||||||
|
throw ApplicationFailure.nonRetryable(
|
||||||
|
`Agent ${agentName} failed output validation after ${attemptNumber} attempts`,
|
||||||
|
'OutputValidationError',
|
||||||
|
[{ agentName, attemptNumber, elapsed: Date.now() - startTime }]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// Let Temporal retry (will be classified as OutputValidationError)
|
||||||
|
throw new Error(`Agent ${agentName} failed output validation`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 9. Success - commit and log
|
||||||
|
const commitHash = await getGitCommitHash(repoPath);
|
||||||
|
await auditSession.endAgent(agentName, {
|
||||||
|
attemptNumber,
|
||||||
|
duration_ms: result.duration,
|
||||||
|
cost_usd: result.cost || 0,
|
||||||
|
success: true,
|
||||||
|
...(commitHash && { checkpoint: commitHash }),
|
||||||
|
});
|
||||||
|
await commitGitSuccess(repoPath, agentName);
|
||||||
|
|
||||||
|
// Track agent completion
|
||||||
|
telemetry.track(TelemetryEvent.AGENT_COMPLETE, {
|
||||||
|
agent_name: agentName,
|
||||||
|
attempt_number: attemptNumber,
|
||||||
|
duration_ms: Date.now() - startTime,
|
||||||
|
cost_usd: result.cost ?? undefined,
|
||||||
|
workflow_id: workflowId,
|
||||||
|
});
|
||||||
|
|
||||||
|
// 10. Return metrics
|
||||||
|
return {
|
||||||
|
durationMs: Date.now() - startTime,
|
||||||
|
inputTokens: null, // Not currently exposed by SDK wrapper
|
||||||
|
outputTokens: null,
|
||||||
|
costUsd: result.cost ?? null,
|
||||||
|
numTurns: result.turns ?? null,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
// Rollback git workspace before Temporal retry to ensure clean state
|
||||||
|
try {
|
||||||
|
await rollbackGitWorkspace(repoPath, 'error recovery');
|
||||||
|
} catch (rollbackErr) {
|
||||||
|
// Log but don't fail - rollback is best-effort
|
||||||
|
console.error(`Failed to rollback git workspace for ${agentName}:`, rollbackErr);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If error is already an ApplicationFailure (e.g., from our retry limit logic),
|
||||||
|
// re-throw it directly without re-classifying
|
||||||
|
if (error instanceof ApplicationFailure) {
|
||||||
|
// Track retry or failure based on retryability
|
||||||
|
telemetry.track(
|
||||||
|
error.nonRetryable ? TelemetryEvent.AGENT_FAILED : TelemetryEvent.AGENT_RETRY,
|
||||||
|
{
|
||||||
|
agent_name: agentName,
|
||||||
|
attempt_number: attemptNumber,
|
||||||
|
duration_ms: Date.now() - startTime,
|
||||||
|
error_type: error.type || 'UnknownError',
|
||||||
|
workflow_id: workflowId,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Classify error for Temporal retry behavior
|
||||||
|
const classified = classifyErrorForTemporal(error);
|
||||||
|
// Truncate message to prevent protobuf buffer overflow
|
||||||
|
const rawMessage = error instanceof Error ? error.message : String(error);
|
||||||
|
const message = truncateErrorMessage(rawMessage);
|
||||||
|
|
||||||
|
// Track retry or failure based on classification
|
||||||
|
telemetry.track(
|
||||||
|
classified.retryable ? TelemetryEvent.AGENT_RETRY : TelemetryEvent.AGENT_FAILED,
|
||||||
|
{
|
||||||
|
agent_name: agentName,
|
||||||
|
attempt_number: attemptNumber,
|
||||||
|
duration_ms: Date.now() - startTime,
|
||||||
|
error_type: classified.type,
|
||||||
|
workflow_id: workflowId,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (classified.retryable) {
|
||||||
|
// Temporal will retry with configured backoff
|
||||||
|
const failure = ApplicationFailure.create({
|
||||||
|
message,
|
||||||
|
type: classified.type,
|
||||||
|
details: [{ agentName, attemptNumber, elapsed: Date.now() - startTime }],
|
||||||
|
});
|
||||||
|
truncateStackTrace(failure);
|
||||||
|
throw failure;
|
||||||
|
} else {
|
||||||
|
// Fail immediately - no retry
|
||||||
|
const failure = ApplicationFailure.nonRetryable(message, classified.type, [
|
||||||
|
{ agentName, attemptNumber, elapsed: Date.now() - startTime },
|
||||||
|
]);
|
||||||
|
truncateStackTrace(failure);
|
||||||
|
throw failure;
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
clearInterval(heartbeatInterval);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// === Individual Agent Activity Exports ===
|
||||||
|
// Each function is a thin wrapper around runAgentActivity with the agent name.
|
||||||
|
|
||||||
|
export async function runPreReconAgent(input: ActivityInput): Promise<AgentMetrics> {
|
||||||
|
return runAgentActivity('pre-recon', input);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function runReconAgent(input: ActivityInput): Promise<AgentMetrics> {
|
||||||
|
return runAgentActivity('recon', input);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function runInjectionVulnAgent(input: ActivityInput): Promise<AgentMetrics> {
|
||||||
|
return runAgentActivity('injection-vuln', input);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function runXssVulnAgent(input: ActivityInput): Promise<AgentMetrics> {
|
||||||
|
return runAgentActivity('xss-vuln', input);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function runAuthVulnAgent(input: ActivityInput): Promise<AgentMetrics> {
|
||||||
|
return runAgentActivity('auth-vuln', input);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function runSsrfVulnAgent(input: ActivityInput): Promise<AgentMetrics> {
|
||||||
|
return runAgentActivity('ssrf-vuln', input);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function runAuthzVulnAgent(input: ActivityInput): Promise<AgentMetrics> {
|
||||||
|
return runAgentActivity('authz-vuln', input);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function runInjectionExploitAgent(input: ActivityInput): Promise<AgentMetrics> {
|
||||||
|
return runAgentActivity('injection-exploit', input);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function runXssExploitAgent(input: ActivityInput): Promise<AgentMetrics> {
|
||||||
|
return runAgentActivity('xss-exploit', input);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function runAuthExploitAgent(input: ActivityInput): Promise<AgentMetrics> {
|
||||||
|
return runAgentActivity('auth-exploit', input);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function runSsrfExploitAgent(input: ActivityInput): Promise<AgentMetrics> {
|
||||||
|
return runAgentActivity('ssrf-exploit', input);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function runAuthzExploitAgent(input: ActivityInput): Promise<AgentMetrics> {
|
||||||
|
return runAgentActivity('authz-exploit', input);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function runReportAgent(input: ActivityInput): Promise<AgentMetrics> {
|
||||||
|
// Use workflow start time for total duration if available, otherwise fall back to now
|
||||||
|
const workflowStartTime = input.workflowStartTime ?? Date.now();
|
||||||
|
const stats = input.workflowStats;
|
||||||
|
const targetHash = hashTargetUrl(input.webUrl);
|
||||||
|
const workflowId = input.workflowId;
|
||||||
|
try {
|
||||||
|
const metrics = await runAgentActivity('report', input);
|
||||||
|
// Report agent success = workflow complete
|
||||||
|
telemetry.track(TelemetryEvent.WORKFLOW_COMPLETE, {
|
||||||
|
total_duration_ms: Date.now() - workflowStartTime,
|
||||||
|
total_cost_usd: metrics.costUsd ?? undefined,
|
||||||
|
total_agents: stats?.totalAgents,
|
||||||
|
agents_succeeded: stats?.agentsSucceeded,
|
||||||
|
agents_failed: stats?.agentsFailed,
|
||||||
|
target_hash: targetHash,
|
||||||
|
workflow_id: workflowId,
|
||||||
|
});
|
||||||
|
return metrics;
|
||||||
|
} catch (error) {
|
||||||
|
// Report agent failure = workflow failed
|
||||||
|
const errorType =
|
||||||
|
error instanceof ApplicationFailure
|
||||||
|
? error.type || 'UnknownError'
|
||||||
|
: classifyErrorForTemporal(error).type;
|
||||||
|
telemetry.track(TelemetryEvent.WORKFLOW_FAILED, {
|
||||||
|
total_duration_ms: Date.now() - workflowStartTime,
|
||||||
|
error_type: errorType,
|
||||||
|
last_agent: 'report',
|
||||||
|
total_agents: stats?.totalAgents,
|
||||||
|
agents_succeeded: stats?.agentsSucceeded,
|
||||||
|
agents_failed: stats?.agentsFailed,
|
||||||
|
target_hash: targetHash,
|
||||||
|
workflow_id: workflowId,
|
||||||
|
});
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Assemble the final report by concatenating exploitation evidence files.
|
||||||
|
* This must be called BEFORE runReportAgent to create the file that the report agent will modify.
|
||||||
|
*/
|
||||||
|
export async function assembleReportActivity(input: ActivityInput): Promise<void> {
|
||||||
|
const { repoPath } = input;
|
||||||
|
console.log(chalk.blue('📝 Assembling deliverables from specialist agents...'));
|
||||||
|
try {
|
||||||
|
await assembleFinalReport(repoPath);
|
||||||
|
} catch (error) {
|
||||||
|
const err = error as Error;
|
||||||
|
console.log(chalk.yellow(`⚠️ Error assembling final report: ${err.message}`));
|
||||||
|
// Don't throw - the report agent can still create content even if no exploitation files exist
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if exploitation should run for a given vulnerability type.
|
||||||
|
* Reads the vulnerability queue file and returns the decision.
|
||||||
|
*
|
||||||
|
* This activity allows the workflow to skip exploit agents entirely
|
||||||
|
* when no vulnerabilities were found, saving API calls and time.
|
||||||
|
*
|
||||||
|
* Error handling:
|
||||||
|
* - Retryable errors (missing files, invalid JSON): re-throw for Temporal retry
|
||||||
|
* - Non-retryable errors: skip exploitation gracefully
|
||||||
|
*/
|
||||||
|
export async function checkExploitationQueue(
|
||||||
|
input: ActivityInput,
|
||||||
|
vulnType: VulnType
|
||||||
|
): Promise<ExploitationDecision> {
|
||||||
|
const { repoPath } = input;
|
||||||
|
|
||||||
|
const result = await safeValidateQueueAndDeliverable(vulnType, repoPath);
|
||||||
|
|
||||||
|
if (result.success && result.data) {
|
||||||
|
const { shouldExploit, vulnerabilityCount } = result.data;
|
||||||
|
console.log(
|
||||||
|
chalk.blue(
|
||||||
|
`🔍 ${vulnType}: ${shouldExploit ? `${vulnerabilityCount} vulnerabilities found` : 'no vulnerabilities, skipping exploitation'}`
|
||||||
|
)
|
||||||
|
);
|
||||||
|
return result.data;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validation failed - check if we should retry or skip
|
||||||
|
const error = result.error;
|
||||||
|
if (error?.retryable) {
|
||||||
|
// Re-throw retryable errors so Temporal can retry the vuln agent
|
||||||
|
console.log(chalk.yellow(`⚠️ ${vulnType}: ${error.message} (retrying)`));
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Non-retryable error - skip exploitation gracefully
|
||||||
|
console.log(
|
||||||
|
chalk.yellow(`⚠️ ${vulnType}: ${error?.message ?? 'Unknown error'}, skipping exploitation`)
|
||||||
|
);
|
||||||
|
return {
|
||||||
|
shouldExploit: false,
|
||||||
|
shouldRetry: false,
|
||||||
|
vulnerabilityCount: 0,
|
||||||
|
vulnType,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log phase transition to the unified workflow log.
|
||||||
|
* Called at phase boundaries for per-workflow logging.
|
||||||
|
*/
|
||||||
|
export async function logPhaseTransition(
|
||||||
|
input: ActivityInput,
|
||||||
|
phase: string,
|
||||||
|
event: 'start' | 'complete'
|
||||||
|
): Promise<void> {
|
||||||
|
const { webUrl, repoPath, outputPath, workflowId } = input;
|
||||||
|
|
||||||
|
const sessionMetadata: SessionMetadata = {
|
||||||
|
id: workflowId,
|
||||||
|
webUrl,
|
||||||
|
repoPath,
|
||||||
|
...(outputPath && { outputPath }),
|
||||||
|
};
|
||||||
|
|
||||||
|
const auditSession = new AuditSession(sessionMetadata);
|
||||||
|
await auditSession.initialize();
|
||||||
|
|
||||||
|
if (event === 'start') {
|
||||||
|
await auditSession.logPhaseStart(phase);
|
||||||
|
} else {
|
||||||
|
await auditSession.logPhaseComplete(phase);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log workflow completion with full summary to the unified workflow log.
|
||||||
|
* Called at the end of the workflow to write a summary breakdown.
|
||||||
|
*/
|
||||||
|
export async function logWorkflowComplete(
|
||||||
|
input: ActivityInput,
|
||||||
|
summary: WorkflowSummary
|
||||||
|
): Promise<void> {
|
||||||
|
const { webUrl, repoPath, outputPath, workflowId } = input;
|
||||||
|
|
||||||
|
const sessionMetadata: SessionMetadata = {
|
||||||
|
id: workflowId,
|
||||||
|
webUrl,
|
||||||
|
repoPath,
|
||||||
|
...(outputPath && { outputPath }),
|
||||||
|
};
|
||||||
|
|
||||||
|
const auditSession = new AuditSession(sessionMetadata);
|
||||||
|
await auditSession.initialize();
|
||||||
|
await auditSession.logWorkflowComplete(summary);
|
||||||
|
}
|
||||||
@@ -0,0 +1,229 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Temporal client for starting Shannon pentest pipeline workflows.
|
||||||
|
*
|
||||||
|
* Starts a workflow and optionally waits for completion with progress polling.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* npm run temporal:start -- <webUrl> <repoPath> [options]
|
||||||
|
* # or
|
||||||
|
* node dist/temporal/client.js <webUrl> <repoPath> [options]
|
||||||
|
*
|
||||||
|
* Options:
|
||||||
|
* --config <path> Configuration file path
|
||||||
|
* --output <path> Output directory for audit logs
|
||||||
|
* --pipeline-testing Use minimal prompts for fast testing
|
||||||
|
* --workflow-id <id> Custom workflow ID (default: shannon-<timestamp>)
|
||||||
|
* --wait Wait for workflow completion with progress polling
|
||||||
|
*
|
||||||
|
* Environment:
|
||||||
|
* TEMPORAL_ADDRESS - Temporal server address (default: localhost:7233)
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Connection, Client } from '@temporalio/client';
|
||||||
|
import dotenv from 'dotenv';
|
||||||
|
import chalk from 'chalk';
|
||||||
|
import { displaySplashScreen } from '../splash-screen.js';
|
||||||
|
import { sanitizeHostname } from '../audit/utils.js';
|
||||||
|
import { telemetry, TelemetryEvent, hashTargetUrl, getInstallationId } from '../telemetry/index.js';
|
||||||
|
// Import types only - these don't pull in workflow runtime code
|
||||||
|
import type { PipelineInput, PipelineState, PipelineProgress } from './shared.js';
|
||||||
|
|
||||||
|
dotenv.config();
|
||||||
|
|
||||||
|
// Query name must match the one defined in workflows.ts
|
||||||
|
const PROGRESS_QUERY = 'getProgress';
|
||||||
|
|
||||||
|
function showUsage(): void {
|
||||||
|
console.log(chalk.cyan.bold('\nShannon Temporal Client'));
|
||||||
|
console.log(chalk.gray('Start a pentest pipeline workflow\n'));
|
||||||
|
console.log(chalk.yellow('Usage:'));
|
||||||
|
console.log(
|
||||||
|
' node dist/temporal/client.js <webUrl> <repoPath> [options]\n'
|
||||||
|
);
|
||||||
|
console.log(chalk.yellow('Options:'));
|
||||||
|
console.log(' --config <path> Configuration file path');
|
||||||
|
console.log(' --output <path> Output directory for audit logs');
|
||||||
|
console.log(' --pipeline-testing Use minimal prompts for fast testing');
|
||||||
|
console.log(
|
||||||
|
' --workflow-id <id> Custom workflow ID (default: shannon-<timestamp>)'
|
||||||
|
);
|
||||||
|
console.log(' --wait Wait for workflow completion with progress polling\n');
|
||||||
|
console.log(chalk.yellow('Examples:'));
|
||||||
|
console.log(' node dist/temporal/client.js https://example.com /path/to/repo');
|
||||||
|
console.log(
|
||||||
|
' node dist/temporal/client.js https://example.com /path/to/repo --config config.yaml\n'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function startPipeline(): Promise<void> {
|
||||||
|
const args = process.argv.slice(2);
|
||||||
|
|
||||||
|
if (args.includes('--help') || args.includes('-h') || args.length === 0) {
|
||||||
|
showUsage();
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse arguments
|
||||||
|
let webUrl: string | undefined;
|
||||||
|
let repoPath: string | undefined;
|
||||||
|
let configPath: string | undefined;
|
||||||
|
let outputPath: string | undefined;
|
||||||
|
let pipelineTestingMode = false;
|
||||||
|
let customWorkflowId: string | undefined;
|
||||||
|
let waitForCompletion = false;
|
||||||
|
|
||||||
|
for (let i = 0; i < args.length; i++) {
|
||||||
|
const arg = args[i];
|
||||||
|
if (arg === '--config') {
|
||||||
|
const nextArg = args[i + 1];
|
||||||
|
if (nextArg && !nextArg.startsWith('-')) {
|
||||||
|
configPath = nextArg;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
} else if (arg === '--output') {
|
||||||
|
const nextArg = args[i + 1];
|
||||||
|
if (nextArg && !nextArg.startsWith('-')) {
|
||||||
|
outputPath = nextArg;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
} else if (arg === '--workflow-id') {
|
||||||
|
const nextArg = args[i + 1];
|
||||||
|
if (nextArg && !nextArg.startsWith('-')) {
|
||||||
|
customWorkflowId = nextArg;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
} else if (arg === '--pipeline-testing') {
|
||||||
|
pipelineTestingMode = true;
|
||||||
|
} else if (arg === '--wait') {
|
||||||
|
waitForCompletion = true;
|
||||||
|
} else if (arg && !arg.startsWith('-')) {
|
||||||
|
if (!webUrl) {
|
||||||
|
webUrl = arg;
|
||||||
|
} else if (!repoPath) {
|
||||||
|
repoPath = arg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!webUrl || !repoPath) {
|
||||||
|
console.log(chalk.red('Error: webUrl and repoPath are required'));
|
||||||
|
showUsage();
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Display splash screen
|
||||||
|
await displaySplashScreen();
|
||||||
|
|
||||||
|
const address = process.env.TEMPORAL_ADDRESS || 'localhost:7233';
|
||||||
|
console.log(chalk.gray(`Connecting to Temporal at ${address}...`));
|
||||||
|
|
||||||
|
const connection = await Connection.connect({ address });
|
||||||
|
const client = new Client({ connection });
|
||||||
|
|
||||||
|
try {
|
||||||
|
const hostname = sanitizeHostname(webUrl);
|
||||||
|
const workflowId = customWorkflowId || `${hostname}_shannon-${Date.now()}`;
|
||||||
|
|
||||||
|
// Get persistent installation ID for unique installation counting
|
||||||
|
const installationId = await getInstallationId();
|
||||||
|
|
||||||
|
// Initialize telemetry with installation ID as distinct ID (for unique user tracking)
|
||||||
|
telemetry.initialize();
|
||||||
|
telemetry.setDistinctId(installationId);
|
||||||
|
|
||||||
|
const input: PipelineInput = {
|
||||||
|
webUrl,
|
||||||
|
repoPath,
|
||||||
|
...(configPath && { configPath }),
|
||||||
|
...(outputPath && { outputPath }),
|
||||||
|
...(pipelineTestingMode && { pipelineTestingMode }),
|
||||||
|
installationId,
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(chalk.green.bold(`✓ Workflow started: ${workflowId}`));
|
||||||
|
console.log();
|
||||||
|
console.log(chalk.white(' Target: ') + chalk.cyan(webUrl));
|
||||||
|
console.log(chalk.white(' Repository: ') + chalk.cyan(repoPath));
|
||||||
|
if (configPath) {
|
||||||
|
console.log(chalk.white(' Config: ') + chalk.cyan(configPath));
|
||||||
|
}
|
||||||
|
if (pipelineTestingMode) {
|
||||||
|
console.log(chalk.white(' Mode: ') + chalk.yellow('Pipeline Testing'));
|
||||||
|
}
|
||||||
|
console.log();
|
||||||
|
|
||||||
|
// Start workflow by name (not by importing the function)
|
||||||
|
const handle = await client.workflow.start<(input: PipelineInput) => Promise<PipelineState>>(
|
||||||
|
'pentestPipelineWorkflow',
|
||||||
|
{
|
||||||
|
taskQueue: 'shannon-pipeline',
|
||||||
|
workflowId,
|
||||||
|
args: [input],
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
// Track workflow start
|
||||||
|
telemetry.track(TelemetryEvent.WORKFLOW_START, {
|
||||||
|
has_config: !!configPath,
|
||||||
|
target_hash: hashTargetUrl(webUrl),
|
||||||
|
workflow_id: workflowId,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!waitForCompletion) {
|
||||||
|
console.log(chalk.bold('Monitor progress:'));
|
||||||
|
console.log(chalk.white(' Web UI: ') + chalk.blue(`http://localhost:8233/namespaces/default/workflows/${workflowId}`));
|
||||||
|
console.log(chalk.white(' Logs: ') + chalk.gray(`./shannon logs ID=${workflowId}`));
|
||||||
|
console.log(chalk.white(' Query: ') + chalk.gray(`./shannon query ID=${workflowId}`));
|
||||||
|
console.log();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Poll for progress every 30 seconds
|
||||||
|
const progressInterval = setInterval(async () => {
|
||||||
|
try {
|
||||||
|
const progress = await handle.query<PipelineProgress>(PROGRESS_QUERY);
|
||||||
|
const elapsed = Math.floor(progress.elapsedMs / 1000);
|
||||||
|
console.log(
|
||||||
|
chalk.gray(`[${elapsed}s]`),
|
||||||
|
chalk.cyan(`Phase: ${progress.currentPhase || 'unknown'}`),
|
||||||
|
chalk.gray(`| Agent: ${progress.currentAgent || 'none'}`),
|
||||||
|
chalk.gray(`| Completed: ${progress.completedAgents.length}/13`)
|
||||||
|
);
|
||||||
|
} catch {
|
||||||
|
// Workflow may have completed
|
||||||
|
}
|
||||||
|
}, 30000);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await handle.result();
|
||||||
|
clearInterval(progressInterval);
|
||||||
|
|
||||||
|
console.log(chalk.green.bold('\nPipeline completed successfully!'));
|
||||||
|
if (result.summary) {
|
||||||
|
console.log(chalk.gray(`Duration: ${Math.floor(result.summary.totalDurationMs / 1000)}s`));
|
||||||
|
console.log(chalk.gray(`Agents completed: ${result.summary.agentCount}`));
|
||||||
|
console.log(chalk.gray(`Total turns: ${result.summary.totalTurns}`));
|
||||||
|
console.log(chalk.gray(`Total cost: $${result.summary.totalCostUsd.toFixed(4)}`));
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
clearInterval(progressInterval);
|
||||||
|
console.error(chalk.red.bold('\nPipeline failed:'), error);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
await telemetry.shutdown();
|
||||||
|
await connection.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
startPipeline().catch((err) => {
|
||||||
|
console.error(chalk.red('Client error:'), err);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
@@ -0,0 +1,155 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Temporal query tool for inspecting Shannon workflow progress.
|
||||||
|
*
|
||||||
|
* Queries a running or completed workflow and displays its state.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* npm run temporal:query -- <workflowId>
|
||||||
|
* # or
|
||||||
|
* node dist/temporal/query.js <workflowId>
|
||||||
|
*
|
||||||
|
* Environment:
|
||||||
|
* TEMPORAL_ADDRESS - Temporal server address (default: localhost:7233)
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Connection, Client } from '@temporalio/client';
|
||||||
|
import dotenv from 'dotenv';
|
||||||
|
import chalk from 'chalk';
|
||||||
|
|
||||||
|
dotenv.config();
|
||||||
|
|
||||||
|
// Query name must match the one defined in workflows.ts
|
||||||
|
const PROGRESS_QUERY = 'getProgress';
|
||||||
|
|
||||||
|
// Types duplicated from shared.ts to avoid importing workflow APIs
|
||||||
|
interface AgentMetrics {
|
||||||
|
durationMs: number;
|
||||||
|
inputTokens: number | null;
|
||||||
|
outputTokens: number | null;
|
||||||
|
costUsd: number | null;
|
||||||
|
numTurns: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface PipelineProgress {
|
||||||
|
status: 'running' | 'completed' | 'failed';
|
||||||
|
currentPhase: string | null;
|
||||||
|
currentAgent: string | null;
|
||||||
|
completedAgents: string[];
|
||||||
|
failedAgent: string | null;
|
||||||
|
error: string | null;
|
||||||
|
startTime: number;
|
||||||
|
agentMetrics: Record<string, AgentMetrics>;
|
||||||
|
workflowId: string;
|
||||||
|
elapsedMs: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
function showUsage(): void {
|
||||||
|
console.log(chalk.cyan.bold('\nShannon Temporal Query Tool'));
|
||||||
|
console.log(chalk.gray('Query progress of a running workflow\n'));
|
||||||
|
console.log(chalk.yellow('Usage:'));
|
||||||
|
console.log(' node dist/temporal/query.js <workflowId>\n');
|
||||||
|
console.log(chalk.yellow('Examples:'));
|
||||||
|
console.log(' node dist/temporal/query.js shannon-1704672000000\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
function getStatusColor(status: string): string {
|
||||||
|
switch (status) {
|
||||||
|
case 'running':
|
||||||
|
return chalk.yellow(status);
|
||||||
|
case 'completed':
|
||||||
|
return chalk.green(status);
|
||||||
|
case 'failed':
|
||||||
|
return chalk.red(status);
|
||||||
|
default:
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatDuration(ms: number): string {
|
||||||
|
const seconds = Math.floor(ms / 1000);
|
||||||
|
const minutes = Math.floor(seconds / 60);
|
||||||
|
const hours = Math.floor(minutes / 60);
|
||||||
|
|
||||||
|
if (hours > 0) {
|
||||||
|
return `${hours}h ${minutes % 60}m`;
|
||||||
|
} else if (minutes > 0) {
|
||||||
|
return `${minutes}m ${seconds % 60}s`;
|
||||||
|
}
|
||||||
|
return `${seconds}s`;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function queryWorkflow(): Promise<void> {
|
||||||
|
const workflowId = process.argv[2];
|
||||||
|
|
||||||
|
if (!workflowId || workflowId === '--help' || workflowId === '-h') {
|
||||||
|
showUsage();
|
||||||
|
process.exit(workflowId ? 0 : 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const address = process.env.TEMPORAL_ADDRESS || 'localhost:7233';
|
||||||
|
|
||||||
|
const connection = await Connection.connect({ address });
|
||||||
|
const client = new Client({ connection });
|
||||||
|
|
||||||
|
try {
|
||||||
|
const handle = client.workflow.getHandle(workflowId);
|
||||||
|
const progress = await handle.query<PipelineProgress>(PROGRESS_QUERY);
|
||||||
|
|
||||||
|
console.log(chalk.cyan.bold('\nWorkflow Progress'));
|
||||||
|
console.log(chalk.gray('\u2500'.repeat(40)));
|
||||||
|
console.log(`${chalk.white('Workflow ID:')} ${progress.workflowId}`);
|
||||||
|
console.log(`${chalk.white('Status:')} ${getStatusColor(progress.status)}`);
|
||||||
|
console.log(
|
||||||
|
`${chalk.white('Current Phase:')} ${progress.currentPhase || 'none'}`
|
||||||
|
);
|
||||||
|
console.log(
|
||||||
|
`${chalk.white('Current Agent:')} ${progress.currentAgent || 'none'}`
|
||||||
|
);
|
||||||
|
console.log(`${chalk.white('Elapsed:')} ${formatDuration(progress.elapsedMs)}`);
|
||||||
|
console.log(
|
||||||
|
`${chalk.white('Completed:')} ${progress.completedAgents.length}/13 agents`
|
||||||
|
);
|
||||||
|
|
||||||
|
if (progress.completedAgents.length > 0) {
|
||||||
|
console.log(chalk.gray('\nCompleted agents:'));
|
||||||
|
for (const agent of progress.completedAgents) {
|
||||||
|
const metrics = progress.agentMetrics[agent];
|
||||||
|
const duration = metrics ? formatDuration(metrics.durationMs) : 'unknown';
|
||||||
|
const cost = metrics?.costUsd ? `$${metrics.costUsd.toFixed(4)}` : '';
|
||||||
|
console.log(
|
||||||
|
chalk.green(` - ${agent}`) +
|
||||||
|
chalk.gray(` (${duration}${cost ? ', ' + cost : ''})`)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (progress.error) {
|
||||||
|
console.log(chalk.red(`\nError: ${progress.error}`));
|
||||||
|
console.log(chalk.red(`Failed agent: ${progress.failedAgent}`));
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log();
|
||||||
|
} catch (error) {
|
||||||
|
const err = error as Error;
|
||||||
|
if (err.message?.includes('not found')) {
|
||||||
|
console.log(chalk.red(`Workflow not found: ${workflowId}`));
|
||||||
|
} else {
|
||||||
|
console.error(chalk.red('Query failed:'), err.message);
|
||||||
|
}
|
||||||
|
process.exit(1);
|
||||||
|
} finally {
|
||||||
|
await connection.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
queryWorkflow().catch((err) => {
|
||||||
|
console.error(chalk.red('Query error:'), err);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
@@ -0,0 +1,62 @@
|
|||||||
|
import { defineQuery } from '@temporalio/workflow';
|
||||||
|
|
||||||
|
// === Types ===
|
||||||
|
|
||||||
|
export interface PipelineInput {
|
||||||
|
webUrl: string;
|
||||||
|
repoPath: string;
|
||||||
|
configPath?: string;
|
||||||
|
outputPath?: string;
|
||||||
|
pipelineTestingMode?: boolean;
|
||||||
|
workflowId?: string; // Added by client, used for audit correlation
|
||||||
|
installationId?: string; // Persistent anonymous ID for counting unique installations
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface AgentMetrics {
|
||||||
|
durationMs: number;
|
||||||
|
inputTokens: number | null;
|
||||||
|
outputTokens: number | null;
|
||||||
|
costUsd: number | null;
|
||||||
|
numTurns: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface PipelineSummary {
|
||||||
|
totalCostUsd: number;
|
||||||
|
totalDurationMs: number; // Wall-clock time (end - start)
|
||||||
|
totalTurns: number;
|
||||||
|
agentCount: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface PipelineState {
|
||||||
|
status: 'running' | 'completed' | 'failed';
|
||||||
|
currentPhase: string | null;
|
||||||
|
currentAgent: string | null;
|
||||||
|
completedAgents: string[];
|
||||||
|
failedAgent: string | null;
|
||||||
|
error: string | null;
|
||||||
|
startTime: number;
|
||||||
|
agentMetrics: Record<string, AgentMetrics>;
|
||||||
|
summary: PipelineSummary | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extended state returned by getProgress query (includes computed fields)
|
||||||
|
export interface PipelineProgress extends PipelineState {
|
||||||
|
workflowId: string;
|
||||||
|
elapsedMs: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Result from a single vuln→exploit pipeline
|
||||||
|
export interface VulnExploitPipelineResult {
|
||||||
|
vulnType: string;
|
||||||
|
vulnMetrics: AgentMetrics | null;
|
||||||
|
exploitMetrics: AgentMetrics | null;
|
||||||
|
exploitDecision: {
|
||||||
|
shouldExploit: boolean;
|
||||||
|
vulnerabilityCount: number;
|
||||||
|
} | null;
|
||||||
|
error: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// === Queries ===
|
||||||
|
|
||||||
|
export const getProgress = defineQuery<PipelineProgress>('getProgress');
|
||||||
@@ -0,0 +1,84 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Temporal worker for Shannon pentest pipeline.
|
||||||
|
*
|
||||||
|
* Polls the 'shannon-pipeline' task queue and executes activities.
|
||||||
|
* Handles up to 25 concurrent activities to support multiple parallel workflows.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* npm run temporal:worker
|
||||||
|
* # or
|
||||||
|
* node dist/temporal/worker.js
|
||||||
|
*
|
||||||
|
* Environment:
|
||||||
|
* TEMPORAL_ADDRESS - Temporal server address (default: localhost:7233)
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { NativeConnection, Worker, bundleWorkflowCode } from '@temporalio/worker';
|
||||||
|
import { fileURLToPath } from 'node:url';
|
||||||
|
import path from 'node:path';
|
||||||
|
import dotenv from 'dotenv';
|
||||||
|
import chalk from 'chalk';
|
||||||
|
import * as activities from './activities.js';
|
||||||
|
import { telemetry } from '../telemetry/index.js';
|
||||||
|
|
||||||
|
dotenv.config();
|
||||||
|
|
||||||
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||||
|
|
||||||
|
async function runWorker(): Promise<void> {
|
||||||
|
const address = process.env.TEMPORAL_ADDRESS || 'localhost:7233';
|
||||||
|
console.log(chalk.cyan(`Connecting to Temporal at ${address}...`));
|
||||||
|
|
||||||
|
const connection = await NativeConnection.connect({ address });
|
||||||
|
|
||||||
|
// Initialize telemetry for activity execution
|
||||||
|
telemetry.initialize();
|
||||||
|
|
||||||
|
// Bundle workflows for Temporal's V8 isolate
|
||||||
|
console.log(chalk.gray('Bundling workflows...'));
|
||||||
|
const workflowBundle = await bundleWorkflowCode({
|
||||||
|
workflowsPath: path.join(__dirname, 'workflows.js'),
|
||||||
|
});
|
||||||
|
|
||||||
|
const worker = await Worker.create({
|
||||||
|
connection,
|
||||||
|
namespace: 'default',
|
||||||
|
workflowBundle,
|
||||||
|
activities,
|
||||||
|
taskQueue: 'shannon-pipeline',
|
||||||
|
maxConcurrentActivityTaskExecutions: 25, // Support multiple parallel workflows (5 agents × ~5 workflows)
|
||||||
|
});
|
||||||
|
|
||||||
|
// Graceful shutdown handling
|
||||||
|
const shutdown = async (): Promise<void> => {
|
||||||
|
console.log(chalk.yellow('\nShutting down worker...'));
|
||||||
|
worker.shutdown();
|
||||||
|
};
|
||||||
|
|
||||||
|
process.on('SIGINT', shutdown);
|
||||||
|
process.on('SIGTERM', shutdown);
|
||||||
|
|
||||||
|
console.log(chalk.green('Shannon worker started'));
|
||||||
|
console.log(chalk.gray('Task queue: shannon-pipeline'));
|
||||||
|
console.log(chalk.gray('Press Ctrl+C to stop\n'));
|
||||||
|
|
||||||
|
try {
|
||||||
|
await worker.run();
|
||||||
|
} finally {
|
||||||
|
await telemetry.shutdown();
|
||||||
|
await connection.close();
|
||||||
|
console.log(chalk.gray('Worker stopped'));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
runWorker().catch((err) => {
|
||||||
|
console.error(chalk.red('Worker failed:'), err);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
@@ -0,0 +1,338 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Temporal workflow for Shannon pentest pipeline.
|
||||||
|
*
|
||||||
|
* Orchestrates the penetration testing workflow:
|
||||||
|
* 1. Pre-Reconnaissance (sequential)
|
||||||
|
* 2. Reconnaissance (sequential)
|
||||||
|
* 3-4. Vulnerability + Exploitation (5 pipelined pairs in parallel)
|
||||||
|
* Each pair: vuln agent → queue check → conditional exploit
|
||||||
|
* No synchronization barrier - exploits start when their vuln finishes
|
||||||
|
* 5. Reporting (sequential)
|
||||||
|
*
|
||||||
|
* Features:
|
||||||
|
* - Queryable state via getProgress
|
||||||
|
* - Automatic retry with backoff for transient/billing errors
|
||||||
|
* - Non-retryable classification for permanent errors
|
||||||
|
* - Audit correlation via workflowId
|
||||||
|
* - Graceful failure handling: pipelines continue if one fails
|
||||||
|
*/
|
||||||
|
|
||||||
|
import {
|
||||||
|
proxyActivities,
|
||||||
|
setHandler,
|
||||||
|
workflowInfo,
|
||||||
|
} from '@temporalio/workflow';
|
||||||
|
import type * as activities from './activities.js';
|
||||||
|
import type { ActivityInput } from './activities.js';
|
||||||
|
import {
|
||||||
|
getProgress,
|
||||||
|
type PipelineInput,
|
||||||
|
type PipelineState,
|
||||||
|
type PipelineProgress,
|
||||||
|
type PipelineSummary,
|
||||||
|
type VulnExploitPipelineResult,
|
||||||
|
type AgentMetrics,
|
||||||
|
} from './shared.js';
|
||||||
|
import type { VulnType } from '../queue-validation.js';
|
||||||
|
|
||||||
|
// Retry configuration for production (long intervals for billing recovery)
|
||||||
|
const PRODUCTION_RETRY = {
|
||||||
|
initialInterval: '5 minutes',
|
||||||
|
maximumInterval: '30 minutes',
|
||||||
|
backoffCoefficient: 2,
|
||||||
|
maximumAttempts: 50,
|
||||||
|
nonRetryableErrorTypes: [
|
||||||
|
'AuthenticationError',
|
||||||
|
'PermissionError',
|
||||||
|
'InvalidRequestError',
|
||||||
|
'RequestTooLargeError',
|
||||||
|
'ConfigurationError',
|
||||||
|
'InvalidTargetError',
|
||||||
|
'ExecutionLimitError',
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
// Retry configuration for pipeline testing (fast iteration)
|
||||||
|
const TESTING_RETRY = {
|
||||||
|
initialInterval: '10 seconds',
|
||||||
|
maximumInterval: '30 seconds',
|
||||||
|
backoffCoefficient: 2,
|
||||||
|
maximumAttempts: 5,
|
||||||
|
nonRetryableErrorTypes: PRODUCTION_RETRY.nonRetryableErrorTypes,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Activity proxy with production retry configuration (default)
|
||||||
|
const acts = proxyActivities<typeof activities>({
|
||||||
|
startToCloseTimeout: '2 hours',
|
||||||
|
heartbeatTimeout: '10 minutes', // Long timeout for resource-constrained workers with many concurrent activities
|
||||||
|
retry: PRODUCTION_RETRY,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Activity proxy with testing retry configuration (fast)
|
||||||
|
const testActs = proxyActivities<typeof activities>({
|
||||||
|
startToCloseTimeout: '10 minutes',
|
||||||
|
heartbeatTimeout: '5 minutes', // Shorter for testing but still tolerant of resource contention
|
||||||
|
retry: TESTING_RETRY,
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute aggregated metrics from the current pipeline state.
|
||||||
|
* Called on both success and failure to provide partial metrics.
|
||||||
|
*/
|
||||||
|
function computeSummary(state: PipelineState): PipelineSummary {
|
||||||
|
const metrics = Object.values(state.agentMetrics);
|
||||||
|
return {
|
||||||
|
totalCostUsd: metrics.reduce((sum, m) => sum + (m.costUsd ?? 0), 0),
|
||||||
|
totalDurationMs: Date.now() - state.startTime,
|
||||||
|
totalTurns: metrics.reduce((sum, m) => sum + (m.numTurns ?? 0), 0),
|
||||||
|
agentCount: state.completedAgents.length,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function pentestPipelineWorkflow(
|
||||||
|
input: PipelineInput
|
||||||
|
): Promise<PipelineState> {
|
||||||
|
const { workflowId } = workflowInfo();
|
||||||
|
|
||||||
|
// Select activity proxy based on testing mode
|
||||||
|
// Pipeline testing uses fast retry intervals (10s) for quick iteration
|
||||||
|
const a = input.pipelineTestingMode ? testActs : acts;
|
||||||
|
|
||||||
|
// Workflow state (queryable)
|
||||||
|
const state: PipelineState = {
|
||||||
|
status: 'running',
|
||||||
|
currentPhase: null,
|
||||||
|
currentAgent: null,
|
||||||
|
completedAgents: [],
|
||||||
|
failedAgent: null,
|
||||||
|
error: null,
|
||||||
|
startTime: Date.now(),
|
||||||
|
agentMetrics: {},
|
||||||
|
summary: null,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Register query handler for real-time progress inspection
|
||||||
|
setHandler(getProgress, (): PipelineProgress => ({
|
||||||
|
...state,
|
||||||
|
workflowId,
|
||||||
|
elapsedMs: Date.now() - state.startTime,
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Build ActivityInput with required workflowId for audit correlation
|
||||||
|
// Activities require workflowId (non-optional), PipelineInput has it optional
|
||||||
|
// Use spread to conditionally include optional properties (exactOptionalPropertyTypes)
|
||||||
|
const activityInput: ActivityInput = {
|
||||||
|
webUrl: input.webUrl,
|
||||||
|
repoPath: input.repoPath,
|
||||||
|
workflowId,
|
||||||
|
...(input.configPath !== undefined && { configPath: input.configPath }),
|
||||||
|
...(input.outputPath !== undefined && { outputPath: input.outputPath }),
|
||||||
|
...(input.pipelineTestingMode !== undefined && {
|
||||||
|
pipelineTestingMode: input.pipelineTestingMode,
|
||||||
|
}),
|
||||||
|
...(input.installationId !== undefined && {
|
||||||
|
installationId: input.installationId,
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
// === Phase 1: Pre-Reconnaissance ===
|
||||||
|
state.currentPhase = 'pre-recon';
|
||||||
|
state.currentAgent = 'pre-recon';
|
||||||
|
await a.logPhaseTransition(activityInput, 'pre-recon', 'start');
|
||||||
|
state.agentMetrics['pre-recon'] =
|
||||||
|
await a.runPreReconAgent(activityInput);
|
||||||
|
state.completedAgents.push('pre-recon');
|
||||||
|
await a.logPhaseTransition(activityInput, 'pre-recon', 'complete');
|
||||||
|
|
||||||
|
// === Phase 2: Reconnaissance ===
|
||||||
|
state.currentPhase = 'recon';
|
||||||
|
state.currentAgent = 'recon';
|
||||||
|
await a.logPhaseTransition(activityInput, 'recon', 'start');
|
||||||
|
state.agentMetrics['recon'] = await a.runReconAgent(activityInput);
|
||||||
|
state.completedAgents.push('recon');
|
||||||
|
await a.logPhaseTransition(activityInput, 'recon', 'complete');
|
||||||
|
|
||||||
|
// === Phases 3-4: Vulnerability Analysis + Exploitation (Pipelined) ===
|
||||||
|
// Each vuln type runs as an independent pipeline:
|
||||||
|
// vuln agent → queue check → conditional exploit agent
|
||||||
|
// This eliminates the synchronization barrier between phases - each exploit
|
||||||
|
// starts immediately when its vuln agent finishes, not waiting for all.
|
||||||
|
state.currentPhase = 'vulnerability-exploitation';
|
||||||
|
state.currentAgent = 'pipelines';
|
||||||
|
await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'start');
|
||||||
|
|
||||||
|
// Helper: Run a single vuln→exploit pipeline
|
||||||
|
async function runVulnExploitPipeline(
|
||||||
|
vulnType: VulnType,
|
||||||
|
runVulnAgent: () => Promise<AgentMetrics>,
|
||||||
|
runExploitAgent: () => Promise<AgentMetrics>
|
||||||
|
): Promise<VulnExploitPipelineResult> {
|
||||||
|
// Step 1: Run vulnerability agent
|
||||||
|
const vulnMetrics = await runVulnAgent();
|
||||||
|
|
||||||
|
// Step 2: Check exploitation queue (starts immediately after vuln)
|
||||||
|
const decision = await a.checkExploitationQueue(activityInput, vulnType);
|
||||||
|
|
||||||
|
// Step 3: Conditionally run exploit agent
|
||||||
|
let exploitMetrics: AgentMetrics | null = null;
|
||||||
|
if (decision.shouldExploit) {
|
||||||
|
exploitMetrics = await runExploitAgent();
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
vulnType,
|
||||||
|
vulnMetrics,
|
||||||
|
exploitMetrics,
|
||||||
|
exploitDecision: {
|
||||||
|
shouldExploit: decision.shouldExploit,
|
||||||
|
vulnerabilityCount: decision.vulnerabilityCount,
|
||||||
|
},
|
||||||
|
error: null,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run all 5 pipelines in parallel with graceful failure handling
|
||||||
|
// Promise.allSettled ensures other pipelines continue if one fails
|
||||||
|
const pipelineResults = await Promise.allSettled([
|
||||||
|
runVulnExploitPipeline(
|
||||||
|
'injection',
|
||||||
|
() => a.runInjectionVulnAgent(activityInput),
|
||||||
|
() => a.runInjectionExploitAgent(activityInput)
|
||||||
|
),
|
||||||
|
runVulnExploitPipeline(
|
||||||
|
'xss',
|
||||||
|
() => a.runXssVulnAgent(activityInput),
|
||||||
|
() => a.runXssExploitAgent(activityInput)
|
||||||
|
),
|
||||||
|
runVulnExploitPipeline(
|
||||||
|
'auth',
|
||||||
|
() => a.runAuthVulnAgent(activityInput),
|
||||||
|
() => a.runAuthExploitAgent(activityInput)
|
||||||
|
),
|
||||||
|
runVulnExploitPipeline(
|
||||||
|
'ssrf',
|
||||||
|
() => a.runSsrfVulnAgent(activityInput),
|
||||||
|
() => a.runSsrfExploitAgent(activityInput)
|
||||||
|
),
|
||||||
|
runVulnExploitPipeline(
|
||||||
|
'authz',
|
||||||
|
() => a.runAuthzVulnAgent(activityInput),
|
||||||
|
() => a.runAuthzExploitAgent(activityInput)
|
||||||
|
),
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Aggregate results from all pipelines
|
||||||
|
const failedPipelines: string[] = [];
|
||||||
|
for (const result of pipelineResults) {
|
||||||
|
if (result.status === 'fulfilled') {
|
||||||
|
const { vulnType, vulnMetrics, exploitMetrics } = result.value;
|
||||||
|
|
||||||
|
// Record vuln agent metrics
|
||||||
|
if (vulnMetrics) {
|
||||||
|
state.agentMetrics[`${vulnType}-vuln`] = vulnMetrics;
|
||||||
|
state.completedAgents.push(`${vulnType}-vuln`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Record exploit agent metrics (if it ran)
|
||||||
|
if (exploitMetrics) {
|
||||||
|
state.agentMetrics[`${vulnType}-exploit`] = exploitMetrics;
|
||||||
|
state.completedAgents.push(`${vulnType}-exploit`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Pipeline failed - log error but continue with others
|
||||||
|
const errorMsg =
|
||||||
|
result.reason instanceof Error
|
||||||
|
? result.reason.message
|
||||||
|
: String(result.reason);
|
||||||
|
failedPipelines.push(errorMsg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Log any pipeline failures (workflow continues despite failures)
|
||||||
|
if (failedPipelines.length > 0) {
|
||||||
|
console.log(
|
||||||
|
`⚠️ ${failedPipelines.length} pipeline(s) failed:`,
|
||||||
|
failedPipelines
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update phase markers
|
||||||
|
state.currentPhase = 'exploitation';
|
||||||
|
state.currentAgent = null;
|
||||||
|
await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'complete');
|
||||||
|
|
||||||
|
// === Phase 5: Reporting ===
|
||||||
|
state.currentPhase = 'reporting';
|
||||||
|
state.currentAgent = 'report';
|
||||||
|
await a.logPhaseTransition(activityInput, 'reporting', 'start');
|
||||||
|
|
||||||
|
// First, assemble the concatenated report from exploitation evidence files
|
||||||
|
await a.assembleReportActivity(activityInput);
|
||||||
|
|
||||||
|
// Then run the report agent to add executive summary and clean up
|
||||||
|
// Pass workflow start time and stats for accurate telemetry
|
||||||
|
const reportInput = {
|
||||||
|
...activityInput,
|
||||||
|
workflowStartTime: state.startTime,
|
||||||
|
workflowStats: {
|
||||||
|
totalAgents: 13, // pre-recon, recon, 5 vuln, 5 exploit, report
|
||||||
|
agentsSucceeded: state.completedAgents.length,
|
||||||
|
agentsFailed: failedPipelines.length,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
state.agentMetrics['report'] = await a.runReportAgent(reportInput);
|
||||||
|
state.completedAgents.push('report');
|
||||||
|
await a.logPhaseTransition(activityInput, 'reporting', 'complete');
|
||||||
|
|
||||||
|
// === Complete ===
|
||||||
|
state.status = 'completed';
|
||||||
|
state.currentPhase = null;
|
||||||
|
state.currentAgent = null;
|
||||||
|
state.summary = computeSummary(state);
|
||||||
|
|
||||||
|
// Log workflow completion summary
|
||||||
|
await a.logWorkflowComplete(activityInput, {
|
||||||
|
status: 'completed',
|
||||||
|
totalDurationMs: state.summary.totalDurationMs,
|
||||||
|
totalCostUsd: state.summary.totalCostUsd,
|
||||||
|
completedAgents: state.completedAgents,
|
||||||
|
agentMetrics: Object.fromEntries(
|
||||||
|
Object.entries(state.agentMetrics).map(([name, m]) => [
|
||||||
|
name,
|
||||||
|
{ durationMs: m.durationMs, costUsd: m.costUsd },
|
||||||
|
])
|
||||||
|
),
|
||||||
|
});
|
||||||
|
|
||||||
|
return state;
|
||||||
|
} catch (error) {
|
||||||
|
state.status = 'failed';
|
||||||
|
state.failedAgent = state.currentAgent;
|
||||||
|
state.error = error instanceof Error ? error.message : String(error);
|
||||||
|
state.summary = computeSummary(state);
|
||||||
|
|
||||||
|
// Log workflow failure summary
|
||||||
|
await a.logWorkflowComplete(activityInput, {
|
||||||
|
status: 'failed',
|
||||||
|
totalDurationMs: state.summary.totalDurationMs,
|
||||||
|
totalCostUsd: state.summary.totalCostUsd,
|
||||||
|
completedAgents: state.completedAgents,
|
||||||
|
agentMetrics: Object.fromEntries(
|
||||||
|
Object.entries(state.agentMetrics).map(([name, m]) => [
|
||||||
|
name,
|
||||||
|
{ durationMs: m.durationMs, costUsd: m.costUsd },
|
||||||
|
])
|
||||||
|
),
|
||||||
|
error: state.error ?? undefined,
|
||||||
|
});
|
||||||
|
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,13 +1,28 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
import { $ } from 'zx';
|
import { $ } from 'zx';
|
||||||
import chalk from 'chalk';
|
import chalk from 'chalk';
|
||||||
|
|
||||||
|
type ToolName = 'nmap' | 'subfinder' | 'whatweb' | 'schemathesis';
|
||||||
|
|
||||||
|
export type ToolAvailability = Record<ToolName, boolean>;
|
||||||
|
|
||||||
// Check availability of required tools
|
// Check availability of required tools
|
||||||
export const checkToolAvailability = async () => {
|
export const checkToolAvailability = async (): Promise<ToolAvailability> => {
|
||||||
const tools = ['nmap', 'subfinder', 'whatweb', 'schemathesis'];
|
const tools: ToolName[] = ['nmap', 'subfinder', 'whatweb', 'schemathesis'];
|
||||||
const availability = {};
|
const availability: ToolAvailability = {
|
||||||
|
nmap: false,
|
||||||
|
subfinder: false,
|
||||||
|
whatweb: false,
|
||||||
|
schemathesis: false
|
||||||
|
};
|
||||||
|
|
||||||
console.log(chalk.blue('🔧 Checking tool availability...'));
|
console.log(chalk.blue('🔧 Checking tool availability...'));
|
||||||
|
|
||||||
for (const tool of tools) {
|
for (const tool of tools) {
|
||||||
try {
|
try {
|
||||||
await $`command -v ${tool}`;
|
await $`command -v ${tool}`;
|
||||||
@@ -18,33 +33,31 @@ export const checkToolAvailability = async () => {
|
|||||||
console.log(chalk.yellow(` ⚠️ ${tool} - not found`));
|
console.log(chalk.yellow(` ⚠️ ${tool} - not found`));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return availability;
|
return availability;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Handle missing tools with user-friendly messages
|
// Handle missing tools with user-friendly messages
|
||||||
export const handleMissingTools = (toolAvailability) => {
|
export const handleMissingTools = (toolAvailability: ToolAvailability): ToolName[] => {
|
||||||
const missing = Object.entries(toolAvailability)
|
const missing = (Object.entries(toolAvailability) as Array<[ToolName, boolean]>)
|
||||||
.filter(([tool, available]) => !available)
|
.filter(([, available]) => !available)
|
||||||
.map(([tool]) => tool);
|
.map(([tool]) => tool);
|
||||||
|
|
||||||
if (missing.length > 0) {
|
if (missing.length > 0) {
|
||||||
console.log(chalk.yellow(`\n⚠️ Missing tools: ${missing.join(', ')}`));
|
console.log(chalk.yellow(`\n⚠️ Missing tools: ${missing.join(', ')}`));
|
||||||
console.log(chalk.gray('Some functionality will be limited. Install missing tools for full capability.'));
|
console.log(chalk.gray('Some functionality will be limited. Install missing tools for full capability.'));
|
||||||
|
|
||||||
// Provide installation hints
|
// Provide installation hints
|
||||||
const installHints = {
|
const installHints: Record<ToolName, string> = {
|
||||||
'nmap': 'brew install nmap (macOS) or apt install nmap (Ubuntu)',
|
'nmap': 'brew install nmap (macOS) or apt install nmap (Ubuntu)',
|
||||||
'subfinder': 'go install -v github.com/projectdiscovery/subfinder/v2/cmd/subfinder@latest',
|
'subfinder': 'go install -v github.com/projectdiscovery/subfinder/v2/cmd/subfinder@latest',
|
||||||
'whatweb': 'gem install whatweb',
|
'whatweb': 'gem install whatweb',
|
||||||
'schemathesis': 'pip install schemathesis'
|
'schemathesis': 'pip install schemathesis'
|
||||||
};
|
};
|
||||||
|
|
||||||
console.log(chalk.gray('\nInstallation hints:'));
|
console.log(chalk.gray('\nInstallation hints:'));
|
||||||
missing.forEach(tool => {
|
missing.forEach(tool => {
|
||||||
if (installHints[tool]) {
|
console.log(chalk.gray(` ${tool}: ${installHints[tool]}`));
|
||||||
console.log(chalk.gray(` ${tool}: ${installHints[tool]}`));
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
console.log('');
|
console.log('');
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,84 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Agent type definitions
|
||||||
|
*/
|
||||||
|
|
||||||
|
export type AgentName =
|
||||||
|
| 'pre-recon'
|
||||||
|
| 'recon'
|
||||||
|
| 'injection-vuln'
|
||||||
|
| 'xss-vuln'
|
||||||
|
| 'auth-vuln'
|
||||||
|
| 'ssrf-vuln'
|
||||||
|
| 'authz-vuln'
|
||||||
|
| 'injection-exploit'
|
||||||
|
| 'xss-exploit'
|
||||||
|
| 'auth-exploit'
|
||||||
|
| 'ssrf-exploit'
|
||||||
|
| 'authz-exploit'
|
||||||
|
| 'report';
|
||||||
|
|
||||||
|
export type PromptName =
|
||||||
|
| 'pre-recon-code'
|
||||||
|
| 'recon'
|
||||||
|
| 'vuln-injection'
|
||||||
|
| 'vuln-xss'
|
||||||
|
| 'vuln-auth'
|
||||||
|
| 'vuln-ssrf'
|
||||||
|
| 'vuln-authz'
|
||||||
|
| 'exploit-injection'
|
||||||
|
| 'exploit-xss'
|
||||||
|
| 'exploit-auth'
|
||||||
|
| 'exploit-ssrf'
|
||||||
|
| 'exploit-authz'
|
||||||
|
| 'report-executive';
|
||||||
|
|
||||||
|
export type PlaywrightAgent =
|
||||||
|
| 'playwright-agent1'
|
||||||
|
| 'playwright-agent2'
|
||||||
|
| 'playwright-agent3'
|
||||||
|
| 'playwright-agent4'
|
||||||
|
| 'playwright-agent5';
|
||||||
|
|
||||||
|
export type AgentValidator = (sourceDir: string) => Promise<boolean>;
|
||||||
|
|
||||||
|
export type AgentStatus =
|
||||||
|
| 'pending'
|
||||||
|
| 'in_progress'
|
||||||
|
| 'completed'
|
||||||
|
| 'failed'
|
||||||
|
| 'rolled-back';
|
||||||
|
|
||||||
|
export interface AgentDefinition {
|
||||||
|
name: AgentName;
|
||||||
|
displayName: string;
|
||||||
|
prerequisites: AgentName[];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maps an agent name to its corresponding prompt file name.
|
||||||
|
*/
|
||||||
|
export function getPromptNameForAgent(agentName: AgentName): PromptName {
|
||||||
|
const mappings: Record<AgentName, PromptName> = {
|
||||||
|
'pre-recon': 'pre-recon-code',
|
||||||
|
'recon': 'recon',
|
||||||
|
'injection-vuln': 'vuln-injection',
|
||||||
|
'xss-vuln': 'vuln-xss',
|
||||||
|
'auth-vuln': 'vuln-auth',
|
||||||
|
'ssrf-vuln': 'vuln-ssrf',
|
||||||
|
'authz-vuln': 'vuln-authz',
|
||||||
|
'injection-exploit': 'exploit-injection',
|
||||||
|
'xss-exploit': 'exploit-xss',
|
||||||
|
'auth-exploit': 'exploit-auth',
|
||||||
|
'ssrf-exploit': 'exploit-ssrf',
|
||||||
|
'authz-exploit': 'exploit-authz',
|
||||||
|
'report': 'report-executive',
|
||||||
|
};
|
||||||
|
|
||||||
|
return mappings[agentName];
|
||||||
|
}
|
||||||
@@ -0,0 +1,63 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configuration type definitions
|
||||||
|
*/
|
||||||
|
|
||||||
|
export type RuleType =
|
||||||
|
| 'path'
|
||||||
|
| 'subdomain'
|
||||||
|
| 'domain'
|
||||||
|
| 'method'
|
||||||
|
| 'header'
|
||||||
|
| 'parameter';
|
||||||
|
|
||||||
|
export interface Rule {
|
||||||
|
description: string;
|
||||||
|
type: RuleType;
|
||||||
|
url_path: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Rules {
|
||||||
|
avoid?: Rule[];
|
||||||
|
focus?: Rule[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export type LoginType = 'form' | 'sso' | 'api' | 'basic';
|
||||||
|
|
||||||
|
export type SuccessConditionType = 'url' | 'cookie' | 'element' | 'redirect';
|
||||||
|
|
||||||
|
export interface SuccessCondition {
|
||||||
|
type: SuccessConditionType;
|
||||||
|
value: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Credentials {
|
||||||
|
username: string;
|
||||||
|
password: string;
|
||||||
|
totp_secret?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Authentication {
|
||||||
|
login_type: LoginType;
|
||||||
|
login_url: string;
|
||||||
|
credentials: Credentials;
|
||||||
|
login_flow: string[];
|
||||||
|
success_condition: SuccessCondition;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Config {
|
||||||
|
rules?: Rules;
|
||||||
|
authentication?: Authentication;
|
||||||
|
login?: unknown; // Deprecated
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface DistributedConfig {
|
||||||
|
avoid: Rule[];
|
||||||
|
focus: Rule[];
|
||||||
|
authentication: Authentication | null;
|
||||||
|
}
|
||||||
@@ -0,0 +1,49 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Error type definitions
|
||||||
|
*/
|
||||||
|
|
||||||
|
export type PentestErrorType =
|
||||||
|
| 'config'
|
||||||
|
| 'network'
|
||||||
|
| 'tool'
|
||||||
|
| 'prompt'
|
||||||
|
| 'filesystem'
|
||||||
|
| 'validation'
|
||||||
|
| 'billing'
|
||||||
|
| 'unknown';
|
||||||
|
|
||||||
|
export interface PentestErrorContext {
|
||||||
|
[key: string]: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface LogEntry {
|
||||||
|
timestamp: string;
|
||||||
|
context: string;
|
||||||
|
error: {
|
||||||
|
name: string;
|
||||||
|
message: string;
|
||||||
|
type: PentestErrorType;
|
||||||
|
retryable: boolean;
|
||||||
|
stack?: string;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ToolErrorResult {
|
||||||
|
tool: string;
|
||||||
|
output: string;
|
||||||
|
status: 'error';
|
||||||
|
duration: number;
|
||||||
|
success: false;
|
||||||
|
error: Error;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface PromptErrorResult {
|
||||||
|
success: false;
|
||||||
|
error: Error;
|
||||||
|
}
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Type definitions barrel export
|
||||||
|
*/
|
||||||
|
|
||||||
|
export * from './errors.js';
|
||||||
|
export * from './config.js';
|
||||||
|
export * from './agents.js';
|
||||||
@@ -1,3 +1,9 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Concurrency Control Utilities
|
* Concurrency Control Utilities
|
||||||
*
|
*
|
||||||
@@ -5,6 +11,8 @@
|
|||||||
* concurrent session operations.
|
* concurrent session operations.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
type UnlockFunction = () => void;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* SessionMutex - Promise-based mutex for session file operations
|
* SessionMutex - Promise-based mutex for session file operations
|
||||||
*
|
*
|
||||||
@@ -13,7 +21,7 @@
|
|||||||
* during parallel execution of vulnerability analysis and exploitation phases.
|
* during parallel execution of vulnerability analysis and exploitation phases.
|
||||||
*
|
*
|
||||||
* Usage:
|
* Usage:
|
||||||
* ```js
|
* ```ts
|
||||||
* const mutex = new SessionMutex();
|
* const mutex = new SessionMutex();
|
||||||
* const unlock = await mutex.lock(sessionId);
|
* const unlock = await mutex.lock(sessionId);
|
||||||
* try {
|
* try {
|
||||||
@@ -23,32 +31,27 @@
|
|||||||
* }
|
* }
|
||||||
* ```
|
* ```
|
||||||
*/
|
*/
|
||||||
|
// Promise-based mutex with queue semantics - safe for parallel agents on same session
|
||||||
export class SessionMutex {
|
export class SessionMutex {
|
||||||
constructor() {
|
// Map of sessionId -> Promise (represents active lock)
|
||||||
// Map of sessionId -> Promise (represents active lock)
|
private locks: Map<string, Promise<void>> = new Map();
|
||||||
this.locks = new Map();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
// Wait for existing lock, then acquire. Queue ensures FIFO ordering.
|
||||||
* Acquire lock for a session
|
async lock(sessionId: string): Promise<UnlockFunction> {
|
||||||
* @param {string} sessionId - Session ID to lock
|
|
||||||
* @returns {Promise<Function>} Unlock function to release the lock
|
|
||||||
*/
|
|
||||||
async lock(sessionId) {
|
|
||||||
if (this.locks.has(sessionId)) {
|
if (this.locks.has(sessionId)) {
|
||||||
// Wait for existing lock to be released
|
// Wait for existing lock to be released
|
||||||
await this.locks.get(sessionId);
|
await this.locks.get(sessionId);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create new lock promise
|
// Create new lock promise
|
||||||
let resolve;
|
let resolve: () => void;
|
||||||
const promise = new Promise(r => resolve = r);
|
const promise = new Promise<void>((r) => (resolve = r));
|
||||||
this.locks.set(sessionId, promise);
|
this.locks.set(sessionId, promise);
|
||||||
|
|
||||||
// Return unlock function
|
// Return unlock function
|
||||||
return () => {
|
return () => {
|
||||||
this.locks.delete(sessionId);
|
this.locks.delete(sessionId);
|
||||||
resolve();
|
resolve!();
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,73 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* File I/O Utilities
|
||||||
|
*
|
||||||
|
* Core utility functions for file operations including atomic writes,
|
||||||
|
* directory creation, and JSON file handling.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import fs from 'fs/promises';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure directory exists (idempotent, race-safe)
|
||||||
|
*/
|
||||||
|
export async function ensureDirectory(dirPath: string): Promise<void> {
|
||||||
|
try {
|
||||||
|
await fs.mkdir(dirPath, { recursive: true });
|
||||||
|
} catch (error) {
|
||||||
|
// Ignore EEXIST errors (race condition safe)
|
||||||
|
if ((error as NodeJS.ErrnoException).code !== 'EEXIST') {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Atomic write using temp file + rename pattern
|
||||||
|
* Guarantees no partial writes or corruption on crash
|
||||||
|
*/
|
||||||
|
export async function atomicWrite(filePath: string, data: object | string): Promise<void> {
|
||||||
|
const tempPath = `${filePath}.tmp`;
|
||||||
|
const content = typeof data === 'string' ? data : JSON.stringify(data, null, 2);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Write to temp file
|
||||||
|
await fs.writeFile(tempPath, content, 'utf8');
|
||||||
|
|
||||||
|
// Atomic rename (POSIX guarantee: atomic on same filesystem)
|
||||||
|
await fs.rename(tempPath, filePath);
|
||||||
|
} catch (error) {
|
||||||
|
// Clean up temp file on failure
|
||||||
|
try {
|
||||||
|
await fs.unlink(tempPath);
|
||||||
|
} catch {
|
||||||
|
// Ignore cleanup errors
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read and parse JSON file
|
||||||
|
*/
|
||||||
|
export async function readJson<T = unknown>(filePath: string): Promise<T> {
|
||||||
|
const content = await fs.readFile(filePath, 'utf8');
|
||||||
|
return JSON.parse(content) as T;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if file exists
|
||||||
|
*/
|
||||||
|
export async function fileExists(filePath: string): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
await fs.access(filePath);
|
||||||
|
return true;
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,60 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Formatting Utilities
|
||||||
|
*
|
||||||
|
* Generic formatting functions for durations, timestamps, and percentages.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format duration in milliseconds to human-readable string
|
||||||
|
*/
|
||||||
|
export function formatDuration(ms: number): string {
|
||||||
|
if (ms < 1000) {
|
||||||
|
return `${ms}ms`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const seconds = ms / 1000;
|
||||||
|
if (seconds < 60) {
|
||||||
|
return `${seconds.toFixed(1)}s`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const minutes = Math.floor(seconds / 60);
|
||||||
|
const remainingSeconds = Math.floor(seconds % 60);
|
||||||
|
return `${minutes}m ${remainingSeconds}s`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format timestamp to ISO 8601 string
|
||||||
|
*/
|
||||||
|
export function formatTimestamp(timestamp: number = Date.now()): string {
|
||||||
|
return new Date(timestamp).toISOString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate percentage
|
||||||
|
*/
|
||||||
|
export function calculatePercentage(part: number, total: number): number {
|
||||||
|
if (total === 0) return 0;
|
||||||
|
return (part / total) * 100;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract agent type from description string for display purposes
|
||||||
|
*/
|
||||||
|
export function extractAgentType(description: string): string {
|
||||||
|
if (description.includes('Pre-recon')) {
|
||||||
|
return 'pre-reconnaissance';
|
||||||
|
}
|
||||||
|
if (description.includes('Recon')) {
|
||||||
|
return 'reconnaissance';
|
||||||
|
}
|
||||||
|
if (description.includes('Report')) {
|
||||||
|
return 'report generation';
|
||||||
|
}
|
||||||
|
return 'analysis';
|
||||||
|
}
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Functional Programming Utilities
|
||||||
|
*
|
||||||
|
* Generic functional composition patterns for async operations.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
type PipelineFunction = (x: any) => any | Promise<any>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Async pipeline that passes result through a series of functions.
|
||||||
|
* Clearer than reduce-based pipe and easier to debug.
|
||||||
|
*/
|
||||||
|
export async function asyncPipe<TResult>(
|
||||||
|
initial: unknown,
|
||||||
|
...fns: PipelineFunction[]
|
||||||
|
): Promise<TResult> {
|
||||||
|
let result = initial;
|
||||||
|
for (const fn of fns) {
|
||||||
|
result = await fn(result);
|
||||||
|
}
|
||||||
|
return result as TResult;
|
||||||
|
}
|
||||||
@@ -1,195 +0,0 @@
|
|||||||
import { $ } from 'zx';
|
|
||||||
import chalk from 'chalk';
|
|
||||||
|
|
||||||
// Global git operations semaphore to prevent index.lock conflicts during parallel execution
|
|
||||||
class GitSemaphore {
|
|
||||||
constructor() {
|
|
||||||
this.queue = [];
|
|
||||||
this.running = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
async acquire() {
|
|
||||||
return new Promise((resolve) => {
|
|
||||||
this.queue.push(resolve);
|
|
||||||
this.process();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
release() {
|
|
||||||
this.running = false;
|
|
||||||
this.process();
|
|
||||||
}
|
|
||||||
|
|
||||||
process() {
|
|
||||||
if (!this.running && this.queue.length > 0) {
|
|
||||||
this.running = true;
|
|
||||||
const resolve = this.queue.shift();
|
|
||||||
resolve();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const gitSemaphore = new GitSemaphore();
|
|
||||||
|
|
||||||
// Execute git commands with retry logic for index.lock conflicts
|
|
||||||
export const executeGitCommandWithRetry = async (commandArgs, sourceDir, description, maxRetries = 5) => {
|
|
||||||
await gitSemaphore.acquire();
|
|
||||||
|
|
||||||
try {
|
|
||||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
||||||
try {
|
|
||||||
// Handle both array and string commands
|
|
||||||
let result;
|
|
||||||
if (Array.isArray(commandArgs)) {
|
|
||||||
// For arrays like ['git', 'status', '--porcelain'], execute parts separately
|
|
||||||
const [cmd, ...args] = commandArgs;
|
|
||||||
result = await $`cd ${sourceDir} && ${cmd} ${args}`;
|
|
||||||
} else {
|
|
||||||
// For string commands
|
|
||||||
result = await $`cd ${sourceDir} && ${commandArgs}`;
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
} catch (error) {
|
|
||||||
const isLockError = error.message.includes('index.lock') ||
|
|
||||||
error.message.includes('unable to lock') ||
|
|
||||||
error.message.includes('Another git process') ||
|
|
||||||
error.message.includes('fatal: Unable to create') ||
|
|
||||||
error.message.includes('fatal: index file');
|
|
||||||
|
|
||||||
if (isLockError && attempt < maxRetries) {
|
|
||||||
const delay = Math.pow(2, attempt - 1) * 1000; // Exponential backoff: 1s, 2s, 4s, 8s, 16s
|
|
||||||
console.log(chalk.yellow(` ⚠️ Git lock conflict during ${description} (attempt ${attempt}/${maxRetries}). Retrying in ${delay}ms...`));
|
|
||||||
await new Promise(resolve => setTimeout(resolve, delay));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
throw error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
gitSemaphore.release();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Pure functions for Git workspace management
|
|
||||||
const cleanWorkspace = async (sourceDir, reason = 'clean start') => {
|
|
||||||
console.log(chalk.blue(` 🧹 Cleaning workspace for ${reason}`));
|
|
||||||
try {
|
|
||||||
// Check for uncommitted changes
|
|
||||||
const status = await $`cd ${sourceDir} && git status --porcelain`;
|
|
||||||
const hasChanges = status.stdout.trim().length > 0;
|
|
||||||
|
|
||||||
if (hasChanges) {
|
|
||||||
// Show what we're about to remove
|
|
||||||
const changes = status.stdout.trim().split('\n').filter(line => line.length > 0);
|
|
||||||
console.log(chalk.yellow(` 🔄 Rolling back workspace for ${reason}`));
|
|
||||||
|
|
||||||
await $`cd ${sourceDir} && git reset --hard HEAD`;
|
|
||||||
await $`cd ${sourceDir} && git clean -fd`;
|
|
||||||
|
|
||||||
console.log(chalk.yellow(` ✅ Rollback completed - removed ${changes.length} contaminated changes:`));
|
|
||||||
changes.slice(0, 3).forEach(change => console.log(chalk.gray(` ${change}`)));
|
|
||||||
if (changes.length > 3) {
|
|
||||||
console.log(chalk.gray(` ... and ${changes.length - 3} more files`));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
console.log(chalk.blue(` ✅ Workspace already clean (no changes to remove)`));
|
|
||||||
}
|
|
||||||
return { success: true, hadChanges: hasChanges };
|
|
||||||
} catch (error) {
|
|
||||||
console.log(chalk.yellow(` ⚠️ Workspace cleanup failed: ${error.message}`));
|
|
||||||
return { success: false, error };
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
export const createGitCheckpoint = async (sourceDir, description, attempt) => {
|
|
||||||
console.log(chalk.blue(` 📍 Creating checkpoint for ${description} (attempt ${attempt})`));
|
|
||||||
try {
|
|
||||||
// Only clean workspace on retry attempts (attempt > 1), not on first attempts
|
|
||||||
// This preserves deliverables between agents while still cleaning on actual retries
|
|
||||||
if (attempt > 1) {
|
|
||||||
const cleanResult = await cleanWorkspace(sourceDir, `${description} (retry cleanup)`);
|
|
||||||
if (!cleanResult.success) {
|
|
||||||
console.log(chalk.yellow(` ⚠️ Workspace cleanup failed, continuing anyway: ${cleanResult.error.message}`));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for uncommitted changes with retry logic
|
|
||||||
const status = await executeGitCommandWithRetry(['git', 'status', '--porcelain'], sourceDir, 'status check');
|
|
||||||
const hasChanges = status.stdout.trim().length > 0;
|
|
||||||
|
|
||||||
// Stage changes with retry logic
|
|
||||||
await executeGitCommandWithRetry(['git', 'add', '-A'], sourceDir, 'staging changes');
|
|
||||||
|
|
||||||
// Create commit with retry logic
|
|
||||||
await executeGitCommandWithRetry(['git', 'commit', '-m', `📍 Checkpoint: ${description} (attempt ${attempt})`, '--allow-empty'], sourceDir, 'creating commit');
|
|
||||||
|
|
||||||
if (hasChanges) {
|
|
||||||
console.log(chalk.blue(` ✅ Checkpoint created with uncommitted changes staged`));
|
|
||||||
} else {
|
|
||||||
console.log(chalk.blue(` ✅ Empty checkpoint created (no workspace changes)`));
|
|
||||||
}
|
|
||||||
return { success: true };
|
|
||||||
} catch (error) {
|
|
||||||
console.log(chalk.yellow(` ⚠️ Checkpoint creation failed after retries: ${error.message}`));
|
|
||||||
return { success: false, error };
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
export const commitGitSuccess = async (sourceDir, description) => {
|
|
||||||
console.log(chalk.green(` 💾 Committing successful results for ${description}`));
|
|
||||||
try {
|
|
||||||
// Check what we're about to commit with retry logic
|
|
||||||
const status = await executeGitCommandWithRetry(['git', 'status', '--porcelain'], sourceDir, 'status check for success commit');
|
|
||||||
const changes = status.stdout.trim().split('\n').filter(line => line.length > 0);
|
|
||||||
|
|
||||||
// Stage changes with retry logic
|
|
||||||
await executeGitCommandWithRetry(['git', 'add', '-A'], sourceDir, 'staging changes for success commit');
|
|
||||||
|
|
||||||
// Create success commit with retry logic
|
|
||||||
await executeGitCommandWithRetry(['git', 'commit', '-m', `✅ ${description}: completed successfully`, '--allow-empty'], sourceDir, 'creating success commit');
|
|
||||||
|
|
||||||
if (changes.length > 0) {
|
|
||||||
console.log(chalk.green(` ✅ Success commit created with ${changes.length} file changes:`));
|
|
||||||
changes.slice(0, 5).forEach(change => console.log(chalk.gray(` ${change}`)));
|
|
||||||
if (changes.length > 5) {
|
|
||||||
console.log(chalk.gray(` ... and ${changes.length - 5} more files`));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
console.log(chalk.green(` ✅ Empty success commit created (agent made no file changes)`));
|
|
||||||
}
|
|
||||||
return { success: true };
|
|
||||||
} catch (error) {
|
|
||||||
console.log(chalk.yellow(` ⚠️ Success commit failed after retries: ${error.message}`));
|
|
||||||
return { success: false, error };
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
export const rollbackGitWorkspace = async (sourceDir, reason = 'retry preparation') => {
|
|
||||||
console.log(chalk.yellow(` 🔄 Rolling back workspace for ${reason}`));
|
|
||||||
try {
|
|
||||||
// Show what we're about to remove with retry logic
|
|
||||||
const status = await executeGitCommandWithRetry(['git', 'status', '--porcelain'], sourceDir, 'status check for rollback');
|
|
||||||
const changes = status.stdout.trim().split('\n').filter(line => line.length > 0);
|
|
||||||
|
|
||||||
// Reset to HEAD with retry logic
|
|
||||||
await executeGitCommandWithRetry(['git', 'reset', '--hard', 'HEAD'], sourceDir, 'hard reset for rollback');
|
|
||||||
|
|
||||||
// Clean untracked files with retry logic
|
|
||||||
await executeGitCommandWithRetry(['git', 'clean', '-fd'], sourceDir, 'cleaning untracked files for rollback');
|
|
||||||
|
|
||||||
if (changes.length > 0) {
|
|
||||||
console.log(chalk.yellow(` ✅ Rollback completed - removed ${changes.length} contaminated changes:`));
|
|
||||||
changes.slice(0, 3).forEach(change => console.log(chalk.gray(` ${change}`)));
|
|
||||||
if (changes.length > 3) {
|
|
||||||
console.log(chalk.gray(` ... and ${changes.length - 3} more files`));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
console.log(chalk.yellow(` ✅ Rollback completed - no changes to remove`));
|
|
||||||
}
|
|
||||||
return { success: true };
|
|
||||||
} catch (error) {
|
|
||||||
console.log(chalk.red(` ❌ Rollback failed after retries: ${error.message}`));
|
|
||||||
return { success: false, error };
|
|
||||||
}
|
|
||||||
};
|
|
||||||
@@ -0,0 +1,299 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
import { $ } from 'zx';
|
||||||
|
import chalk from 'chalk';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a directory is a git repository.
|
||||||
|
* Returns true if the directory contains a .git folder or is inside a git repo.
|
||||||
|
*/
|
||||||
|
export async function isGitRepository(dir: string): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
await $`cd ${dir} && git rev-parse --git-dir`.quiet();
|
||||||
|
return true;
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
interface GitOperationResult {
|
||||||
|
success: boolean;
|
||||||
|
hadChanges?: boolean;
|
||||||
|
error?: Error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get list of changed files from git status --porcelain output
|
||||||
|
*/
|
||||||
|
async function getChangedFiles(
|
||||||
|
sourceDir: string,
|
||||||
|
operationDescription: string
|
||||||
|
): Promise<string[]> {
|
||||||
|
const status = await executeGitCommandWithRetry(
|
||||||
|
['git', 'status', '--porcelain'],
|
||||||
|
sourceDir,
|
||||||
|
operationDescription
|
||||||
|
);
|
||||||
|
return status.stdout
|
||||||
|
.trim()
|
||||||
|
.split('\n')
|
||||||
|
.filter((line) => line.length > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log a summary of changed files with truncation for long lists
|
||||||
|
*/
|
||||||
|
function logChangeSummary(
|
||||||
|
changes: string[],
|
||||||
|
messageWithChanges: string,
|
||||||
|
messageWithoutChanges: string,
|
||||||
|
color: typeof chalk.green,
|
||||||
|
maxToShow: number = 5
|
||||||
|
): void {
|
||||||
|
if (changes.length > 0) {
|
||||||
|
console.log(color(messageWithChanges.replace('{count}', String(changes.length))));
|
||||||
|
changes.slice(0, maxToShow).forEach((change) => console.log(chalk.gray(` ${change}`)));
|
||||||
|
if (changes.length > maxToShow) {
|
||||||
|
console.log(chalk.gray(` ... and ${changes.length - maxToShow} more files`));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log(color(messageWithoutChanges));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert unknown error to GitOperationResult
|
||||||
|
*/
|
||||||
|
function toErrorResult(error: unknown): GitOperationResult {
|
||||||
|
const errMsg = error instanceof Error ? error.message : String(error);
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: error instanceof Error ? error : new Error(errMsg),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Serializes git operations to prevent index.lock conflicts during parallel agent execution
|
||||||
|
class GitSemaphore {
|
||||||
|
private queue: Array<() => void> = [];
|
||||||
|
private running: boolean = false;
|
||||||
|
|
||||||
|
async acquire(): Promise<void> {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
this.queue.push(resolve);
|
||||||
|
this.process();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
release(): void {
|
||||||
|
this.running = false;
|
||||||
|
this.process();
|
||||||
|
}
|
||||||
|
|
||||||
|
private process(): void {
|
||||||
|
if (!this.running && this.queue.length > 0) {
|
||||||
|
this.running = true;
|
||||||
|
const resolve = this.queue.shift();
|
||||||
|
resolve!();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const gitSemaphore = new GitSemaphore();
|
||||||
|
|
||||||
|
const GIT_LOCK_ERROR_PATTERNS = [
|
||||||
|
'index.lock',
|
||||||
|
'unable to lock',
|
||||||
|
'Another git process',
|
||||||
|
'fatal: Unable to create',
|
||||||
|
'fatal: index file',
|
||||||
|
];
|
||||||
|
|
||||||
|
function isGitLockError(errorMessage: string): boolean {
|
||||||
|
return GIT_LOCK_ERROR_PATTERNS.some((pattern) => errorMessage.includes(pattern));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Retries git commands on lock conflicts with exponential backoff
|
||||||
|
export async function executeGitCommandWithRetry(
|
||||||
|
commandArgs: string[],
|
||||||
|
sourceDir: string,
|
||||||
|
description: string,
|
||||||
|
maxRetries: number = 5
|
||||||
|
): Promise<{ stdout: string; stderr: string }> {
|
||||||
|
await gitSemaphore.acquire();
|
||||||
|
|
||||||
|
try {
|
||||||
|
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||||
|
try {
|
||||||
|
const [cmd, ...args] = commandArgs;
|
||||||
|
const result = await $`cd ${sourceDir} && ${cmd} ${args}`;
|
||||||
|
return result;
|
||||||
|
} catch (error) {
|
||||||
|
const errMsg = error instanceof Error ? error.message : String(error);
|
||||||
|
|
||||||
|
if (isGitLockError(errMsg) && attempt < maxRetries) {
|
||||||
|
const delay = Math.pow(2, attempt - 1) * 1000;
|
||||||
|
console.log(
|
||||||
|
chalk.yellow(
|
||||||
|
` ⚠️ Git lock conflict during ${description} (attempt ${attempt}/${maxRetries}). Retrying in ${delay}ms...`
|
||||||
|
)
|
||||||
|
);
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, delay));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new Error(`Git command failed after ${maxRetries} retries`);
|
||||||
|
} finally {
|
||||||
|
gitSemaphore.release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Two-phase reset: hard reset (tracked files) + clean (untracked files)
|
||||||
|
export async function rollbackGitWorkspace(
|
||||||
|
sourceDir: string,
|
||||||
|
reason: string = 'retry preparation'
|
||||||
|
): Promise<GitOperationResult> {
|
||||||
|
// Skip git operations if not a git repository
|
||||||
|
if (!(await isGitRepository(sourceDir))) {
|
||||||
|
console.log(chalk.gray(` ⏭️ Skipping git rollback (not a git repository)`));
|
||||||
|
return { success: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(chalk.yellow(` 🔄 Rolling back workspace for ${reason}`));
|
||||||
|
try {
|
||||||
|
const changes = await getChangedFiles(sourceDir, 'status check for rollback');
|
||||||
|
|
||||||
|
await executeGitCommandWithRetry(
|
||||||
|
['git', 'reset', '--hard', 'HEAD'],
|
||||||
|
sourceDir,
|
||||||
|
'hard reset for rollback'
|
||||||
|
);
|
||||||
|
await executeGitCommandWithRetry(
|
||||||
|
['git', 'clean', '-fd'],
|
||||||
|
sourceDir,
|
||||||
|
'cleaning untracked files for rollback'
|
||||||
|
);
|
||||||
|
|
||||||
|
logChangeSummary(
|
||||||
|
changes,
|
||||||
|
' ✅ Rollback completed - removed {count} contaminated changes:',
|
||||||
|
' ✅ Rollback completed - no changes to remove',
|
||||||
|
chalk.yellow,
|
||||||
|
3
|
||||||
|
);
|
||||||
|
return { success: true };
|
||||||
|
} catch (error) {
|
||||||
|
const result = toErrorResult(error);
|
||||||
|
console.log(chalk.red(` ❌ Rollback failed after retries: ${result.error?.message}`));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Creates checkpoint before each attempt. First attempt preserves workspace; retries clean it.
|
||||||
|
export async function createGitCheckpoint(
|
||||||
|
sourceDir: string,
|
||||||
|
description: string,
|
||||||
|
attempt: number
|
||||||
|
): Promise<GitOperationResult> {
|
||||||
|
// Skip git operations if not a git repository
|
||||||
|
if (!(await isGitRepository(sourceDir))) {
|
||||||
|
console.log(chalk.gray(` ⏭️ Skipping git checkpoint (not a git repository)`));
|
||||||
|
return { success: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(chalk.blue(` 📍 Creating checkpoint for ${description} (attempt ${attempt})`));
|
||||||
|
try {
|
||||||
|
// First attempt: preserve existing deliverables. Retries: clean workspace to prevent pollution
|
||||||
|
if (attempt > 1) {
|
||||||
|
const cleanResult = await rollbackGitWorkspace(sourceDir, `${description} (retry cleanup)`);
|
||||||
|
if (!cleanResult.success) {
|
||||||
|
console.log(
|
||||||
|
chalk.yellow(` ⚠️ Workspace cleanup failed, continuing anyway: ${cleanResult.error?.message}`)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const changes = await getChangedFiles(sourceDir, 'status check');
|
||||||
|
const hasChanges = changes.length > 0;
|
||||||
|
|
||||||
|
await executeGitCommandWithRetry(['git', 'add', '-A'], sourceDir, 'staging changes');
|
||||||
|
await executeGitCommandWithRetry(
|
||||||
|
['git', 'commit', '-m', `📍 Checkpoint: ${description} (attempt ${attempt})`, '--allow-empty'],
|
||||||
|
sourceDir,
|
||||||
|
'creating commit'
|
||||||
|
);
|
||||||
|
|
||||||
|
if (hasChanges) {
|
||||||
|
console.log(chalk.blue(` ✅ Checkpoint created with uncommitted changes staged`));
|
||||||
|
} else {
|
||||||
|
console.log(chalk.blue(` ✅ Empty checkpoint created (no workspace changes)`));
|
||||||
|
}
|
||||||
|
return { success: true };
|
||||||
|
} catch (error) {
|
||||||
|
const result = toErrorResult(error);
|
||||||
|
console.log(chalk.yellow(` ⚠️ Checkpoint creation failed after retries: ${result.error?.message}`));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function commitGitSuccess(
|
||||||
|
sourceDir: string,
|
||||||
|
description: string
|
||||||
|
): Promise<GitOperationResult> {
|
||||||
|
// Skip git operations if not a git repository
|
||||||
|
if (!(await isGitRepository(sourceDir))) {
|
||||||
|
console.log(chalk.gray(` ⏭️ Skipping git commit (not a git repository)`));
|
||||||
|
return { success: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(chalk.green(` 💾 Committing successful results for ${description}`));
|
||||||
|
try {
|
||||||
|
const changes = await getChangedFiles(sourceDir, 'status check for success commit');
|
||||||
|
|
||||||
|
await executeGitCommandWithRetry(
|
||||||
|
['git', 'add', '-A'],
|
||||||
|
sourceDir,
|
||||||
|
'staging changes for success commit'
|
||||||
|
);
|
||||||
|
await executeGitCommandWithRetry(
|
||||||
|
['git', 'commit', '-m', `✅ ${description}: completed successfully`, '--allow-empty'],
|
||||||
|
sourceDir,
|
||||||
|
'creating success commit'
|
||||||
|
);
|
||||||
|
|
||||||
|
logChangeSummary(
|
||||||
|
changes,
|
||||||
|
' ✅ Success commit created with {count} file changes:',
|
||||||
|
' ✅ Empty success commit created (agent made no file changes)',
|
||||||
|
chalk.green,
|
||||||
|
5
|
||||||
|
);
|
||||||
|
return { success: true };
|
||||||
|
} catch (error) {
|
||||||
|
const result = toErrorResult(error);
|
||||||
|
console.log(chalk.yellow(` ⚠️ Success commit failed after retries: ${result.error?.message}`));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get current git commit hash.
|
||||||
|
* Returns null if not a git repository.
|
||||||
|
*/
|
||||||
|
export async function getGitCommitHash(sourceDir: string): Promise<string | null> {
|
||||||
|
if (!(await isGitRepository(sourceDir))) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const result = await $`cd ${sourceDir} && git rev-parse HEAD`;
|
||||||
|
return result.stdout.trim();
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,98 +0,0 @@
|
|||||||
import chalk from 'chalk';
|
|
||||||
import { formatDuration } from '../audit/utils.js';
|
|
||||||
|
|
||||||
// Timing utilities
|
|
||||||
|
|
||||||
export class Timer {
|
|
||||||
constructor(name) {
|
|
||||||
this.name = name;
|
|
||||||
this.startTime = Date.now();
|
|
||||||
this.endTime = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
stop() {
|
|
||||||
this.endTime = Date.now();
|
|
||||||
return this.duration();
|
|
||||||
}
|
|
||||||
|
|
||||||
duration() {
|
|
||||||
const end = this.endTime || Date.now();
|
|
||||||
return end - this.startTime;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Global timing and cost tracker
|
|
||||||
export const timingResults = {
|
|
||||||
total: null,
|
|
||||||
phases: {},
|
|
||||||
commands: {},
|
|
||||||
agents: {}
|
|
||||||
};
|
|
||||||
|
|
||||||
export const costResults = {
|
|
||||||
agents: {},
|
|
||||||
total: 0
|
|
||||||
};
|
|
||||||
|
|
||||||
// Function to display comprehensive timing summary
|
|
||||||
export const displayTimingSummary = () => {
|
|
||||||
const totalDuration = timingResults.total.stop();
|
|
||||||
|
|
||||||
console.log(chalk.cyan.bold('\n⏱️ TIMING SUMMARY'));
|
|
||||||
console.log(chalk.gray('─'.repeat(60)));
|
|
||||||
|
|
||||||
// Total execution time
|
|
||||||
console.log(chalk.cyan(`📊 Total Execution Time: ${formatDuration(totalDuration)}`));
|
|
||||||
console.log();
|
|
||||||
|
|
||||||
// Phase breakdown
|
|
||||||
if (Object.keys(timingResults.phases).length > 0) {
|
|
||||||
console.log(chalk.yellow.bold('🔍 Phase Breakdown:'));
|
|
||||||
let phaseTotal = 0;
|
|
||||||
for (const [phase, duration] of Object.entries(timingResults.phases)) {
|
|
||||||
const percentage = ((duration / totalDuration) * 100).toFixed(1);
|
|
||||||
console.log(chalk.yellow(` ${phase.padEnd(20)} ${formatDuration(duration).padStart(8)} (${percentage}%)`));
|
|
||||||
phaseTotal += duration;
|
|
||||||
}
|
|
||||||
console.log(chalk.gray(` ${'Phases Total'.padEnd(20)} ${formatDuration(phaseTotal).padStart(8)} (${((phaseTotal / totalDuration) * 100).toFixed(1)}%)`));
|
|
||||||
console.log();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Command breakdown
|
|
||||||
if (Object.keys(timingResults.commands).length > 0) {
|
|
||||||
console.log(chalk.blue.bold('🖥️ Command Breakdown:'));
|
|
||||||
let commandTotal = 0;
|
|
||||||
for (const [command, duration] of Object.entries(timingResults.commands)) {
|
|
||||||
const percentage = ((duration / totalDuration) * 100).toFixed(1);
|
|
||||||
console.log(chalk.blue(` ${command.padEnd(20)} ${formatDuration(duration).padStart(8)} (${percentage}%)`));
|
|
||||||
commandTotal += duration;
|
|
||||||
}
|
|
||||||
console.log(chalk.gray(` ${'Commands Total'.padEnd(20)} ${formatDuration(commandTotal).padStart(8)} (${((commandTotal / totalDuration) * 100).toFixed(1)}%)`));
|
|
||||||
console.log();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Agent breakdown
|
|
||||||
if (Object.keys(timingResults.agents).length > 0) {
|
|
||||||
console.log(chalk.magenta.bold('🤖 Agent Breakdown:'));
|
|
||||||
let agentTotal = 0;
|
|
||||||
for (const [agent, duration] of Object.entries(timingResults.agents)) {
|
|
||||||
const percentage = ((duration / totalDuration) * 100).toFixed(1);
|
|
||||||
const displayName = agent.replace(/-/g, ' ');
|
|
||||||
console.log(chalk.magenta(` ${displayName.padEnd(20)} ${formatDuration(duration).padStart(8)} (${percentage}%)`));
|
|
||||||
agentTotal += duration;
|
|
||||||
}
|
|
||||||
console.log(chalk.gray(` ${'Agents Total'.padEnd(20)} ${formatDuration(agentTotal).padStart(8)} (${((agentTotal / totalDuration) * 100).toFixed(1)}%)`));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cost breakdown
|
|
||||||
if (Object.keys(costResults.agents).length > 0) {
|
|
||||||
console.log(chalk.green.bold('\n💰 Cost Breakdown:'));
|
|
||||||
for (const [agent, cost] of Object.entries(costResults.agents)) {
|
|
||||||
const displayName = agent.replace(/-/g, ' ');
|
|
||||||
console.log(chalk.green(` ${displayName.padEnd(20)} $${cost.toFixed(4).padStart(8)}`));
|
|
||||||
}
|
|
||||||
console.log(chalk.gray(` ${'Total Cost'.padEnd(20)} $${costResults.total.toFixed(4).padStart(8)}`));
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(chalk.gray('─'.repeat(60)));
|
|
||||||
};
|
|
||||||
@@ -0,0 +1,110 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
import chalk from 'chalk';
|
||||||
|
import { formatDuration } from './formatting.js';
|
||||||
|
|
||||||
|
// Timing utilities
|
||||||
|
|
||||||
|
export class Timer {
|
||||||
|
name: string;
|
||||||
|
startTime: number;
|
||||||
|
endTime: number | null = null;
|
||||||
|
|
||||||
|
constructor(name: string) {
|
||||||
|
this.name = name;
|
||||||
|
this.startTime = Date.now();
|
||||||
|
}
|
||||||
|
|
||||||
|
stop(): number {
|
||||||
|
this.endTime = Date.now();
|
||||||
|
return this.duration();
|
||||||
|
}
|
||||||
|
|
||||||
|
duration(): number {
|
||||||
|
const end = this.endTime || Date.now();
|
||||||
|
return end - this.startTime;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
interface TimingResultsAgents {
|
||||||
|
[key: string]: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface TimingResults {
|
||||||
|
total: Timer | null;
|
||||||
|
agents: TimingResultsAgents;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface CostResultsAgents {
|
||||||
|
[key: string]: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface CostResults {
|
||||||
|
agents: CostResultsAgents;
|
||||||
|
total: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Global timing and cost tracker
|
||||||
|
export const timingResults: TimingResults = {
|
||||||
|
total: null,
|
||||||
|
agents: {},
|
||||||
|
};
|
||||||
|
|
||||||
|
export const costResults: CostResults = {
|
||||||
|
agents: {},
|
||||||
|
total: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Function to display comprehensive timing summary
|
||||||
|
export const displayTimingSummary = (): void => {
|
||||||
|
if (!timingResults.total) {
|
||||||
|
console.log(chalk.yellow('No timing data available'));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const totalDuration = timingResults.total.stop();
|
||||||
|
|
||||||
|
console.log(chalk.cyan.bold('\n⏱️ TIMING SUMMARY'));
|
||||||
|
console.log(chalk.gray('─'.repeat(60)));
|
||||||
|
|
||||||
|
// Total execution time
|
||||||
|
console.log(chalk.cyan(`📊 Total Execution Time: ${formatDuration(totalDuration)}`));
|
||||||
|
console.log();
|
||||||
|
|
||||||
|
// Agent breakdown
|
||||||
|
if (Object.keys(timingResults.agents).length > 0) {
|
||||||
|
console.log(chalk.magenta.bold('🤖 Agent Breakdown:'));
|
||||||
|
let agentTotal = 0;
|
||||||
|
for (const [agent, duration] of Object.entries(timingResults.agents)) {
|
||||||
|
const percentage = ((duration / totalDuration) * 100).toFixed(1);
|
||||||
|
const displayName = agent.replace(/-/g, ' ');
|
||||||
|
console.log(
|
||||||
|
chalk.magenta(
|
||||||
|
` ${displayName.padEnd(20)} ${formatDuration(duration).padStart(8)} (${percentage}%)`
|
||||||
|
)
|
||||||
|
);
|
||||||
|
agentTotal += duration;
|
||||||
|
}
|
||||||
|
console.log(
|
||||||
|
chalk.gray(
|
||||||
|
` ${'Agents Total'.padEnd(20)} ${formatDuration(agentTotal).padStart(8)} (${((agentTotal / totalDuration) * 100).toFixed(1)}%)`
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cost breakdown
|
||||||
|
if (Object.keys(costResults.agents).length > 0) {
|
||||||
|
console.log(chalk.green.bold('\n💰 Cost Breakdown:'));
|
||||||
|
for (const [agent, cost] of Object.entries(costResults.agents)) {
|
||||||
|
const displayName = agent.replace(/-/g, ' ');
|
||||||
|
console.log(chalk.green(` ${displayName.padEnd(20)} $${cost.toFixed(4).padStart(8)}`));
|
||||||
|
}
|
||||||
|
console.log(chalk.gray(` ${'Total Cost'.padEnd(20)} $${costResults.total.toFixed(4).padStart(8)}`));
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(chalk.gray('─'.repeat(60)));
|
||||||
|
};
|
||||||
@@ -1,9 +1,35 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
import { AGENTS } from '../session-manager.js';
|
import { AGENTS } from '../session-manager.js';
|
||||||
|
|
||||||
|
interface ToolCallInput {
|
||||||
|
url?: string;
|
||||||
|
element?: string;
|
||||||
|
key?: string;
|
||||||
|
fields?: unknown[];
|
||||||
|
text?: string;
|
||||||
|
action?: string;
|
||||||
|
description?: string;
|
||||||
|
todos?: Array<{
|
||||||
|
status: string;
|
||||||
|
content: string;
|
||||||
|
}>;
|
||||||
|
[key: string]: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ToolCall {
|
||||||
|
name: string;
|
||||||
|
input?: ToolCallInput;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract domain from URL for display
|
* Extract domain from URL for display
|
||||||
*/
|
*/
|
||||||
function extractDomain(url) {
|
function extractDomain(url: string): string {
|
||||||
try {
|
try {
|
||||||
const urlObj = new URL(url);
|
const urlObj = new URL(url);
|
||||||
return urlObj.hostname || url.slice(0, 30);
|
return urlObj.hostname || url.slice(0, 30);
|
||||||
@@ -15,24 +41,24 @@ function extractDomain(url) {
|
|||||||
/**
|
/**
|
||||||
* Summarize TodoWrite updates into clean progress indicators
|
* Summarize TodoWrite updates into clean progress indicators
|
||||||
*/
|
*/
|
||||||
function summarizeTodoUpdate(input) {
|
function summarizeTodoUpdate(input: ToolCallInput | undefined): string | null {
|
||||||
if (!input?.todos || !Array.isArray(input.todos)) {
|
if (!input?.todos || !Array.isArray(input.todos)) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const todos = input.todos;
|
const todos = input.todos;
|
||||||
const completed = todos.filter(t => t.status === 'completed');
|
const completed = todos.filter((t) => t.status === 'completed');
|
||||||
const inProgress = todos.filter(t => t.status === 'in_progress');
|
const inProgress = todos.filter((t) => t.status === 'in_progress');
|
||||||
|
|
||||||
// Show recently completed tasks
|
// Show recently completed tasks
|
||||||
if (completed.length > 0) {
|
if (completed.length > 0) {
|
||||||
const recent = completed[completed.length - 1];
|
const recent = completed[completed.length - 1]!;
|
||||||
return `✅ ${recent.content}`;
|
return `✅ ${recent.content}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Show current in-progress task
|
// Show current in-progress task
|
||||||
if (inProgress.length > 0) {
|
if (inProgress.length > 0) {
|
||||||
const current = inProgress[0];
|
const current = inProgress[0]!;
|
||||||
return `🔄 ${current.content}`;
|
return `🔄 ${current.content}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -42,9 +68,9 @@ function summarizeTodoUpdate(input) {
|
|||||||
/**
|
/**
|
||||||
* Get agent prefix for parallel execution
|
* Get agent prefix for parallel execution
|
||||||
*/
|
*/
|
||||||
export function getAgentPrefix(description) {
|
export function getAgentPrefix(description: string): string {
|
||||||
// Map agent names to their prefixes
|
// Map agent names to their prefixes
|
||||||
const agentPrefixes = {
|
const agentPrefixes: Record<string, string> = {
|
||||||
'injection-vuln': '[Injection]',
|
'injection-vuln': '[Injection]',
|
||||||
'xss-vuln': '[XSS]',
|
'xss-vuln': '[XSS]',
|
||||||
'auth-vuln': '[Auth]',
|
'auth-vuln': '[Auth]',
|
||||||
@@ -54,12 +80,13 @@ export function getAgentPrefix(description) {
|
|||||||
'xss-exploit': '[XSS]',
|
'xss-exploit': '[XSS]',
|
||||||
'auth-exploit': '[Auth]',
|
'auth-exploit': '[Auth]',
|
||||||
'authz-exploit': '[Authz]',
|
'authz-exploit': '[Authz]',
|
||||||
'ssrf-exploit': '[SSRF]'
|
'ssrf-exploit': '[SSRF]',
|
||||||
};
|
};
|
||||||
|
|
||||||
// First try to match by agent name directly
|
// First try to match by agent name directly
|
||||||
for (const [agentName, prefix] of Object.entries(agentPrefixes)) {
|
for (const [agentName, prefix] of Object.entries(agentPrefixes)) {
|
||||||
if (AGENTS[agentName] && description.includes(AGENTS[agentName].displayName)) {
|
const agent = AGENTS[agentName as keyof typeof AGENTS];
|
||||||
|
if (agent && description.includes(agent.displayName)) {
|
||||||
return prefix;
|
return prefix;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -67,7 +94,7 @@ export function getAgentPrefix(description) {
|
|||||||
// Fallback to partial matches for backwards compatibility
|
// Fallback to partial matches for backwards compatibility
|
||||||
if (description.includes('injection')) return '[Injection]';
|
if (description.includes('injection')) return '[Injection]';
|
||||||
if (description.includes('xss')) return '[XSS]';
|
if (description.includes('xss')) return '[XSS]';
|
||||||
if (description.includes('authz')) return '[Authz]'; // Check authz before auth
|
if (description.includes('authz')) return '[Authz]'; // Check authz before auth
|
||||||
if (description.includes('auth')) return '[Auth]';
|
if (description.includes('auth')) return '[Auth]';
|
||||||
if (description.includes('ssrf')) return '[SSRF]';
|
if (description.includes('ssrf')) return '[SSRF]';
|
||||||
|
|
||||||
@@ -77,7 +104,7 @@ export function getAgentPrefix(description) {
|
|||||||
/**
|
/**
|
||||||
* Format browser tool calls into clean progress indicators
|
* Format browser tool calls into clean progress indicators
|
||||||
*/
|
*/
|
||||||
function formatBrowserAction(toolCall) {
|
function formatBrowserAction(toolCall: ToolCall): string {
|
||||||
const toolName = toolCall.name;
|
const toolName = toolCall.name;
|
||||||
const input = toolCall.input || {};
|
const input = toolCall.input || {};
|
||||||
|
|
||||||
@@ -175,13 +202,13 @@ function formatBrowserAction(toolCall) {
|
|||||||
/**
|
/**
|
||||||
* Filter out JSON tool calls from content, with special handling for Task calls
|
* Filter out JSON tool calls from content, with special handling for Task calls
|
||||||
*/
|
*/
|
||||||
export function filterJsonToolCalls(content) {
|
export function filterJsonToolCalls(content: string | null | undefined): string {
|
||||||
if (!content || typeof content !== 'string') {
|
if (!content || typeof content !== 'string') {
|
||||||
return content;
|
return content || '';
|
||||||
}
|
}
|
||||||
|
|
||||||
const lines = content.split('\n');
|
const lines = content.split('\n');
|
||||||
const processedLines = [];
|
const processedLines: string[] = [];
|
||||||
|
|
||||||
for (const line of lines) {
|
for (const line of lines) {
|
||||||
const trimmed = line.trim();
|
const trimmed = line.trim();
|
||||||
@@ -194,7 +221,7 @@ export function filterJsonToolCalls(content) {
|
|||||||
// Check if this is a JSON tool call
|
// Check if this is a JSON tool call
|
||||||
if (trimmed.startsWith('{"type":"tool_use"')) {
|
if (trimmed.startsWith('{"type":"tool_use"')) {
|
||||||
try {
|
try {
|
||||||
const toolCall = JSON.parse(trimmed);
|
const toolCall = JSON.parse(trimmed) as ToolCall;
|
||||||
|
|
||||||
// Special handling for Task tool calls
|
// Special handling for Task tool calls
|
||||||
if (toolCall.name === 'Task') {
|
if (toolCall.name === 'Task') {
|
||||||
@@ -223,8 +250,7 @@ export function filterJsonToolCalls(content) {
|
|||||||
|
|
||||||
// Hide all other tool calls (Read, Write, Grep, etc.)
|
// Hide all other tool calls (Read, Write, Grep, etc.)
|
||||||
continue;
|
continue;
|
||||||
|
} catch {
|
||||||
} catch (error) {
|
|
||||||
// If JSON parsing fails, treat as regular text
|
// If JSON parsing fails, treat as regular text
|
||||||
processedLines.push(line);
|
processedLines.push(line);
|
||||||
}
|
}
|
||||||
@@ -235,4 +261,4 @@ export function filterJsonToolCalls(content) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return processedLines.join('\n');
|
return processedLines.join('\n');
|
||||||
}
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user