From 378ed824ada93e684f5b10f318da54063cfad5a7 Mon Sep 17 00:00:00 2001 From: Arjun Malleswaran Date: Thu, 15 Jan 2026 11:30:46 -0800 Subject: [PATCH] Feat/temporal (#52) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * refactor: modularize claude-executor and extract shared utilities - Extract message handling into src/ai/message-handlers.ts with pure functions - Extract output formatting into src/ai/output-formatters.ts - Extract progress management into src/ai/progress-manager.ts - Add audit-logger.ts with Null Object pattern for optional logging - Add shared utilities: formatting.ts, file-io.ts, functional.ts - Consolidate getPromptNameForAgent into src/types/agents.ts * feat: add Claude Code custom commands for debug and review * feat: add Temporal integration foundation (phase 1-2) - Add Temporal SDK dependencies (@temporalio/client, worker, workflow, activity) - Add shared types for pipeline state, metrics, and progress queries - Add classifyErrorForTemporal() for retry behavior classification - Add docker-compose for Temporal server with SQLite persistence * feat: add Temporal activities for agent execution (phase 3) - Add activities.ts with heartbeat loop, git checkpoint/rollback, and error classification - Export runClaudePrompt, validateAgentOutput, ClaudePromptResult for Temporal use - Track attempt number via Temporal Context for accurate audit logging - Rollback git workspace before retry to ensure clean state * feat: add Temporal workflow for 5-phase pipeline orchestration (phase 4) * feat: add Temporal worker, client, and query tools (phase 5) - Add worker.ts with workflow bundling and graceful shutdown - Add client.ts CLI to start pipelines with progress polling - Add query.ts CLI to inspect running workflow state - Fix buffer overflow by truncating error messages and stack traces - Skip git operations gracefully on non-git repositories - Add kill.sh/start.sh dev scripts and Dockerfile.worker * feat: fix Docker worker container setup - Install uv instead of deprecated uvx package - Add mcp-server and configs directories to container - Mount target repo dynamically via TARGET_REPO env variable * fix: add report assembly step to Temporal workflow - Add assembleReportActivity to concatenate exploitation evidence files before report agent runs - Call assembleFinalReport in workflow Phase 5 before runReportAgent - Ensure deliverables directory exists before writing final report - Simplify pipeline-testing report prompt to just prepend header * refactor: consolidate Docker setup to root docker-compose.yml * feat: improve Temporal client UX and env handling - Change default to fire-and-forget (--wait flag to opt-in) - Add splash screen and improve console output formatting - Add .env to gitignore, remove from dockerignore for container access - Add Taskfile for common development commands * refactor: simplify session ID handling and improve Taskfile options - Include hostname in workflow ID for better audit log organization - Extract sanitizeHostname utility to audit/utils.ts for reuse - Remove unused generateSessionLogPath and buildLogFilePath functions - Simplify Taskfile with CONFIG/OUTPUT/CLEAN named parameters * chore: add .env.example and simplify .gitignore * docs: update README and CLAUDE.md for Temporal workflow usage - Replace Docker CLI instructions with Task-based commands - Add monitoring/stopping sections and workflow examples - Document Temporal orchestration layer and troubleshooting - Simplify file structure to key files overview * refactor: replace Taskfile with bash CLI script - Add shannon bash script with start/logs/query/stop/help commands - Remove Taskfile.yml dependency (no longer requires Task installation) - Update README.md and CLAUDE.md to use ./shannon commands - Update client.ts output to show ./shannon commands * docs: fix deliverable filename in README * refactor: remove direct CLI and .shannon-store.json in favor of Temporal - Delete src/shannon.ts direct CLI entry point (Temporal is now the only mode) - Remove .shannon-store.json session lock (Temporal handles workflow deduplication) - Remove broken scripts/export-metrics.js (imported non-existent function) - Update package.json to remove main, start script, and bin entry - Clean up CLAUDE.md and debug.md to remove obsolete references * chore: remove licensing comments from prompt files to prevent leaking into actual prompts * fix: resolve parallel workflow race conditions and retry logic bugs - Fix save_deliverable race condition using closure pattern instead of global variable - Fix error classification order so OutputValidationError matches before generic validation - Fix ApplicationFailure re-classification bug by checking instanceof before re-throwing - Add per-error-type retry limits (3 for output validation, 50 for billing) - Add fast retry intervals for pipeline testing mode (10s vs 5min) - Increase worker concurrent activities to 25 for parallel workflows * refactor: pipeline vuln→exploit workflow for parallel execution - Replace sync barrier between vuln/exploit phases with independent pipelines - Each vuln type runs: vuln agent → queue check → conditional exploit - Add checkExploitationQueue activity to skip exploits when no vulns found - Use Promise.allSettled for graceful failure handling across pipelines - Add PipelineSummary type for aggregated cost/duration/turns metrics * fix: re-throw retryable errors in checkExploitationQueue * fix: detect and retry on Claude Code spending cap errors - Add spending cap pattern detection in detectApiError() with retryable error - Add matching patterns to classifyErrorForTemporal() for proper Temporal retry - Add defense-in-depth safeguard in runClaudePrompt() for $0 cost / low turn detection - Add final sanity check in activities before declaring success * fix: increase heartbeat timeout to prevent false worker-dead detection Original 30s timeout was from POC spec assuming <5min activities. With hour-long activities and multiple concurrent workflows sharing one worker, resource contention causes event loop stalls exceeding 30s, triggering false heartbeat timeouts. Increased to 10min (prod) and 5min (testing). * fix: temporal db init * fix: persist home dir * feat: add per-workflow unified logging with ./shannon logs ID= - Add WorkflowLogger class for human-readable, per-workflow log files - Create workflow.log in audit-logs/{workflowId}/ with phase, agent, tool, and LLM events - Update ./shannon logs to require ID param and tail specific workflow log - Add phase transition logging at workflow boundaries - Include workflow completion summary with agent breakdown (duration, cost) - Mount audit-logs volume in docker-compose for host access * feat: configurable OUTPUT directory with auto-discovery - Add OUTPUT= option to write reports to custom directory - Mount custom output dir as volume for container-to-host persistence - Auto-discover workflow logs regardless of output path used - Display host output path in workflow start message - Add ASCII splash screen to ./shannon help --------- Co-authored-by: ezl-keygraph --- docker-compose.yml | 1 + shannon | 61 +++++++++++++++++++++++++++++++++++------- src/temporal/client.ts | 18 +++++++++++++ 3 files changed, 71 insertions(+), 9 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 852ac11..211e2bf 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -28,6 +28,7 @@ services: volumes: - ./prompts:/app/prompts - ./audit-logs:/app/audit-logs + - ${OUTPUT_DIR:-./audit-logs}:/app/output - ${TARGET_REPO:-.}:/target-repo - ${BENCHMARKS_BASE:-.}:/benchmarks shm_size: 2gb diff --git a/shannon b/shannon index 9aaf223..61f8a4e 100755 --- a/shannon +++ b/shannon @@ -14,7 +14,15 @@ fi show_help() { cat << 'EOF' -Shannon - AI Penetration Testing Framework + + ███████╗██╗ ██╗ █████╗ ███╗ ██╗███╗ ██╗ ██████╗ ███╗ ██╗ + ██╔════╝██║ ██║██╔══██╗████╗ ██║████╗ ██║██╔═══██╗████╗ ██║ + ███████╗███████║███████║██╔██╗ ██║██╔██╗ ██║██║ ██║██╔██╗ ██║ + ╚════██║██╔══██║██╔══██║██║╚██╗██║██║╚██╗██║██║ ██║██║╚██╗██║ + ███████║██║ ██║██║ ██║██║ ╚████║██║ ╚████║╚██████╔╝██║ ╚████║ + ╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═══╝╚═╝ ╚═══╝ ╚═════╝ ╚═╝ ╚═══╝ + + AI Penetration Testing Framework Usage: ./shannon start URL= REPO= Start a pentest workflow @@ -25,15 +33,16 @@ Usage: Options for 'start': CONFIG= Configuration file (YAML) - OUTPUT= Output directory for reports + OUTPUT= Output directory for reports (default: ./audit-logs/) PIPELINE_TESTING=true Use minimal prompts for fast testing Options for 'stop': - CLEAN=true Remove all data including volumes + CLEAN=true Remove all data including volumes Examples: ./shannon start URL=https://example.com REPO=/path/to/repo ./shannon start URL=https://example.com REPO=/path/to/repo CONFIG=./config.yaml + ./shannon start URL=https://example.com REPO=/path/to/repo OUTPUT=./my-reports ./shannon logs ID=example.com_shannon-1234567890 ./shannon query ID=shannon-1234567890 ./shannon stop CLEAN=true @@ -64,8 +73,15 @@ is_temporal_ready() { temporal operator cluster health --address localhost:7233 2>/dev/null | grep -q "SERVING" } -# Ensure containers are running +# Ensure containers are running with correct mounts ensure_containers() { + # If custom OUTPUT_DIR is set, always refresh worker to ensure correct volume mount + # Docker compose will only recreate if the mount actually changed + if [ -n "$OUTPUT_DIR" ]; then + echo "Ensuring worker has correct output mount..." + docker compose -f "$COMPOSE_FILE" up -d worker 2>/dev/null || true + fi + # Quick check: if Temporal is already healthy, we're good if is_temporal_ready; then return 0 @@ -125,13 +141,27 @@ cmd_start() { ;; esac + # Handle custom OUTPUT directory + # Export OUTPUT_DIR for docker-compose volume mount BEFORE starting containers + if [ -n "$OUTPUT" ]; then + # Create output directory if it doesn't exist + mkdir -p "$OUTPUT" + export OUTPUT_DIR="$OUTPUT" + fi + # Ensure containers are running (starts them if needed) ensure_containers # Build optional args ARGS="" [ -n "$CONFIG" ] && ARGS="$ARGS --config $CONFIG" - [ -n "$OUTPUT" ] && ARGS="$ARGS --output $OUTPUT" + + # Pass container path for output (where OUTPUT_DIR is mounted) + # Also pass display path so client can show the host path to user + if [ -n "$OUTPUT" ]; then + ARGS="$ARGS --output /app/output --display-output $OUTPUT" + fi + [ "$PIPELINE_TESTING" = "true" ] && ARGS="$ARGS --pipeline-testing" # Run the client to submit workflow @@ -148,18 +178,31 @@ cmd_logs() { exit 1 fi - WORKFLOW_LOG="./audit-logs/${ID}/workflow.log" + # Auto-discover the workflow log file + # 1. Check default location first + # 2. Search common output directories + # 3. Fall back to find command + WORKFLOW_LOG="" - if [ -f "$WORKFLOW_LOG" ]; then + if [ -f "./audit-logs/${ID}/workflow.log" ]; then + WORKFLOW_LOG="./audit-logs/${ID}/workflow.log" + else + # Search for the workflow directory (handles custom OUTPUT paths) + FOUND=$(find . -maxdepth 3 -path "*/${ID}/workflow.log" -type f 2>/dev/null | head -1) + if [ -n "$FOUND" ]; then + WORKFLOW_LOG="$FOUND" + fi + fi + + if [ -n "$WORKFLOW_LOG" ]; then echo "Tailing workflow log: $WORKFLOW_LOG" tail -f "$WORKFLOW_LOG" else - echo "ERROR: Workflow log not found: $WORKFLOW_LOG" + echo "ERROR: Workflow log not found for ID: $ID" echo "" echo "Possible causes:" echo " - Workflow hasn't started yet" echo " - Workflow ID is incorrect" - echo " - Workflow is using a custom OUTPUT path" echo "" echo "Check: ./shannon query ID=$ID for workflow details" exit 1 diff --git a/src/temporal/client.ts b/src/temporal/client.ts index f3e345c..945af42 100644 --- a/src/temporal/client.ts +++ b/src/temporal/client.ts @@ -74,6 +74,7 @@ async function startPipeline(): Promise { let repoPath: string | undefined; let configPath: string | undefined; let outputPath: string | undefined; + let displayOutputPath: string | undefined; // Host path for display purposes let pipelineTestingMode = false; let customWorkflowId: string | undefined; let waitForCompletion = false; @@ -92,6 +93,12 @@ async function startPipeline(): Promise { outputPath = nextArg; i++; } + } else if (arg === '--display-output') { + const nextArg = args[i + 1]; + if (nextArg && !nextArg.startsWith('-')) { + displayOutputPath = nextArg; + i++; + } } else if (arg === '--workflow-id') { const nextArg = args[i + 1]; if (nextArg && !nextArg.startsWith('-')) { @@ -138,6 +145,11 @@ async function startPipeline(): Promise { ...(pipelineTestingMode && { pipelineTestingMode }), }; + // Determine output directory for display + // Use displayOutputPath (host path) if provided, otherwise fall back to outputPath or default + const effectiveDisplayPath = displayOutputPath || outputPath || './audit-logs'; + const outputDir = `${effectiveDisplayPath}/${workflowId}`; + console.log(chalk.green.bold(`✓ Workflow started: ${workflowId}`)); console.log(); console.log(chalk.white(' Target: ') + chalk.cyan(webUrl)); @@ -145,6 +157,9 @@ async function startPipeline(): Promise { if (configPath) { console.log(chalk.white(' Config: ') + chalk.cyan(configPath)); } + if (displayOutputPath) { + console.log(chalk.white(' Output: ') + chalk.cyan(displayOutputPath)); + } if (pipelineTestingMode) { console.log(chalk.white(' Mode: ') + chalk.yellow('Pipeline Testing')); } @@ -166,6 +181,9 @@ async function startPipeline(): Promise { console.log(chalk.white(' Logs: ') + chalk.gray(`./shannon logs ID=${workflowId}`)); console.log(chalk.white(' Query: ') + chalk.gray(`./shannon query ID=${workflowId}`)); console.log(); + console.log(chalk.bold('Output:')); + console.log(chalk.white(' Reports: ') + chalk.cyan(outputDir)); + console.log(); return; }