Merge pull request #127 from KeygraphHQ/fix/large-deliverable-handling-v2

fix: improve large deliverable handling and audit trail
2026-02-12 17:22:50 +00:00 · 2026-02-12 08:54:19 -08:00
parent ae4c4ed402 6f79ce18d2
commit c6fa48d161
18 changed files with 124 additions and 16 deletions
--- a/.env.example
+++ b/.env.example
@@ -1,6 +1,9 @@
 # Shannon Environment Configuration
 # Copy this file to .env and fill in your credentials

+# Recommended output token configuration for larger tool outputs
+CLAUDE_CODE_MAX_OUTPUT_TOKENS=64000
+
 # =============================================================================
 # OPTION 1: Direct Anthropic (default, no router)
 # =============================================================================
--- a/README.md
+++ b/README.md
@@ -119,10 +119,12 @@ cd shannon

 # Option A: Export environment variables
 export ANTHROPIC_API_KEY="your-api-key"              # or CLAUDE_CODE_OAUTH_TOKEN
+export CLAUDE_CODE_MAX_OUTPUT_TOKENS=64000           # recommended

 # Option B: Create a .env file
 cat > .env << 'EOF'
 ANTHROPIC_API_KEY=your-api-key
+CLAUDE_CODE_MAX_OUTPUT_TOKENS=64000
 EOF

 # 3. Run a pentest
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -24,6 +24,7 @@ services:
      - ANTHROPIC_AUTH_TOKEN=${ANTHROPIC_AUTH_TOKEN:-}  # Auth token for router
      - ROUTER_DEFAULT=${ROUTER_DEFAULT:-}  # Model name when using router (e.g., "gemini,gemini-2.5-pro")
      - CLAUDE_CODE_OAUTH_TOKEN=${CLAUDE_CODE_OAUTH_TOKEN:-}
+      - CLAUDE_CODE_MAX_OUTPUT_TOKENS=${CLAUDE_CODE_MAX_OUTPUT_TOKENS:-64000}
    depends_on:
      temporal:
        condition: service_healthy
--- a/prompts/exploit-auth.txt
+++ b/prompts/exploit-auth.txt
@@ -408,7 +408,12 @@ If [blocker] were bypassed/removed:
 COMPLETION REQUIREMENTS (ALL must be satisfied):
 	1. Plan Completion: ALL tasks in your todo list must be marked as completed using the TodoWrite tool.
 	2. Deliverable Generation: The required deliverable must be successfully saved using save_deliverable script:
-	  - Evidence report: Write to `deliverables/auth_exploitation_evidence.md`, then call `save_deliverable` with `deliverable_type: "AUTH_EVIDENCE"` and `file_path` (not inline `content`)
+	  - **CHUNKED WRITING (MANDATORY):**
+	    1. Use the **Write** tool to create `deliverables/auth_exploitation_evidence.md` with the title and first major section
+	    2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
+	    3. Repeat step 2 for all remaining sections
+	    4. Call `save_deliverable` with `deliverable_type: "AUTH_EVIDENCE"` and `file_path: "deliverables/auth_exploitation_evidence.md"`
+	    **WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.

 CRITICAL WARNING: Announcing completion before every item in deliverables/auth_exploitation_queue.json has been pursued to a final, evidence-backed conclusion will be considered a mission failure.

--- a/prompts/exploit-authz.txt
+++ b/prompts/exploit-authz.txt
@@ -410,7 +410,12 @@ If [blocker] were bypassed/removed:
 COMPLETION REQUIREMENTS (ALL must be satisfied):
 	1. Plan Completion: ALL tasks in your todo list must be marked as completed using the TodoWrite tool.
 	2. Deliverable Generation: The required deliverable must be successfully saved using save_deliverable script:
-	  - Evidence report: Write to `deliverables/authz_exploitation_evidence.md`, then call `save_deliverable` with `deliverable_type: "AUTHZ_EVIDENCE"` and `file_path` (not inline `content`)
+	  - **CHUNKED WRITING (MANDATORY):**
+	    1. Use the **Write** tool to create `deliverables/authz_exploitation_evidence.md` with the title and first major section
+	    2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
+	    3. Repeat step 2 for all remaining sections
+	    4. Call `save_deliverable` with `deliverable_type: "AUTHZ_EVIDENCE"` and `file_path: "deliverables/authz_exploitation_evidence.md"`
+	    **WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.

 CRITICAL WARNING: Announcing completion before every item in deliverables/authz_exploitation_queue.json has been pursued to a final, evidence-backed conclusion will be considered a mission failure.

--- a/prompts/exploit-injection.txt
+++ b/prompts/exploit-injection.txt
@@ -437,7 +437,12 @@ If [blocker] were bypassed/removed:
 COMPLETION REQUIREMENTS (ALL must be satisfied):
 1.  **Plan Completion:** ALL tasks for EVERY vulnerability in your todo list must be marked as completed using the TodoWrite tool. **No vulnerability or task can be left unaddressed.**
 2.  **Deliverable Generation:** The required deliverable must be successfully saved using save_deliverable MCP tool:
-    - Evidence report: Write to `deliverables/injection_exploitation_evidence.md`, then call `save_deliverable` with `deliverable_type: "INJECTION_EVIDENCE"` and `file_path` (not inline `content`)
+    - **CHUNKED WRITING (MANDATORY):**
+      1. Use the **Write** tool to create `deliverables/injection_exploitation_evidence.md` with the title and first major section
+      2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
+      3. Repeat step 2 for all remaining sections
+      4. Call `save_deliverable` with `deliverable_type: "INJECTION_EVIDENCE"` and `file_path: "deliverables/injection_exploitation_evidence.md"`
+      **WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.

 **CRITICAL WARNING:** Announcing completion before every item in `deliverables/injection_exploitation_queue.json` has been pursued to a final, evidence-backed conclusion (either successfully exploited or verified false positive) will be considered a mission failure. Superficial testing is not acceptable.

--- a/prompts/exploit-ssrf.txt
+++ b/prompts/exploit-ssrf.txt
@@ -487,7 +487,12 @@ If [blocker] were bypassed/removed:
 COMPLETION REQUIREMENTS (ALL must be satisfied):
 	1. Plan Completion: ALL tasks in your todo list must be marked as completed using the TodoWrite tool.
 	2. Deliverable Generation: The required deliverable must be successfully saved using save_deliverable script:
-	  - Evidence report: Write to `deliverables/ssrf_exploitation_evidence.md`, then call `save_deliverable` with `deliverable_type: "SSRF_EVIDENCE"` and `file_path` (not inline `content`)
+	  - **CHUNKED WRITING (MANDATORY):**
+	    1. Use the **Write** tool to create `deliverables/ssrf_exploitation_evidence.md` with the title and first major section
+	    2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
+	    3. Repeat step 2 for all remaining sections
+	    4. Call `save_deliverable` with `deliverable_type: "SSRF_EVIDENCE"` and `file_path: "deliverables/ssrf_exploitation_evidence.md"`
+	    **WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.

 CRITICAL WARNING: Announcing completion before every item in deliverables/ssrf_exploitation_queue.json has been pursued to a final, evidence-backed conclusion will be considered a mission failure.

--- a/prompts/exploit-xss.txt
+++ b/prompts/exploit-xss.txt
@@ -427,7 +427,12 @@ If [blocker] were bypassed/removed:
 COMPLETION REQUIREMENTS (ALL must be satisfied):
 - Todo List Completion: ALL vulnerabilities from the exploitation queue must have been processed and marked as completed in your todo list.
 - Deliverable Generation: The required deliverable must be successfully saved using save_deliverable MCP tool:
-  - Evidence report: Write to `deliverables/xss_exploitation_evidence.md`, then call `save_deliverable` with `deliverable_type: "XSS_EVIDENCE"` and `file_path` (not inline `content`)
+  - **CHUNKED WRITING (MANDATORY):**
+    1. Use the **Write** tool to create `deliverables/xss_exploitation_evidence.md` with the title and first major section
+    2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
+    3. Repeat step 2 for all remaining sections
+    4. Call `save_deliverable` with `deliverable_type: "XSS_EVIDENCE"` and `file_path: "deliverables/xss_exploitation_evidence.md"`
+    **WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.

 **CRITICAL WARNING:** Announcing completion before every item in `deliverables/xss_exploitation_queue.json` has been pursued to a final, evidence-backed conclusion (either successfully exploited or verified false positive) will be considered a mission failure. Superficial testing is not acceptable.

--- a/prompts/pre-recon-code.txt
+++ b/prompts/pre-recon-code.txt
@@ -129,7 +129,12 @@ After Phase 1 completes, launch all three vulnerability-focused agents in parall
  - Create the `outputs/schemas/` directory using mkdir -p
  - Copy all discovered schema files to `outputs/schemas/` with descriptive names
  - Include schema locations in your attack surface analysis
- Write your report to `deliverables/code_analysis_deliverable.md`, then call `save_deliverable` with `deliverable_type: "CODE_ANALYSIS"` and `file_path: "deliverables/code_analysis_deliverable.md"` (do NOT use inline `content`)
+- **CHUNKED WRITING (MANDATORY):**
+  1. Use the **Write** tool to create `deliverables/code_analysis_deliverable.md` with the title and first major section
+  2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
+  3. Repeat step 2 for all remaining sections
+  4. Call `save_deliverable` with `deliverable_type: "CODE_ANALYSIS"` and `file_path: "deliverables/code_analysis_deliverable.md"`
+- **WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.

 **EXECUTION PATTERN:**
 1. **Use TodoWrite to create task list** tracking: Phase 1 agents, Phase 2 agents, and report synthesis
--- a/prompts/recon.txt
+++ b/prompts/recon.txt
@@ -368,10 +368,13 @@ CRITICAL: Only include sources tracing to dangerous sinks (shell, DB, file ops,

 <conclusion_trigger>
 **DELIVERABLE SAVING:**
-1. Write your report to `deliverables/recon_deliverable.md`
+1. **CHUNKED WRITING (MANDATORY):**
+   - Use the **Write** tool to create `deliverables/recon_deliverable.md` with the title and first major section
+   - Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
+   - Repeat for all remaining sections
 2. Call `save_deliverable` with `deliverable_type: "RECON"` and `file_path: "deliverables/recon_deliverable.md"`

-**WARNING:** Do NOT pass your report as inline `content` — it will exceed output token limits. Always use `file_path`.
+**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations. Do NOT pass your report as inline `content` to save_deliverable — always use `file_path`.

 Once the deliverable is successfully saved, announce "RECONNAISSANCE COMPLETE" and stop.

--- a/prompts/vuln-auth.txt
+++ b/prompts/vuln-auth.txt
@@ -254,7 +254,12 @@ This file serves as the handoff mechanism and must always be created to signal c

 1.  **Systematic Analysis:** ALL relevant API endpoints and user-facing features identified in the reconnaissance deliverable must be analyzed for AuthN/AuthZ flaws.
 2.  **Deliverable Generation:** Both required deliverables must be successfully saved using save_deliverable MCP tool:
-    - Analysis report: Write to `deliverables/auth_analysis_deliverable.md`, then call `save_deliverable` with `deliverable_type: "AUTH_ANALYSIS"` and `file_path` (not inline `content`)
+    - **CHUNKED WRITING (MANDATORY):**
+      1. Use the **Write** tool to create `deliverables/auth_analysis_deliverable.md` with the title and first major section
+      2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
+      3. Repeat step 2 for all remaining sections
+      4. Call `save_deliverable` with `deliverable_type: "AUTH_ANALYSIS"` and `file_path: "deliverables/auth_analysis_deliverable.md"`
+      **WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
    - Exploitation queue: Use `save_deliverable` MCP tool with `deliverable_type: "AUTH_QUEUE"` and `content: {"vulnerabilities": [...]}`

 **ONLY AFTER** both systematic analysis AND successful deliverable generation, announce "**AUTH ANALYSIS COMPLETE**" and stop.
--- a/prompts/vuln-authz.txt
+++ b/prompts/vuln-authz.txt
@@ -357,7 +357,12 @@ This file serves as the handoff mechanism and must always be created to signal c

 1. **Todo Completion:** ALL tasks in your TodoWrite list must be marked as "completed"
 2. **Deliverable Generation:** Both required deliverables must be successfully saved using save_deliverable MCP tool:
-   - Analysis report: Write to `deliverables/authz_analysis_deliverable.md`, then call `save_deliverable` with `deliverable_type: "AUTHZ_ANALYSIS"` and `file_path` (not inline `content`)
+   - **CHUNKED WRITING (MANDATORY):**
+     1. Use the **Write** tool to create `deliverables/authz_analysis_deliverable.md` with the title and first major section
+     2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
+     3. Repeat step 2 for all remaining sections
+     4. Call `save_deliverable` with `deliverable_type: "AUTHZ_ANALYSIS"` and `file_path: "deliverables/authz_analysis_deliverable.md"`
+     **WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
   - Exploitation queue: Use `save_deliverable` MCP tool with `deliverable_type: "AUTHZ_QUEUE"` and `content: {"vulnerabilities": [...]}`

 **ONLY AFTER** both todo completion AND successful deliverable generation, announce "**AUTHORIZATION ANALYSIS COMPLETE**" and stop.
--- a/prompts/vuln-injection.txt
+++ b/prompts/vuln-injection.txt
@@ -364,7 +364,12 @@ This file serves as the handoff mechanism to the Exploitation phase and must alw

 1. **Todo Completion:** ALL tasks in your TodoWrite list must be marked as "completed"
 2. **Deliverable Generation:** Both required deliverables must be successfully saved using save_deliverable MCP tool:
-   - Analysis report: Write to `deliverables/injection_analysis_deliverable.md`, then call `save_deliverable` with `deliverable_type: "INJECTION_ANALYSIS"` and `file_path` (not inline `content`)
+   - **CHUNKED WRITING (MANDATORY):**
+     1. Use the **Write** tool to create `deliverables/injection_analysis_deliverable.md` with the title and first major section
+     2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
+     3. Repeat step 2 for all remaining sections
+     4. Call `save_deliverable` with `deliverable_type: "INJECTION_ANALYSIS"` and `file_path: "deliverables/injection_analysis_deliverable.md"`
+     **WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
   - Exploitation queue: Use `save_deliverable` MCP tool with `deliverable_type: "INJECTION_QUEUE"` and `content: {"vulnerabilities": [...]}`

 **ONLY AFTER** both todo completion AND successful deliverable generation, announce "**INJECTION ANALYSIS COMPLETE**" and stop.
--- a/prompts/vuln-ssrf.txt
+++ b/prompts/vuln-ssrf.txt
@@ -301,7 +301,12 @@ This file serves as the handoff mechanism and must always be created to signal c

 1.  **Systematic Analysis:** ALL relevant API endpoints and request-making features identified in the reconnaissance deliverable must be analyzed for SSRF vulnerabilities.
 2.  **Deliverable Generation:** Both required deliverables must be successfully saved using save_deliverable MCP tool:
-    - Analysis report: Write to `deliverables/ssrf_analysis_deliverable.md`, then call `save_deliverable` with `deliverable_type: "SSRF_ANALYSIS"` and `file_path` (not inline `content`)
+    - **CHUNKED WRITING (MANDATORY):**
+      1. Use the **Write** tool to create `deliverables/ssrf_analysis_deliverable.md` with the title and first major section
+      2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
+      3. Repeat step 2 for all remaining sections
+      4. Call `save_deliverable` with `deliverable_type: "SSRF_ANALYSIS"` and `file_path: "deliverables/ssrf_analysis_deliverable.md"`
+      **WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
    - Exploitation queue: Use `save_deliverable` MCP tool with `deliverable_type: "SSRF_QUEUE"` and `content: {"vulnerabilities": [...]}`

 **ONLY AFTER** both systematic analysis AND successful deliverable generation, announce "**SSRF ANALYSIS COMPLETE**" and stop.
--- a/prompts/vuln-xss.txt
+++ b/prompts/vuln-xss.txt
@@ -290,7 +290,12 @@ COMPLETION REQUIREMENTS (ALL must be satisfied):

 1. Systematic Analysis: ALL input vectors identified from the reconnaissance deliverable must be analyzed.
 2. Deliverable Generation: Both required deliverables must be successfully saved using save_deliverable MCP tool:
-   - Analysis report: Write to `deliverables/xss_analysis_deliverable.md`, then call `save_deliverable` with `deliverable_type: "XSS_ANALYSIS"` and `file_path` (not inline `content`)
+   - **CHUNKED WRITING (MANDATORY):**
+     1. Use the **Write** tool to create `deliverables/xss_analysis_deliverable.md` with the title and first major section
+     2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
+     3. Repeat step 2 for all remaining sections
+     4. Call `save_deliverable` with `deliverable_type: "XSS_ANALYSIS"` and `file_path: "deliverables/xss_analysis_deliverable.md"`
+     **WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
   - Exploitation queue: Use `save_deliverable` MCP tool with `deliverable_type: "XSS_QUEUE"` and `content: {"vulnerabilities": [...]}`

 ONLY AFTER both systematic analysis AND successful deliverable generation, announce "XSS ANALYSIS COMPLETE" and stop.
--- a/src/ai/claude-executor.ts
+++ b/src/ai/claude-executor.ts
@@ -220,7 +220,9 @@ export async function runClaudePrompt(
  const mcpServers = buildMcpServers(sourceDir, agentName);

  // Build env vars to pass to SDK subprocesses
-  const sdkEnv: Record<string, string> = {};
+  const sdkEnv: Record<string, string> = {
+    CLAUDE_CODE_MAX_OUTPUT_TOKENS: process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS || '64000',
+  };
  if (process.env.ANTHROPIC_API_KEY) {
    sdkEnv.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
  }
--- a/src/audit/utils.ts
+++ b/src/audit/utils.ts
@@ -187,14 +187,49 @@ export async function fileExists(filePath: string): Promise<boolean> {

 /**
 * Initialize audit directory structure for a session
- * Creates: audit-logs/{sessionId}/, agents/, prompts/
+ * Creates: audit-logs/{sessionId}/, agents/, prompts/, deliverables/
 */
 export async function initializeAuditStructure(sessionMetadata: SessionMetadata): Promise<void> {
  const auditPath = generateAuditPath(sessionMetadata);
  const agentsPath = path.join(auditPath, 'agents');
  const promptsPath = path.join(auditPath, 'prompts');
+  const deliverablesPath = path.join(auditPath, 'deliverables');

  await ensureDirectory(auditPath);
  await ensureDirectory(agentsPath);
  await ensureDirectory(promptsPath);
+  await ensureDirectory(deliverablesPath);
+}
+
+/**
+ * Copy deliverable files from repo to audit-logs for self-contained audit trail.
+ * No-ops if source directory doesn't exist. Idempotent and parallel-safe.
+ */
+export async function copyDeliverablesToAudit(
+  sessionMetadata: SessionMetadata,
+  repoPath: string
+): Promise<void> {
+  const sourceDir = path.join(repoPath, 'deliverables');
+  const destDir = path.join(generateAuditPath(sessionMetadata), 'deliverables');
+
+  let entries: string[];
+  try {
+    entries = await fs.readdir(sourceDir);
+  } catch {
+    // Source directory doesn't exist yet — nothing to copy
+    return;
+  }
+
+  await ensureDirectory(destDir);
+
+  for (const entry of entries) {
+    const sourcePath = path.join(sourceDir, entry);
+    const destPath = path.join(destDir, entry);
+
+    // Only copy files, skip subdirectories
+    const stat = await fs.stat(sourcePath);
+    if (stat.isFile()) {
+      await fs.copyFile(sourcePath, destPath);
+    }
+  }
 }
--- a/src/temporal/activities.ts
+++ b/src/temporal/activities.ts
@@ -74,7 +74,7 @@ import type { WorkflowSummary } from '../audit/workflow-logger.js';
 import type { AgentName } from '../types/agents.js';
 import type { AgentMetrics } from './shared.js';
 import type { DistributedConfig } from '../types/config.js';
-import type { SessionMetadata } from '../audit/utils.js';
+import { copyDeliverablesToAudit, type SessionMetadata } from '../audit/utils.js';

 const HEARTBEAT_INTERVAL_MS = 2000; // Must be < heartbeatTimeout (10min production, 5min testing)

@@ -251,6 +251,13 @@ async function runAgentActivity(
    });
    await commitGitSuccess(repoPath, agentName);

+    // 9.5. Copy deliverables to audit-logs (non-fatal)
+    try {
+      await copyDeliverablesToAudit(sessionMetadata, repoPath);
+    } catch (copyErr) {
+      console.error(`Failed to copy deliverables to audit-logs for ${agentName}:`, copyErr);
+    }
+
    // 10. Return metrics
    return {
      durationMs: Date.now() - startTime,