Merge pull request #127 from KeygraphHQ/fix/large-deliverable-handling-v2

fix: improve large deliverable handling and audit trail
This commit is contained in:
Arjun Malleswaran
2026-02-12 08:54:19 -08:00
committed by GitHub
18 changed files with 124 additions and 16 deletions

View File

@@ -1,6 +1,9 @@
# Shannon Environment Configuration
# Copy this file to .env and fill in your credentials
# Recommended output token configuration for larger tool outputs
CLAUDE_CODE_MAX_OUTPUT_TOKENS=64000
# =============================================================================
# OPTION 1: Direct Anthropic (default, no router)
# =============================================================================

View File

@@ -119,10 +119,12 @@ cd shannon
# Option A: Export environment variables
export ANTHROPIC_API_KEY="your-api-key" # or CLAUDE_CODE_OAUTH_TOKEN
export CLAUDE_CODE_MAX_OUTPUT_TOKENS=64000 # recommended
# Option B: Create a .env file
cat > .env << 'EOF'
ANTHROPIC_API_KEY=your-api-key
CLAUDE_CODE_MAX_OUTPUT_TOKENS=64000
EOF
# 3. Run a pentest

View File

@@ -24,6 +24,7 @@ services:
- ANTHROPIC_AUTH_TOKEN=${ANTHROPIC_AUTH_TOKEN:-} # Auth token for router
- ROUTER_DEFAULT=${ROUTER_DEFAULT:-} # Model name when using router (e.g., "gemini,gemini-2.5-pro")
- CLAUDE_CODE_OAUTH_TOKEN=${CLAUDE_CODE_OAUTH_TOKEN:-}
- CLAUDE_CODE_MAX_OUTPUT_TOKENS=${CLAUDE_CODE_MAX_OUTPUT_TOKENS:-64000}
depends_on:
temporal:
condition: service_healthy

View File

@@ -408,7 +408,12 @@ If [blocker] were bypassed/removed:
COMPLETION REQUIREMENTS (ALL must be satisfied):
1. Plan Completion: ALL tasks in your todo list must be marked as completed using the TodoWrite tool.
2. Deliverable Generation: The required deliverable must be successfully saved using save_deliverable script:
- Evidence report: Write to `deliverables/auth_exploitation_evidence.md`, then call `save_deliverable` with `deliverable_type: "AUTH_EVIDENCE"` and `file_path` (not inline `content`)
- **CHUNKED WRITING (MANDATORY):**
1. Use the **Write** tool to create `deliverables/auth_exploitation_evidence.md` with the title and first major section
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
3. Repeat step 2 for all remaining sections
4. Call `save_deliverable` with `deliverable_type: "AUTH_EVIDENCE"` and `file_path: "deliverables/auth_exploitation_evidence.md"`
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
CRITICAL WARNING: Announcing completion before every item in deliverables/auth_exploitation_queue.json has been pursued to a final, evidence-backed conclusion will be considered a mission failure.

View File

@@ -410,7 +410,12 @@ If [blocker] were bypassed/removed:
COMPLETION REQUIREMENTS (ALL must be satisfied):
1. Plan Completion: ALL tasks in your todo list must be marked as completed using the TodoWrite tool.
2. Deliverable Generation: The required deliverable must be successfully saved using save_deliverable script:
- Evidence report: Write to `deliverables/authz_exploitation_evidence.md`, then call `save_deliverable` with `deliverable_type: "AUTHZ_EVIDENCE"` and `file_path` (not inline `content`)
- **CHUNKED WRITING (MANDATORY):**
1. Use the **Write** tool to create `deliverables/authz_exploitation_evidence.md` with the title and first major section
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
3. Repeat step 2 for all remaining sections
4. Call `save_deliverable` with `deliverable_type: "AUTHZ_EVIDENCE"` and `file_path: "deliverables/authz_exploitation_evidence.md"`
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
CRITICAL WARNING: Announcing completion before every item in deliverables/authz_exploitation_queue.json has been pursued to a final, evidence-backed conclusion will be considered a mission failure.

View File

@@ -437,7 +437,12 @@ If [blocker] were bypassed/removed:
COMPLETION REQUIREMENTS (ALL must be satisfied):
1. **Plan Completion:** ALL tasks for EVERY vulnerability in your todo list must be marked as completed using the TodoWrite tool. **No vulnerability or task can be left unaddressed.**
2. **Deliverable Generation:** The required deliverable must be successfully saved using save_deliverable MCP tool:
- Evidence report: Write to `deliverables/injection_exploitation_evidence.md`, then call `save_deliverable` with `deliverable_type: "INJECTION_EVIDENCE"` and `file_path` (not inline `content`)
- **CHUNKED WRITING (MANDATORY):**
1. Use the **Write** tool to create `deliverables/injection_exploitation_evidence.md` with the title and first major section
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
3. Repeat step 2 for all remaining sections
4. Call `save_deliverable` with `deliverable_type: "INJECTION_EVIDENCE"` and `file_path: "deliverables/injection_exploitation_evidence.md"`
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
**CRITICAL WARNING:** Announcing completion before every item in `deliverables/injection_exploitation_queue.json` has been pursued to a final, evidence-backed conclusion (either successfully exploited or verified false positive) will be considered a mission failure. Superficial testing is not acceptable.

View File

@@ -487,7 +487,12 @@ If [blocker] were bypassed/removed:
COMPLETION REQUIREMENTS (ALL must be satisfied):
1. Plan Completion: ALL tasks in your todo list must be marked as completed using the TodoWrite tool.
2. Deliverable Generation: The required deliverable must be successfully saved using save_deliverable script:
- Evidence report: Write to `deliverables/ssrf_exploitation_evidence.md`, then call `save_deliverable` with `deliverable_type: "SSRF_EVIDENCE"` and `file_path` (not inline `content`)
- **CHUNKED WRITING (MANDATORY):**
1. Use the **Write** tool to create `deliverables/ssrf_exploitation_evidence.md` with the title and first major section
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
3. Repeat step 2 for all remaining sections
4. Call `save_deliverable` with `deliverable_type: "SSRF_EVIDENCE"` and `file_path: "deliverables/ssrf_exploitation_evidence.md"`
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
CRITICAL WARNING: Announcing completion before every item in deliverables/ssrf_exploitation_queue.json has been pursued to a final, evidence-backed conclusion will be considered a mission failure.

View File

@@ -427,7 +427,12 @@ If [blocker] were bypassed/removed:
COMPLETION REQUIREMENTS (ALL must be satisfied):
- Todo List Completion: ALL vulnerabilities from the exploitation queue must have been processed and marked as completed in your todo list.
- Deliverable Generation: The required deliverable must be successfully saved using save_deliverable MCP tool:
- Evidence report: Write to `deliverables/xss_exploitation_evidence.md`, then call `save_deliverable` with `deliverable_type: "XSS_EVIDENCE"` and `file_path` (not inline `content`)
- **CHUNKED WRITING (MANDATORY):**
1. Use the **Write** tool to create `deliverables/xss_exploitation_evidence.md` with the title and first major section
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
3. Repeat step 2 for all remaining sections
4. Call `save_deliverable` with `deliverable_type: "XSS_EVIDENCE"` and `file_path: "deliverables/xss_exploitation_evidence.md"`
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
**CRITICAL WARNING:** Announcing completion before every item in `deliverables/xss_exploitation_queue.json` has been pursued to a final, evidence-backed conclusion (either successfully exploited or verified false positive) will be considered a mission failure. Superficial testing is not acceptable.

View File

@@ -129,7 +129,12 @@ After Phase 1 completes, launch all three vulnerability-focused agents in parall
- Create the `outputs/schemas/` directory using mkdir -p
- Copy all discovered schema files to `outputs/schemas/` with descriptive names
- Include schema locations in your attack surface analysis
- Write your report to `deliverables/code_analysis_deliverable.md`, then call `save_deliverable` with `deliverable_type: "CODE_ANALYSIS"` and `file_path: "deliverables/code_analysis_deliverable.md"` (do NOT use inline `content`)
- **CHUNKED WRITING (MANDATORY):**
1. Use the **Write** tool to create `deliverables/code_analysis_deliverable.md` with the title and first major section
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
3. Repeat step 2 for all remaining sections
4. Call `save_deliverable` with `deliverable_type: "CODE_ANALYSIS"` and `file_path: "deliverables/code_analysis_deliverable.md"`
- **WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
**EXECUTION PATTERN:**
1. **Use TodoWrite to create task list** tracking: Phase 1 agents, Phase 2 agents, and report synthesis

View File

@@ -368,10 +368,13 @@ CRITICAL: Only include sources tracing to dangerous sinks (shell, DB, file ops,
<conclusion_trigger>
**DELIVERABLE SAVING:**
1. Write your report to `deliverables/recon_deliverable.md`
1. **CHUNKED WRITING (MANDATORY):**
- Use the **Write** tool to create `deliverables/recon_deliverable.md` with the title and first major section
- Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
- Repeat for all remaining sections
2. Call `save_deliverable` with `deliverable_type: "RECON"` and `file_path: "deliverables/recon_deliverable.md"`
**WARNING:** Do NOT pass your report as inline `content` — it will exceed output token limits. Always use `file_path`.
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations. Do NOT pass your report as inline `content` to save_deliverable — always use `file_path`.
Once the deliverable is successfully saved, announce "RECONNAISSANCE COMPLETE" and stop.

View File

@@ -254,7 +254,12 @@ This file serves as the handoff mechanism and must always be created to signal c
1. **Systematic Analysis:** ALL relevant API endpoints and user-facing features identified in the reconnaissance deliverable must be analyzed for AuthN/AuthZ flaws.
2. **Deliverable Generation:** Both required deliverables must be successfully saved using save_deliverable MCP tool:
- Analysis report: Write to `deliverables/auth_analysis_deliverable.md`, then call `save_deliverable` with `deliverable_type: "AUTH_ANALYSIS"` and `file_path` (not inline `content`)
- **CHUNKED WRITING (MANDATORY):**
1. Use the **Write** tool to create `deliverables/auth_analysis_deliverable.md` with the title and first major section
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
3. Repeat step 2 for all remaining sections
4. Call `save_deliverable` with `deliverable_type: "AUTH_ANALYSIS"` and `file_path: "deliverables/auth_analysis_deliverable.md"`
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
- Exploitation queue: Use `save_deliverable` MCP tool with `deliverable_type: "AUTH_QUEUE"` and `content: {"vulnerabilities": [...]}`
**ONLY AFTER** both systematic analysis AND successful deliverable generation, announce "**AUTH ANALYSIS COMPLETE**" and stop.

View File

@@ -357,7 +357,12 @@ This file serves as the handoff mechanism and must always be created to signal c
1. **Todo Completion:** ALL tasks in your TodoWrite list must be marked as "completed"
2. **Deliverable Generation:** Both required deliverables must be successfully saved using save_deliverable MCP tool:
- Analysis report: Write to `deliverables/authz_analysis_deliverable.md`, then call `save_deliverable` with `deliverable_type: "AUTHZ_ANALYSIS"` and `file_path` (not inline `content`)
- **CHUNKED WRITING (MANDATORY):**
1. Use the **Write** tool to create `deliverables/authz_analysis_deliverable.md` with the title and first major section
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
3. Repeat step 2 for all remaining sections
4. Call `save_deliverable` with `deliverable_type: "AUTHZ_ANALYSIS"` and `file_path: "deliverables/authz_analysis_deliverable.md"`
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
- Exploitation queue: Use `save_deliverable` MCP tool with `deliverable_type: "AUTHZ_QUEUE"` and `content: {"vulnerabilities": [...]}`
**ONLY AFTER** both todo completion AND successful deliverable generation, announce "**AUTHORIZATION ANALYSIS COMPLETE**" and stop.

View File

@@ -364,7 +364,12 @@ This file serves as the handoff mechanism to the Exploitation phase and must alw
1. **Todo Completion:** ALL tasks in your TodoWrite list must be marked as "completed"
2. **Deliverable Generation:** Both required deliverables must be successfully saved using save_deliverable MCP tool:
- Analysis report: Write to `deliverables/injection_analysis_deliverable.md`, then call `save_deliverable` with `deliverable_type: "INJECTION_ANALYSIS"` and `file_path` (not inline `content`)
- **CHUNKED WRITING (MANDATORY):**
1. Use the **Write** tool to create `deliverables/injection_analysis_deliverable.md` with the title and first major section
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
3. Repeat step 2 for all remaining sections
4. Call `save_deliverable` with `deliverable_type: "INJECTION_ANALYSIS"` and `file_path: "deliverables/injection_analysis_deliverable.md"`
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
- Exploitation queue: Use `save_deliverable` MCP tool with `deliverable_type: "INJECTION_QUEUE"` and `content: {"vulnerabilities": [...]}`
**ONLY AFTER** both todo completion AND successful deliverable generation, announce "**INJECTION ANALYSIS COMPLETE**" and stop.

View File

@@ -301,7 +301,12 @@ This file serves as the handoff mechanism and must always be created to signal c
1. **Systematic Analysis:** ALL relevant API endpoints and request-making features identified in the reconnaissance deliverable must be analyzed for SSRF vulnerabilities.
2. **Deliverable Generation:** Both required deliverables must be successfully saved using save_deliverable MCP tool:
- Analysis report: Write to `deliverables/ssrf_analysis_deliverable.md`, then call `save_deliverable` with `deliverable_type: "SSRF_ANALYSIS"` and `file_path` (not inline `content`)
- **CHUNKED WRITING (MANDATORY):**
1. Use the **Write** tool to create `deliverables/ssrf_analysis_deliverable.md` with the title and first major section
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
3. Repeat step 2 for all remaining sections
4. Call `save_deliverable` with `deliverable_type: "SSRF_ANALYSIS"` and `file_path: "deliverables/ssrf_analysis_deliverable.md"`
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
- Exploitation queue: Use `save_deliverable` MCP tool with `deliverable_type: "SSRF_QUEUE"` and `content: {"vulnerabilities": [...]}`
**ONLY AFTER** both systematic analysis AND successful deliverable generation, announce "**SSRF ANALYSIS COMPLETE**" and stop.

View File

@@ -290,7 +290,12 @@ COMPLETION REQUIREMENTS (ALL must be satisfied):
1. Systematic Analysis: ALL input vectors identified from the reconnaissance deliverable must be analyzed.
2. Deliverable Generation: Both required deliverables must be successfully saved using save_deliverable MCP tool:
- Analysis report: Write to `deliverables/xss_analysis_deliverable.md`, then call `save_deliverable` with `deliverable_type: "XSS_ANALYSIS"` and `file_path` (not inline `content`)
- **CHUNKED WRITING (MANDATORY):**
1. Use the **Write** tool to create `deliverables/xss_analysis_deliverable.md` with the title and first major section
2. Use the **Edit** tool to append each remaining section — match the last few lines of the file, then replace with those lines plus the new section content
3. Repeat step 2 for all remaining sections
4. Call `save_deliverable` with `deliverable_type: "XSS_ANALYSIS"` and `file_path: "deliverables/xss_analysis_deliverable.md"`
**WARNING:** Do NOT write the entire report in a single tool call — exceeds 32K output token limit. Split into multiple Write/Edit operations.
- Exploitation queue: Use `save_deliverable` MCP tool with `deliverable_type: "XSS_QUEUE"` and `content: {"vulnerabilities": [...]}`
ONLY AFTER both systematic analysis AND successful deliverable generation, announce "XSS ANALYSIS COMPLETE" and stop.

View File

@@ -220,7 +220,9 @@ export async function runClaudePrompt(
const mcpServers = buildMcpServers(sourceDir, agentName);
// Build env vars to pass to SDK subprocesses
const sdkEnv: Record<string, string> = {};
const sdkEnv: Record<string, string> = {
CLAUDE_CODE_MAX_OUTPUT_TOKENS: process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS || '64000',
};
if (process.env.ANTHROPIC_API_KEY) {
sdkEnv.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
}

View File

@@ -187,14 +187,49 @@ export async function fileExists(filePath: string): Promise<boolean> {
/**
* Initialize audit directory structure for a session
* Creates: audit-logs/{sessionId}/, agents/, prompts/
* Creates: audit-logs/{sessionId}/, agents/, prompts/, deliverables/
*/
export async function initializeAuditStructure(sessionMetadata: SessionMetadata): Promise<void> {
const auditPath = generateAuditPath(sessionMetadata);
const agentsPath = path.join(auditPath, 'agents');
const promptsPath = path.join(auditPath, 'prompts');
const deliverablesPath = path.join(auditPath, 'deliverables');
await ensureDirectory(auditPath);
await ensureDirectory(agentsPath);
await ensureDirectory(promptsPath);
await ensureDirectory(deliverablesPath);
}
/**
* Copy deliverable files from repo to audit-logs for self-contained audit trail.
* No-ops if source directory doesn't exist. Idempotent and parallel-safe.
*/
export async function copyDeliverablesToAudit(
sessionMetadata: SessionMetadata,
repoPath: string
): Promise<void> {
const sourceDir = path.join(repoPath, 'deliverables');
const destDir = path.join(generateAuditPath(sessionMetadata), 'deliverables');
let entries: string[];
try {
entries = await fs.readdir(sourceDir);
} catch {
// Source directory doesn't exist yet — nothing to copy
return;
}
await ensureDirectory(destDir);
for (const entry of entries) {
const sourcePath = path.join(sourceDir, entry);
const destPath = path.join(destDir, entry);
// Only copy files, skip subdirectories
const stat = await fs.stat(sourcePath);
if (stat.isFile()) {
await fs.copyFile(sourcePath, destPath);
}
}
}

View File

@@ -74,7 +74,7 @@ import type { WorkflowSummary } from '../audit/workflow-logger.js';
import type { AgentName } from '../types/agents.js';
import type { AgentMetrics } from './shared.js';
import type { DistributedConfig } from '../types/config.js';
import type { SessionMetadata } from '../audit/utils.js';
import { copyDeliverablesToAudit, type SessionMetadata } from '../audit/utils.js';
const HEARTBEAT_INTERVAL_MS = 2000; // Must be < heartbeatTimeout (10min production, 5min testing)
@@ -251,6 +251,13 @@ async function runAgentActivity(
});
await commitGitSuccess(repoPath, agentName);
// 9.5. Copy deliverables to audit-logs (non-fatal)
try {
await copyDeliverablesToAudit(sessionMetadata, repoPath);
} catch (copyErr) {
console.error(`Failed to copy deliverables to audit-logs for ${agentName}:`, copyErr);
}
// 10. Return metrics
return {
durationMs: Date.now() - startTime,