fix: add file_path parameter to save_deliverable for large reports

Large deliverable reports can exceed output token limits when passed as inline content. This change allows agents to write reports to disk first and pass a file_path instead. Changes: - Add file_path parameter to save_deliverable MCP tool with path traversal protection - Pass CLAUDE_CODE_MAX_OUTPUT_TOKENS env var to SDK subprocesses - Fix false positive error detection by extracting only text content (not tool_use JSON) when checking for API errors - Update all prompts to instruct agents to use file_path for large reports and stop immediately after completion
2026-02-12 17:22:50 +00:00 · 2026-02-11 07:35:49 -08:00
parent 1710bd93f7
commit f7eb3a57ce
15 changed files with 190 additions and 53 deletions
--- a/mcp-server/src/tools/save-deliverable.ts
+++ b/mcp-server/src/tools/save-deliverable.ts
@@ -16,6 +16,8 @@

 import { tool } from '@anthropic-ai/claude-agent-sdk';
 import { z } from 'zod';
+import fs from 'node:fs';
+import path from 'node:path';
 import { DeliverableType, DELIVERABLE_FILENAMES, isQueueType } from '../types/deliverables.js';
 import { createToolResult, type ToolResult, type SaveDeliverableResponse } from '../types/tool-responses.js';
 import { validateQueueJson } from '../validation/queue-validator.js';
@@ -27,13 +29,68 @@ import { createValidationError, createGenericError } from '../utils/error-format
 */
 export const SaveDeliverableInputSchema = z.object({
  deliverable_type: z.nativeEnum(DeliverableType).describe('Type of deliverable to save'),
-  content: z.string().min(1).describe('File content (markdown for analysis/evidence, JSON for queues)'),
+  content: z.string().min(1).optional().describe('File content (markdown for analysis/evidence, JSON for queues). Optional if file_path is provided.'),
+  file_path: z.string().optional().describe('Path to a file whose contents should be used as the deliverable content. Relative paths are resolved against the deliverables directory. Use this instead of content for large reports to avoid output token limits.'),
 });

 export type SaveDeliverableInput = z.infer<typeof SaveDeliverableInputSchema>;

 /**
- * Create save_deliverable handler with targetDir captured in closure
+ * Check if a path is contained within a base directory.
+ * Prevents path traversal attacks (e.g., ../../../etc/passwd).
+ */
+function isPathContained(basePath: string, targetPath: string): boolean {
+  const resolvedBase = path.resolve(basePath);
+  const resolvedTarget = path.resolve(targetPath);
+  return resolvedTarget === resolvedBase || resolvedTarget.startsWith(resolvedBase + path.sep);
+}
+
+/**
+ * Resolve deliverable content from either inline content or a file path.
+ * Returns the content string on success, or a ToolResult error on failure.
+ */
+function resolveContent(
+  args: SaveDeliverableInput,
+  targetDir: string,
+): string | ToolResult {
+  if (args.content) {
+    return args.content;
+  }
+
+  if (!args.file_path) {
+    return createToolResult(createValidationError(
+      'Either "content" or "file_path" must be provided',
+      true,
+      { deliverableType: args.deliverable_type },
+    ));
+  }
+
+  const resolvedPath = path.isAbsolute(args.file_path)
+    ? args.file_path
+    : path.resolve(targetDir, args.file_path);
+
+  // Security: Prevent path traversal outside targetDir
+  if (!isPathContained(targetDir, resolvedPath)) {
+    return createToolResult(createValidationError(
+      `Path "${args.file_path}" resolves outside allowed directory`,
+      false,
+      { deliverableType: args.deliverable_type, allowedBase: targetDir },
+    ));
+  }
+
+  try {
+    return fs.readFileSync(resolvedPath, 'utf-8');
+  } catch (readError) {
+    return createToolResult(createValidationError(
+      `Failed to read file at ${resolvedPath}: ${readError instanceof Error ? readError.message : String(readError)}`,
+      true,
+      { deliverableType: args.deliverable_type, filePath: resolvedPath },
+    ));
+  }
+}
+
+/**
+ * Create save_deliverable handler with targetDir captured in closure.
 *
 * This factory pattern ensures each MCP server instance has its own targetDir,
 * preventing race conditions when multiple workflows run in parallel.
@@ -41,29 +98,28 @@ export type SaveDeliverableInput = z.infer<typeof SaveDeliverableInputSchema>;
 function createSaveDeliverableHandler(targetDir: string) {
  return async function saveDeliverable(args: SaveDeliverableInput): Promise<ToolResult> {
    try {
-      const { deliverable_type, content } = args;
+      const { deliverable_type } = args;
+
+      const contentOrError = resolveContent(args, targetDir);
+      if (typeof contentOrError !== 'string') {
+        return contentOrError;
+      }
+      const content = contentOrError;

-      // Validate queue JSON if applicable
      if (isQueueType(deliverable_type)) {
        const queueValidation = validateQueueJson(content);
        if (!queueValidation.valid) {
-          const errorResponse = createValidationError(
+          return createToolResult(createValidationError(
            queueValidation.message ?? 'Invalid queue JSON',
            true,
-            {
-              deliverableType: deliverable_type,
-              expectedFormat: '{"vulnerabilities": [...]}',
-            }
-          );
-          return createToolResult(errorResponse);
+            { deliverableType: deliverable_type, expectedFormat: '{"vulnerabilities": [...]}' },
+          ));
        }
      }

-      // Get filename and save file (targetDir captured from closure)
      const filename = DELIVERABLE_FILENAMES[deliverable_type];
      const filepath = saveDeliverableFile(targetDir, filename, content);

-      // Success response
      const successResponse: SaveDeliverableResponse = {
        status: 'success',
        message: `Deliverable saved successfully: ${filename}`,
@@ -74,13 +130,11 @@ function createSaveDeliverableHandler(targetDir: string) {

      return createToolResult(successResponse);
    } catch (error) {
-      const errorResponse = createGenericError(
+      return createToolResult(createGenericError(
        error,
        false,
-        { deliverableType: args.deliverable_type }
-      );
-
-      return createToolResult(errorResponse);
+        { deliverableType: args.deliverable_type },
+      ));
    }
  };
 }
@@ -94,7 +148,7 @@ function createSaveDeliverableHandler(targetDir: string) {
 export function createSaveDeliverableTool(targetDir: string) {
  return tool(
    'save_deliverable',
-    'Saves deliverable files with automatic validation. Queue files must have {"vulnerabilities": [...]} structure.',
+    'Saves deliverable files with automatic validation. Queue files must have {"vulnerabilities": [...]} structure. For large reports, write the file to disk first then pass file_path instead of inline content to avoid output token limits.',
    SaveDeliverableInputSchema.shape,
    createSaveDeliverableHandler(targetDir)
  );
--- a/prompts/exploit-auth.txt
+++ b/prompts/exploit-auth.txt
@@ -146,8 +146,10 @@ You are the **Identity Compromise Specialist** - proving tangible impact of brok
 - **save_deliverable (MCP Tool):** Saves exploitation evidence files.
  - **Parameters:**
    - `deliverable_type`: "AUTH_EVIDENCE" (required)
-    - `content`: Your complete evidence report (markdown) (required)
+    - `file_path`: Path to the evidence report file you wrote to disk (preferred for large reports)
+    - `content`: Inline content string (optional, use only for small content)
  - **Returns:** `{ status: "success", filepath: "..." }` on success or `{ status: "error", message: "..." }` on failure
+  - **WARNING:** Do NOT pass large reports as inline `content` — this will exceed output token limits and cause agent failure. Always use `file_path` for evidence reports.
 - **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
 - **Task Agent:** Mandatory coder-executor for brute force batches, credential stuffing, token replay automation, and any scripted workflow.
 - **TodoWrite tool:** Essential for managing your exploitation task list. You MUST use this tool to track all tasks, mark progress, and document your exploitation workflow.
@@ -406,9 +408,11 @@ If [blocker] were bypassed/removed:
 COMPLETION REQUIREMENTS (ALL must be satisfied):
 	1. Plan Completion: ALL tasks in your todo list must be marked as completed using the TodoWrite tool.
 	2. Deliverable Generation: The required deliverable must be successfully saved using save_deliverable script:
-	  - Evidence report: Use `save_deliverable` MCP tool with `deliverable_type: "AUTH_EVIDENCE"` and your evidence report as `content`
+	  - Evidence report: Write to `deliverables/auth_exploitation_evidence.md`, then call `save_deliverable` with `deliverable_type: "AUTH_EVIDENCE"` and `file_path` (not inline `content`)

 CRITICAL WARNING: Announcing completion before every item in deliverables/auth_exploitation_queue.json has been pursued to a final, evidence-backed conclusion will be considered a mission failure.

 ONLY AFTER fulfilling these exhaustive requirements, announce "AUTH EXPLOITATION COMPLETE" and stop.
+
+**CRITICAL:** After announcing completion, STOP IMMEDIATELY. Do NOT output summaries, recaps, or explanations of your work — the deliverable contains everything needed.
 </conclusion_trigger>
--- a/prompts/exploit-authz.txt
+++ b/prompts/exploit-authz.txt
@@ -133,8 +133,10 @@ You are the **Privilege Escalation Specialist** - proving tangible impact of bro
 - **save_deliverable (MCP Tool):** Saves exploitation evidence files.
  - **Parameters:**
    - `deliverable_type`: "AUTHZ_EVIDENCE" (required)
-    - `content`: Your complete evidence report (markdown) (required)
+    - `file_path`: Path to the evidence report file you wrote to disk (preferred for large reports)
+    - `content`: Inline content string (optional, use only for small content)
  - **Returns:** `{ status: "success", filepath: "..." }` on success or `{ status: "error", message: "..." }` on failure
+  - **WARNING:** Do NOT pass large reports as inline `content` — this will exceed output token limits and cause agent failure. Always use `file_path` for evidence reports.
 - **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
 - **Task Agent:** Mandatory coder-executor for IDOR sweeps, role escalation loops, and workflow bypass automation.
 - **TodoWrite tool:** Essential for managing your exploitation task list. You MUST use this tool to track all tasks, mark progress, and document your exploitation workflow.
@@ -408,9 +410,11 @@ If [blocker] were bypassed/removed:
 COMPLETION REQUIREMENTS (ALL must be satisfied):
 	1. Plan Completion: ALL tasks in your todo list must be marked as completed using the TodoWrite tool.
 	2. Deliverable Generation: The required deliverable must be successfully saved using save_deliverable script:
-	  - Evidence report: Use `save_deliverable` MCP tool with `deliverable_type: "AUTHZ_EVIDENCE"` and your evidence report as `content`
+	  - Evidence report: Write to `deliverables/authz_exploitation_evidence.md`, then call `save_deliverable` with `deliverable_type: "AUTHZ_EVIDENCE"` and `file_path` (not inline `content`)

 CRITICAL WARNING: Announcing completion before every item in deliverables/authz_exploitation_queue.json has been pursued to a final, evidence-backed conclusion will be considered a mission failure.

 ONLY AFTER fulfilling these exhaustive requirements, announce "AUTHZ EXPLOITATION COMPLETE" and stop.
+
+**CRITICAL:** After announcing completion, STOP IMMEDIATELY. Do NOT output summaries, recaps, or explanations of your work — the deliverable contains everything needed.
 </conclusion_trigger>
--- a/prompts/exploit-injection.txt
+++ b/prompts/exploit-injection.txt
@@ -115,8 +115,10 @@ You are the **Proof-of-Impact Generator** - converting theoretical injection fla
 - **save_deliverable (MCP Tool):** Saves exploitation evidence files.
  - **Parameters:**
    - `deliverable_type`: "INJECTION_EVIDENCE" (required)
-    - `content`: Your complete evidence report (markdown) (required)
+    - `file_path`: Path to the evidence report file you wrote to disk (preferred for large reports)
+    - `content`: Inline content string (optional, use only for small content)
  - **Returns:** `{ status: "success", filepath: "..." }` on success or `{ status: "error", message: "..." }` on failure
+  - **WARNING:** Do NOT pass large reports as inline `content` — this will exceed output token limits and cause agent failure. Always use `file_path` for evidence reports.
 - **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
 - **Task Agent:** Mandatory coder-executor for any custom scripting beyond single ad-hoc commands.
 - **TodoWrite tool:** Essential for managing your exploitation task list. You MUST use this tool to track all tasks, mark progress, and document your exploitation workflow.
@@ -435,9 +437,11 @@ If [blocker] were bypassed/removed:
 COMPLETION REQUIREMENTS (ALL must be satisfied):
 1.  **Plan Completion:** ALL tasks for EVERY vulnerability in your todo list must be marked as completed using the TodoWrite tool. **No vulnerability or task can be left unaddressed.**
 2.  **Deliverable Generation:** The required deliverable must be successfully saved using save_deliverable MCP tool:
-    - Evidence report: Use `save_deliverable` MCP tool with `deliverable_type: "INJECTION_EVIDENCE"` and your evidence report as `content`
+    - Evidence report: Write to `deliverables/injection_exploitation_evidence.md`, then call `save_deliverable` with `deliverable_type: "INJECTION_EVIDENCE"` and `file_path` (not inline `content`)

 **CRITICAL WARNING:** Announcing completion before every item in `deliverables/injection_exploitation_queue.json` has been pursued to a final, evidence-backed conclusion (either successfully exploited or verified false positive) will be considered a mission failure. Superficial testing is not acceptable.

 ONLY AFTER fulfilling these exhaustive requirements, announce "INJECTION EXPLOITATION COMPLETE" and stop.
+
+**CRITICAL:** After announcing completion, STOP IMMEDIATELY. Do NOT output summaries, recaps, or explanations of your work — the deliverable contains everything needed.
 </conclusion_trigger>
--- a/prompts/exploit-ssrf.txt
+++ b/prompts/exploit-ssrf.txt
@@ -132,8 +132,10 @@ You are the **Network Boundary Breaker** - proving tangible impact of SSRF vulne
 - **save_deliverable (MCP Tool):** Saves exploitation evidence files.
  - **Parameters:**
    - `deliverable_type`: "SSRF_EVIDENCE" (required)
-    - `content`: Your complete evidence report (markdown) (required)
+    - `file_path`: Path to the evidence report file you wrote to disk (preferred for large reports)
+    - `content`: Inline content string (optional, use only for small content)
  - **Returns:** `{ status: "success", filepath: "..." }` on success or `{ status: "error", message: "..." }` on failure
+  - **WARNING:** Do NOT pass large reports as inline `content` — this will exceed output token limits and cause agent failure. Always use `file_path` for evidence reports.
 - **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
 - **{{MCP_SERVER}} (Playwright):** Useful for complex multi-step SSRF exploitation that requires browser context or JavaScript execution.
 - **Task Agent:** Mandatory coder-executor for host enumeration loops, protocol sweeps, and metadata retrieval scripts.
@@ -485,9 +487,11 @@ If [blocker] were bypassed/removed:
 COMPLETION REQUIREMENTS (ALL must be satisfied):
 	1. Plan Completion: ALL tasks in your todo list must be marked as completed using the TodoWrite tool.
 	2. Deliverable Generation: The required deliverable must be successfully saved using save_deliverable script:
-	  - Evidence report: Use `save_deliverable` MCP tool with `deliverable_type: "SSRF_EVIDENCE"` and your evidence report as `content`
+	  - Evidence report: Write to `deliverables/ssrf_exploitation_evidence.md`, then call `save_deliverable` with `deliverable_type: "SSRF_EVIDENCE"` and `file_path` (not inline `content`)

 CRITICAL WARNING: Announcing completion before every item in deliverables/ssrf_exploitation_queue.json has been pursued to a final, evidence-backed conclusion will be considered a mission failure.

 ONLY AFTER fulfilling these exhaustive requirements, announce "SSRF EXPLOITATION COMPLETE" and stop.
+
+**CRITICAL:** After announcing completion, STOP IMMEDIATELY. Do NOT output summaries, recaps, or explanations of your work — the deliverable contains everything needed.
 </conclusion_trigger>
--- a/prompts/exploit-xss.txt
+++ b/prompts/exploit-xss.txt
@@ -135,8 +135,10 @@ You are the **Client-Side Impact Demonstrator** - converting theoretical XSS fla
 - **save_deliverable (MCP Tool):** Saves exploitation evidence files.
  - **Parameters:**
    - `deliverable_type`: "XSS_EVIDENCE" (required)
-    - `content`: Your complete evidence report (markdown) (required)
+    - `file_path`: Path to the evidence report file you wrote to disk (preferred for large reports)
+    - `content`: Inline content string (optional, use only for small content)
  - **Returns:** `{ status: "success", filepath: "..." }` on success or `{ status: "error", message: "..." }` on failure
+  - **WARNING:** Do NOT pass large reports as inline `content` — this will exceed output token limits and cause agent failure. Always use `file_path` for evidence reports.
 - **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
 - **Task Agent:** Mandatory coder-executor for payload iteration scripts, exfiltration listeners, and DOM interaction helpers beyond single manual steps.
 - **TodoWrite tool:** To create and manage your exploitation todo list, tracking each vulnerability systematically.
@@ -425,10 +427,11 @@ If [blocker] were bypassed/removed:
 COMPLETION REQUIREMENTS (ALL must be satisfied):
 - Todo List Completion: ALL vulnerabilities from the exploitation queue must have been processed and marked as completed in your todo list.
 - Deliverable Generation: The required deliverable must be successfully saved using save_deliverable MCP tool:
-  - Evidence report: Use `save_deliverable` MCP tool with `deliverable_type: "XSS_EVIDENCE"` and your evidence report as `content`
+  - Evidence report: Write to `deliverables/xss_exploitation_evidence.md`, then call `save_deliverable` with `deliverable_type: "XSS_EVIDENCE"` and `file_path` (not inline `content`)

 **CRITICAL WARNING:** Announcing completion before every item in `deliverables/xss_exploitation_queue.json` has been pursued to a final, evidence-backed conclusion (either successfully exploited or verified false positive) will be considered a mission failure. Superficial testing is not acceptable.

-ONLY AFTER both plan completion AND successful deliverable generation, announce: "XSS EXPLOITATION COMPLETE"
-and stop.
+ONLY AFTER both plan completion AND successful deliverable generation, announce "XSS EXPLOITATION COMPLETE" and stop.
+
+**CRITICAL:** After announcing completion, STOP IMMEDIATELY. Do NOT output summaries, recaps, or explanations of your work — the deliverable contains everything needed.
 </conclusion_trigger>
--- a/prompts/pre-recon-code.txt
+++ b/prompts/pre-recon-code.txt
@@ -81,9 +81,11 @@ You are the **Code Intelligence Gatherer** and **Architectural Foundation Builde
 - **save_deliverable (MCP Tool):** Saves your final deliverable file with automatic validation.
  - **Parameters:**
    - `deliverable_type`: "CODE_ANALYSIS" (required)
-    - `content`: Your complete markdown report (required)
+    - `file_path`: Path to the file you wrote to disk (preferred for large reports)
+    - `content`: Inline content string (optional, use only for small content like JSON queues)
  - **Returns:** `{ status: "success", filepath: "...", validated: true/false }` on success or `{ status: "error", message: "...", errorType: "...", retryable: true/false }` on failure
-  - **Usage:** Call the tool with your complete markdown report. The tool handles correct naming and file validation automatically.
+  - **Usage:** Write your report to disk first, then call with `file_path`. The tool handles correct naming and file validation automatically.
+  - **WARNING:** Do NOT pass large reports as inline `content` — this will exceed output token limits and cause agent failure. Always use `file_path` for analysis reports.
 - **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
 </available_tools>

@@ -127,7 +129,7 @@ After Phase 1 completes, launch all three vulnerability-focused agents in parall
  - Create the `outputs/schemas/` directory using mkdir -p
  - Copy all discovered schema files to `outputs/schemas/` with descriptive names
  - Include schema locations in your attack surface analysis
- Save complete analysis using the `save_deliverable` MCP tool with `deliverable_type: "CODE_ANALYSIS"` and your complete markdown report as the `content`
+- Write your report to `deliverables/code_analysis_deliverable.md`, then call `save_deliverable` with `deliverable_type: "CODE_ANALYSIS"` and `file_path: "deliverables/code_analysis_deliverable.md"` (do NOT use inline `content`)

 **EXECUTION PATTERN:**
 1. **Use TodoWrite to create task list** tracking: Phase 1 agents, Phase 2 agents, and report synthesis
@@ -385,10 +387,12 @@ A component is **out-of-scope** if it **cannot** be invoked through the running
   - Phase 3: Synthesis and report generation completed

 2. **Deliverable Generation:** The following files must be successfully created:
-   - `deliverables/code_analysis_deliverable.md` (Created using save_deliverable MCP tool with CODE_ANALYSIS type)
+   - `deliverables/code_analysis_deliverable.md` (via `save_deliverable` with `file_path`, not inline `content`)
   - `outputs/schemas/` directory with all discovered schema files copied (if any schemas found)

 3. **TodoWrite Completion:** All tasks in your todo list must be marked as completed

 **ONLY AFTER** all three requirements are satisfied, announce "**PRE-RECON CODE ANALYSIS COMPLETE**" and stop.
+
+**CRITICAL:** After announcing completion, STOP IMMEDIATELY. Do NOT output summaries, recaps, or explanations of your work — the deliverable contains everything needed.
 </conclusion_trigger>
--- a/prompts/recon.txt
+++ b/prompts/recon.txt
@@ -63,8 +63,10 @@ Please use these tools for the following use cases:
 - **save_deliverable (MCP Tool):** Saves your reconnaissance deliverable file.
  - **Parameters:**
    - `deliverable_type`: "RECON" (required)
-    - `content`: Your complete markdown report (required)
+    - `file_path`: Path to the file you wrote to disk (preferred for large reports)
+    - `content`: Inline content string (optional, use only for small content like JSON queues)
  - **Returns:** `{ status: "success", filepath: "..." }` on success or `{ status: "error", message: "..." }` on failure
+  - **WARNING:** Do NOT pass large reports as inline `content` — this will exceed output token limits and cause agent failure. Always use `file_path` for analysis reports.
 - **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.

 **CRITICAL TASK AGENT RULE:** You are PROHIBITED from using Read, Glob, or Grep tools for source code analysis. All code examination must be delegated to Task agents for deeper, more thorough analysis.
@@ -365,5 +367,13 @@ CRITICAL: Only include sources tracing to dangerous sinks (shell, DB, file ops,
 </deliverable_instructions>

 <conclusion_trigger>
-Once you have saved the complete deliverable using the save_deliverable MCP tool with `deliverable_type: "RECON"` and your complete report as the `content`, your phase is complete. Announce "RECONNAISSANCE COMPLETE" and await further instructions.
+**DELIVERABLE SAVING:**
+1. Write your report to `deliverables/recon_deliverable.md`
+2. Call `save_deliverable` with `deliverable_type: "RECON"` and `file_path: "deliverables/recon_deliverable.md"`
+
+**WARNING:** Do NOT pass your report as inline `content` — it will exceed output token limits. Always use `file_path`.
+
+Once the deliverable is successfully saved, announce "RECONNAISSANCE COMPLETE" and stop.
+
+**CRITICAL:** After announcing completion, STOP IMMEDIATELY. Do NOT output summaries, recaps, or explanations of your work — the deliverable contains everything needed.
 </conclusion_trigger>
--- a/prompts/vuln-auth.txt
+++ b/prompts/vuln-auth.txt
@@ -80,9 +80,11 @@ An **exploitable vulnerability** is a logical flaw in the code that represents a
 - **save_deliverable (MCP Tool):** Saves deliverable files with automatic validation.
  - **Parameters:**
    - `deliverable_type`: "AUTH_ANALYSIS" or "AUTH_QUEUE" (required)
-    - `content`: Your markdown report or JSON queue (required)
+    - `file_path`: Path to the file you wrote to disk (preferred for large reports)
+    - `content`: Inline content string (use only for small content like JSON queues)
  - **Returns:** `{ status: "success", filepath: "...", validated: true/false }` on success or `{ status: "error", message: "...", errorType: "...", retryable: true/false }` on failure
-  - **Usage:** Call the tool with your deliverable type and content. Queue files must have `{"vulnerabilities": [...]}` structure and will be validated automatically.
+  - **Usage:** For analysis reports, write to disk first then call with `file_path`. For JSON queues, you may pass inline `content`. Queue files must have `{"vulnerabilities": [...]}` structure and will be validated automatically.
+  - **WARNING:** Do NOT pass large reports as inline `content` — this will exceed output token limits and cause agent failure. Always use `file_path` for analysis reports.
 - **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
 - **{{MCP_SERVER}} (Playwright):** To interact with the live web application to understand multi-step flows like password reset or registration.
 - **TodoWrite Tool:** Use this to create and manage your analysis task list. Create a todo item for each endpoint/flow that needs analysis. Mark items as "in_progress" when working on them and "completed" when done.
@@ -252,8 +254,10 @@ This file serves as the handoff mechanism and must always be created to signal c

 1.  **Systematic Analysis:** ALL relevant API endpoints and user-facing features identified in the reconnaissance deliverable must be analyzed for AuthN/AuthZ flaws.
 2.  **Deliverable Generation:** Both required deliverables must be successfully saved using save_deliverable MCP tool:
-    - Analysis report: Use `save_deliverable` MCP tool with `deliverable_type: "AUTH_ANALYSIS"` and your report as `content`
+    - Analysis report: Write to `deliverables/auth_analysis_deliverable.md`, then call `save_deliverable` with `deliverable_type: "AUTH_ANALYSIS"` and `file_path` (not inline `content`)
    - Exploitation queue: Use `save_deliverable` MCP tool with `deliverable_type: "AUTH_QUEUE"` and `content: {"vulnerabilities": [...]}`

 **ONLY AFTER** both systematic analysis AND successful deliverable generation, announce "**AUTH ANALYSIS COMPLETE**" and stop.
+
+**CRITICAL:** After announcing completion, STOP IMMEDIATELY. Do NOT output summaries, recaps, or explanations of your work — the deliverable contains everything needed.
 </conclusion_trigger>
--- a/prompts/vuln-authz.txt
+++ b/prompts/vuln-authz.txt
@@ -83,9 +83,11 @@ An **exploitable vulnerability** is a logical flaw in the code that represents a
 - **save_deliverable (MCP Tool):** Saves deliverable files with automatic validation.
  - **Parameters:**
    - `deliverable_type`: "AUTHZ_ANALYSIS" or "AUTHZ_QUEUE" (required)
-    - `content`: Your markdown report or JSON queue (required)
+    - `file_path`: Path to the file you wrote to disk (preferred for large reports)
+    - `content`: Inline content string (use only for small content like JSON queues)
  - **Returns:** `{ status: "success", filepath: "...", validated: true/false }` on success or `{ status: "error", message: "...", errorType: "...", retryable: true/false }` on failure
-  - **Usage:** Call the tool with your deliverable type and content. Queue files must have `{"vulnerabilities": [...]}` structure and will be validated automatically.
+  - **Usage:** For analysis reports, write to disk first then call with `file_path`. For JSON queues, you may pass inline `content`. Queue files must have `{"vulnerabilities": [...]}` structure and will be validated automatically.
+  - **WARNING:** Do NOT pass large reports as inline `content` — this will exceed output token limits and cause agent failure. Always use `file_path` for analysis reports.
 - **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
 - **{{MCP_SERVER}} (Playwright):** To interact with the live web application to understand multi-step flows and role-based access controls.
 - **TodoWrite Tool:** Use this to create and manage your analysis task list. Create a todo item for each endpoint that needs authorization analysis. Mark items as "in_progress" when working on them and "completed" when done.
@@ -355,10 +357,12 @@ This file serves as the handoff mechanism and must always be created to signal c

 1. **Todo Completion:** ALL tasks in your TodoWrite list must be marked as "completed"
 2. **Deliverable Generation:** Both required deliverables must be successfully saved using save_deliverable MCP tool:
-   - Analysis report: Use `save_deliverable` MCP tool with `deliverable_type: "AUTHZ_ANALYSIS"` and your report as `content`
+   - Analysis report: Write to `deliverables/authz_analysis_deliverable.md`, then call `save_deliverable` with `deliverable_type: "AUTHZ_ANALYSIS"` and `file_path` (not inline `content`)
   - Exploitation queue: Use `save_deliverable` MCP tool with `deliverable_type: "AUTHZ_QUEUE"` and `content: {"vulnerabilities": [...]}`

 **ONLY AFTER** both todo completion AND successful deliverable generation, announce "**AUTHORIZATION ANALYSIS COMPLETE**" and stop.

+**CRITICAL:** After announcing completion, STOP IMMEDIATELY. Do NOT output summaries, recaps, or explanations of your work — the deliverable contains everything needed.
+
 **FAILURE TO COMPLETE TODOS = INCOMPLETE ANALYSIS** - You will be considered to have failed the mission if you generate deliverables before completing comprehensive testing of all authorization vectors.
 </conclusion_trigger>
--- a/prompts/vuln-injection.txt
+++ b/prompts/vuln-injection.txt
@@ -83,9 +83,11 @@ An **exploitable vulnerability** is a confirmed source-to-sink path where the en
 - **save_deliverable (MCP Tool):** Saves deliverable files with automatic validation.
  - **Parameters:**
    - `deliverable_type`: "INJECTION_ANALYSIS" or "INJECTION_QUEUE" (required)
-    - `content`: Your markdown report or JSON queue (required)
+    - `file_path`: Path to the file you wrote to disk (preferred for large reports)
+    - `content`: Inline content string (use only for small content like JSON queues)
  - **Returns:** `{ status: "success", filepath: "...", validated: true/false }` on success or `{ status: "error", message: "...", errorType: "...", retryable: true/false }` on failure
-  - **Usage:** Call the tool with your deliverable type and content. Queue files must have `{"vulnerabilities": [...]}` structure and will be validated automatically.
+  - **Usage:** For analysis reports, write to disk first then call with `file_path`. For JSON queues, you may pass inline `content`. Queue files must have `{"vulnerabilities": [...]}` structure and will be validated automatically.
+  - **WARNING:** Do NOT pass large reports as inline `content` — this will exceed output token limits and cause agent failure. Always use `file_path` for analysis reports.
 - **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
 - **{{MCP_SERVER}} (Playwright):** To interact with the live web application to understand multi-step flows like password reset or registration.
 - **TodoWrite Tool:** Use this to create and manage your analysis task list. Create a todo item for each injection source that needs analysis. Mark items as "in_progress" when working on them and "completed" when done.
@@ -362,10 +364,12 @@ This file serves as the handoff mechanism to the Exploitation phase and must alw

 1. **Todo Completion:** ALL tasks in your TodoWrite list must be marked as "completed"
 2. **Deliverable Generation:** Both required deliverables must be successfully saved using save_deliverable MCP tool:
-   - Analysis report: Use `save_deliverable` MCP tool with `deliverable_type: "INJECTION_ANALYSIS"` and your report as `content`
+   - Analysis report: Write to `deliverables/injection_analysis_deliverable.md`, then call `save_deliverable` with `deliverable_type: "INJECTION_ANALYSIS"` and `file_path` (not inline `content`)
   - Exploitation queue: Use `save_deliverable` MCP tool with `deliverable_type: "INJECTION_QUEUE"` and `content: {"vulnerabilities": [...]}`

 **ONLY AFTER** both todo completion AND successful deliverable generation, announce "**INJECTION ANALYSIS COMPLETE**" and stop.

+**CRITICAL:** After announcing completion, STOP IMMEDIATELY. Do NOT output summaries, recaps, or explanations of your work — the deliverable contains everything needed.
+
 **FAILURE TO COMPLETE TODOS = INCOMPLETE ANALYSIS** - You will be considered to have failed the mission if you generate deliverables before completing comprehensive testing of all input vectors.
 </conclusion_trigger>
--- a/prompts/vuln-ssrf.txt
+++ b/prompts/vuln-ssrf.txt
@@ -79,9 +79,11 @@ An **exploitable vulnerability** is a data flow where user-controlled input infl
 - **save_deliverable (MCP Tool):** Saves deliverable files with automatic validation.
  - **Parameters:**
    - `deliverable_type`: "SSRF_ANALYSIS" or "SSRF_QUEUE" (required)
-    - `content`: Your markdown report or JSON queue (required)
+    - `file_path`: Path to the file you wrote to disk (preferred for large reports)
+    - `content`: Inline content string (use only for small content like JSON queues)
  - **Returns:** `{ status: "success", filepath: "...", validated: true/false }` on success or `{ status: "error", message: "...", errorType: "...", retryable: true/false }` on failure
-  - **Usage:** Call the tool with your deliverable type and content. Queue files must have `{"vulnerabilities": [...]}` structure and will be validated automatically.
+  - **Usage:** For analysis reports, write to disk first then call with `file_path`. For JSON queues, you may pass inline `content`. Queue files must have `{"vulnerabilities": [...]}` structure and will be validated automatically.
+  - **WARNING:** Do NOT pass large reports as inline `content` — this will exceed output token limits and cause agent failure. Always use `file_path` for analysis reports.
 - **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
 - **{{MCP_SERVER}} (Playwright):** To interact with the live web application to understand multi-step flows that might involve URL redirection or proxy functionality.
 - **TodoWrite Tool:** Use this to create and manage your analysis task list. Create a todo item for each SSRF sink that needs analysis. Mark items as "in_progress" when working on them and "completed" when done.
@@ -299,8 +301,10 @@ This file serves as the handoff mechanism and must always be created to signal c

 1.  **Systematic Analysis:** ALL relevant API endpoints and request-making features identified in the reconnaissance deliverable must be analyzed for SSRF vulnerabilities.
 2.  **Deliverable Generation:** Both required deliverables must be successfully saved using save_deliverable MCP tool:
-    - Analysis report: Use `save_deliverable` MCP tool with `deliverable_type: "SSRF_ANALYSIS"` and your report as `content`
+    - Analysis report: Write to `deliverables/ssrf_analysis_deliverable.md`, then call `save_deliverable` with `deliverable_type: "SSRF_ANALYSIS"` and `file_path` (not inline `content`)
    - Exploitation queue: Use `save_deliverable` MCP tool with `deliverable_type: "SSRF_QUEUE"` and `content: {"vulnerabilities": [...]}`

 **ONLY AFTER** both systematic analysis AND successful deliverable generation, announce "**SSRF ANALYSIS COMPLETE**" and stop.
+
+**CRITICAL:** After announcing completion, STOP IMMEDIATELY. Do NOT output summaries, recaps, or explanations of your work — the deliverable contains everything needed.
 </conclusion_trigger>
--- a/prompts/vuln-xss.txt
+++ b/prompts/vuln-xss.txt
@@ -84,9 +84,11 @@ An **exploitable vulnerability** is a confirmed source-to-sink path where the en
 - **save_deliverable (MCP Tool):** Saves deliverable files with automatic validation.
  - **Parameters:**
    - `deliverable_type`: "XSS_ANALYSIS" or "XSS_QUEUE" (required)
-    - `content`: Your markdown report or JSON queue (required)
+    - `file_path`: Path to the file you wrote to disk (preferred for large reports)
+    - `content`: Inline content string (use only for small content like JSON queues)
  - **Returns:** `{ status: "success", filepath: "...", validated: true/false }` on success or `{ status: "error", message: "...", errorType: "...", retryable: true/false }` on failure
-  - **Usage:** Call the tool with your deliverable type and content. Queue files must have `{"vulnerabilities": [...]}` structure and will be validated automatically.
+  - **Usage:** For analysis reports, write to disk first then call with `file_path`. For JSON queues, you may pass inline `content`. Queue files must have `{"vulnerabilities": [...]}` structure and will be validated automatically.
+  - **WARNING:** Do NOT pass large reports as inline `content` — this will exceed output token limits and cause agent failure. Always use `file_path` for analysis reports.
 - **Bash tool:** Use for creating directories, copying files, and other shell commands as needed.
 </available_tools>

@@ -288,8 +290,10 @@ COMPLETION REQUIREMENTS (ALL must be satisfied):

 1. Systematic Analysis: ALL input vectors identified from the reconnaissance deliverable must be analyzed.
 2. Deliverable Generation: Both required deliverables must be successfully saved using save_deliverable MCP tool:
-   - Analysis report: Use `save_deliverable` MCP tool with `deliverable_type: "XSS_ANALYSIS"` and your report as `content`
+   - Analysis report: Write to `deliverables/xss_analysis_deliverable.md`, then call `save_deliverable` with `deliverable_type: "XSS_ANALYSIS"` and `file_path` (not inline `content`)
   - Exploitation queue: Use `save_deliverable` MCP tool with `deliverable_type: "XSS_QUEUE"` and `content: {"vulnerabilities": [...]}`

 ONLY AFTER both systematic analysis AND successful deliverable generation, announce "XSS ANALYSIS COMPLETE" and stop.
+
+**CRITICAL:** After announcing completion, STOP IMMEDIATELY. Do NOT output summaries, recaps, or explanations of your work — the deliverable contains everything needed.
 </conclusion_trigger>
--- a/src/ai/claude-executor.ts
+++ b/src/ai/claude-executor.ts
@@ -218,6 +218,18 @@ export async function runClaudePrompt(
  console.log(chalk.blue(`  Running Claude Code: ${description}...`));

  const mcpServers = buildMcpServers(sourceDir, agentName);
+
+  // Build env vars to pass to SDK subprocesses
+  const sdkEnv: Record<string, string> = {
+    CLAUDE_CODE_MAX_OUTPUT_TOKENS: process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS || '64000',
+  };
+  if (process.env.ANTHROPIC_API_KEY) {
+    sdkEnv.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
+  }
+  if (process.env.CLAUDE_CODE_OAUTH_TOKEN) {
+    sdkEnv.CLAUDE_CODE_OAUTH_TOKEN = process.env.CLAUDE_CODE_OAUTH_TOKEN;
+  }
+
  const options = {
    model: 'claude-sonnet-4-5-20250929',
    maxTurns: 10_000,
@@ -225,6 +237,7 @@ export async function runClaudePrompt(
    permissionMode: 'bypassPermissions' as const,
    allowDangerouslySkipPermissions: true,
    mcpServers,
+    env: sdkEnv,
  };

  if (!execContext.useCleanOutput) {
--- a/src/ai/message-handlers.ts
+++ b/src/ai/message-handlers.ts
@@ -50,6 +50,20 @@ export function extractMessageContent(message: AssistantMessage): string {
  return String(messageContent.content);
 }

+// Extracts only text content (no tool_use JSON) to avoid false positives in error detection
+export function extractTextOnlyContent(message: AssistantMessage): string {
+  const messageContent = message.message;
+
+  if (Array.isArray(messageContent.content)) {
+    return messageContent.content
+      .filter((c: ContentBlock) => c.type === 'text' || c.text)
+      .map((c: ContentBlock) => c.text || '')
+      .join('\n');
+  }
+
+  return String(messageContent.content);
+}
+
 export function detectApiError(content: string): ApiErrorDetection {
  if (!content || typeof content !== 'string') {
    return { detected: false };
@@ -175,11 +189,14 @@ export function handleAssistantMessage(
  const cleanedContent = filterJsonToolCalls(content);

  // Prefer structured error field from SDK, fall back to text-sniffing
+  // Use text-only content for error detection to avoid false positives
+  // from tool_use JSON (e.g. security reports containing "usage limit")
  let errorDetection: ApiErrorDetection;
  if (message.error) {
    errorDetection = handleStructuredError(message.error, content);
  } else {
-    errorDetection = detectApiError(content);
+    const textOnlyContent = extractTextOnlyContent(message);
+    errorDetection = detectApiError(textOnlyContent);
  }

  const result: AssistantResult = {