From cfe8dc8bc89f0feacd110b3576bc9a71fd9a0787 Mon Sep 17 00:00:00 2001
From: ajmallesh <ajmallesh@gmail.com>
Date: Wed, 22 Oct 2025 17:41:41 -0700
Subject: [PATCH] fix: critical bug - exploitation phase was always skipped
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ROOT CAUSE:
- Exploitation phase checked session.validationResults to determine eligibility
- validationResults field was removed during audit system refactor
- Field never existed in session schema, so all exploits were skipped

THE FIX:
- Exploitation phase now validates queue files directly when checking eligibility
- Reads exploitation_queue.json and checks if vulnerabilities array is non-empty
- No need to store validation results - just re-validate on demand

CHANGES:
1. runParallelExploit() now calls safeValidateQueueAndDeliverable() directly
2. Removed validationResults parameter from markAgentCompleted()
3. Simplified calculateVulnerabilityAnalysisSummary() - no longer needs validation data
4. Simplified calculateExploitationSummary() - no longer needs validation data

IMPACT:
- Exploitation agents will now run when vulnerabilities are found
- Queue files are the single source of truth for eligibility
- Simpler architecture - no duplicate state storage

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 src/checkpoint-manager.js | 51 ++++++++++++++++++++++++---------------
 src/session-manager.js    | 32 +++++-------------------
 2 files changed, 37 insertions(+), 46 deletions(-)

diff --git a/src/checkpoint-manager.js b/src/checkpoint-manager.js
index fc58f86..f2783af 100644
--- a/src/checkpoint-manager.js
+++ b/src/checkpoint-manager.js
@@ -218,12 +218,12 @@ export const runSingleAgent = async (agentName, session, pipelineTestingMode, ru
         const validation = await safeValidateQueueAndDeliverable(vulnType, targetRepo);
 
         if (validation.success) {
+          // Log validation result (don't store - will be re-validated during exploitation phase)
+          console.log(chalk.blue(`📋 Validation: ${validation.data.shouldExploit ? `Ready for exploitation (${validation.data.vulnerabilityCount} vulnerabilities)` : 'No vulnerabilities found'}`));
           validationData = {
             shouldExploit: validation.data.shouldExploit,
-            vulnerabilityCount: validation.data.vulnerabilityCount,
-            validatedAt: new Date().toISOString()
+            vulnerabilityCount: validation.data.vulnerabilityCount
           };
-          console.log(chalk.blue(`📋 Validation: ${validationData.shouldExploit ? `Ready for exploitation (${validationData.vulnerabilityCount} vulnerabilities)` : 'No vulnerabilities found'}`));
         } else {
           console.log(chalk.yellow(`⚠️ Validation failed: ${validation.error.message}`));
         }
@@ -232,8 +232,8 @@ export const runSingleAgent = async (agentName, session, pipelineTestingMode, ru
       }
     }
 
-    // Mark agent as completed
-    await markAgentCompleted(session.id, agentName, commitHash, timingData, costData, validationData);
+    // Mark agent as completed (validation not stored - will be re-checked during exploitation)
+    await markAgentCompleted(session.id, agentName, commitHash);
 
     // Only show completion message for sequential execution
     if (!skipWorkspaceClean) {
@@ -429,25 +429,36 @@ export const runParallelExploit = async (session, pipelineTestingMode, runClaude
   const { getSession } = await import('./session-manager.js');
   const freshSession = await getSession(session.id);
 
+  // Load validation module
+  const { safeValidateQueueAndDeliverable } = await import('./queue-validation.js');
+
   // Only run exploit agents whose vuln counterparts completed successfully AND found vulnerabilities
-  const eligibleAgents = exploitAgents.filter(agentName => {
-    const vulnAgentName = agentName.replace('-exploit', '-vuln');
+  const eligibilityChecks = await Promise.all(
+    exploitAgents.map(async (agentName) => {
+      const vulnAgentName = agentName.replace('-exploit', '-vuln');
 
-    // Must have completed the vulnerability analysis
-    if (!freshSession.completedAgents.includes(vulnAgentName)) {
-      return false;
-    }
+      // Must have completed the vulnerability analysis
+      if (!freshSession.completedAgents.includes(vulnAgentName)) {
+        return { agentName, eligible: false };
+      }
 
-    // Must have found vulnerabilities to exploit
-    const validationResult = freshSession.validationResults?.[vulnAgentName];
-    if (!validationResult || !validationResult.shouldExploit) {
-      console.log(chalk.gray(`⏭️  Skipping ${agentName} (no vulnerabilities found in ${vulnAgentName})`));
-      return false;
-    }
+      // Check if vulnerabilities were found by validating the queue file
+      const vulnType = vulnAgentName.replace('-vuln', ''); // "injection-vuln" -> "injection"
+      const validation = await safeValidateQueueAndDeliverable(vulnType, freshSession.targetRepo);
 
-    console.log(chalk.blue(`✓ ${agentName} eligible (${validationResult.vulnerabilityCount} vulnerabilities from ${vulnAgentName})`));
-    return true;
-  });
+      if (!validation.success || !validation.data.shouldExploit) {
+        console.log(chalk.gray(`⏭️  Skipping ${agentName} (no vulnerabilities found in ${vulnAgentName})`));
+        return { agentName, eligible: false };
+      }
+
+      console.log(chalk.blue(`✓ ${agentName} eligible (${validation.data.vulnerabilityCount} vulnerabilities from ${vulnAgentName})`));
+      return { agentName, eligible: true };
+    })
+  );
+
+  const eligibleAgents = eligibilityChecks
+    .filter(check => check.eligible)
+    .map(check => check.agentName);
 
   const activeAgents = eligibleAgents.filter(agent => !freshSession.completedAgents.includes(agent));
 
diff --git a/src/session-manager.js b/src/session-manager.js
index 3a0f6c5..dfbdfd1 100644
--- a/src/session-manager.js
+++ b/src/session-manager.js
@@ -552,25 +552,12 @@ export const getSessionStatus = (session) => {
 export const calculateVulnerabilityAnalysisSummary = (session) => {
   const vulnAgents = PHASES['vulnerability-analysis'];
   const completedVulnAgents = session.completedAgents.filter(agent => vulnAgents.includes(agent));
-  const validationResults = session.validationResults || {};
-
-  let totalVulnerabilities = 0;
-  let agentsWithVulns = 0;
-
-  for (const agent of completedVulnAgents) {
-    const validation = validationResults[agent];
-    if (validation?.vulnerabilityCount > 0) {
-      totalVulnerabilities += validation.vulnerabilityCount;
-      agentsWithVulns++;
-    }
-  }
 
+  // NOTE: Actual vulnerability counts require reading queue files
+  // This summary only shows completion counts
   return Object.freeze({
     totalAnalyses: completedVulnAgents.length,
-    totalVulnerabilities,
-    agentsWithVulnerabilities: agentsWithVulns,
-    successRate: completedVulnAgents.length > 0 ? (agentsWithVulns / completedVulnAgents.length) * 100 : 0,
-    exploitationCandidates: Object.values(validationResults).filter(v => v?.shouldExploit).length
+    completedAgents: completedVulnAgents
   });
 };
 
@@ -578,19 +565,12 @@ export const calculateVulnerabilityAnalysisSummary = (session) => {
 export const calculateExploitationSummary = (session) => {
   const exploitAgents = PHASES['exploitation'];
   const completedExploitAgents = session.completedAgents.filter(agent => exploitAgents.includes(agent));
-  const validationResults = session.validationResults || {};
-
-  // Count how many exploitation agents were eligible to run
-  const eligibleExploits = exploitAgents.filter(agentName => {
-    const vulnAgentName = agentName.replace('-exploit', '-vuln');
-    return validationResults[vulnAgentName]?.shouldExploit;
-  });
 
+  // NOTE: Eligibility requires reading queue files
+  // This summary only shows completion counts
   return Object.freeze({
     totalAttempts: completedExploitAgents.length,
-    eligibleExploits: eligibleExploits.length,
-    skippedExploits: eligibleExploits.length - completedExploitAgents.length,
-    successRate: eligibleExploits.length > 0 ? (completedExploitAgents.length / eligibleExploits.length) * 100 : 0
+    completedAgents: completedExploitAgents
   });
 };