fix: critical bug - exploitation phase was always skipped

ROOT CAUSE: - Exploitation phase checked session.validationResults to determine eligibility - validationResults field was removed during audit system refactor - Field never existed in session schema, so all exploits were skipped THE FIX: - Exploitation phase now validates queue files directly when checking eligibility - Reads exploitation_queue.json and checks if vulnerabilities array is non-empty - No need to store validation results - just re-validate on demand CHANGES: 1. runParallelExploit() now calls safeValidateQueueAndDeliverable() directly 2. Removed validationResults parameter from markAgentCompleted() 3. Simplified calculateVulnerabilityAnalysisSummary() - no longer needs validation data 4. Simplified calculateExploitationSummary() - no longer needs validation data IMPACT: - Exploitation agents will now run when vulnerabilities are found - Queue files are the single source of truth for eligibility - Simpler architecture - no duplicate state storage 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-07-10 06:58:45 +02:00 · 2025-10-22 17:41:41 -07:00
parent 255956d113
commit cfe8dc8bc8
2 changed files with 37 additions and 46 deletions
@@ -218,12 +218,12 @@ export const runSingleAgent = async (agentName, session, pipelineTestingMode, ru
        const validation = await safeValidateQueueAndDeliverable(vulnType, targetRepo);

        if (validation.success) {
+          // Log validation result (don't store - will be re-validated during exploitation phase)
+          console.log(chalk.blue(`📋 Validation: ${validation.data.shouldExploit ? `Ready for exploitation (${validation.data.vulnerabilityCount} vulnerabilities)` : 'No vulnerabilities found'}`));
          validationData = {
            shouldExploit: validation.data.shouldExploit,
-            vulnerabilityCount: validation.data.vulnerabilityCount,
-            validatedAt: new Date().toISOString()
+            vulnerabilityCount: validation.data.vulnerabilityCount
          };
-          console.log(chalk.blue(`📋 Validation: ${validationData.shouldExploit ? `Ready for exploitation (${validationData.vulnerabilityCount} vulnerabilities)` : 'No vulnerabilities found'}`));
        } else {
          console.log(chalk.yellow(`⚠️ Validation failed: ${validation.error.message}`));
        }
@@ -232,8 +232,8 @@ export const runSingleAgent = async (agentName, session, pipelineTestingMode, ru
      }
    }

-    // Mark agent as completed
-    await markAgentCompleted(session.id, agentName, commitHash, timingData, costData, validationData);
+    // Mark agent as completed (validation not stored - will be re-checked during exploitation)
+    await markAgentCompleted(session.id, agentName, commitHash);

    // Only show completion message for sequential execution
    if (!skipWorkspaceClean) {
@@ -429,25 +429,36 @@ export const runParallelExploit = async (session, pipelineTestingMode, runClaude
  const { getSession } = await import('./session-manager.js');
  const freshSession = await getSession(session.id);

+  // Load validation module
+  const { safeValidateQueueAndDeliverable } = await import('./queue-validation.js');
+
  // Only run exploit agents whose vuln counterparts completed successfully AND found vulnerabilities
-  const eligibleAgents = exploitAgents.filter(agentName => {
-    const vulnAgentName = agentName.replace('-exploit', '-vuln');
+  const eligibilityChecks = await Promise.all(
+    exploitAgents.map(async (agentName) => {
+      const vulnAgentName = agentName.replace('-exploit', '-vuln');

-    // Must have completed the vulnerability analysis
-    if (!freshSession.completedAgents.includes(vulnAgentName)) {
-      return false;
-    }
+      // Must have completed the vulnerability analysis
+      if (!freshSession.completedAgents.includes(vulnAgentName)) {
+        return { agentName, eligible: false };
+      }

-    // Must have found vulnerabilities to exploit
-    const validationResult = freshSession.validationResults?.[vulnAgentName];
-    if (!validationResult || !validationResult.shouldExploit) {
-      console.log(chalk.gray(`⏭️  Skipping ${agentName} (no vulnerabilities found in ${vulnAgentName})`));
-      return false;
-    }
+      // Check if vulnerabilities were found by validating the queue file
+      const vulnType = vulnAgentName.replace('-vuln', ''); // "injection-vuln" -> "injection"
+      const validation = await safeValidateQueueAndDeliverable(vulnType, freshSession.targetRepo);

-    console.log(chalk.blue(`✓ ${agentName} eligible (${validationResult.vulnerabilityCount} vulnerabilities from ${vulnAgentName})`));
-    return true;
-  });
+      if (!validation.success || !validation.data.shouldExploit) {
+        console.log(chalk.gray(`⏭️  Skipping ${agentName} (no vulnerabilities found in ${vulnAgentName})`));
+        return { agentName, eligible: false };
+      }
+
+      console.log(chalk.blue(`✓ ${agentName} eligible (${validation.data.vulnerabilityCount} vulnerabilities from ${vulnAgentName})`));
+      return { agentName, eligible: true };
+    })
+  );
+
+  const eligibleAgents = eligibilityChecks
+    .filter(check => check.eligible)
+    .map(check => check.agentName);

  const activeAgents = eligibleAgents.filter(agent => !freshSession.completedAgents.includes(agent));

@@ -552,25 +552,12 @@ export const getSessionStatus = (session) => {
 export const calculateVulnerabilityAnalysisSummary = (session) => {
  const vulnAgents = PHASES['vulnerability-analysis'];
  const completedVulnAgents = session.completedAgents.filter(agent => vulnAgents.includes(agent));
-  const validationResults = session.validationResults || {};
-
-  let totalVulnerabilities = 0;
-  let agentsWithVulns = 0;
-
-  for (const agent of completedVulnAgents) {
-    const validation = validationResults[agent];
-    if (validation?.vulnerabilityCount > 0) {
-      totalVulnerabilities += validation.vulnerabilityCount;
-      agentsWithVulns++;
-    }
-  }

+  // NOTE: Actual vulnerability counts require reading queue files
+  // This summary only shows completion counts
  return Object.freeze({
    totalAnalyses: completedVulnAgents.length,
-    totalVulnerabilities,
-    agentsWithVulnerabilities: agentsWithVulns,
-    successRate: completedVulnAgents.length > 0 ? (agentsWithVulns / completedVulnAgents.length) * 100 : 0,
-    exploitationCandidates: Object.values(validationResults).filter(v => v?.shouldExploit).length
+    completedAgents: completedVulnAgents
  });
 };

@@ -578,19 +565,12 @@ export const calculateVulnerabilityAnalysisSummary = (session) => {
 export const calculateExploitationSummary = (session) => {
  const exploitAgents = PHASES['exploitation'];
  const completedExploitAgents = session.completedAgents.filter(agent => exploitAgents.includes(agent));
-  const validationResults = session.validationResults || {};
-
-  // Count how many exploitation agents were eligible to run
-  const eligibleExploits = exploitAgents.filter(agentName => {
-    const vulnAgentName = agentName.replace('-exploit', '-vuln');
-    return validationResults[vulnAgentName]?.shouldExploit;
-  });

+  // NOTE: Eligibility requires reading queue files
+  // This summary only shows completion counts
  return Object.freeze({
    totalAttempts: completedExploitAgents.length,
-    eligibleExploits: eligibleExploits.length,
-    skippedExploits: eligibleExploits.length - completedExploitAgents.length,
-    successRate: eligibleExploits.length > 0 ? (completedExploitAgents.length / eligibleExploits.length) * 100 : 0
+    completedAgents: completedExploitAgents
  });
 };