fix: critical bug - exploitation phase was always skipped

ROOT CAUSE:
- Exploitation phase checked session.validationResults to determine eligibility
- validationResults field was removed during audit system refactor
- Field never existed in session schema, so all exploits were skipped

THE FIX:
- Exploitation phase now validates queue files directly when checking eligibility
- Reads exploitation_queue.json and checks if vulnerabilities array is non-empty
- No need to store validation results - just re-validate on demand

CHANGES:
1. runParallelExploit() now calls safeValidateQueueAndDeliverable() directly
2. Removed validationResults parameter from markAgentCompleted()
3. Simplified calculateVulnerabilityAnalysisSummary() - no longer needs validation data
4. Simplified calculateExploitationSummary() - no longer needs validation data

IMPACT:
- Exploitation agents will now run when vulnerabilities are found
- Queue files are the single source of truth for eligibility
- Simpler architecture - no duplicate state storage

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
ajmallesh
2025-10-22 17:41:41 -07:00
parent 255956d113
commit cfe8dc8bc8
2 changed files with 37 additions and 46 deletions
+31 -20
View File
@@ -218,12 +218,12 @@ export const runSingleAgent = async (agentName, session, pipelineTestingMode, ru
const validation = await safeValidateQueueAndDeliverable(vulnType, targetRepo);
if (validation.success) {
// Log validation result (don't store - will be re-validated during exploitation phase)
console.log(chalk.blue(`📋 Validation: ${validation.data.shouldExploit ? `Ready for exploitation (${validation.data.vulnerabilityCount} vulnerabilities)` : 'No vulnerabilities found'}`));
validationData = {
shouldExploit: validation.data.shouldExploit,
vulnerabilityCount: validation.data.vulnerabilityCount,
validatedAt: new Date().toISOString()
vulnerabilityCount: validation.data.vulnerabilityCount
};
console.log(chalk.blue(`📋 Validation: ${validationData.shouldExploit ? `Ready for exploitation (${validationData.vulnerabilityCount} vulnerabilities)` : 'No vulnerabilities found'}`));
} else {
console.log(chalk.yellow(`⚠️ Validation failed: ${validation.error.message}`));
}
@@ -232,8 +232,8 @@ export const runSingleAgent = async (agentName, session, pipelineTestingMode, ru
}
}
// Mark agent as completed
await markAgentCompleted(session.id, agentName, commitHash, timingData, costData, validationData);
// Mark agent as completed (validation not stored - will be re-checked during exploitation)
await markAgentCompleted(session.id, agentName, commitHash);
// Only show completion message for sequential execution
if (!skipWorkspaceClean) {
@@ -429,25 +429,36 @@ export const runParallelExploit = async (session, pipelineTestingMode, runClaude
const { getSession } = await import('./session-manager.js');
const freshSession = await getSession(session.id);
// Load validation module
const { safeValidateQueueAndDeliverable } = await import('./queue-validation.js');
// Only run exploit agents whose vuln counterparts completed successfully AND found vulnerabilities
const eligibleAgents = exploitAgents.filter(agentName => {
const vulnAgentName = agentName.replace('-exploit', '-vuln');
const eligibilityChecks = await Promise.all(
exploitAgents.map(async (agentName) => {
const vulnAgentName = agentName.replace('-exploit', '-vuln');
// Must have completed the vulnerability analysis
if (!freshSession.completedAgents.includes(vulnAgentName)) {
return false;
}
// Must have completed the vulnerability analysis
if (!freshSession.completedAgents.includes(vulnAgentName)) {
return { agentName, eligible: false };
}
// Must have found vulnerabilities to exploit
const validationResult = freshSession.validationResults?.[vulnAgentName];
if (!validationResult || !validationResult.shouldExploit) {
console.log(chalk.gray(`⏭️ Skipping ${agentName} (no vulnerabilities found in ${vulnAgentName})`));
return false;
}
// Check if vulnerabilities were found by validating the queue file
const vulnType = vulnAgentName.replace('-vuln', ''); // "injection-vuln" -> "injection"
const validation = await safeValidateQueueAndDeliverable(vulnType, freshSession.targetRepo);
console.log(chalk.blue(`${agentName} eligible (${validationResult.vulnerabilityCount} vulnerabilities from ${vulnAgentName})`));
return true;
});
if (!validation.success || !validation.data.shouldExploit) {
console.log(chalk.gray(`⏭️ Skipping ${agentName} (no vulnerabilities found in ${vulnAgentName})`));
return { agentName, eligible: false };
}
console.log(chalk.blue(`${agentName} eligible (${validation.data.vulnerabilityCount} vulnerabilities from ${vulnAgentName})`));
return { agentName, eligible: true };
})
);
const eligibleAgents = eligibilityChecks
.filter(check => check.eligible)
.map(check => check.agentName);
const activeAgents = eligibleAgents.filter(agent => !freshSession.completedAgents.includes(agent));
+6 -26
View File
@@ -552,25 +552,12 @@ export const getSessionStatus = (session) => {
export const calculateVulnerabilityAnalysisSummary = (session) => {
const vulnAgents = PHASES['vulnerability-analysis'];
const completedVulnAgents = session.completedAgents.filter(agent => vulnAgents.includes(agent));
const validationResults = session.validationResults || {};
let totalVulnerabilities = 0;
let agentsWithVulns = 0;
for (const agent of completedVulnAgents) {
const validation = validationResults[agent];
if (validation?.vulnerabilityCount > 0) {
totalVulnerabilities += validation.vulnerabilityCount;
agentsWithVulns++;
}
}
// NOTE: Actual vulnerability counts require reading queue files
// This summary only shows completion counts
return Object.freeze({
totalAnalyses: completedVulnAgents.length,
totalVulnerabilities,
agentsWithVulnerabilities: agentsWithVulns,
successRate: completedVulnAgents.length > 0 ? (agentsWithVulns / completedVulnAgents.length) * 100 : 0,
exploitationCandidates: Object.values(validationResults).filter(v => v?.shouldExploit).length
completedAgents: completedVulnAgents
});
};
@@ -578,19 +565,12 @@ export const calculateVulnerabilityAnalysisSummary = (session) => {
export const calculateExploitationSummary = (session) => {
const exploitAgents = PHASES['exploitation'];
const completedExploitAgents = session.completedAgents.filter(agent => exploitAgents.includes(agent));
const validationResults = session.validationResults || {};
// Count how many exploitation agents were eligible to run
const eligibleExploits = exploitAgents.filter(agentName => {
const vulnAgentName = agentName.replace('-exploit', '-vuln');
return validationResults[vulnAgentName]?.shouldExploit;
});
// NOTE: Eligibility requires reading queue files
// This summary only shows completion counts
return Object.freeze({
totalAttempts: completedExploitAgents.length,
eligibleExploits: eligibleExploits.length,
skippedExploits: eligibleExploits.length - completedExploitAgents.length,
successRate: eligibleExploits.length > 0 ? (completedExploitAgents.length / eligibleExploits.length) * 100 : 0
completedAgents: completedExploitAgents
});
};