feat: implement unified audit system v3.0 with crash-safety and self-healing

## Unified Audit System (v3.0)
- Implemented crash-safe, append-only logging to audit-logs/{hostname}_{sessionId}/
- Added session.json with comprehensive metrics (timing, cost, attempts)
- Agent execution logs with turn-by-turn detail
- Prompt snapshots saved to audit-logs/.../prompts/{agent}.md
- SessionMutex prevents race conditions during parallel execution
- Self-healing reconciliation before every CLI command

## Session Metadata Standardization
- Fixed critical bug: standardized on 'id' field (not 'sessionId') throughout codebase
- Updated: shannon.mjs (recon, report), src/phases/pre-recon.js
- Added validation in AuditSession to fail fast on incorrect field usage
- JavaScript shorthand syntax was causing wrong field names

## Schema Improvements
- session.json: Added cost_usd per phase, removed redundant final_cost_usd
- Renamed 'percentage' -> 'duration_percentage' for clarity
- Simplified agent metrics to single total_cost_usd field
- Removed unused validation object from schema

## Legacy System Removal
- Removed savePromptSnapshot() - prompts now only saved by audit system
- Removed target repo pollution (prompt-snapshots/ no longer created)
- Single source of truth: audit-logs/{hostname}_{sessionId}/prompts/

## Export Script Simplification
- Removed JSON export mode (session.json already exists)
- CSV-only export with clean columns: agent, phase, status, attempts, duration_ms, cost_usd
- Tested on real session data

## Documentation
- Updated CLAUDE.md with audit system architecture
- Added .gitignore entry for audit-logs/

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
ajmallesh
2025-10-22 16:09:08 -07:00
parent a9e00ca19f
commit 27334a4dd6
18 changed files with 1871 additions and 206 deletions
+150
View File
@@ -0,0 +1,150 @@
#!/usr/bin/env node
/**
* Export Metrics Script
*
* Export session metrics from audit logs to CSV format for analysis.
*
* Use Cases:
* - Performance analysis across sessions
* - Cost tracking and budgeting
* - Agent success rate analysis
* - Benchmarking improvements
*/
import chalk from 'chalk';
import { fs, path } from 'zx';
import { getSession } from '../src/session-manager.js';
import { AuditSession } from '../src/audit/index.js';
// Parse command-line arguments
function parseArgs() {
const args = {
sessionId: null,
output: null
};
for (let i = 2; i < process.argv.length; i++) {
const arg = process.argv[i];
if (arg === '--session-id' && process.argv[i + 1]) {
args.sessionId = process.argv[i + 1];
i++;
} else if (arg === '--output' && process.argv[i + 1]) {
args.output = process.argv[i + 1];
i++;
} else if (arg === '--help' || arg === '-h') {
printUsage();
process.exit(0);
} else {
console.log(chalk.red(`❌ Unknown argument: ${arg}`));
printUsage();
process.exit(1);
}
}
return args;
}
function printUsage() {
console.log(chalk.cyan('\n📊 Export Metrics to CSV'));
console.log(chalk.gray('\nUsage: ./scripts/export-metrics.js [options]\n'));
console.log(chalk.white('Options:'));
console.log(chalk.gray(' --session-id <id> Session ID to export (required)'));
console.log(chalk.gray(' --output <file> Output CSV file path (default: stdout)'));
console.log(chalk.gray(' --help, -h Show this help\n'));
console.log(chalk.white('Examples:'));
console.log(chalk.gray(' # Export to stdout'));
console.log(chalk.gray(' ./scripts/export-metrics.js --session-id abc123\n'));
console.log(chalk.gray(' # Export to file'));
console.log(chalk.gray(' ./scripts/export-metrics.js --session-id abc123 --output metrics.csv\n'));
}
// Export metrics for a session
async function exportMetrics(sessionId) {
const session = await getSession(sessionId);
if (!session) {
throw new Error(`Session ${sessionId} not found`);
}
const auditSession = new AuditSession(session);
await auditSession.initialize();
const metrics = await auditSession.getMetrics();
return exportAsCSV(session, metrics);
}
// Export as CSV
function exportAsCSV(session, metrics) {
const lines = [];
// Header
lines.push('agent,phase,status,attempts,duration_ms,cost_usd');
// Phase mapping
const phaseMap = {
'pre-recon': 'pre-recon',
'recon': 'recon',
'injection-vuln': 'vulnerability-analysis',
'xss-vuln': 'vulnerability-analysis',
'auth-vuln': 'vulnerability-analysis',
'authz-vuln': 'vulnerability-analysis',
'ssrf-vuln': 'vulnerability-analysis',
'injection-exploit': 'exploitation',
'xss-exploit': 'exploitation',
'auth-exploit': 'exploitation',
'authz-exploit': 'exploitation',
'ssrf-exploit': 'exploitation',
'report': 'reporting'
};
// Agent rows
for (const [agentName, agentData] of Object.entries(metrics.metrics.agents)) {
const phase = phaseMap[agentName] || 'unknown';
lines.push([
agentName,
phase,
agentData.status,
agentData.attempts.length,
agentData.final_duration_ms,
agentData.total_cost_usd.toFixed(4)
].join(','));
}
return lines.join('\n');
}
// Main execution
async function main() {
const args = parseArgs();
if (!args.sessionId) {
console.log(chalk.red('❌ Must specify --session-id'));
printUsage();
process.exit(1);
}
console.log(chalk.cyan.bold('\n📊 Exporting Metrics to CSV\n'));
console.log(chalk.gray(`Session ID: ${args.sessionId}\n`));
const output = await exportMetrics(args.sessionId);
if (args.output) {
await fs.writeFile(args.output, output);
console.log(chalk.green(`✅ Exported to: ${args.output}`));
} else {
console.log(chalk.cyan('CSV Output:\n'));
console.log(output);
}
console.log();
}
main().catch(error => {
console.log(chalk.red.bold(`\n🚨 Fatal error: ${error.message}`));
if (process.env.DEBUG) {
console.log(chalk.gray(error.stack));
}
process.exit(1);
});
+225
View File
@@ -0,0 +1,225 @@
#!/usr/bin/env node
/**
* Manual Session Reconciliation Script
*
* Purpose: Diagnostics and exceptional recovery (NOT normal operations).
*
* Use Cases:
* 1. Diagnostics (Primary): Non-destructively report inconsistencies
* 2. Debugging: Test reconciliation logic in isolation
* 3. Exceptional Recovery: Malformed JSON recovery, reconciliation bugs
* 4. Bulk Operations: System-wide consistency audit
*
* Design Principle:
* "Self-healing is the norm. Manual intervention is the exception."
*
* Red Flags (indicate bugs):
* - Manual script needed frequently
* - Automatic reconciliation failing consistently
* - Manual intervention after every crash
*/
import chalk from 'chalk';
import { fs, path } from 'zx';
import { reconcileSession, getSession } from '../src/session-manager.js';
const STORE_FILE = path.join(process.cwd(), '.shannon-store.json');
// Parse command-line arguments
function parseArgs() {
const args = {
sessionId: null,
allSessions: false,
dryRun: false,
verbose: false
};
for (let i = 2; i < process.argv.length; i++) {
const arg = process.argv[i];
if (arg === '--session-id' && process.argv[i + 1]) {
args.sessionId = process.argv[i + 1];
i++;
} else if (arg === '--all-sessions') {
args.allSessions = true;
} else if (arg === '--dry-run') {
args.dryRun = true;
} else if (arg === '--verbose') {
args.verbose = true;
} else if (arg === '--help' || arg === '-h') {
printUsage();
process.exit(0);
} else {
console.log(chalk.red(`❌ Unknown argument: ${arg}`));
printUsage();
process.exit(1);
}
}
return args;
}
function printUsage() {
console.log(chalk.cyan('\n📋 Manual Session Reconciliation Script'));
console.log(chalk.gray('\nUsage: ./scripts/reconcile-session.js [options]\n'));
console.log(chalk.white('Options:'));
console.log(chalk.gray(' --session-id <id> Reconcile specific session'));
console.log(chalk.gray(' --all-sessions Reconcile all sessions'));
console.log(chalk.gray(' --dry-run Report inconsistencies without fixing'));
console.log(chalk.gray(' --verbose Detailed logging'));
console.log(chalk.gray(' --help, -h Show this help\n'));
console.log(chalk.white('Examples:'));
console.log(chalk.gray(' # Diagnostics (primary use case)'));
console.log(chalk.gray(' ./scripts/reconcile-session.js --session-id abc123 --dry-run\n'));
console.log(chalk.gray(' # System-wide consistency audit'));
console.log(chalk.gray(' ./scripts/reconcile-session.js --all-sessions --dry-run --verbose\n'));
console.log(chalk.gray(' # Exceptional recovery'));
console.log(chalk.gray(' ./scripts/reconcile-session.js --session-id abc123\n'));
}
// Load all sessions
async function loadAllSessions() {
try {
if (!await fs.pathExists(STORE_FILE)) {
return [];
}
const content = await fs.readFile(STORE_FILE, 'utf8');
const store = JSON.parse(content);
return Object.values(store.sessions || {});
} catch (error) {
throw new Error(`Failed to load sessions: ${error.message}`);
}
}
// Reconcile a single session
async function reconcileSingleSession(sessionId, dryRun, verbose) {
try {
const session = await getSession(sessionId);
if (!session) {
console.log(chalk.red(`❌ Session ${sessionId} not found`));
return { success: false, sessionId };
}
if (verbose) {
console.log(chalk.blue(`\n🔍 Analyzing session: ${sessionId}`));
console.log(chalk.gray(` Web URL: ${session.webUrl}`));
console.log(chalk.gray(` Status: ${session.status}`));
console.log(chalk.gray(` Completed Agents: ${session.completedAgents.length}`));
}
if (dryRun) {
console.log(chalk.yellow(` [DRY RUN] Would reconcile session ${sessionId.substring(0, 8)}...`));
return { success: true, sessionId, dryRun: true };
}
// Perform actual reconciliation
const report = await reconcileSession(sessionId);
const hasChanges = report.promotions.length > 0 ||
report.demotions.length > 0 ||
report.failures.length > 0;
if (hasChanges) {
console.log(chalk.green(`✅ Reconciled session ${sessionId.substring(0, 8)}...`));
if (report.promotions.length > 0) {
console.log(chalk.blue(` Added ${report.promotions.length} completed agents: ${report.promotions.join(', ')}`));
}
if (report.demotions.length > 0) {
console.log(chalk.yellow(` Removed ${report.demotions.length} rolled-back agents: ${report.demotions.join(', ')}`));
}
if (report.failures.length > 0) {
console.log(chalk.red(` ❌ Marked ${report.failures.length} failed agents: ${report.failures.join(', ')}`));
}
} else {
if (verbose) {
console.log(chalk.gray(` ✓ No inconsistencies found`));
}
}
return { success: true, sessionId, ...report };
} catch (error) {
console.log(chalk.red(`❌ Failed to reconcile session ${sessionId}: ${error.message}`));
return { success: false, sessionId, error: error.message };
}
}
// Main execution
async function main() {
const args = parseArgs();
console.log(chalk.cyan.bold('\n🔄 Manual Session Reconciliation\n'));
if (args.dryRun) {
console.log(chalk.yellow('⚠️ DRY RUN MODE - No changes will be made\n'));
}
let sessions = [];
if (args.sessionId) {
sessions = [{ id: args.sessionId }];
} else if (args.allSessions) {
sessions = await loadAllSessions();
console.log(chalk.blue(`Found ${sessions.length} sessions\n`));
} else {
console.log(chalk.red('❌ Must specify either --session-id or --all-sessions'));
printUsage();
process.exit(1);
}
const results = {
total: sessions.length,
success: 0,
failed: 0,
totalPromotions: 0,
totalDemotions: 0,
totalFailures: 0
};
for (const session of sessions) {
const result = await reconcileSingleSession(session.id, args.dryRun, args.verbose);
if (result.success) {
results.success++;
results.totalPromotions += result.promotions?.length || 0;
results.totalDemotions += result.demotions?.length || 0;
results.totalFailures += result.failures?.length || 0;
} else {
results.failed++;
}
}
// Summary
console.log(chalk.cyan.bold('\n📊 Summary:'));
console.log(chalk.gray(`Total sessions: ${results.total}`));
console.log(chalk.green(`Successful: ${results.success}`));
if (results.failed > 0) {
console.log(chalk.red(`Failed: ${results.failed}`));
}
console.log(chalk.blue(`Promotions: ${results.totalPromotions}`));
console.log(chalk.yellow(`Demotions: ${results.totalDemotions}`));
console.log(chalk.red(`Failures: ${results.totalFailures}`));
// Health check
if (args.allSessions) {
const consistencyRate = (results.success / results.total) * 100;
console.log(chalk.cyan(`\n📈 Consistency Rate: ${consistencyRate.toFixed(1)}%`));
if (consistencyRate < 98) {
console.log(chalk.red('\n⚠️ WARNING: Low consistency rate detected!'));
console.log(chalk.red('This may indicate bugs in automatic reconciliation.'));
}
}
console.log();
}
main().catch(error => {
console.log(chalk.red.bold(`\n🚨 Fatal error: ${error.message}`));
if (process.env.DEBUG) {
console.log(chalk.gray(error.stack));
}
process.exit(1);
});