feat: implement unified audit system v3.0 with crash-safety and self-healing

## Unified Audit System (v3.0) - Implemented crash-safe, append-only logging to audit-logs/{hostname}_{sessionId}/ - Added session.json with comprehensive metrics (timing, cost, attempts) - Agent execution logs with turn-by-turn detail - Prompt snapshots saved to audit-logs/.../prompts/{agent}.md - SessionMutex prevents race conditions during parallel execution - Self-healing reconciliation before every CLI command ## Session Metadata Standardization - Fixed critical bug: standardized on 'id' field (not 'sessionId') throughout codebase - Updated: shannon.mjs (recon, report), src/phases/pre-recon.js - Added validation in AuditSession to fail fast on incorrect field usage - JavaScript shorthand syntax was causing wrong field names ## Schema Improvements - session.json: Added cost_usd per phase, removed redundant final_cost_usd - Renamed 'percentage' -> 'duration_percentage' for clarity - Simplified agent metrics to single total_cost_usd field - Removed unused validation object from schema ## Legacy System Removal - Removed savePromptSnapshot() - prompts now only saved by audit system - Removed target repo pollution (prompt-snapshots/ no longer created) - Single source of truth: audit-logs/{hostname}_{sessionId}/prompts/ ## Export Script Simplification - Removed JSON export mode (session.json already exists) - CSV-only export with clean columns: agent, phase, status, attempts, duration_ms, cost_usd - Tested on real session data ## Documentation - Updated CLAUDE.md with audit system architecture - Added .gitignore entry for audit-logs/ 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-05-20 16:05:04 +02:00 · 2025-10-22 16:09:08 -07:00
parent a9e00ca19f
commit 27334a4dd6
18 changed files with 1871 additions and 206 deletions
@@ -0,0 +1,150 @@
+#!/usr/bin/env node
+
+/**
+ * Export Metrics Script
+ *
+ * Export session metrics from audit logs to CSV format for analysis.
+ *
+ * Use Cases:
+ * - Performance analysis across sessions
+ * - Cost tracking and budgeting
+ * - Agent success rate analysis
+ * - Benchmarking improvements
+ */
+
+import chalk from 'chalk';
+import { fs, path } from 'zx';
+import { getSession } from '../src/session-manager.js';
+import { AuditSession } from '../src/audit/index.js';
+
+// Parse command-line arguments
+function parseArgs() {
+  const args = {
+    sessionId: null,
+    output: null
+  };
+
+  for (let i = 2; i < process.argv.length; i++) {
+    const arg = process.argv[i];
+
+    if (arg === '--session-id' && process.argv[i + 1]) {
+      args.sessionId = process.argv[i + 1];
+      i++;
+    } else if (arg === '--output' && process.argv[i + 1]) {
+      args.output = process.argv[i + 1];
+      i++;
+    } else if (arg === '--help' || arg === '-h') {
+      printUsage();
+      process.exit(0);
+    } else {
+      console.log(chalk.red(`❌ Unknown argument: ${arg}`));
+      printUsage();
+      process.exit(1);
+    }
+  }
+
+  return args;
+}
+
+function printUsage() {
+  console.log(chalk.cyan('\n📊 Export Metrics to CSV'));
+  console.log(chalk.gray('\nUsage: ./scripts/export-metrics.js [options]\n'));
+  console.log(chalk.white('Options:'));
+  console.log(chalk.gray('  --session-id <id>      Session ID to export (required)'));
+  console.log(chalk.gray('  --output <file>        Output CSV file path (default: stdout)'));
+  console.log(chalk.gray('  --help, -h             Show this help\n'));
+  console.log(chalk.white('Examples:'));
+  console.log(chalk.gray('  # Export to stdout'));
+  console.log(chalk.gray('  ./scripts/export-metrics.js --session-id abc123\n'));
+  console.log(chalk.gray('  # Export to file'));
+  console.log(chalk.gray('  ./scripts/export-metrics.js --session-id abc123 --output metrics.csv\n'));
+}
+
+// Export metrics for a session
+async function exportMetrics(sessionId) {
+  const session = await getSession(sessionId);
+  if (!session) {
+    throw new Error(`Session ${sessionId} not found`);
+  }
+
+  const auditSession = new AuditSession(session);
+  await auditSession.initialize();
+  const metrics = await auditSession.getMetrics();
+
+  return exportAsCSV(session, metrics);
+}
+
+// Export as CSV
+function exportAsCSV(session, metrics) {
+  const lines = [];
+
+  // Header
+  lines.push('agent,phase,status,attempts,duration_ms,cost_usd');
+
+  // Phase mapping
+  const phaseMap = {
+    'pre-recon': 'pre-recon',
+    'recon': 'recon',
+    'injection-vuln': 'vulnerability-analysis',
+    'xss-vuln': 'vulnerability-analysis',
+    'auth-vuln': 'vulnerability-analysis',
+    'authz-vuln': 'vulnerability-analysis',
+    'ssrf-vuln': 'vulnerability-analysis',
+    'injection-exploit': 'exploitation',
+    'xss-exploit': 'exploitation',
+    'auth-exploit': 'exploitation',
+    'authz-exploit': 'exploitation',
+    'ssrf-exploit': 'exploitation',
+    'report': 'reporting'
+  };
+
+  // Agent rows
+  for (const [agentName, agentData] of Object.entries(metrics.metrics.agents)) {
+    const phase = phaseMap[agentName] || 'unknown';
+
+    lines.push([
+      agentName,
+      phase,
+      agentData.status,
+      agentData.attempts.length,
+      agentData.final_duration_ms,
+      agentData.total_cost_usd.toFixed(4)
+    ].join(','));
+  }
+
+  return lines.join('\n');
+}
+
+// Main execution
+async function main() {
+  const args = parseArgs();
+
+  if (!args.sessionId) {
+    console.log(chalk.red('❌ Must specify --session-id'));
+    printUsage();
+    process.exit(1);
+  }
+
+  console.log(chalk.cyan.bold('\n📊 Exporting Metrics to CSV\n'));
+  console.log(chalk.gray(`Session ID: ${args.sessionId}\n`));
+
+  const output = await exportMetrics(args.sessionId);
+
+  if (args.output) {
+    await fs.writeFile(args.output, output);
+    console.log(chalk.green(`✅ Exported to: ${args.output}`));
+  } else {
+    console.log(chalk.cyan('CSV Output:\n'));
+    console.log(output);
+  }
+
+  console.log();
+}
+
+main().catch(error => {
+  console.log(chalk.red.bold(`\n🚨 Fatal error: ${error.message}`));
+  if (process.env.DEBUG) {
+    console.log(chalk.gray(error.stack));
+  }
+  process.exit(1);
+});
@@ -0,0 +1,225 @@
+#!/usr/bin/env node
+
+/**
+ * Manual Session Reconciliation Script
+ *
+ * Purpose: Diagnostics and exceptional recovery (NOT normal operations).
+ *
+ * Use Cases:
+ * 1. Diagnostics (Primary): Non-destructively report inconsistencies
+ * 2. Debugging: Test reconciliation logic in isolation
+ * 3. Exceptional Recovery: Malformed JSON recovery, reconciliation bugs
+ * 4. Bulk Operations: System-wide consistency audit
+ *
+ * Design Principle:
+ * "Self-healing is the norm. Manual intervention is the exception."
+ *
+ * Red Flags (indicate bugs):
+ * - Manual script needed frequently
+ * - Automatic reconciliation failing consistently
+ * - Manual intervention after every crash
+ */
+
+import chalk from 'chalk';
+import { fs, path } from 'zx';
+import { reconcileSession, getSession } from '../src/session-manager.js';
+
+const STORE_FILE = path.join(process.cwd(), '.shannon-store.json');
+
+// Parse command-line arguments
+function parseArgs() {
+  const args = {
+    sessionId: null,
+    allSessions: false,
+    dryRun: false,
+    verbose: false
+  };
+
+  for (let i = 2; i < process.argv.length; i++) {
+    const arg = process.argv[i];
+
+    if (arg === '--session-id' && process.argv[i + 1]) {
+      args.sessionId = process.argv[i + 1];
+      i++;
+    } else if (arg === '--all-sessions') {
+      args.allSessions = true;
+    } else if (arg === '--dry-run') {
+      args.dryRun = true;
+    } else if (arg === '--verbose') {
+      args.verbose = true;
+    } else if (arg === '--help' || arg === '-h') {
+      printUsage();
+      process.exit(0);
+    } else {
+      console.log(chalk.red(`❌ Unknown argument: ${arg}`));
+      printUsage();
+      process.exit(1);
+    }
+  }
+
+  return args;
+}
+
+function printUsage() {
+  console.log(chalk.cyan('\n📋 Manual Session Reconciliation Script'));
+  console.log(chalk.gray('\nUsage: ./scripts/reconcile-session.js [options]\n'));
+  console.log(chalk.white('Options:'));
+  console.log(chalk.gray('  --session-id <id>      Reconcile specific session'));
+  console.log(chalk.gray('  --all-sessions         Reconcile all sessions'));
+  console.log(chalk.gray('  --dry-run              Report inconsistencies without fixing'));
+  console.log(chalk.gray('  --verbose              Detailed logging'));
+  console.log(chalk.gray('  --help, -h             Show this help\n'));
+  console.log(chalk.white('Examples:'));
+  console.log(chalk.gray('  # Diagnostics (primary use case)'));
+  console.log(chalk.gray('  ./scripts/reconcile-session.js --session-id abc123 --dry-run\n'));
+  console.log(chalk.gray('  # System-wide consistency audit'));
+  console.log(chalk.gray('  ./scripts/reconcile-session.js --all-sessions --dry-run --verbose\n'));
+  console.log(chalk.gray('  # Exceptional recovery'));
+  console.log(chalk.gray('  ./scripts/reconcile-session.js --session-id abc123\n'));
+}
+
+// Load all sessions
+async function loadAllSessions() {
+  try {
+    if (!await fs.pathExists(STORE_FILE)) {
+      return [];
+    }
+
+    const content = await fs.readFile(STORE_FILE, 'utf8');
+    const store = JSON.parse(content);
+    return Object.values(store.sessions || {});
+  } catch (error) {
+    throw new Error(`Failed to load sessions: ${error.message}`);
+  }
+}
+
+// Reconcile a single session
+async function reconcileSingleSession(sessionId, dryRun, verbose) {
+  try {
+    const session = await getSession(sessionId);
+    if (!session) {
+      console.log(chalk.red(`❌ Session ${sessionId} not found`));
+      return { success: false, sessionId };
+    }
+
+    if (verbose) {
+      console.log(chalk.blue(`\n🔍 Analyzing session: ${sessionId}`));
+      console.log(chalk.gray(`   Web URL: ${session.webUrl}`));
+      console.log(chalk.gray(`   Status: ${session.status}`));
+      console.log(chalk.gray(`   Completed Agents: ${session.completedAgents.length}`));
+    }
+
+    if (dryRun) {
+      console.log(chalk.yellow(`   [DRY RUN] Would reconcile session ${sessionId.substring(0, 8)}...`));
+      return { success: true, sessionId, dryRun: true };
+    }
+
+    // Perform actual reconciliation
+    const report = await reconcileSession(sessionId);
+
+    const hasChanges = report.promotions.length > 0 ||
+                       report.demotions.length > 0 ||
+                       report.failures.length > 0;
+
+    if (hasChanges) {
+      console.log(chalk.green(`✅ Reconciled session ${sessionId.substring(0, 8)}...`));
+
+      if (report.promotions.length > 0) {
+        console.log(chalk.blue(`   ➕ Added ${report.promotions.length} completed agents: ${report.promotions.join(', ')}`));
+      }
+      if (report.demotions.length > 0) {
+        console.log(chalk.yellow(`   ➖ Removed ${report.demotions.length} rolled-back agents: ${report.demotions.join(', ')}`));
+      }
+      if (report.failures.length > 0) {
+        console.log(chalk.red(`   ❌ Marked ${report.failures.length} failed agents: ${report.failures.join(', ')}`));
+      }
+    } else {
+      if (verbose) {
+        console.log(chalk.gray(`   ✓ No inconsistencies found`));
+      }
+    }
+
+    return { success: true, sessionId, ...report };
+  } catch (error) {
+    console.log(chalk.red(`❌ Failed to reconcile session ${sessionId}: ${error.message}`));
+    return { success: false, sessionId, error: error.message };
+  }
+}
+
+// Main execution
+async function main() {
+  const args = parseArgs();
+
+  console.log(chalk.cyan.bold('\n🔄 Manual Session Reconciliation\n'));
+
+  if (args.dryRun) {
+    console.log(chalk.yellow('⚠️  DRY RUN MODE - No changes will be made\n'));
+  }
+
+  let sessions = [];
+
+  if (args.sessionId) {
+    sessions = [{ id: args.sessionId }];
+  } else if (args.allSessions) {
+    sessions = await loadAllSessions();
+    console.log(chalk.blue(`Found ${sessions.length} sessions\n`));
+  } else {
+    console.log(chalk.red('❌ Must specify either --session-id or --all-sessions'));
+    printUsage();
+    process.exit(1);
+  }
+
+  const results = {
+    total: sessions.length,
+    success: 0,
+    failed: 0,
+    totalPromotions: 0,
+    totalDemotions: 0,
+    totalFailures: 0
+  };
+
+  for (const session of sessions) {
+    const result = await reconcileSingleSession(session.id, args.dryRun, args.verbose);
+
+    if (result.success) {
+      results.success++;
+      results.totalPromotions += result.promotions?.length || 0;
+      results.totalDemotions += result.demotions?.length || 0;
+      results.totalFailures += result.failures?.length || 0;
+    } else {
+      results.failed++;
+    }
+  }
+
+  // Summary
+  console.log(chalk.cyan.bold('\n📊 Summary:'));
+  console.log(chalk.gray(`Total sessions: ${results.total}`));
+  console.log(chalk.green(`Successful: ${results.success}`));
+  if (results.failed > 0) {
+    console.log(chalk.red(`Failed: ${results.failed}`));
+  }
+  console.log(chalk.blue(`Promotions: ${results.totalPromotions}`));
+  console.log(chalk.yellow(`Demotions: ${results.totalDemotions}`));
+  console.log(chalk.red(`Failures: ${results.totalFailures}`));
+
+  // Health check
+  if (args.allSessions) {
+    const consistencyRate = (results.success / results.total) * 100;
+    console.log(chalk.cyan(`\n📈 Consistency Rate: ${consistencyRate.toFixed(1)}%`));
+
+    if (consistencyRate < 98) {
+      console.log(chalk.red('\n⚠️  WARNING: Low consistency rate detected!'));
+      console.log(chalk.red('This may indicate bugs in automatic reconciliation.'));
+    }
+  }
+
+  console.log();
+}
+
+main().catch(error => {
+  console.log(chalk.red.bold(`\n🚨 Fatal error: ${error.message}`));
+  if (process.env.DEBUG) {
+    console.log(chalk.gray(error.stack));
+  }
+  process.exit(1);
+});