From 1b696cac1bb3a3b85338ec9a225eec10bdc1275c Mon Sep 17 00:00:00 2001 From: ezl-keygraph Date: Sat, 14 Feb 2026 02:52:11 +0530 Subject: [PATCH] fix: store checkpoint as success commit hash and show cumulative metrics - Swap commitGitSuccess/getGitCommitHash order so checkpoint in session.json points to the success commit (which contains deliverables) instead of the pre-agent marker commit - Simplify restoreGitCheckpoint: git reset --hard now naturally preserves completed agent deliverables, removing the in-memory backup/restore - Show cumulative cost/duration in workflow.log from session.json - Fill in per-agent metrics for skipped agents in workflow.log breakdown - Display cumulative cost in client output for resume runs --- CLAUDE.md | 9 +++++++- README.md | 34 ++++++++++++++++++++++++++++++ src/temporal/activities.ts | 43 +++++++++++++++++++++++++++++++------- src/temporal/client.ts | 17 ++++++++++++++- 4 files changed, 94 insertions(+), 9 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index cf0f788..41572ca 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -18,6 +18,12 @@ git clone https://github.com/org/repo.git ./repos/my-repo ./shannon start URL= REPO=my-repo ./shannon start URL= REPO=my-repo CONFIG=./configs/my-config.yaml +# Workspaces & Resume +./shannon start URL= REPO=my-repo WORKSPACE=my-audit # New named workspace +./shannon start URL= REPO=my-repo WORKSPACE=my-audit # Resume (same command) +./shannon start URL= REPO=my-repo WORKSPACE= # Resume auto-named run +./shannon workspaces # List all workspaces + # Monitor ./shannon logs # Real-time worker logs ./shannon query ID= # Query workflow progress @@ -31,7 +37,7 @@ git clone https://github.com/org/repo.git ./repos/my-repo npm run build ``` -**Options:** `CONFIG=` (YAML config), `OUTPUT=` (default: `./audit-logs/`), `PIPELINE_TESTING=true` (minimal prompts, 10s retries), `REBUILD=true` (force Docker rebuild), `ROUTER=true` (multi-model routing via [claude-code-router](https://github.com/musistudio/claude-code-router)) +**Options:** `CONFIG=` (YAML config), `OUTPUT=` (default: `./audit-logs/`), `WORKSPACE=` (named workspace; auto-resumes if exists), `PIPELINE_TESTING=true` (minimal prompts, 10s retries), `REBUILD=true` (force Docker rebuild), `ROUTER=true` (multi-model routing via [claude-code-router](https://github.com/musistudio/claude-code-router)) ## Architecture @@ -67,6 +73,7 @@ Durable workflow orchestration with crash recovery, queryable progress, intellig - **SDK Integration** — Uses `@anthropic-ai/claude-agent-sdk` with `maxTurns: 10_000` and `bypassPermissions` mode. Playwright MCP for browser automation, TOTP generation via MCP tool. Login flow template at `prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth - **Audit System** — Crash-safe append-only logging in `audit-logs/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables - **Deliverables** — Saved to `deliverables/` in the target repo via the `save_deliverable` MCP tool +- **Workspaces & Resume** — Named workspaces via `WORKSPACE=` or auto-named from URL+timestamp. Resume passes `--workspace` to the Temporal client (`src/temporal/client.ts`), which loads `session.json` to detect completed agents. `loadResumeState()` in `src/temporal/activities.ts` validates deliverable existence, restores git checkpoints, and cleans up incomplete deliverables. Workspace listing via `src/temporal/workspaces.ts` ## Development Notes diff --git a/README.md b/README.md index 89e2bfa..db39f0a 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,7 @@ Shannon is available in two editions: - [Monitoring Progress](#monitoring-progress) - [Stopping Shannon](#stopping-shannon) - [Usage Examples](#usage-examples) + - [Workspaces and Resuming](#workspaces-and-resuming) - [Configuration (Optional)](#configuration-optional) - [[EXPERIMENTAL - UNSUPPORTED] Router Mode (Alternative Providers)](#experimental---unsupported-router-mode-alternative-providers) - [Output and Results](#output-and-results) @@ -167,8 +168,41 @@ open http://localhost:8233 # Custom output directory ./shannon start URL=https://example.com REPO=repo-name OUTPUT=./my-reports + +# Named workspace +./shannon start URL=https://example.com REPO=repo-name WORKSPACE=q1-audit + +# List all workspaces +./shannon workspaces ``` +### Workspaces and Resuming + +Shannon supports **workspaces** that allow you to resume interrupted or failed runs without re-running completed agents. + +**How it works:** +- Every run creates a workspace in `audit-logs/` (auto-named by default, e.g. `example-com_shannon-1771007534808`) +- Use `WORKSPACE=` to give your run a custom name for easier reference +- To resume any run, pass its workspace name via `WORKSPACE=` — Shannon detects which agents completed successfully and picks up where it left off +- Each agent's progress is checkpointed via git commits, so resumed runs start from a clean, validated state + +```bash +# Start with a named workspace +./shannon start URL=https://example.com REPO=repo-name WORKSPACE=my-audit + +# Resume the same workspace (skips completed agents) +./shannon start URL=https://example.com REPO=repo-name WORKSPACE=my-audit + +# Resume an auto-named workspace from a previous run +./shannon start URL=https://example.com REPO=repo-name WORKSPACE=example-com_shannon-1771007534808 + +# List all workspaces and their status +./shannon workspaces +``` + +> [!NOTE] +> The `URL` must match the original workspace URL when resuming. Shannon will reject mismatched URLs to prevent cross-target contamination. + ### Prepare Your Repository Shannon expects target repositories to be placed under the `./repos/` directory at the project root. The `REPO` flag refers to a folder name inside `./repos/`. Copy the repository you want to scan into `./repos/`, or clone it directly there: diff --git a/src/temporal/activities.ts b/src/temporal/activities.ts index 5cf902f..454701b 100644 --- a/src/temporal/activities.ts +++ b/src/temporal/activities.ts @@ -246,7 +246,8 @@ async function runAgentActivity( throw new Error(`Agent ${agentName} failed output validation`); } - // 9. Success - commit and log + // 9. Success - commit deliverables, then capture checkpoint hash + await commitGitSuccess(repoPath, agentName); const commitHash = await getGitCommitHash(repoPath); await auditSession.endAgent(agentName, { attemptNumber, @@ -256,7 +257,6 @@ async function runAgentActivity( model: result.model, ...(commitHash && { checkpoint: commitHash }), }); - await commitGitSuccess(repoPath, agentName); // 10. Return metrics return { @@ -606,7 +606,8 @@ export async function restoreGitCheckpoint( ): Promise { console.log(chalk.blue(`Restoring git workspace to ${checkpointHash}...`)); - // Git reset to checkpoint + // Checkpoint hash points to the success commit (after commitGitSuccess), + // so git reset --hard naturally preserves all completed agent deliverables. await executeGitCommandWithRetry( ['git', 'reset', '--hard', checkpointHash], repoPath, @@ -618,8 +619,7 @@ export async function restoreGitCheckpoint( 'clean untracked files for resume' ); - - // Explicitly delete deliverables for incomplete agents + // Clean up any partial deliverables from incomplete agents for (const agentName of incompleteAgents) { const deliverablePath = getDeliverablePath(agentName, repoPath); try { @@ -629,7 +629,6 @@ export async function restoreGitCheckpoint( await fs.unlink(deliverablePath); } } catch (error) { - // Non-fatal, just log console.log(chalk.gray(`Note: Failed to delete ${deliverablePath}: ${error}`)); } } @@ -709,7 +708,37 @@ export async function logWorkflowComplete( const auditSession = new AuditSession(sessionMetadata); await auditSession.initialize(workflowId); await auditSession.updateSessionStatus(summary.status); - await auditSession.logWorkflowComplete(summary); + + // Use cumulative metrics from session.json (includes all resume attempts) + const sessionData = await auditSession.getMetrics() as { + metrics: { + total_duration_ms: number; + total_cost_usd: number; + agents: Record; + }; + }; + + // Fill in metrics for skipped agents (completed in previous runs) + const agentMetrics = { ...summary.agentMetrics }; + for (const agentName of summary.completedAgents) { + if (!agentMetrics[agentName]) { + const agentData = sessionData.metrics.agents[agentName]; + if (agentData) { + agentMetrics[agentName] = { + durationMs: agentData.final_duration_ms, + costUsd: agentData.total_cost_usd, + }; + } + } + } + + const cumulativeSummary: WorkflowSummary = { + ...summary, + totalDurationMs: sessionData.metrics.total_duration_ms, + totalCostUsd: sessionData.metrics.total_cost_usd, + agentMetrics, + }; + await auditSession.logWorkflowComplete(cumulativeSummary); // Copy all deliverables to audit-logs once at workflow end (non-fatal) try { diff --git a/src/temporal/client.ts b/src/temporal/client.ts index b81ecc4..3ea690a 100644 --- a/src/temporal/client.ts +++ b/src/temporal/client.ts @@ -46,6 +46,9 @@ interface SessionJson { originalWorkflowId?: string; resumeAttempts?: Array<{ workflowId: string }>; }; + metrics: { + total_cost_usd: number; + }; } dotenv.config(); @@ -356,7 +359,19 @@ async function startPipeline(): Promise { console.log(chalk.gray(`Duration: ${Math.floor(result.summary.totalDurationMs / 1000)}s`)); console.log(chalk.gray(`Agents completed: ${result.summary.agentCount}`)); console.log(chalk.gray(`Total turns: ${result.summary.totalTurns}`)); - console.log(chalk.gray(`Total cost: $${result.summary.totalCostUsd.toFixed(4)}`)); + console.log(chalk.gray(`Run cost: $${result.summary.totalCostUsd.toFixed(4)}`)); + + // Show cumulative cost from session.json (includes all resume attempts) + if (isResume) { + try { + const session = await readJson( + path.join('./audit-logs', sessionId, 'session.json') + ); + console.log(chalk.gray(`Cumulative cost: $${session.metrics.total_cost_usd.toFixed(4)}`)); + } catch { + // Non-fatal, skip cumulative cost display + } + } } } catch (error) { clearInterval(progressInterval);