refactor(worker): rename claude-executor to pi-executor

This commit is contained in:
ezl-keygraph
2026-06-15 16:05:31 +05:30
parent 56241625a4
commit 263b18e98a
13 changed files with 32 additions and 32 deletions
+4 -4
View File
@@ -122,7 +122,7 @@ Infra (Temporal) runs via `docker-compose.yml`. Workers are ephemeral `docker ru
- `apps/worker/src/paths.ts` — Centralized path constants (`PROMPTS_DIR`, `CONFIGS_DIR`, `WORKSPACES_DIR`)
- `apps/worker/src/session-manager.ts` — Agent definitions (`AGENTS` record). Agent types in `apps/worker/src/types/agents.ts`
- `apps/worker/src/config-parser.ts` — YAML config parsing with JSON Schema validation
- `apps/worker/src/ai/claude-executor.ts`Claude Agent SDK integration with retry logic
- `apps/worker/src/ai/pi-executor.ts`pi harness integration (retry disabled; Temporal owns retry)
- `apps/worker/src/services/` — Business logic layer (Temporal-agnostic). Activities delegate here. Key: `agent-execution.ts`, `error-handling.ts`, `container.ts`
- `apps/worker/src/types/` — Consolidated types: `Result<T,E>`, `ErrorCode`, `AgentName`, `ActivityLogger`, etc.
- `apps/worker/src/utils/` — Shared utilities (file I/O, formatting, concurrency)
@@ -145,9 +145,9 @@ Durable workflow orchestration with crash recovery, queryable progress, intellig
5. **Reporting** (`report`) — Executive-level security report
### Supporting Systems
- **Configuration** — YAML configs in `apps/worker/configs/` with JSON Schema validation (`config-schema.json`). Supports auth settings (MFA/TOTP), URL/code rule scoping (`rules.avoid`/`rules.focus`), run-scope steering (`vuln_classes`, `exploit`), free-form `rules_of_engagement`, and post-hoc `report` filters (`min_severity`, `min_confidence`, `guidance`). `code_path` avoid rules are enforced via the `@gotgenes/pi-permission-system` extension: `apps/worker/src/temporal/activities.ts:syncCodePathDenyRules` writes a global `path` deny config once per workflow (`apps/worker/src/ai/settings-writer.ts:writeCodePathPermissionConfig`), and the executor loads the extension when that config is present (`apps/worker/src/ai/claude-executor.ts`), so denies fire across every tool and child `task` session. `vuln_classes`/`exploit` scope is locked into `session.json` on first run; resumes with a different scope fail fast (`persistOrValidateRunScope`). Credential resolution — local mode: env vars → `./.env`; npx mode: env vars → `~/.shannon/config.toml` (via `shn setup`)
- **Configuration** — YAML configs in `apps/worker/configs/` with JSON Schema validation (`config-schema.json`). Supports auth settings (MFA/TOTP), URL/code rule scoping (`rules.avoid`/`rules.focus`), run-scope steering (`vuln_classes`, `exploit`), free-form `rules_of_engagement`, and post-hoc `report` filters (`min_severity`, `min_confidence`, `guidance`). `code_path` avoid rules are enforced via the `@gotgenes/pi-permission-system` extension: `apps/worker/src/temporal/activities.ts:syncCodePathDenyRules` writes a global `path` deny config once per workflow (`apps/worker/src/ai/settings-writer.ts:writeCodePathPermissionConfig`), and the executor loads the extension when that config is present (`apps/worker/src/ai/pi-executor.ts`), so denies fire across every tool and child `task` session. `vuln_classes`/`exploit` scope is locked into `session.json` on first run; resumes with a different scope fail fast (`persistOrValidateRunScope`). Credential resolution — local mode: env vars → `./.env`; npx mode: env vars → `~/.shannon/config.toml` (via `shn setup`)
- **Prompts** — Per-phase templates in `apps/worker/prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `apps/worker/prompts/shared/` via `apps/worker/src/services/prompt-manager.ts`, including `_code-path-rules.txt` (focus/avoid `[FILE]`/`[GLOB]` routing) and `_rules-of-engagement.txt` (free-text engagement rules). When `exploit: false`, `apps/worker/src/services/findings-renderer.ts` deterministically converts each `*_exploitation_queue.json` into a `*_findings.md` for report assembly — no LLM in the loop
- **Agent Harness (pi)** — Uses the **pi harness** (`@earendil-works/pi-coding-agent`, requires Node ≥ 22.19) via `apps/worker/src/ai/claude-executor.ts` (`runClaudePrompt``createAgentSession`, retry disabled so Temporal owns retry). Models resolve through pi-ai in `apps/worker/src/ai/models.ts` (Anthropic / Bedrock / custom base URL via `ModelRegistry`+`AuthStorage`). pi ships no JSON-schema output or `Task`/`TodoWrite` built-ins, so structured queues are captured via a `submit_exploitation_queue` custom tool (`apps/worker/src/ai/queue-schemas.ts`), and `task` (read-only child sessions) + `todo_write` are provided as custom tools (`apps/worker/src/ai/tools.ts`); the per-phase MCP collectors are pi custom tools (TypeBox `defineTool` in `apps/worker/src/mcp-server/`). Thinking level defaults to `medium`; disable per-scan via `CLAUDE_ADAPTIVE_THINKING=false` (→ `off`) or set `CLAUDE_THINKING_LEVEL` (env) / `core.adaptive_thinking = false` (npx TOML). Browser automation via `playwright-cli` with session isolation (`-s=<session>`). TOTP generation via `generate-totp` CLI tool. Login flow template at `apps/worker/prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth. On authenticated whitebox scans, the `validate-authentication` preflight performs the single real login and saves the browser session to `auth-state.json` in the per-session audit directory (path from `authStateFile()` in `apps/worker/src/audit/utils.ts`, derived from `generateAuditPath()`). The validation activity (`apps/worker/src/services/validate-authentication.ts`) removes any stale file from a prior run before the agent runs and verifies the file parses and contains cookies or storage before the preflight is marked complete; `logWorkflowComplete` deletes it when the workflow ends so authenticated cookies don't sit on disk between scans. Agent prompts opt in to session reuse by `@include(shared/_shared-session.txt)` before their `<login_instructions>` block — the partial restores the session and falls through to the full login flow if verification fails. `vuln-auth`/`exploit-auth` omit the include and own their own login
- **Agent Harness (pi)** — Uses the **pi harness** (`@earendil-works/pi-coding-agent`, requires Node ≥ 22.19) via `apps/worker/src/ai/pi-executor.ts` (`runPiPrompt``createAgentSession`, retry disabled so Temporal owns retry). Models resolve through pi-ai in `apps/worker/src/ai/models.ts` (Anthropic / Bedrock / custom base URL via `ModelRegistry`+`AuthStorage`). pi ships no JSON-schema output or `Task`/`TodoWrite` built-ins, so structured queues are captured via a `submit_exploitation_queue` custom tool (`apps/worker/src/ai/queue-schemas.ts`), and `task` (read-only child sessions) + `todo_write` are provided as custom tools (`apps/worker/src/ai/tools.ts`); the per-phase MCP collectors are pi custom tools (TypeBox `defineTool` in `apps/worker/src/mcp-server/`). Thinking level defaults to `medium`; disable per-scan via `CLAUDE_ADAPTIVE_THINKING=false` (→ `off`) or set `CLAUDE_THINKING_LEVEL` (env) / `core.adaptive_thinking = false` (npx TOML). Browser automation via `playwright-cli` with session isolation (`-s=<session>`). TOTP generation via `generate-totp` CLI tool. Login flow template at `apps/worker/prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth. On authenticated whitebox scans, the `validate-authentication` preflight performs the single real login and saves the browser session to `auth-state.json` in the per-session audit directory (path from `authStateFile()` in `apps/worker/src/audit/utils.ts`, derived from `generateAuditPath()`). The validation activity (`apps/worker/src/services/validate-authentication.ts`) removes any stale file from a prior run before the agent runs and verifies the file parses and contains cookies or storage before the preflight is marked complete; `logWorkflowComplete` deletes it when the workflow ends so authenticated cookies don't sit on disk between scans. Agent prompts opt in to session reuse by `@include(shared/_shared-session.txt)` before their `<login_instructions>` block — the partial restores the session and falls through to the full login flow if verification fails. `vuln-auth`/`exploit-auth` omit the include and own their own login
- **Audit System** — Crash-safe append-only logging in `workspaces/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables. WorkflowLogger (`apps/worker/src/audit/workflow-logger.ts`) provides unified human-readable per-workflow logs, backed by LogStream (`apps/worker/src/audit/log-stream.ts`) shared stream primitive
- **Deliverables** — Saved to `deliverables/` in the target repo via the `save-deliverable` CLI script (`apps/worker/src/scripts/save-deliverable.ts`)
- **Workspaces & Resume** — Named workspaces via `-w <name>` or auto-named from URL+timestamp. Resume detects completed agents via `session.json`. `loadResumeState()` in `apps/worker/src/temporal/activities.ts` validates deliverable existence, restores git checkpoints, and cleans up incomplete deliverables. Workspace listing via `apps/worker/src/temporal/workspaces.ts`
@@ -228,7 +228,7 @@ Comments must be **timeless** — no references to this conversation, refactorin
**Entry Points:** `apps/worker/src/temporal/workflows.ts`, `apps/worker/src/temporal/activities.ts`, `apps/worker/src/temporal/worker.ts`
**Core Logic:** `apps/worker/src/session-manager.ts`, `apps/worker/src/ai/claude-executor.ts`, `apps/worker/src/ai/settings-writer.ts` (writes `code_path` deny rules to the `@gotgenes/pi-permission-system` global config), `apps/worker/src/config-parser.ts`, `apps/worker/src/services/` (incl. `preflight.ts`, `findings-renderer.ts`, `reporting.ts`), `apps/worker/src/audit/`
**Core Logic:** `apps/worker/src/session-manager.ts`, `apps/worker/src/ai/pi-executor.ts`, `apps/worker/src/ai/settings-writer.ts` (writes `code_path` deny rules to the `@gotgenes/pi-permission-system` global config), `apps/worker/src/config-parser.ts`, `apps/worker/src/services/` (incl. `preflight.ts`, `findings-renderer.ts`, `reporting.ts`), `apps/worker/src/audit/`
**Config:** `docker-compose.yml`, `apps/cli/infra/compose.yml`, `apps/worker/configs/`, `apps/worker/prompts/`, `tsconfig.base.json` (shared compiler options), `turbo.json`, `biome.json`
+1 -1
View File
@@ -106,7 +106,7 @@ export function resolveModelId(tier: ModelTier = 'medium', providerConfig?: Prov
/**
* Resolve the thinking level for a run.
*
* The Claude Agent SDK enabled "adaptive" thinking only on capable models; pi uses
* The previous harness enabled "adaptive" thinking only on capable models; pi uses
* explicit levels and clamps to model capability internally. We default to 'medium'
* and honour the existing CLAUDE_ADAPTIVE_THINKING=false kill switch (→ 'off'). An
* explicit CLAUDE_THINKING_LEVEL wins when set.
+2 -2
View File
@@ -326,7 +326,7 @@ export function formatErrorOutput(
} else if (context.useCleanOutput) {
lines.push(`${context.agentType} failed (${formatDuration(duration)})`);
} else {
lines.push(` Claude Code failed: ${description} (${formatDuration(duration)})`);
lines.push(` pi agent failed: ${description} (${formatDuration(duration)})`);
}
lines.push(` Error Type: ${error.constructor.name}`);
@@ -360,7 +360,7 @@ export function formatCompletionMessage(
return `${context.agentType.charAt(0).toUpperCase() + context.agentType.slice(1)} complete! (${turnCount} turns, ${formatDuration(duration)})`;
}
return ` Claude Code completed: ${description} (${turnCount} turns) in ${formatDuration(duration)}`;
return ` pi agent completed: ${description} (${turnCount} turns) in ${formatDuration(duration)}`;
}
export function formatToolUseOutput(toolName: string, input: Record<string, unknown> | undefined): string[] {
@@ -78,7 +78,7 @@ async function buildPermissionResourceLoader(cwd: string, logger: ActivityLogger
return loader;
}
export interface ClaudePromptResult {
export interface PiPromptResult {
result?: string | null | undefined;
success: boolean;
duration: number;
@@ -109,7 +109,7 @@ async function writeErrorLog(
try {
const errorLog = {
timestamp: formatTimestamp(),
agent: 'claude-executor',
agent: 'pi-executor',
error: { name: err.constructor.name, message: err.message, code: err.code, status: err.status, stack: err.stack },
context: { sourceDir, prompt: `${fullPrompt.slice(0, 200)}...`, retryable: isRetryableError(err) },
duration,
@@ -122,7 +122,7 @@ async function writeErrorLog(
}
export async function validateAgentOutput(
result: ClaudePromptResult,
result: PiPromptResult,
agentName: string | null,
sourceDir: string,
logger: ActivityLogger,
@@ -187,11 +187,11 @@ function classifyErrorText(content: string): PentestError | null {
// Low-level pi execution. Drives one agent session to completion with progress and
// audit logging. Exported for Temporal activities to call single-attempt execution.
export async function runClaudePrompt(
export async function runPiPrompt(
prompt: string,
sourceDir: string,
context: string = '',
description: string = 'Claude analysis',
description: string = 'Agent analysis',
_agentName: string | null = null,
auditSession: AuditSession | null = null,
logger: ActivityLogger,
@@ -200,7 +200,7 @@ export async function runClaudePrompt(
apiKey?: string,
deliverablesSubdir?: string,
providerConfig?: import('../types/config.js').ProviderConfig,
): Promise<ClaudePromptResult> {
): Promise<PiPromptResult> {
// 1. Initialize timing and prompt
const timer = new Timer(`agent-${description.toLowerCase().replace(/\s+/g, '-')}`);
const fullPrompt = context ? `${context}\n\n${prompt}` : prompt;
+1 -1
View File
@@ -6,7 +6,7 @@
/**
* Writes the @gotgenes/pi-permission-system global config from `code_path` avoid
* patterns. The executor loads the extension (see claude-executor) and pi enforces
* patterns. The executor loads the extension (see pi-executor) and pi enforces
* these path denies at the tool layer for every agent. Written to the global config
* dir under `agentDir` — the project-scoped path is gated behind project trust,
* which our headless runs do not grant; the global path is not.
+2 -2
View File
@@ -7,7 +7,7 @@
/**
* Universal custom tools registered for every agent: `task` and `todo_write`.
*
* These replace the Claude Agent SDK built-ins that pi does not ship. `task`
* These replace the previous harness built-ins that pi does not ship. `task`
* delegates a focused analysis to an in-process read-only child session (the
* Task sub-agent replacement); `todo_write` is a full-state-replace planning
* scratchpad mirrored to the workflow log.
@@ -116,7 +116,7 @@ function renderTodos(todos: readonly TodoItem[]): string {
/**
* The `todo_write` tool — a full-state-replace planning scratchpad.
*
* Mirrors Claude Code's TodoWrite: each call carries the entire list and replaces
* Mirrors the TodoWrite tool: each call carries the entire list and replaces
* stored state (no append/merge). No deliverable impact; every call is echoed to
* the workflow log so `shannon logs` shows the agent's live plan. State is per
* tool instance (one per agent execution).
+5 -5
View File
@@ -12,7 +12,7 @@
* - Load prompt template using AGENTS[agentName].promptTemplate
* - Create git checkpoint
* - Start audit logging
* - Invoke Claude SDK via runClaudePrompt
* - Invoke the pi agent via runPiPrompt
* - Spending cap check using isSpendingCapBehavior
* - Handle failure (rollback, audit)
* - Validate output using AGENTS[agentName].deliverableFilename
@@ -22,7 +22,7 @@
*/
import { fs, path } from 'zx';
import { type ClaudePromptResult, runClaudePrompt, validateAgentOutput } from '../ai/claude-executor.js';
import { type PiPromptResult, runPiPrompt, validateAgentOutput } from '../ai/pi-executor.js';
import { createQueueSubmitTool, getQueueFilename } from '../ai/queue-schemas.js';
import type { AuditSession } from '../audit/index.js';
import { authStateFile } from '../audit/utils.js';
@@ -59,7 +59,7 @@ export interface AgentExecutionInput {
interface FailAgentOpts {
attemptNumber: number;
result: ClaudePromptResult;
result: PiPromptResult;
rollbackReason: string;
errorMessage: string;
errorCode: ErrorCode;
@@ -167,7 +167,7 @@ export class AgentExecutionService {
// exploitation queue (pi has no JSON-schema output format).
const submitTool = createQueueSubmitTool(agentName, distributedConfig?.exploit ?? true);
const callerTools = [...(customTools ?? []), ...(submitTool ? [submitTool.tool] : [])];
const result: ClaudePromptResult = await runClaudePrompt(
const result: PiPromptResult = await runPiPrompt(
prompt,
repoPath,
'', // context
@@ -309,7 +309,7 @@ export class AgentExecutionService {
/**
* Convert AgentEndResult to AgentMetrics for workflow state.
*/
static toMetrics(endResult: AgentEndResult, result: ClaudePromptResult): AgentMetrics {
static toMetrics(endResult: AgentEndResult, result: PiPromptResult): AgentMetrics {
return {
durationMs: endResult.duration_ms,
inputTokens: null, // Not currently exposed by SDK wrapper
+1 -1
View File
@@ -62,7 +62,7 @@ const RETRYABLE_PATTERNS = [
'internal server error',
'service unavailable',
'bad gateway',
// Claude API errors
// Provider API errors
'model unavailable',
'service temporarily unavailable',
'api error',
+2 -2
View File
@@ -11,8 +11,8 @@
* Services are pure domain logic with no Temporal dependencies.
*/
export type { ClaudePromptResult } from '../ai/claude-executor.js';
export { runClaudePrompt } from '../ai/claude-executor.js';
export type { PiPromptResult } from '../ai/pi-executor.js';
export { runPiPrompt } from '../ai/pi-executor.js';
export type { AgentExecutionInput } from './agent-execution.js';
export { AgentExecutionService } from './agent-execution.js';
export { ConfigLoaderService } from './config-loader.js';
+1 -1
View File
@@ -350,7 +350,7 @@ async function probeCredentialsWithPi(authType: string, baseUrl?: string): Promi
return ok(undefined);
}
/** Validate credentials via a minimal Claude Agent SDK query. */
/** Validate credentials via a minimal pi session. */
async function validateCredentials(
logger: ActivityLogger,
apiKey?: string,
@@ -15,7 +15,7 @@
import { readFile, rm } from 'node:fs/promises';
import { defineTool, type ToolDefinition } from '@earendil-works/pi-coding-agent';
import { Type } from 'typebox';
import { runClaudePrompt } from '../ai/claude-executor.js';
import { runPiPrompt } from '../ai/pi-executor.js';
import type { AuditSession } from '../audit/index.js';
import { authStateFile } from '../audit/utils.js';
import type { ActivityLogger } from '../types/activity-logger.js';
@@ -123,7 +123,7 @@ export async function validateAuthentication(input: ValidateAuthInput): Promise<
const startTime = Date.now();
const submit = createAuthSubmitTool();
const result = await runClaudePrompt(
const result = await runPiPrompt(
prompt,
repoPath,
'',
@@ -219,7 +219,7 @@ function countStorageEntries(parsed: unknown, key: 'cookies' | 'origins'): numbe
}
function classifyResult(
result: import('../ai/claude-executor.js').ClaudePromptResult,
result: import('../ai/pi-executor.js').PiPromptResult,
authentication: NonNullable<DistributedConfig['authentication']>,
): Result<void, PentestError> {
if (!result.success) {
+1 -1
View File
@@ -663,7 +663,7 @@ export async function syncPlaywrightStealthConfig(input: ActivityInput): Promise
/**
* Sync code_path avoid rules into the @gotgenes/pi-permission-system global config
* so pi enforces them at the tool layer for every agent in this run. The executor
* loads the extension when this config is present (see claude-executor).
* loads the extension when this config is present (see pi-executor).
*
* Runs once per workflow before any analysis agent fires. Config is fixed for the
* lifetime of the workflow, so writing once avoids a parallel-agent race on the
+3 -3
View File
@@ -9,7 +9,7 @@
*
* Anthropic's spending cap behavior is inconsistent:
* - Sometimes a proper SDK error (billing_error)
* - Sometimes Claude responds with text about the cap
* - Sometimes the agent responds with text about the cap
* - Sometimes partial billing before cutoff
*
* This module provides defense-in-depth detection with shared pattern lists
@@ -17,7 +17,7 @@
*/
/**
* Text patterns for SDK output sniffing (what Claude says).
* Text patterns for SDK output sniffing (what the agent says).
* Used by message-handlers.ts and the behavioral heuristic.
*/
export const BILLING_TEXT_PATTERNS = [
@@ -67,7 +67,7 @@ export function matchesBillingApiPattern(message: string): boolean {
/**
* Behavioral heuristic for detecting spending cap.
*
* When Claude hits a spending cap, it often returns a short message
* When the agent hits a spending cap, it often returns a short message
* with $0 cost. Legitimate agent work NEVER costs $0 with only 1-2 turns.
*
* This combines three signals: