mirror of
https://github.com/KeygraphHQ/shannon.git
synced 2026-05-26 10:27:53 +02:00
feat: add Temporal integration foundation (phase 1-2)
- Add Temporal SDK dependencies (@temporalio/client, worker, workflow, activity) - Add shared types for pipeline state, metrics, and progress queries - Add classifyErrorForTemporal() for retry behavior classification - Add docker-compose for Temporal server with SQLite persistence
This commit is contained in:
@@ -0,0 +1,20 @@
|
||||
services:
|
||||
temporal:
|
||||
image: temporalio/auto-setup:latest
|
||||
environment:
|
||||
- DB=sqlite
|
||||
- SQLITE_DB_PATH=/var/lib/temporal/temporal.db
|
||||
ports:
|
||||
- "7233:7233" # gRPC
|
||||
- "8233:8233" # Web UI
|
||||
volumes:
|
||||
- temporal-data:/var/lib/temporal
|
||||
healthcheck:
|
||||
test: ["CMD", "temporal", "operator", "cluster", "health", "--address", "localhost:7233"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
start_period: 30s
|
||||
|
||||
volumes:
|
||||
temporal-data:
|
||||
Generated
+1856
-2
File diff suppressed because it is too large
Load Diff
@@ -9,6 +9,10 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@anthropic-ai/claude-agent-sdk": "^0.1.0",
|
||||
"@temporalio/activity": "^1.11.0",
|
||||
"@temporalio/client": "^1.11.0",
|
||||
"@temporalio/worker": "^1.11.0",
|
||||
"@temporalio/workflow": "^1.11.0",
|
||||
"ajv": "^8.12.0",
|
||||
"ajv-formats": "^2.1.1",
|
||||
"boxen": "^8.0.1",
|
||||
|
||||
@@ -14,6 +14,12 @@ import type {
|
||||
PromptErrorResult,
|
||||
} from './types/errors.js';
|
||||
|
||||
// Temporal error classification for ApplicationFailure wrapping
|
||||
export interface TemporalErrorClassification {
|
||||
type: string;
|
||||
retryable: boolean;
|
||||
}
|
||||
|
||||
// Custom error class for pentest operations
|
||||
export class PentestError extends Error {
|
||||
name = 'PentestError' as const;
|
||||
@@ -190,3 +196,108 @@ export function getRetryDelay(error: Error, attempt: number): number {
|
||||
const jitter = Math.random() * 1000; // 0-1s random
|
||||
return Math.min(baseDelay + jitter, 30000); // Max 30s
|
||||
}
|
||||
|
||||
/**
|
||||
* Classifies errors for Temporal workflow retry behavior.
|
||||
* Returns error type and whether Temporal should retry.
|
||||
*
|
||||
* Used by activities to wrap errors in ApplicationFailure:
|
||||
* - Retryable errors: Temporal retries with configured backoff
|
||||
* - Non-retryable errors: Temporal fails immediately
|
||||
*/
|
||||
export function classifyErrorForTemporal(error: unknown): TemporalErrorClassification {
|
||||
const message = (error instanceof Error ? error.message : String(error)).toLowerCase();
|
||||
|
||||
// === BILLING ERRORS (Retryable with long backoff) ===
|
||||
// Anthropic returns billing as 400 invalid_request_error
|
||||
// Human can add credits, so retry with 5-30 min backoff
|
||||
if (
|
||||
message.includes('billing_error') ||
|
||||
message.includes('credit balance is too low') ||
|
||||
message.includes('insufficient credits') ||
|
||||
message.includes('usage is blocked due to insufficient credits') ||
|
||||
message.includes('please visit plans & billing') ||
|
||||
message.includes('please visit plans and billing') ||
|
||||
message.includes('usage limit reached') ||
|
||||
message.includes('quota exceeded') ||
|
||||
message.includes('daily rate limit') ||
|
||||
message.includes('limit will reset')
|
||||
) {
|
||||
return { type: 'BillingError', retryable: true };
|
||||
}
|
||||
|
||||
// === PERMANENT ERRORS (Non-retryable) ===
|
||||
|
||||
// Authentication (401) - bad API key won't fix itself
|
||||
if (
|
||||
message.includes('authentication') ||
|
||||
message.includes('api key') ||
|
||||
message.includes('401') ||
|
||||
message.includes('authentication_error')
|
||||
) {
|
||||
return { type: 'AuthenticationError', retryable: false };
|
||||
}
|
||||
|
||||
// Permission (403) - access won't be granted
|
||||
if (
|
||||
message.includes('permission') ||
|
||||
message.includes('forbidden') ||
|
||||
message.includes('403')
|
||||
) {
|
||||
return { type: 'PermissionError', retryable: false };
|
||||
}
|
||||
|
||||
// Invalid Request (400) - malformed request is permanent
|
||||
// Note: Checked AFTER billing since Anthropic billing is 400
|
||||
if (
|
||||
message.includes('invalid_request_error') ||
|
||||
message.includes('malformed') ||
|
||||
message.includes('validation')
|
||||
) {
|
||||
return { type: 'InvalidRequestError', retryable: false };
|
||||
}
|
||||
|
||||
// Request Too Large (413) - won't fit no matter how many retries
|
||||
if (
|
||||
message.includes('request_too_large') ||
|
||||
message.includes('too large') ||
|
||||
message.includes('413')
|
||||
) {
|
||||
return { type: 'RequestTooLargeError', retryable: false };
|
||||
}
|
||||
|
||||
// Configuration errors - missing files need manual fix
|
||||
if (
|
||||
message.includes('enoent') ||
|
||||
message.includes('no such file') ||
|
||||
message.includes('cli not installed')
|
||||
) {
|
||||
return { type: 'ConfigurationError', retryable: false };
|
||||
}
|
||||
|
||||
// Execution limits - max turns/budget reached
|
||||
if (
|
||||
message.includes('max turns') ||
|
||||
message.includes('budget') ||
|
||||
message.includes('execution limit') ||
|
||||
message.includes('error_max_turns') ||
|
||||
message.includes('error_max_budget')
|
||||
) {
|
||||
return { type: 'ExecutionLimitError', retryable: false };
|
||||
}
|
||||
|
||||
// Invalid target URL - bad URL format won't fix itself
|
||||
if (
|
||||
message.includes('invalid url') ||
|
||||
message.includes('invalid target') ||
|
||||
message.includes('malformed url') ||
|
||||
message.includes('invalid uri')
|
||||
) {
|
||||
return { type: 'InvalidTargetError', retryable: false };
|
||||
}
|
||||
|
||||
// === TRANSIENT ERRORS (Retryable) ===
|
||||
// Rate limits (429), server errors (5xx), network issues
|
||||
// Let Temporal retry with configured backoff
|
||||
return { type: 'TransientError', retryable: true };
|
||||
}
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
import { defineQuery } from '@temporalio/workflow';
|
||||
|
||||
// === Types ===
|
||||
|
||||
export interface PipelineInput {
|
||||
webUrl: string;
|
||||
repoPath: string;
|
||||
configPath?: string;
|
||||
outputPath?: string;
|
||||
pipelineTestingMode?: boolean;
|
||||
workflowId?: string; // Added by client, used for audit correlation
|
||||
}
|
||||
|
||||
export interface AgentMetrics {
|
||||
durationMs: number;
|
||||
inputTokens: number | null;
|
||||
outputTokens: number | null;
|
||||
costUsd: number | null;
|
||||
numTurns: number | null;
|
||||
}
|
||||
|
||||
export interface PipelineState {
|
||||
status: 'running' | 'completed' | 'failed';
|
||||
currentPhase: string | null;
|
||||
currentAgent: string | null;
|
||||
completedAgents: string[];
|
||||
failedAgent: string | null;
|
||||
error: string | null;
|
||||
startTime: number;
|
||||
agentMetrics: Record<string, AgentMetrics>;
|
||||
}
|
||||
|
||||
// Extended state returned by getProgress query (includes computed fields)
|
||||
export interface PipelineProgress extends PipelineState {
|
||||
workflowId: string;
|
||||
elapsedMs: number;
|
||||
}
|
||||
|
||||
// === Queries ===
|
||||
|
||||
export const getProgress = defineQuery<PipelineProgress>('getProgress');
|
||||
Reference in New Issue
Block a user