mirror of
https://github.com/KeygraphHQ/shannon.git
synced 2026-05-25 18:07:54 +02:00
feat(auth): auth-validation preflight + email_login credentials (#335)
* feat(preflight): add credential validation activity * refactor(preflight): tighten error retryability and dedup failure-point enum * refactor(preflight): extract resolvePromptDir helper and cap failure_detail at 250 chars * refactor(preflight): inline validator rules into intro paragraph * refactor(preflight): restyle validator prompt with XML tags and tool list * chore(preflight): bump auth validation timeout to 10 minutes * feat: provision playwright stealth config for browser auto-discovery * feat(stealth): strengthen browser fingerprint with chrome.runtime and realistic plugins * feat(prompts): add pipeline-testing stub for validate-authentication * refactor(stealth): swap zx for node:fs in playwright-config-writer * feat(auth): add email_login credentials with login-flow substitution * fix(auth): propagate email_login through credentials sanitizer * fix(config): drop dangerous-pattern check on credentials.password * feat(auth-validation): instruct agent to mask sensitive values in failure_detail * docs(auth): document email_login credentials for magic-link and email-OTP flows * docs(auth): add login_flow authoring guide with placeholder reference * feat(auth): make credentials.password optional for passwordless flows * docs(auth): drop redundant placeholder hint from login_flow examples
This commit is contained in:
@@ -396,6 +396,13 @@ authentication:
|
||||
password: "yourpassword"
|
||||
totp_secret: "LB2E2RX7XFHSTGCK" # Optional for 2FA
|
||||
|
||||
# Optional mailbox credentials for magic-link / email-OTP flows.
|
||||
# email_login:
|
||||
# address: "inbox@example.com"
|
||||
# password: "mailbox-password"
|
||||
# totp_secret: "JBSWY3DPEHPK3PXP"
|
||||
|
||||
# Natural language instructions for login flow
|
||||
login_flow:
|
||||
- "Type $username into the email field"
|
||||
- "Type $password into the password field"
|
||||
@@ -445,9 +452,32 @@ npx @keygraph/shannon start -u https://example.com -r /path/to/repo -c ./my-app-
|
||||
|
||||
</details>
|
||||
|
||||
#### TOTP Setup for 2FA
|
||||
#### Writing Login Flow
|
||||
|
||||
If your application uses two-factor authentication, simply add the TOTP secret to your config file. The AI will automatically generate the required codes during testing.
|
||||
Log in once in a fresh incognito/private window. Write the steps in the same order you perform them:
|
||||
- When you type into a field, reference the field by its exact label or placeholder.
|
||||
- When you click a button, reference the exact button text.
|
||||
|
||||
Supported placeholders:
|
||||
|
||||
- `$username`
|
||||
- `$password`
|
||||
- `$totp`
|
||||
- `$email_address`
|
||||
- `$email_password`
|
||||
- `$email_totp`
|
||||
|
||||
At runtime, Shannon replaces these placeholders with the credentials passed in the config.
|
||||
|
||||
```yaml
|
||||
login_flow:
|
||||
- "Type $username in <exact email field label or placeholder>"
|
||||
- "Click <exact button text>"
|
||||
- "Type $password in <exact password field label or placeholder>"
|
||||
- "Click <exact button text>"
|
||||
- "If prompted for 2FA, type $totp in <exact code field label or placeholder>"
|
||||
- "Click <exact button text>"
|
||||
```
|
||||
|
||||
#### Adaptive Thinking (Opus 4.6/4.7)
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ export async function start(args: StartArgs): Promise<void> {
|
||||
const workspacePath = path.join(workspacesDir, workspace);
|
||||
fs.mkdirSync(workspacePath, { recursive: true });
|
||||
fs.chmodSync(workspacePath, 0o777);
|
||||
for (const dir of ['deliverables', 'scratchpad', '.playwright-cli']) {
|
||||
for (const dir of ['deliverables', 'scratchpad', '.playwright-cli', '.playwright']) {
|
||||
const dirPath = path.join(workspacePath, dir);
|
||||
fs.mkdirSync(dirPath, { recursive: true });
|
||||
fs.chmodSync(dirPath, 0o777);
|
||||
@@ -76,6 +76,7 @@ export async function start(args: StartArgs): Promise<void> {
|
||||
for (const dir of ['deliverables', 'scratchpad', '.playwright-cli']) {
|
||||
fs.mkdirSync(path.join(shannonDir, dir), { recursive: true });
|
||||
}
|
||||
fs.mkdirSync(path.join(repo.hostPath, '.playwright'), { recursive: true });
|
||||
|
||||
const credentialsPath = getCredentialsPath();
|
||||
const hasCredentials = fs.existsSync(credentialsPath);
|
||||
|
||||
@@ -185,11 +185,12 @@ export function spawnWorker(opts: WorkerOptions): ChildProcess {
|
||||
args.push('-v', `${opts.workspacesDir}:/app/workspaces`);
|
||||
args.push('-v', `${opts.repo.hostPath}:${opts.repo.containerPath}:ro`);
|
||||
|
||||
// Writable overlays: shadow .shannon/ inside the :ro repo with workspace-backed dirs
|
||||
// Writable overlays: shadow .shannon/ and .playwright/ inside the :ro repo with workspace-backed dirs
|
||||
const workspacePath = path.join(opts.workspacesDir, opts.workspace);
|
||||
args.push('-v', `${path.join(workspacePath, 'deliverables')}:${opts.repo.containerPath}/.shannon/deliverables`);
|
||||
args.push('-v', `${path.join(workspacePath, 'scratchpad')}:${opts.repo.containerPath}/.shannon/scratchpad`);
|
||||
args.push('-v', `${path.join(workspacePath, '.playwright-cli')}:${opts.repo.containerPath}/.shannon/.playwright-cli`);
|
||||
args.push('-v', `${path.join(workspacePath, '.playwright')}:${opts.repo.containerPath}/.playwright`);
|
||||
|
||||
// Local mode: mount prompts for live editing
|
||||
if (opts.promptsDir) {
|
||||
|
||||
@@ -39,9 +39,33 @@
|
||||
"type": "string",
|
||||
"pattern": "^[A-Za-z2-7]+=*$",
|
||||
"description": "TOTP secret for two-factor authentication (Base32 encoded, case insensitive)"
|
||||
},
|
||||
"email_login": {
|
||||
"type": "object",
|
||||
"description": "Email account credentials for magic-link or OTP follow-through flows",
|
||||
"properties": {
|
||||
"address": {
|
||||
"type": "string",
|
||||
"format": "email",
|
||||
"description": "Email address used to receive magic links or OTPs"
|
||||
},
|
||||
"password": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"maxLength": 255,
|
||||
"description": "Password for the email account"
|
||||
},
|
||||
"totp_secret": {
|
||||
"type": "string",
|
||||
"pattern": "^[A-Za-z2-7]+=*$",
|
||||
"description": "TOTP secret for the email account's two-factor authentication (Base32 encoded)"
|
||||
}
|
||||
},
|
||||
"required": ["address", "password"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"required": ["username", "password"],
|
||||
"required": ["username"],
|
||||
"additionalProperties": false
|
||||
},
|
||||
"login_flow": {
|
||||
|
||||
@@ -33,6 +33,12 @@ authentication:
|
||||
password: "testpassword"
|
||||
totp_secret: "JBSWY3DPEHPK3PXP" # Optional TOTP secret for 2FA
|
||||
|
||||
# Optional mailbox credentials for magic-link / email-OTP flows.
|
||||
# email_login:
|
||||
# address: "inbox@example.com"
|
||||
# password: "mailbox-password"
|
||||
# totp_secret: "JBSWY3DPEHPK3PXP"
|
||||
|
||||
# Natural language instructions for login flow
|
||||
login_flow:
|
||||
- "Type $username into the email field"
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
Return the structured verdict `{ "login_success": true }` and stop.
|
||||
@@ -0,0 +1,25 @@
|
||||
<role>
|
||||
You are a credential validator agent. Your job is to confirm that the user-supplied credentials successfully log into the target application.
|
||||
</role>
|
||||
|
||||
<objective>
|
||||
This runs as a preflight check for our AI pentester. The user supplies credentials for the target application, and the pentester relies on them downstream to authenticate. Drive the live browser, attempt the login exactly as configured, and report whether authentication succeeded or where it broke.
|
||||
</objective>
|
||||
|
||||
<target_authentication>
|
||||
{{AUTH_CONTEXT}}
|
||||
</target_authentication>
|
||||
|
||||
<cli_tools>
|
||||
- **Browser Automation (playwright-cli skill):** Invoke the `playwright-cli` skill to learn available commands. Always pass `-s={{PLAYWRIGHT_SESSION}}` to every command for session isolation.
|
||||
- **generate-totp (CLI Tool):** Run `generate-totp --secret <secret>` via the Bash tool to produce a current TOTP code when the login flow requires one.
|
||||
</cli_tools>
|
||||
|
||||
<login_instructions>
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
<critical>
|
||||
- Submit each field (username, password, captcha, TOTP) exactly once.
|
||||
- Any rejection = auth error: return `login_success: false` and stop. Do not retry.
|
||||
</critical>
|
||||
@@ -0,0 +1,90 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Writes <sourceDir>/.playwright/cli.config.json with stealth defaults so
|
||||
* `playwright-cli open` auto-loads them from the agent's cwd. Skipped when a
|
||||
* config already exists so user-provided files are never clobbered.
|
||||
*
|
||||
* NOTE: Playwright's MCP browser config treats `initScript` entries as file
|
||||
* paths, not inline source. The stealth script is written alongside the config
|
||||
* and referenced by absolute path. Inline strings silently fail the daemon.
|
||||
*/
|
||||
|
||||
import fs from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
|
||||
async function pathExists(p: string): Promise<boolean> {
|
||||
try {
|
||||
await fs.access(p);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const STEALTH_INIT_SCRIPT = `delete Object.getPrototypeOf(navigator).webdriver;
|
||||
|
||||
Object.defineProperty(navigator, 'plugins', {
|
||||
get: () => {
|
||||
const arr = [
|
||||
{ name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
|
||||
{ name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai', description: '' },
|
||||
{ name: 'Native Client', filename: 'internal-nacl-plugin', description: '' },
|
||||
];
|
||||
arr.__proto__ = PluginArray.prototype;
|
||||
return arr;
|
||||
},
|
||||
});
|
||||
|
||||
window.chrome = window.chrome || {};
|
||||
window.chrome.runtime = window.chrome.runtime || {
|
||||
PlatformOs: { MAC: 'mac', WIN: 'win', ANDROID: 'android', CROS: 'cros', LINUX: 'linux', OPENBSD: 'openbsd' },
|
||||
PlatformArch: { ARM: 'arm', X86_32: 'x86-32', X86_64: 'x86-64' },
|
||||
PlatformNaclArch: { ARM: 'arm', X86_32: 'x86-32', X86_64: 'x86-64' },
|
||||
RequestUpdateCheckStatus: { THROTTLED: 'throttled', NO_UPDATE: 'no_update', UPDATE_AVAILABLE: 'update_available' },
|
||||
OnInstalledReason: { INSTALL: 'install', UPDATE: 'update', CHROME_UPDATE: 'chrome_update', SHARED_MODULE_UPDATE: 'shared_module_update' },
|
||||
OnRestartRequiredReason: { APP_UPDATE: 'app_update', OS_UPDATE: 'os_update', PERIODIC: 'periodic' },
|
||||
};
|
||||
`;
|
||||
|
||||
function buildStealthConfig(initScriptPath: string) {
|
||||
return {
|
||||
browser: {
|
||||
browserName: 'chromium',
|
||||
launchOptions: {
|
||||
headless: true,
|
||||
args: ['--disable-blink-features=AutomationControlled'],
|
||||
ignoreDefaultArgs: ['--enable-automation'],
|
||||
},
|
||||
contextOptions: {
|
||||
viewport: { width: 1920, height: 1080 },
|
||||
locale: 'en-US',
|
||||
extraHTTPHeaders: { 'Accept-Language': 'en-US,en;q=0.9' },
|
||||
userAgent:
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
|
||||
},
|
||||
initScript: [initScriptPath],
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export type StealthConfigWriteResult = 'wrote' | 'skipped-existing';
|
||||
|
||||
export async function writePlaywrightStealthConfig(
|
||||
sourceDir: string,
|
||||
): Promise<{ result: StealthConfigWriteResult; configPath: string }> {
|
||||
const playwrightDir = path.join(sourceDir, '.playwright');
|
||||
const configPath = path.join(playwrightDir, 'cli.config.json');
|
||||
if (await pathExists(configPath)) {
|
||||
return { result: 'skipped-existing', configPath };
|
||||
}
|
||||
const initScriptPath = path.join(playwrightDir, 'scripts', 'stealth.js');
|
||||
await fs.mkdir(path.dirname(initScriptPath), { recursive: true });
|
||||
await fs.writeFile(initScriptPath, STEALTH_INIT_SCRIPT);
|
||||
await fs.writeFile(configPath, JSON.stringify(buildStealthConfig(initScriptPath), null, 2));
|
||||
return { result: 'wrote', configPath };
|
||||
}
|
||||
@@ -428,15 +428,6 @@ const performSecurityValidation = (config: Config): void => {
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED,
|
||||
);
|
||||
}
|
||||
if (pattern.test(auth.credentials.password)) {
|
||||
throw new PentestError(
|
||||
`authentication.credentials.password contains potentially dangerous pattern: ${pattern.source}`,
|
||||
'config',
|
||||
false,
|
||||
{ field: 'credentials.password', pattern: pattern.source },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -709,8 +700,17 @@ const sanitizeAuthentication = (auth: Authentication): Authentication => {
|
||||
login_url: auth.login_url.trim(),
|
||||
credentials: {
|
||||
username: auth.credentials.username.trim(),
|
||||
password: auth.credentials.password,
|
||||
...(auth.credentials.password && { password: auth.credentials.password }),
|
||||
...(auth.credentials.totp_secret && { totp_secret: auth.credentials.totp_secret.trim() }),
|
||||
...(auth.credentials.email_login && {
|
||||
email_login: {
|
||||
address: auth.credentials.email_login.address.trim(),
|
||||
password: auth.credentials.email_login.password,
|
||||
...(auth.credentials.email_login.totp_secret && {
|
||||
totp_secret: auth.credentials.email_login.totp_secret.trim(),
|
||||
}),
|
||||
},
|
||||
}),
|
||||
},
|
||||
...(auth.login_flow && { login_flow: auth.login_flow.map((step) => step.trim()) }),
|
||||
success_condition: {
|
||||
|
||||
@@ -138,6 +138,9 @@ function classifyByErrorCode(code: ErrorCode, retryableFromError: boolean): { ty
|
||||
case ErrorCode.AUTH_FAILED:
|
||||
return { type: 'AuthenticationError', retryable: false };
|
||||
|
||||
case ErrorCode.AUTH_LOGIN_FAILED:
|
||||
return { type: 'AuthLoginFailedError', retryable: false };
|
||||
|
||||
case ErrorCode.BILLING_ERROR:
|
||||
return { type: 'BillingError', retryable: true };
|
||||
|
||||
|
||||
@@ -180,6 +180,21 @@ async function buildLoginInstructions(
|
||||
`generated TOTP code using secret "${authentication.credentials.totp_secret}"`,
|
||||
);
|
||||
}
|
||||
if (authentication.credentials.email_login?.address) {
|
||||
userInstructions = userInstructions.replace(/\$email_address/g, authentication.credentials.email_login.address);
|
||||
}
|
||||
if (authentication.credentials.email_login?.password) {
|
||||
userInstructions = userInstructions.replace(
|
||||
/\$email_password/g,
|
||||
authentication.credentials.email_login.password,
|
||||
);
|
||||
}
|
||||
if (authentication.credentials.email_login?.totp_secret) {
|
||||
userInstructions = userInstructions.replace(
|
||||
/\$email_totp/g,
|
||||
`generated TOTP code using secret "${authentication.credentials.email_login.totp_secret}"`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
loginInstructions = loginInstructions.replace(/{{user_instructions}}/g, userInstructions);
|
||||
@@ -352,6 +367,14 @@ async function interpolateVariables(
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve promptDir override against SHANNON_WORKER_ROOT so relative paths
|
||||
// from callers stay cwd-independent.
|
||||
function resolvePromptDir(promptDir: string | undefined): string {
|
||||
if (!promptDir) return PROMPTS_DIR;
|
||||
if (path.isAbsolute(promptDir)) return promptDir;
|
||||
return path.resolve(process.env.SHANNON_WORKER_ROOT ?? process.cwd(), promptDir);
|
||||
}
|
||||
|
||||
// Pure function: Load and interpolate prompt template
|
||||
export async function loadPrompt(
|
||||
promptName: string,
|
||||
@@ -362,8 +385,7 @@ export async function loadPrompt(
|
||||
promptDir?: string,
|
||||
): Promise<string> {
|
||||
try {
|
||||
// 1. Resolve prompt file path (promptDir override → default PROMPTS_DIR)
|
||||
const basePromptsDir = promptDir ?? PROMPTS_DIR;
|
||||
const basePromptsDir = resolvePromptDir(promptDir);
|
||||
const promptsDir = pipelineTestingMode ? path.join(basePromptsDir, 'pipeline-testing') : basePromptsDir;
|
||||
const promptPath = path.join(promptsDir, `${promptName}.txt`);
|
||||
|
||||
|
||||
@@ -0,0 +1,194 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Authentication validation service.
|
||||
*
|
||||
* Drives a real browser via the playwright-cli skill to confirm
|
||||
* user-supplied credentials log in successfully, before the pentest
|
||||
* pipeline burns hours on broken auth.
|
||||
*/
|
||||
|
||||
import type { JsonSchemaOutputFormat } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { z } from 'zod';
|
||||
import { runClaudePrompt } from '../ai/claude-executor.js';
|
||||
import type { AuditSession } from '../audit/index.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
import type { AgentEndResult } from '../types/audit.js';
|
||||
import type { DistributedConfig, ProviderConfig } from '../types/config.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import { err, ok, type Result } from '../types/result.js';
|
||||
import { PentestError } from './error-handling.js';
|
||||
import { loadPrompt } from './prompt-manager.js';
|
||||
|
||||
const FAILURE_POINTS = ['username_or_password', 'totp_secret', 'out_of_band'] as const;
|
||||
type AuthFailurePoint = (typeof FAILURE_POINTS)[number];
|
||||
|
||||
function isAuthFailurePoint(v: unknown): v is AuthFailurePoint {
|
||||
return typeof v === 'string' && (FAILURE_POINTS as readonly string[]).includes(v);
|
||||
}
|
||||
|
||||
// NOTE: SDK's AJV validator expects draft-07; Zod defaults to draft-2020-12,
|
||||
// which causes the SDK to silently skip structured output.
|
||||
const AuthValidationSchema = z.object({
|
||||
login_success: z.boolean(),
|
||||
failure_point: z.enum(FAILURE_POINTS).optional(),
|
||||
failure_detail: z
|
||||
.string()
|
||||
.max(250)
|
||||
.optional()
|
||||
.describe(
|
||||
'Free-form 1-2 sentence diagnostic of what the page showed (error messages, page state) when login failed. Required when login_success is false. Mask any sensitive values.',
|
||||
),
|
||||
});
|
||||
|
||||
type AuthValidationVerdict = z.infer<typeof AuthValidationSchema>;
|
||||
|
||||
const VALIDATION_SCHEMA: JsonSchemaOutputFormat = {
|
||||
type: 'json_schema',
|
||||
schema: z.toJSONSchema(AuthValidationSchema, { target: 'draft-07' }) as Record<string, unknown>,
|
||||
};
|
||||
|
||||
const AGENT_NAME = 'validate-authentication';
|
||||
|
||||
export interface ValidateAuthInput {
|
||||
readonly distributedConfig: DistributedConfig;
|
||||
readonly repoPath: string;
|
||||
readonly webUrl: string;
|
||||
readonly logger: ActivityLogger;
|
||||
readonly auditSession: AuditSession;
|
||||
readonly attemptNumber: number;
|
||||
readonly apiKey?: string;
|
||||
readonly providerConfig?: ProviderConfig;
|
||||
readonly deliverablesSubdir?: string;
|
||||
readonly promptDir?: string;
|
||||
readonly pipelineTestingMode?: boolean;
|
||||
}
|
||||
|
||||
export async function validateAuthentication(input: ValidateAuthInput): Promise<Result<void, PentestError>> {
|
||||
const {
|
||||
distributedConfig,
|
||||
repoPath,
|
||||
webUrl,
|
||||
logger,
|
||||
auditSession,
|
||||
attemptNumber,
|
||||
apiKey,
|
||||
providerConfig,
|
||||
deliverablesSubdir,
|
||||
promptDir,
|
||||
pipelineTestingMode,
|
||||
} = input;
|
||||
|
||||
const authentication = distributedConfig.authentication;
|
||||
if (!authentication) {
|
||||
return ok(undefined);
|
||||
}
|
||||
|
||||
logger.info('Validating authentication credentials with live browser...', {
|
||||
loginUrl: authentication.login_url,
|
||||
loginType: authentication.login_type,
|
||||
});
|
||||
|
||||
const prompt = await loadPrompt(
|
||||
AGENT_NAME,
|
||||
{ webUrl, repoPath },
|
||||
distributedConfig,
|
||||
pipelineTestingMode ?? false,
|
||||
logger,
|
||||
promptDir,
|
||||
);
|
||||
|
||||
await auditSession.startAgent(AGENT_NAME, prompt, attemptNumber);
|
||||
const startTime = Date.now();
|
||||
|
||||
const result = await runClaudePrompt(
|
||||
prompt,
|
||||
repoPath,
|
||||
'',
|
||||
'Authentication validation',
|
||||
AGENT_NAME,
|
||||
auditSession,
|
||||
logger,
|
||||
'medium',
|
||||
VALIDATION_SCHEMA,
|
||||
apiKey,
|
||||
deliverablesSubdir,
|
||||
providerConfig,
|
||||
);
|
||||
|
||||
const classification = classifyResult(result, authentication);
|
||||
|
||||
const endResult: AgentEndResult = {
|
||||
attemptNumber,
|
||||
duration_ms: Date.now() - startTime,
|
||||
cost_usd: result.cost || 0,
|
||||
success: classification.ok,
|
||||
...(result.model !== undefined && { model: result.model }),
|
||||
...(!classification.ok && { error: classification.error.message }),
|
||||
};
|
||||
await auditSession.endAgent(AGENT_NAME, endResult);
|
||||
|
||||
return classification;
|
||||
}
|
||||
|
||||
function classifyResult(
|
||||
result: import('../ai/claude-executor.js').ClaudePromptResult,
|
||||
authentication: NonNullable<DistributedConfig['authentication']>,
|
||||
): Result<void, PentestError> {
|
||||
if (!result.success) {
|
||||
const detail = result.error ?? 'Validator agent terminated unexpectedly.';
|
||||
return err(
|
||||
new PentestError(
|
||||
`Authentication validator failed to run: ${detail}`,
|
||||
'validation',
|
||||
result.retryable ?? true,
|
||||
{ originalError: detail, errorType: result.errorType, cost: result.cost },
|
||||
ErrorCode.AGENT_EXECUTION_FAILED,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
if (!result.structuredOutput || typeof result.structuredOutput !== 'object') {
|
||||
return err(
|
||||
new PentestError(
|
||||
'Authentication validator did not return a structured verdict.',
|
||||
'validation',
|
||||
true,
|
||||
{ cost: result.cost },
|
||||
ErrorCode.AGENT_EXECUTION_FAILED,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
const verdict = result.structuredOutput as Partial<AuthValidationVerdict>;
|
||||
|
||||
if (verdict.login_success === true) {
|
||||
return ok(undefined);
|
||||
}
|
||||
|
||||
const failurePoint: AuthFailurePoint = isAuthFailurePoint(verdict.failure_point)
|
||||
? verdict.failure_point
|
||||
: 'out_of_band';
|
||||
const failureDetail =
|
||||
verdict.failure_detail?.trim() || 'Login failed without a specific diagnostic from the validator agent.';
|
||||
|
||||
return err(
|
||||
new PentestError(
|
||||
`Authentication failed at "${failurePoint}": ${failureDetail}`,
|
||||
'config',
|
||||
false,
|
||||
{
|
||||
failurePoint,
|
||||
failureDetail,
|
||||
loginUrl: authentication.login_url,
|
||||
loginType: authentication.login_type,
|
||||
cost: result.cost,
|
||||
},
|
||||
ErrorCode.AUTH_LOGIN_FAILED,
|
||||
),
|
||||
);
|
||||
}
|
||||
@@ -151,6 +151,9 @@ function createExploitValidator(vulnType: VulnType): AgentValidator {
|
||||
// Playwright session mapping - assigns each agent to a specific session for browser isolation
|
||||
// Keys are promptTemplate values from AGENTS registry
|
||||
export const PLAYWRIGHT_SESSION_MAPPING: Record<string, PlaywrightSession> = Object.freeze({
|
||||
// Runs before any agent — non-concurrent, so agent1 is safe to share
|
||||
'validate-authentication': 'agent1',
|
||||
|
||||
// Phase 1: Pre-reconnaissance
|
||||
'pre-recon-code': 'agent1',
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
import fs from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import { ApplicationFailure, Context, heartbeat } from '@temporalio/activity';
|
||||
import { writePlaywrightStealthConfig } from '../ai/playwright-config-writer.js';
|
||||
import { writeUserSettingsForCodePathAvoids } from '../ai/settings-writer.js';
|
||||
import { AuditSession } from '../audit/index.js';
|
||||
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
|
||||
@@ -28,11 +29,12 @@ import { DEFAULT_DELIVERABLES_SUBDIR, deliverablesDir } from '../paths.js';
|
||||
import { getContainer, getOrCreateContainer, removeContainer } from '../services/container.js';
|
||||
import { classifyErrorForTemporal, PentestError } from '../services/error-handling.js';
|
||||
import { ExploitationCheckerService } from '../services/exploitation-checker.js';
|
||||
import { renderFindingsFromQueues } from '../services/findings-renderer.js';
|
||||
import { executeGitCommandWithRetry } from '../services/git-manager.js';
|
||||
import { runPreflightChecks } from '../services/preflight.js';
|
||||
import type { ExploitationDecision, VulnType } from '../services/queue-validation.js';
|
||||
import { renderFindingsFromQueues } from '../services/findings-renderer.js';
|
||||
import { assembleFinalReport, injectModelIntoReport } from '../services/reporting.js';
|
||||
import { validateAuthentication } from '../services/validate-authentication.js';
|
||||
import { AGENTS } from '../session-manager.js';
|
||||
import type { AgentName } from '../types/agents.js';
|
||||
import { ALL_AGENTS } from '../types/agents.js';
|
||||
@@ -184,11 +186,7 @@ async function runAgentActivity(agentName: AgentName, input: ActivityInput): Pro
|
||||
attemptNumber,
|
||||
...(input.apiKey !== undefined && { apiKey: input.apiKey }),
|
||||
...(input.providerConfig !== undefined && { providerConfig: input.providerConfig }),
|
||||
...(input.promptDir !== undefined && {
|
||||
promptDir: path.isAbsolute(input.promptDir)
|
||||
? input.promptDir
|
||||
: path.resolve(process.env.SHANNON_WORKER_ROOT ?? process.cwd(), input.promptDir),
|
||||
}),
|
||||
...(input.promptDir !== undefined && { promptDir: input.promptDir }),
|
||||
...(input.configYAML !== undefined && { configYAML: input.configYAML }),
|
||||
},
|
||||
auditSession,
|
||||
@@ -375,6 +373,95 @@ export async function runPreflightValidation(input: ActivityInput): Promise<void
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Authentication validation activity. No-ops without an authentication
|
||||
* block; otherwise surfaces a classified failure (failurePoint +
|
||||
* failureDetail in ApplicationFailure.details) on credential rejection.
|
||||
*/
|
||||
export async function runAuthenticationValidation(input: ActivityInput): Promise<void> {
|
||||
const startTime = Date.now();
|
||||
const attemptNumber = Context.current().info.attempt;
|
||||
|
||||
const heartbeatInterval = setInterval(() => {
|
||||
const elapsed = Math.floor((Date.now() - startTime) / 1000);
|
||||
heartbeat({ phase: 'auth-validation', elapsedSeconds: elapsed, attempt: attemptNumber });
|
||||
}, HEARTBEAT_INTERVAL_MS);
|
||||
|
||||
try {
|
||||
const logger = createActivityLogger();
|
||||
|
||||
const sessionMetadata = buildSessionMetadata(input);
|
||||
const container = getOrCreateContainer(input.workflowId, sessionMetadata, buildContainerConfig(input));
|
||||
const configResult = await container.configLoader.loadOptional(input.configPath, undefined, input.configYAML);
|
||||
if (isErr(configResult)) {
|
||||
// runPreflightValidation already validated parsing, so this is unexpected.
|
||||
logger.warn(`runAuthenticationValidation: config load failed unexpectedly: ${configResult.error.message}`);
|
||||
return;
|
||||
}
|
||||
|
||||
const distributedConfig = configResult.value;
|
||||
if (!distributedConfig?.authentication) {
|
||||
logger.info('No authentication configured — skipping credential validation');
|
||||
return;
|
||||
}
|
||||
|
||||
const auditSession = new AuditSession(sessionMetadata);
|
||||
await auditSession.initialize(input.workflowId);
|
||||
|
||||
const result = await validateAuthentication({
|
||||
distributedConfig,
|
||||
repoPath: input.repoPath,
|
||||
webUrl: input.webUrl,
|
||||
logger,
|
||||
auditSession,
|
||||
attemptNumber,
|
||||
...(input.apiKey !== undefined && { apiKey: input.apiKey }),
|
||||
...(input.providerConfig !== undefined && { providerConfig: input.providerConfig }),
|
||||
...(input.deliverablesSubdir !== undefined && { deliverablesSubdir: input.deliverablesSubdir }),
|
||||
...(input.promptDir !== undefined && { promptDir: input.promptDir }),
|
||||
...(input.pipelineTestingMode !== undefined && { pipelineTestingMode: input.pipelineTestingMode }),
|
||||
});
|
||||
|
||||
if (isErr(result)) {
|
||||
const classified = classifyErrorForTemporal(result.error);
|
||||
const message = truncateErrorMessage(result.error.message);
|
||||
const ctx = result.error.context;
|
||||
const details = [
|
||||
{
|
||||
phase: 'auth-validation',
|
||||
attemptNumber,
|
||||
elapsed: Date.now() - startTime,
|
||||
...(ctx.failurePoint !== undefined && { failurePoint: ctx.failurePoint }),
|
||||
...(ctx.failureDetail !== undefined && { failureDetail: ctx.failureDetail }),
|
||||
},
|
||||
];
|
||||
|
||||
const failure = classified.retryable
|
||||
? ApplicationFailure.create({ message, type: classified.type, details })
|
||||
: ApplicationFailure.nonRetryable(message, classified.type, details);
|
||||
truncateStackTrace(failure);
|
||||
throw failure;
|
||||
}
|
||||
} catch (error) {
|
||||
if (error instanceof ApplicationFailure) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
const classified = classifyErrorForTemporal(error);
|
||||
const rawMessage = error instanceof Error ? error.message : String(error);
|
||||
const message = truncateErrorMessage(rawMessage);
|
||||
const details = [{ phase: 'auth-validation', attemptNumber, elapsed: Date.now() - startTime }];
|
||||
|
||||
const failure = classified.retryable
|
||||
? ApplicationFailure.create({ message, type: classified.type, details })
|
||||
: ApplicationFailure.nonRetryable(message, classified.type, details);
|
||||
truncateStackTrace(failure);
|
||||
throw failure;
|
||||
} finally {
|
||||
clearInterval(heartbeatInterval);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize a private git repository inside the workspace deliverables directory.
|
||||
* Idempotent — skips if .git already exists (resume case).
|
||||
@@ -400,6 +487,24 @@ export async function initDeliverableGit(input: ActivityInput): Promise<void> {
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Drop a stealth cli.config.json into the repo's .playwright/ directory so
|
||||
* `playwright-cli open` auto-loads anti-detection defaults from the agent's
|
||||
* cwd (disables the Blink AutomationControlled flag, drops the
|
||||
* --enable-automation default, and overrides the HeadlessChrome user agent).
|
||||
*
|
||||
* No-op when the repo already has its own .playwright/cli.config.json.
|
||||
*/
|
||||
export async function syncPlaywrightStealthConfig(input: ActivityInput): Promise<void> {
|
||||
const logger = createActivityLogger();
|
||||
const { result, configPath } = await writePlaywrightStealthConfig(input.repoPath);
|
||||
if (result === 'skipped-existing') {
|
||||
logger.info(`Playwright stealth config: leaving existing ${configPath} in place`);
|
||||
} else {
|
||||
logger.info(`Playwright stealth config: wrote ${configPath}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sync code_path avoid rules into Claude's user-scope settings.json so the
|
||||
* SDK enforces them at the tool layer for every agent in this run.
|
||||
@@ -879,17 +984,7 @@ export async function generateReportOutputActivity(input: ActivityInput): Promis
|
||||
|
||||
const logger = createActivityLogger();
|
||||
|
||||
// Resolve promptDir against the worker root so providers are cwd-independent.
|
||||
const resolvedInput: ActivityInput = {
|
||||
...input,
|
||||
...(input.promptDir !== undefined && {
|
||||
promptDir: path.isAbsolute(input.promptDir)
|
||||
? input.promptDir
|
||||
: path.resolve(process.env.SHANNON_WORKER_ROOT ?? process.cwd(), input.promptDir),
|
||||
}),
|
||||
};
|
||||
|
||||
const result = await container.reportOutputProvider.generate(resolvedInput, logger);
|
||||
const result = await container.reportOutputProvider.generate(input, logger);
|
||||
if (result.outputPath) {
|
||||
logger.info(`Report output written to ${result.outputPath}`);
|
||||
}
|
||||
|
||||
@@ -76,6 +76,7 @@ const PRODUCTION_RETRY = {
|
||||
'ConfigurationError',
|
||||
'InvalidTargetError',
|
||||
'ExecutionLimitError',
|
||||
'AuthLoginFailedError',
|
||||
],
|
||||
};
|
||||
|
||||
@@ -134,6 +135,22 @@ const preflightActs = proxyActivities<typeof activities>({
|
||||
retry: PREFLIGHT_RETRY,
|
||||
});
|
||||
|
||||
// Credential rejection is not retryable; transient SDK errors get 3 attempts.
|
||||
const AUTH_VALIDATION_RETRY = {
|
||||
initialInterval: '10 seconds',
|
||||
maximumInterval: '1 minute',
|
||||
backoffCoefficient: 2,
|
||||
maximumAttempts: 3,
|
||||
nonRetryableErrorTypes: PRODUCTION_RETRY.nonRetryableErrorTypes,
|
||||
};
|
||||
|
||||
// Browser-driving validation measured at 60–180s; 10 min start-to-close leaves headroom for slow SSO/MFA flows.
|
||||
const authValidationActs = proxyActivities<typeof activities>({
|
||||
startToCloseTimeout: '10 minutes',
|
||||
heartbeatTimeout: '10 minutes',
|
||||
retry: AUTH_VALIDATION_RETRY,
|
||||
});
|
||||
|
||||
/**
|
||||
* Compute aggregated metrics from the current pipeline state.
|
||||
* Called on both success and failure to provide partial metrics.
|
||||
@@ -420,6 +437,18 @@ export async function pentestPipeline(input: PipelineInput): Promise<PipelineSta
|
||||
await preflightActs.runPreflightValidation(activityInput);
|
||||
log.info('Preflight validation passed');
|
||||
|
||||
// === Playwright stealth config ===
|
||||
// Write the playwright-cli config before any browser session opens so the
|
||||
// validator and downstream agents inherit anti-detection defaults.
|
||||
await preflightActs.syncPlaywrightStealthConfig(activityInput);
|
||||
|
||||
// === Authentication Validation ===
|
||||
state.currentPhase = 'auth-validation';
|
||||
state.currentAgent = 'validate-authentication';
|
||||
await authValidationActs.runAuthenticationValidation(activityInput);
|
||||
state.currentAgent = null;
|
||||
log.info('Authentication validation passed');
|
||||
|
||||
// === Initialize Deliverables Git ===
|
||||
await a.initDeliverableGit(activityInput);
|
||||
|
||||
|
||||
@@ -41,12 +41,19 @@ export interface SuccessCondition {
|
||||
value: string;
|
||||
}
|
||||
|
||||
export interface Credentials {
|
||||
username: string;
|
||||
export interface EmailLogin {
|
||||
address: string;
|
||||
password: string;
|
||||
totp_secret?: string;
|
||||
}
|
||||
|
||||
export interface Credentials {
|
||||
username: string;
|
||||
password?: string;
|
||||
totp_secret?: string;
|
||||
email_login?: EmailLogin;
|
||||
}
|
||||
|
||||
export interface Authentication {
|
||||
login_type: LoginType;
|
||||
login_url: string;
|
||||
|
||||
@@ -44,6 +44,7 @@ export enum ErrorCode {
|
||||
REPO_NOT_FOUND = 'REPO_NOT_FOUND',
|
||||
TARGET_UNREACHABLE = 'TARGET_UNREACHABLE',
|
||||
AUTH_FAILED = 'AUTH_FAILED',
|
||||
AUTH_LOGIN_FAILED = 'AUTH_LOGIN_FAILED',
|
||||
BILLING_ERROR = 'BILLING_ERROR',
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user