From db38fd2c07c7d81b4963d776f36293901531d6e6 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sat, 25 Apr 2026 12:59:55 -0700 Subject: [PATCH] feat(browse): domain-skills storage + state machine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New module browse/src/domain-skills.ts implements the per-site notes the agent writes for itself, persisted as type:"domain" rows alongside /learn's per-project learnings. Three scopes layered: per-project default, global by explicit promotion. Project-active shadows global for the same host. State machine (T6 — codex outside-voice): quarantined --3 uses w/o flag--> active(project) --promote--> global ^ | +----- classifier flag during use - Append-only JSONL with O_APPEND for atomic small writes - Tolerant parser drops partial trailing line on read - Tombstone for deletes (compactor cleans up later) - Version log per (host, scope) enables rollback - Hostname derived from active tab top-level origin (T3 confused-deputy fix) - writeSkill rejects classifier_score >= 0.85 with structured error Co-Authored-By: Claude Opus 4.7 (1M context) --- browse/src/domain-skills.ts | 416 ++++++++++++++++++++++++++++++++++++ 1 file changed, 416 insertions(+) create mode 100644 browse/src/domain-skills.ts diff --git a/browse/src/domain-skills.ts b/browse/src/domain-skills.ts new file mode 100644 index 00000000..002968c8 --- /dev/null +++ b/browse/src/domain-skills.ts @@ -0,0 +1,416 @@ +/** + * Domain skills — per-site notes the agent writes for itself, persisted + * alongside /learn's per-project learnings as type:"domain" rows. + * + * Scope: + * - per-project: ~/.gstack/projects//learnings.jsonl + * - global: ~/.gstack/global-domain-skills.jsonl + * + * State machine (T6 — defense against persistent prompt poisoning): + * + * ┌──────────────┐ N=3 successful uses ┌────────┐ promote-to-global ┌────────┐ + * │ quarantined │ ─────────────────────▶ │ active │ ──────────────────▶ │ global │ + * │ (per-project)│ (no classifier flags) │(project)│ (manual command) │ │ + * └──────────────┘ └────────┘ └────────┘ + * ▲ │ + * │ classifier flag during use │ rollback (version log) + * └───────────────────────────────────────┘ + * + * - new save → quarantined (does NOT auto-fire in prompts) + * - active skills fire in prompts for their project (wrapped in UNTRUSTED) + * - global skills fire across all projects (cross-context, requires explicit promote) + * - rollback restores prior version by sha256 + * + * Storage discipline (T5): + * - Append-only with O_APPEND (POSIX guarantees atomic appends < PIPE_BUF) + * - Tombstone for deletes; idle compactor rewrites file + * - Tolerant parser drops partial trailing line on read + * + * Hostname rules (T3, CEO-temporal): + * - Derived from active tab's top-level origin — NEVER agent-supplied + * - Lowercase, strip www., keep full subdomain (subdomain-exact match) + * - Punycode hostnames stored as-encoded + */ + +import { promises as fs } from 'fs'; +import { open as fsOpen, constants as fsConstants } from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { createHash } from 'crypto'; +import type { Page } from 'playwright'; + +export type SkillState = 'quarantined' | 'active' | 'global'; +export type SkillScope = 'project' | 'global'; +export type SkillSource = 'agent' | 'human'; + +export interface DomainSkillRow { + type: 'domain'; + host: string; + scope: SkillScope; + state: SkillState; + body: string; + version: number; + classifier_score: number; + source: SkillSource; + sha256: string; + use_count: number; + flag_count: number; + created_ts: string; + updated_ts: string; + tombstone?: boolean; +} + +const PROMOTE_THRESHOLD = 3; + +const GSTACK_HOME = process.env.GSTACK_HOME || path.join(os.homedir(), '.gstack'); +const GLOBAL_FILE = path.join(GSTACK_HOME, 'global-domain-skills.jsonl'); + +function projectFile(slug: string): string { + return path.join(GSTACK_HOME, 'projects', slug, 'learnings.jsonl'); +} + +// ─── Hostname normalization (T3) ────────────────────────────── + +export function normalizeHost(input: string): string { + let h = input.trim().toLowerCase(); + // strip protocol if present + h = h.replace(/^https?:\/\//, ''); + // strip path/query + h = h.split('/')[0]!.split('?')[0]!.split('#')[0]!; + // strip port + h = h.split(':')[0]!; + // strip www. prefix + h = h.replace(/^www\./, ''); + return h; +} + +/** + * Derive hostname from the active tab's top-level origin. + * Closes the confused-deputy bug (Codex T3): agent cannot supply a wrong + * hostname even if it tried — host is read from the page state we control. + */ +export async function deriveHostFromActiveTab(page: Page): Promise { + const url = page.url(); + if (!url || url === 'about:blank' || url.startsWith('chrome://')) { + throw new Error( + 'Cannot save domain-skill: no top-level URL on active tab.\n' + + 'Cause: tab is empty or on chrome:// page.\n' + + 'Action: navigate to the target site first with $B goto .' + ); + } + return normalizeHost(url); +} + +// ─── File I/O (T5: append-only + flock-free atomic appends) ──── + +async function ensureDir(filePath: string): Promise { + await fs.mkdir(path.dirname(filePath), { recursive: true }); +} + +/** + * Append a JSONL row atomically. POSIX guarantees atomicity for writes < + * PIPE_BUF (typically 4KB) when O_APPEND is set. Each row is single-line JSON + * well under that bound. fsync ensures durability before return. + */ +async function appendRow(filePath: string, row: DomainSkillRow): Promise { + await ensureDir(filePath); + const line = JSON.stringify(row) + '\n'; + return new Promise((resolve, reject) => { + fsOpen(filePath, fsConstants.O_WRONLY | fsConstants.O_CREAT | fsConstants.O_APPEND, 0o644, (err, fd) => { + if (err) return reject(err); + const buf = Buffer.from(line, 'utf8'); + const writeAndSync = () => { + // Use fs.writeSync via fd to ensure single write call (atomic with O_APPEND). + const fsSync = require('fs'); + try { + fsSync.writeSync(fd, buf, 0, buf.length); + fsSync.fsyncSync(fd); + fsSync.closeSync(fd); + resolve(); + } catch (e) { + try { + fsSync.closeSync(fd); + } catch { + // Ignore close errors after a write failure — original error wins. + } + reject(e); + } + }; + writeAndSync(); + }); + }); +} + +/** + * Read all rows from a JSONL file. Tolerant of partial trailing line (drops it). + * Returns rows in append order. Caller resolves latest-wins per (host, scope). + */ +async function readRows(filePath: string): Promise { + let raw: string; + try { + raw = await fs.readFile(filePath, 'utf8'); + } catch (e) { + const err = e as NodeJS.ErrnoException; + if (err.code === 'ENOENT') return []; + throw err; + } + const rows: DomainSkillRow[] = []; + const lines = raw.split('\n'); + // Last line is empty (trailing newline) OR partial. Drop unconditionally if no parse. + for (const line of lines) { + if (!line) continue; + try { + const parsed = JSON.parse(line); + if (parsed && parsed.type === 'domain') rows.push(parsed as DomainSkillRow); + } catch { + // Partial-line corruption tolerated. Compactor will clean up. + } + } + return rows; +} + +// ─── Latest-wins resolution ──────────────────────────────────── + +interface SkillKey { + host: string; + scope: SkillScope; +} + +function keyOf(row: DomainSkillRow): string { + return `${row.scope}::${row.host}`; +} + +/** + * Reduce a row stream to latest-version-wins per (host, scope). + * Tombstones win (deleted skill stays deleted). + */ +function resolveLatest(rows: DomainSkillRow[]): Map { + const m = new Map(); + for (const row of rows) { + const k = keyOf(row); + const prior = m.get(k); + if (!prior || row.version >= prior.version) { + m.set(k, row); + } + } + // Drop tombstoned entries from the result map for readers; rollback uses raw history. + for (const [k, row] of m) { + if (row.tombstone) m.delete(k); + } + return m; +} + +// ─── Public API ──────────────────────────────────────────────── + +export interface ReadSkillResult { + row: DomainSkillRow; + source: 'project' | 'global'; +} + +/** + * Read the active or global skill for a host visible to a given project. + * Project-scoped active skills shadow global skills for the same host. + * Quarantined skills are NEVER returned (they don't fire). + */ +export async function readSkill(host: string, projectSlug: string): Promise { + const normalized = normalizeHost(host); + // Project layer first + const projectRows = await readRows(projectFile(projectSlug)); + const projectLatest = resolveLatest(projectRows); + const projectHit = projectLatest.get(`project::${normalized}`); + if (projectHit && projectHit.state === 'active') { + return { row: projectHit, source: 'project' }; + } + // Global layer fallback + const globalRows = await readRows(GLOBAL_FILE); + const globalLatest = resolveLatest(globalRows); + const globalHit = globalLatest.get(`global::${normalized}`); + if (globalHit && globalHit.state === 'global') { + return { row: globalHit, source: 'global' }; + } + return null; +} + +export interface WriteSkillInput { + host: string; + body: string; // markdown frontmatter + content + projectSlug: string; + source: SkillSource; + classifierScore: number; // 0..1; caller invokes classifier before calling this +} + +/** + * Save a new skill (always quarantined initially per T6). + * Caller MUST run the classifier first and pass classifierScore. + * Score >= 0.85 should fail-fast at caller, never reach here. + */ +export async function writeSkill(input: WriteSkillInput): Promise { + if (input.classifierScore >= 0.85) { + throw new Error( + `Save blocked: classifier flagged content as potential injection (score: ${input.classifierScore.toFixed(2)}).\n` + + 'Cause: skill body contains patterns the L4 classifier marks as risky.\n' + + 'Action: rewrite the skill content removing instruction-like prose, retry.' + ); + } + const normalized = normalizeHost(input.host); + const body = input.body; + const now = new Date().toISOString(); + const sha = createHash('sha256').update(body, 'utf8').digest('hex'); + // Determine prior version for this (host, scope=project) so version counter increments. + const projectRows = await readRows(projectFile(input.projectSlug)); + const projectLatest = resolveLatest(projectRows); + const prior = projectLatest.get(`project::${normalized}`); + const version = prior ? prior.version + 1 : 1; + const row: DomainSkillRow = { + type: 'domain', + host: normalized, + scope: 'project', + state: 'quarantined', + body, + version, + classifier_score: input.classifierScore, + source: input.source, + sha256: sha, + use_count: 0, + flag_count: 0, + created_ts: prior?.created_ts ?? now, + updated_ts: now, + }; + await appendRow(projectFile(input.projectSlug), row); + return row; +} + +/** + * Promote a quarantined skill to active in its project after N=3 uses without + * classifier flagging. Called by sidebar-agent on successful skill use. + * + * Auto-promote logic: + * - increment use_count + * - if use_count >= PROMOTE_THRESHOLD AND flag_count == 0 → state:active + * - else stay quarantined with updated counter + */ +export async function recordSkillUse(host: string, projectSlug: string, classifierFlagged: boolean): Promise { + const normalized = normalizeHost(host); + const rows = await readRows(projectFile(projectSlug)); + const latest = resolveLatest(rows); + const current = latest.get(`project::${normalized}`); + if (!current) return null; + const useCount = current.use_count + 1; + const flagCount = current.flag_count + (classifierFlagged ? 1 : 0); + let state: SkillState = current.state; + if (state === 'quarantined' && useCount >= PROMOTE_THRESHOLD && flagCount === 0) { + state = 'active'; + } + const updated: DomainSkillRow = { + ...current, + state, + use_count: useCount, + flag_count: flagCount, + version: current.version + 1, + updated_ts: new Date().toISOString(), + }; + await appendRow(projectFile(projectSlug), updated); + return updated; +} + +/** + * Promote an active per-project skill to global. Explicit operator call only — + * never auto-promoted across project boundaries (T4). + */ +export async function promoteToGlobal(host: string, projectSlug: string): Promise { + const normalized = normalizeHost(host); + const rows = await readRows(projectFile(projectSlug)); + const latest = resolveLatest(rows); + const current = latest.get(`project::${normalized}`); + if (!current) { + throw new Error( + `Cannot promote: no skill for ${normalized} in project ${projectSlug}.\n` + + 'Cause: skill does not exist or is tombstoned.\n' + + 'Action: $B domain-skill list to see what exists in this project.' + ); + } + if (current.state !== 'active') { + throw new Error( + `Cannot promote: skill for ${normalized} is in state "${current.state}", expected "active".\n` + + `Cause: skill must be active in this project (used ${PROMOTE_THRESHOLD}+ times without flag) before global promotion.\n` + + 'Action: use the skill in this project until it auto-promotes to active.' + ); + } + const now = new Date().toISOString(); + const globalRow: DomainSkillRow = { + ...current, + scope: 'global', + state: 'global', + version: 1, // global file has its own version line + use_count: 0, + flag_count: 0, + updated_ts: now, + }; + await appendRow(GLOBAL_FILE, globalRow); + return globalRow; +} + +/** + * Rollback to a prior version (by sha256 OR previous version number). + * Re-emits the prior row as the latest, preserving the version counter monotonicity. + */ +export async function rollbackSkill(host: string, projectSlug: string, scope: SkillScope = 'project'): Promise { + const normalized = normalizeHost(host); + const file = scope === 'project' ? projectFile(projectSlug) : GLOBAL_FILE; + const rows = await readRows(file); + const matching = rows.filter((r) => r.host === normalized && r.scope === scope && !r.tombstone); + if (matching.length < 2) { + throw new Error( + `Cannot rollback: ${normalized} has fewer than 2 versions in ${scope} scope.\n` + + 'Cause: no prior version to roll back to.\n' + + 'Action: $B domain-skill rm to delete instead, or wait for a future revision to roll back from.' + ); + } + // Sort by version desc; take second-latest as the rollback target. + matching.sort((a, b) => b.version - a.version); + const target = matching[1]!; + const newVersion = matching[0]!.version + 1; + const restored: DomainSkillRow = { + ...target, + version: newVersion, + updated_ts: new Date().toISOString(), + }; + await appendRow(file, restored); + return restored; +} + +/** + * List all non-tombstoned skills visible to a project (active project + active global). + */ +export async function listSkills(projectSlug: string): Promise<{ project: DomainSkillRow[]; global: DomainSkillRow[] }> { + const projectRows = await readRows(projectFile(projectSlug)); + const globalRows = await readRows(GLOBAL_FILE); + const projectLatest = Array.from(resolveLatest(projectRows).values()); + const globalLatest = Array.from(resolveLatest(globalRows).values()).filter((r) => r.state === 'global'); + return { project: projectLatest, global: globalLatest }; +} + +/** + * Tombstone a skill. Append a tombstone row; compactor cleans up later. + */ +export async function deleteSkill(host: string, projectSlug: string, scope: SkillScope = 'project'): Promise { + const normalized = normalizeHost(host); + const file = scope === 'project' ? projectFile(projectSlug) : GLOBAL_FILE; + const rows = await readRows(file); + const latest = resolveLatest(rows); + const current = latest.get(`${scope}::${normalized}`); + if (!current) { + throw new Error( + `Cannot delete: no skill for ${normalized} in ${scope} scope.\n` + + 'Cause: skill does not exist or is already tombstoned.\n' + + 'Action: $B domain-skill list to see what exists.' + ); + } + const tombstone: DomainSkillRow = { + ...current, + version: current.version + 1, + updated_ts: new Date().toISOString(), + tombstone: true, + }; + await appendRow(file, tombstone); +}