feat(browse): domain-skills storage + state machine

New module browse/src/domain-skills.ts implements the per-site notes
the agent writes for itself, persisted as type:"domain" rows alongside
/learn's per-project learnings.

Three scopes layered: per-project default, global by explicit promotion.
Project-active shadows global for the same host.

State machine (T6 — codex outside-voice):
  quarantined --3 uses w/o flag--> active(project) --promote--> global
        ^                                |
        +----- classifier flag during use

- Append-only JSONL with O_APPEND for atomic small writes
- Tolerant parser drops partial trailing line on read
- Tombstone for deletes (compactor cleans up later)
- Version log per (host, scope) enables rollback
- Hostname derived from active tab top-level origin (T3 confused-deputy fix)
- writeSkill rejects classifier_score >= 0.85 with structured error

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-04-25 12:59:55 -07:00
parent 54d3ad923d
commit db38fd2c07
+416
View File
@@ -0,0 +1,416 @@
/**
* Domain skills — per-site notes the agent writes for itself, persisted
* alongside /learn's per-project learnings as type:"domain" rows.
*
* Scope:
* - per-project: ~/.gstack/projects/<slug>/learnings.jsonl
* - global: ~/.gstack/global-domain-skills.jsonl
*
* State machine (T6 — defense against persistent prompt poisoning):
*
* ┌──────────────┐ N=3 successful uses ┌────────┐ promote-to-global ┌────────┐
* │ quarantined │ ─────────────────────▶ │ active │ ──────────────────▶ │ global │
* │ (per-project)│ (no classifier flags) │(project)│ (manual command) │ │
* └──────────────┘ └────────┘ └────────┘
* ▲ │
* │ classifier flag during use │ rollback (version log)
* └───────────────────────────────────────┘
*
* - new save → quarantined (does NOT auto-fire in prompts)
* - active skills fire in prompts for their project (wrapped in UNTRUSTED)
* - global skills fire across all projects (cross-context, requires explicit promote)
* - rollback restores prior version by sha256
*
* Storage discipline (T5):
* - Append-only with O_APPEND (POSIX guarantees atomic appends < PIPE_BUF)
* - Tombstone for deletes; idle compactor rewrites file
* - Tolerant parser drops partial trailing line on read
*
* Hostname rules (T3, CEO-temporal):
* - Derived from active tab's top-level origin — NEVER agent-supplied
* - Lowercase, strip www., keep full subdomain (subdomain-exact match)
* - Punycode hostnames stored as-encoded
*/
import { promises as fs } from 'fs';
import { open as fsOpen, constants as fsConstants } from 'fs';
import * as path from 'path';
import * as os from 'os';
import { createHash } from 'crypto';
import type { Page } from 'playwright';
export type SkillState = 'quarantined' | 'active' | 'global';
export type SkillScope = 'project' | 'global';
export type SkillSource = 'agent' | 'human';
export interface DomainSkillRow {
type: 'domain';
host: string;
scope: SkillScope;
state: SkillState;
body: string;
version: number;
classifier_score: number;
source: SkillSource;
sha256: string;
use_count: number;
flag_count: number;
created_ts: string;
updated_ts: string;
tombstone?: boolean;
}
const PROMOTE_THRESHOLD = 3;
const GSTACK_HOME = process.env.GSTACK_HOME || path.join(os.homedir(), '.gstack');
const GLOBAL_FILE = path.join(GSTACK_HOME, 'global-domain-skills.jsonl');
function projectFile(slug: string): string {
return path.join(GSTACK_HOME, 'projects', slug, 'learnings.jsonl');
}
// ─── Hostname normalization (T3) ──────────────────────────────
export function normalizeHost(input: string): string {
let h = input.trim().toLowerCase();
// strip protocol if present
h = h.replace(/^https?:\/\//, '');
// strip path/query
h = h.split('/')[0]!.split('?')[0]!.split('#')[0]!;
// strip port
h = h.split(':')[0]!;
// strip www. prefix
h = h.replace(/^www\./, '');
return h;
}
/**
* Derive hostname from the active tab's top-level origin.
* Closes the confused-deputy bug (Codex T3): agent cannot supply a wrong
* hostname even if it tried — host is read from the page state we control.
*/
export async function deriveHostFromActiveTab(page: Page): Promise<string> {
const url = page.url();
if (!url || url === 'about:blank' || url.startsWith('chrome://')) {
throw new Error(
'Cannot save domain-skill: no top-level URL on active tab.\n' +
'Cause: tab is empty or on chrome:// page.\n' +
'Action: navigate to the target site first with $B goto <url>.'
);
}
return normalizeHost(url);
}
// ─── File I/O (T5: append-only + flock-free atomic appends) ────
async function ensureDir(filePath: string): Promise<void> {
await fs.mkdir(path.dirname(filePath), { recursive: true });
}
/**
* Append a JSONL row atomically. POSIX guarantees atomicity for writes <
* PIPE_BUF (typically 4KB) when O_APPEND is set. Each row is single-line JSON
* well under that bound. fsync ensures durability before return.
*/
async function appendRow(filePath: string, row: DomainSkillRow): Promise<void> {
await ensureDir(filePath);
const line = JSON.stringify(row) + '\n';
return new Promise((resolve, reject) => {
fsOpen(filePath, fsConstants.O_WRONLY | fsConstants.O_CREAT | fsConstants.O_APPEND, 0o644, (err, fd) => {
if (err) return reject(err);
const buf = Buffer.from(line, 'utf8');
const writeAndSync = () => {
// Use fs.writeSync via fd to ensure single write call (atomic with O_APPEND).
const fsSync = require('fs');
try {
fsSync.writeSync(fd, buf, 0, buf.length);
fsSync.fsyncSync(fd);
fsSync.closeSync(fd);
resolve();
} catch (e) {
try {
fsSync.closeSync(fd);
} catch {
// Ignore close errors after a write failure — original error wins.
}
reject(e);
}
};
writeAndSync();
});
});
}
/**
* Read all rows from a JSONL file. Tolerant of partial trailing line (drops it).
* Returns rows in append order. Caller resolves latest-wins per (host, scope).
*/
async function readRows(filePath: string): Promise<DomainSkillRow[]> {
let raw: string;
try {
raw = await fs.readFile(filePath, 'utf8');
} catch (e) {
const err = e as NodeJS.ErrnoException;
if (err.code === 'ENOENT') return [];
throw err;
}
const rows: DomainSkillRow[] = [];
const lines = raw.split('\n');
// Last line is empty (trailing newline) OR partial. Drop unconditionally if no parse.
for (const line of lines) {
if (!line) continue;
try {
const parsed = JSON.parse(line);
if (parsed && parsed.type === 'domain') rows.push(parsed as DomainSkillRow);
} catch {
// Partial-line corruption tolerated. Compactor will clean up.
}
}
return rows;
}
// ─── Latest-wins resolution ────────────────────────────────────
interface SkillKey {
host: string;
scope: SkillScope;
}
function keyOf(row: DomainSkillRow): string {
return `${row.scope}::${row.host}`;
}
/**
* Reduce a row stream to latest-version-wins per (host, scope).
* Tombstones win (deleted skill stays deleted).
*/
function resolveLatest(rows: DomainSkillRow[]): Map<string, DomainSkillRow> {
const m = new Map<string, DomainSkillRow>();
for (const row of rows) {
const k = keyOf(row);
const prior = m.get(k);
if (!prior || row.version >= prior.version) {
m.set(k, row);
}
}
// Drop tombstoned entries from the result map for readers; rollback uses raw history.
for (const [k, row] of m) {
if (row.tombstone) m.delete(k);
}
return m;
}
// ─── Public API ────────────────────────────────────────────────
export interface ReadSkillResult {
row: DomainSkillRow;
source: 'project' | 'global';
}
/**
* Read the active or global skill for a host visible to a given project.
* Project-scoped active skills shadow global skills for the same host.
* Quarantined skills are NEVER returned (they don't fire).
*/
export async function readSkill(host: string, projectSlug: string): Promise<ReadSkillResult | null> {
const normalized = normalizeHost(host);
// Project layer first
const projectRows = await readRows(projectFile(projectSlug));
const projectLatest = resolveLatest(projectRows);
const projectHit = projectLatest.get(`project::${normalized}`);
if (projectHit && projectHit.state === 'active') {
return { row: projectHit, source: 'project' };
}
// Global layer fallback
const globalRows = await readRows(GLOBAL_FILE);
const globalLatest = resolveLatest(globalRows);
const globalHit = globalLatest.get(`global::${normalized}`);
if (globalHit && globalHit.state === 'global') {
return { row: globalHit, source: 'global' };
}
return null;
}
export interface WriteSkillInput {
host: string;
body: string; // markdown frontmatter + content
projectSlug: string;
source: SkillSource;
classifierScore: number; // 0..1; caller invokes classifier before calling this
}
/**
* Save a new skill (always quarantined initially per T6).
* Caller MUST run the classifier first and pass classifierScore.
* Score >= 0.85 should fail-fast at caller, never reach here.
*/
export async function writeSkill(input: WriteSkillInput): Promise<DomainSkillRow> {
if (input.classifierScore >= 0.85) {
throw new Error(
`Save blocked: classifier flagged content as potential injection (score: ${input.classifierScore.toFixed(2)}).\n` +
'Cause: skill body contains patterns the L4 classifier marks as risky.\n' +
'Action: rewrite the skill content removing instruction-like prose, retry.'
);
}
const normalized = normalizeHost(input.host);
const body = input.body;
const now = new Date().toISOString();
const sha = createHash('sha256').update(body, 'utf8').digest('hex');
// Determine prior version for this (host, scope=project) so version counter increments.
const projectRows = await readRows(projectFile(input.projectSlug));
const projectLatest = resolveLatest(projectRows);
const prior = projectLatest.get(`project::${normalized}`);
const version = prior ? prior.version + 1 : 1;
const row: DomainSkillRow = {
type: 'domain',
host: normalized,
scope: 'project',
state: 'quarantined',
body,
version,
classifier_score: input.classifierScore,
source: input.source,
sha256: sha,
use_count: 0,
flag_count: 0,
created_ts: prior?.created_ts ?? now,
updated_ts: now,
};
await appendRow(projectFile(input.projectSlug), row);
return row;
}
/**
* Promote a quarantined skill to active in its project after N=3 uses without
* classifier flagging. Called by sidebar-agent on successful skill use.
*
* Auto-promote logic:
* - increment use_count
* - if use_count >= PROMOTE_THRESHOLD AND flag_count == 0 → state:active
* - else stay quarantined with updated counter
*/
export async function recordSkillUse(host: string, projectSlug: string, classifierFlagged: boolean): Promise<DomainSkillRow | null> {
const normalized = normalizeHost(host);
const rows = await readRows(projectFile(projectSlug));
const latest = resolveLatest(rows);
const current = latest.get(`project::${normalized}`);
if (!current) return null;
const useCount = current.use_count + 1;
const flagCount = current.flag_count + (classifierFlagged ? 1 : 0);
let state: SkillState = current.state;
if (state === 'quarantined' && useCount >= PROMOTE_THRESHOLD && flagCount === 0) {
state = 'active';
}
const updated: DomainSkillRow = {
...current,
state,
use_count: useCount,
flag_count: flagCount,
version: current.version + 1,
updated_ts: new Date().toISOString(),
};
await appendRow(projectFile(projectSlug), updated);
return updated;
}
/**
* Promote an active per-project skill to global. Explicit operator call only —
* never auto-promoted across project boundaries (T4).
*/
export async function promoteToGlobal(host: string, projectSlug: string): Promise<DomainSkillRow> {
const normalized = normalizeHost(host);
const rows = await readRows(projectFile(projectSlug));
const latest = resolveLatest(rows);
const current = latest.get(`project::${normalized}`);
if (!current) {
throw new Error(
`Cannot promote: no skill for ${normalized} in project ${projectSlug}.\n` +
'Cause: skill does not exist or is tombstoned.\n' +
'Action: $B domain-skill list to see what exists in this project.'
);
}
if (current.state !== 'active') {
throw new Error(
`Cannot promote: skill for ${normalized} is in state "${current.state}", expected "active".\n` +
`Cause: skill must be active in this project (used ${PROMOTE_THRESHOLD}+ times without flag) before global promotion.\n` +
'Action: use the skill in this project until it auto-promotes to active.'
);
}
const now = new Date().toISOString();
const globalRow: DomainSkillRow = {
...current,
scope: 'global',
state: 'global',
version: 1, // global file has its own version line
use_count: 0,
flag_count: 0,
updated_ts: now,
};
await appendRow(GLOBAL_FILE, globalRow);
return globalRow;
}
/**
* Rollback to a prior version (by sha256 OR previous version number).
* Re-emits the prior row as the latest, preserving the version counter monotonicity.
*/
export async function rollbackSkill(host: string, projectSlug: string, scope: SkillScope = 'project'): Promise<DomainSkillRow> {
const normalized = normalizeHost(host);
const file = scope === 'project' ? projectFile(projectSlug) : GLOBAL_FILE;
const rows = await readRows(file);
const matching = rows.filter((r) => r.host === normalized && r.scope === scope && !r.tombstone);
if (matching.length < 2) {
throw new Error(
`Cannot rollback: ${normalized} has fewer than 2 versions in ${scope} scope.\n` +
'Cause: no prior version to roll back to.\n' +
'Action: $B domain-skill rm to delete instead, or wait for a future revision to roll back from.'
);
}
// Sort by version desc; take second-latest as the rollback target.
matching.sort((a, b) => b.version - a.version);
const target = matching[1]!;
const newVersion = matching[0]!.version + 1;
const restored: DomainSkillRow = {
...target,
version: newVersion,
updated_ts: new Date().toISOString(),
};
await appendRow(file, restored);
return restored;
}
/**
* List all non-tombstoned skills visible to a project (active project + active global).
*/
export async function listSkills(projectSlug: string): Promise<{ project: DomainSkillRow[]; global: DomainSkillRow[] }> {
const projectRows = await readRows(projectFile(projectSlug));
const globalRows = await readRows(GLOBAL_FILE);
const projectLatest = Array.from(resolveLatest(projectRows).values());
const globalLatest = Array.from(resolveLatest(globalRows).values()).filter((r) => r.state === 'global');
return { project: projectLatest, global: globalLatest };
}
/**
* Tombstone a skill. Append a tombstone row; compactor cleans up later.
*/
export async function deleteSkill(host: string, projectSlug: string, scope: SkillScope = 'project'): Promise<void> {
const normalized = normalizeHost(host);
const file = scope === 'project' ? projectFile(projectSlug) : GLOBAL_FILE;
const rows = await readRows(file);
const latest = resolveLatest(rows);
const current = latest.get(`${scope}::${normalized}`);
if (!current) {
throw new Error(
`Cannot delete: no skill for ${normalized} in ${scope} scope.\n` +
'Cause: skill does not exist or is already tombstoned.\n' +
'Action: $B domain-skill list to see what exists.'
);
}
const tombstone: DomainSkillRow = {
...current,
version: current.version + 1,
updated_ts: new Date().toISOString(),
tombstone: true,
};
await appendRow(file, tombstone);
}