Files
gstack/scripts/resolvers/codex-helpers.ts
T
Garry Tan dc5e0538e5 feat: worktree isolation for E2E tests + infrastructure elegance (v0.11.12.0) (#425)
* refactor: extract gen-skill-docs into modular resolver architecture

Break the 3000-line monolith into 10 domain modules under scripts/resolvers/:
types, constants, preamble, utility, browse, design, testing, review,
codex-helpers, and index. Each module owns one domain of template generation.

The preamble module introduces a 4-tier composition system (T1-T4) so skills
only pay for the preamble sections they actually need, reducing token usage
for lightweight skills by ~40%.

Adds a token budget dashboard that prints after every generation run showing
per-skill and total token counts.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat: tiered preamble — skills only pay for what they use

Tag all 23 templates with preamble-tier (T1-T4). Lightweight skills
like /browse and /benchmark get a minimal preamble (~40% fewer tokens),
while review skills get the full stack. Regenerate all SKILL.md files.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat: migrate eval storage to project-scoped paths

Move eval results and E2E run artifacts from ~/.gstack-dev/evals/ to
~/.gstack/projects/$SLUG/evals/ so each project's eval history lives
alongside its other gstack data. Falls back to legacy path if slug
detection fails.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix: sync package.json version with VERSION after merge

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat: add WorktreeManager for isolated test environments

Reusable platform module (lib/worktree.ts) that creates git worktrees
for test isolation and harvests useful changes as patches. Includes
SHA-256 dedup, original SHA tracking for committed change detection,
and automatic gitignored artifact copying (.agents/, browse/dist/).

12 unit tests covering lifecycle, harvest, dedup, and error handling.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat: integrate worktree isolation into E2E test infrastructure

Add createTestWorktree(), harvestAndCleanup(), and describeWithWorktree()
helpers to e2e-helpers.ts. Add harvest field to EvalTestEntry for
eval-store integration. Register lib/worktree.ts as a global touchfile.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat: run Gemini and Codex E2E tests in worktrees

Switch both test suites from cwd: ROOT to worktree isolation.
Gemini (--yolo) no longer pollutes the working tree. Codex
(read-only) gets worktree for consistency. Useful changes are
harvested as patches for cherry-picking.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix: skip symlinks in copyDirSync to prevent infinite recursion

Adversarial review caught that .claude/skills/gstack may be a symlink
back to the repo root, causing copyDirSync to recurse infinitely
when copying gitignored artifacts into worktrees.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* chore: bump version and changelog (v0.11.12.0)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: relax session-awareness assertion to accept structured options

The LLM consistently presents well-formatted A/B choices with pros/cons
but doesn't always use the exact string "RECOMMENDATION". Accept
case-insensitive "recommend", "option a", "which do you want", or
"which approach" as equivalent signals of a structured recommendation.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-23 23:05:22 -07:00

133 lines
5.0 KiB
TypeScript

import type { Host } from './types';
const OPENAI_SHORT_DESCRIPTION_LIMIT = 120;
export function extractNameAndDescription(content: string): { name: string; description: string } {
const fmStart = content.indexOf('---\n');
if (fmStart !== 0) return { name: '', description: '' };
const fmEnd = content.indexOf('\n---', fmStart + 4);
if (fmEnd === -1) return { name: '', description: '' };
const frontmatter = content.slice(fmStart + 4, fmEnd);
const nameMatch = frontmatter.match(/^name:\s*(.+)$/m);
const name = nameMatch ? nameMatch[1].trim() : '';
let description = '';
const lines = frontmatter.split('\n');
let inDescription = false;
const descLines: string[] = [];
for (const line of lines) {
if (line.match(/^description:\s*\|?\s*$/)) {
inDescription = true;
continue;
}
if (line.match(/^description:\s*\S/)) {
description = line.replace(/^description:\s*/, '').trim();
break;
}
if (inDescription) {
if (line === '' || line.match(/^\s/)) {
descLines.push(line.replace(/^ /, ''));
} else {
break;
}
}
}
if (descLines.length > 0) {
description = descLines.join('\n').trim();
}
return { name, description };
}
export function condenseOpenAIShortDescription(description: string): string {
const firstParagraph = description.split(/\n\s*\n/)[0] || description;
const collapsed = firstParagraph.replace(/\s+/g, ' ').trim();
if (collapsed.length <= OPENAI_SHORT_DESCRIPTION_LIMIT) return collapsed;
const truncated = collapsed.slice(0, OPENAI_SHORT_DESCRIPTION_LIMIT - 3);
const lastSpace = truncated.lastIndexOf(' ');
const safe = lastSpace > 40 ? truncated.slice(0, lastSpace) : truncated;
return `${safe}...`;
}
export function generateOpenAIYaml(displayName: string, shortDescription: string): string {
return `interface:
display_name: ${JSON.stringify(displayName)}
short_description: ${JSON.stringify(shortDescription)}
default_prompt: ${JSON.stringify(`Use ${displayName} for this task.`)}
policy:
allow_implicit_invocation: true
`;
}
export function codexSkillName(skillDir: string): string {
if (skillDir === '.' || skillDir === '') return 'gstack';
// Don't double-prefix: gstack-upgrade → gstack-upgrade (not gstack-gstack-upgrade)
if (skillDir.startsWith('gstack-')) return skillDir;
return `gstack-${skillDir}`;
}
/**
* Transform frontmatter for Codex: keep only name + description.
* Strips allowed-tools, hooks, version, and all other fields.
* Handles multiline block scalar descriptions (YAML | syntax).
*/
export function transformFrontmatter(content: string, host: Host): string {
if (host === 'claude') return content;
// Find frontmatter boundaries
const fmStart = content.indexOf('---\n');
if (fmStart !== 0) return content; // frontmatter must be at the start
const fmEnd = content.indexOf('\n---', fmStart + 4);
if (fmEnd === -1) return content;
const body = content.slice(fmEnd + 4); // includes the leading \n after ---
const { name, description } = extractNameAndDescription(content);
// Codex 1024-char description limit — fail build, don't ship broken skills
const MAX_DESC = 1024;
if (description.length > MAX_DESC) {
throw new Error(
`Codex description for "${name}" is ${description.length} chars (max ${MAX_DESC}). ` +
`Compress the description in the .tmpl file.`
);
}
// Re-emit Codex frontmatter (name + description only)
const indentedDesc = description.split('\n').map(l => ` ${l}`).join('\n');
const codexFm = `---\nname: ${name}\ndescription: |\n${indentedDesc}\n---`;
return codexFm + body;
}
/**
* Extract hook descriptions from frontmatter for inline safety prose.
* Returns a description of what the hooks do, or null if no hooks.
*/
export function extractHookSafetyProse(tmplContent: string): string | null {
if (!tmplContent.match(/^hooks:/m)) return null;
// Parse the hook matchers to build a human-readable safety description
const matchers: string[] = [];
const matcherRegex = /matcher:\s*"(\w+)"/g;
let m;
while ((m = matcherRegex.exec(tmplContent)) !== null) {
if (!matchers.includes(m[1])) matchers.push(m[1]);
}
if (matchers.length === 0) return null;
// Build safety prose based on what tools are hooked
const toolDescriptions: Record<string, string> = {
Bash: 'check bash commands for destructive operations (rm -rf, DROP TABLE, force-push, git reset --hard, etc.) before execution',
Edit: 'verify file edits are within the allowed scope boundary before applying',
Write: 'verify file writes are within the allowed scope boundary before applying',
};
const safetyChecks = matchers
.map(t => toolDescriptions[t] || `check ${t} operations for safety`)
.join(', and ');
return `> **Safety Advisory:** This skill includes safety checks that ${safetyChecks}. When using this skill, always pause and verify before executing potentially destructive operations. If uncertain about a command's safety, ask the user for confirmation before proceeding.`;
}