mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-02 11:45:20 +02:00
dc5e0538e5
* refactor: extract gen-skill-docs into modular resolver architecture Break the 3000-line monolith into 10 domain modules under scripts/resolvers/: types, constants, preamble, utility, browse, design, testing, review, codex-helpers, and index. Each module owns one domain of template generation. The preamble module introduces a 4-tier composition system (T1-T4) so skills only pay for the preamble sections they actually need, reducing token usage for lightweight skills by ~40%. Adds a token budget dashboard that prints after every generation run showing per-skill and total token counts. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: tiered preamble — skills only pay for what they use Tag all 23 templates with preamble-tier (T1-T4). Lightweight skills like /browse and /benchmark get a minimal preamble (~40% fewer tokens), while review skills get the full stack. Regenerate all SKILL.md files. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: migrate eval storage to project-scoped paths Move eval results and E2E run artifacts from ~/.gstack-dev/evals/ to ~/.gstack/projects/$SLUG/evals/ so each project's eval history lives alongside its other gstack data. Falls back to legacy path if slug detection fails. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: sync package.json version with VERSION after merge Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: add WorktreeManager for isolated test environments Reusable platform module (lib/worktree.ts) that creates git worktrees for test isolation and harvests useful changes as patches. Includes SHA-256 dedup, original SHA tracking for committed change detection, and automatic gitignored artifact copying (.agents/, browse/dist/). 12 unit tests covering lifecycle, harvest, dedup, and error handling. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: integrate worktree isolation into E2E test infrastructure Add createTestWorktree(), harvestAndCleanup(), and describeWithWorktree() helpers to e2e-helpers.ts. Add harvest field to EvalTestEntry for eval-store integration. Register lib/worktree.ts as a global touchfile. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: run Gemini and Codex E2E tests in worktrees Switch both test suites from cwd: ROOT to worktree isolation. Gemini (--yolo) no longer pollutes the working tree. Codex (read-only) gets worktree for consistency. Useful changes are harvested as patches for cherry-picking. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: skip symlinks in copyDirSync to prevent infinite recursion Adversarial review caught that .claude/skills/gstack may be a symlink back to the repo root, causing copyDirSync to recurse infinitely when copying gitignored artifacts into worktrees. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * chore: bump version and changelog (v0.11.12.0) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: relax session-awareness assertion to accept structured options The LLM consistently presents well-formatted A/B choices with pros/cons but doesn't always use the exact string "RECOMMENDATION". Accept case-insensitive "recommend", "option a", "which do you want", or "which approach" as equivalent signals of a structured recommendation. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
133 lines
5.0 KiB
TypeScript
133 lines
5.0 KiB
TypeScript
import type { Host } from './types';
|
|
|
|
const OPENAI_SHORT_DESCRIPTION_LIMIT = 120;
|
|
|
|
export function extractNameAndDescription(content: string): { name: string; description: string } {
|
|
const fmStart = content.indexOf('---\n');
|
|
if (fmStart !== 0) return { name: '', description: '' };
|
|
const fmEnd = content.indexOf('\n---', fmStart + 4);
|
|
if (fmEnd === -1) return { name: '', description: '' };
|
|
|
|
const frontmatter = content.slice(fmStart + 4, fmEnd);
|
|
const nameMatch = frontmatter.match(/^name:\s*(.+)$/m);
|
|
const name = nameMatch ? nameMatch[1].trim() : '';
|
|
|
|
let description = '';
|
|
const lines = frontmatter.split('\n');
|
|
let inDescription = false;
|
|
const descLines: string[] = [];
|
|
for (const line of lines) {
|
|
if (line.match(/^description:\s*\|?\s*$/)) {
|
|
inDescription = true;
|
|
continue;
|
|
}
|
|
if (line.match(/^description:\s*\S/)) {
|
|
description = line.replace(/^description:\s*/, '').trim();
|
|
break;
|
|
}
|
|
if (inDescription) {
|
|
if (line === '' || line.match(/^\s/)) {
|
|
descLines.push(line.replace(/^ /, ''));
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (descLines.length > 0) {
|
|
description = descLines.join('\n').trim();
|
|
}
|
|
|
|
return { name, description };
|
|
}
|
|
|
|
export function condenseOpenAIShortDescription(description: string): string {
|
|
const firstParagraph = description.split(/\n\s*\n/)[0] || description;
|
|
const collapsed = firstParagraph.replace(/\s+/g, ' ').trim();
|
|
if (collapsed.length <= OPENAI_SHORT_DESCRIPTION_LIMIT) return collapsed;
|
|
|
|
const truncated = collapsed.slice(0, OPENAI_SHORT_DESCRIPTION_LIMIT - 3);
|
|
const lastSpace = truncated.lastIndexOf(' ');
|
|
const safe = lastSpace > 40 ? truncated.slice(0, lastSpace) : truncated;
|
|
return `${safe}...`;
|
|
}
|
|
|
|
export function generateOpenAIYaml(displayName: string, shortDescription: string): string {
|
|
return `interface:
|
|
display_name: ${JSON.stringify(displayName)}
|
|
short_description: ${JSON.stringify(shortDescription)}
|
|
default_prompt: ${JSON.stringify(`Use ${displayName} for this task.`)}
|
|
policy:
|
|
allow_implicit_invocation: true
|
|
`;
|
|
}
|
|
|
|
export function codexSkillName(skillDir: string): string {
|
|
if (skillDir === '.' || skillDir === '') return 'gstack';
|
|
// Don't double-prefix: gstack-upgrade → gstack-upgrade (not gstack-gstack-upgrade)
|
|
if (skillDir.startsWith('gstack-')) return skillDir;
|
|
return `gstack-${skillDir}`;
|
|
}
|
|
|
|
/**
|
|
* Transform frontmatter for Codex: keep only name + description.
|
|
* Strips allowed-tools, hooks, version, and all other fields.
|
|
* Handles multiline block scalar descriptions (YAML | syntax).
|
|
*/
|
|
export function transformFrontmatter(content: string, host: Host): string {
|
|
if (host === 'claude') return content;
|
|
|
|
// Find frontmatter boundaries
|
|
const fmStart = content.indexOf('---\n');
|
|
if (fmStart !== 0) return content; // frontmatter must be at the start
|
|
const fmEnd = content.indexOf('\n---', fmStart + 4);
|
|
if (fmEnd === -1) return content;
|
|
|
|
const body = content.slice(fmEnd + 4); // includes the leading \n after ---
|
|
const { name, description } = extractNameAndDescription(content);
|
|
|
|
// Codex 1024-char description limit — fail build, don't ship broken skills
|
|
const MAX_DESC = 1024;
|
|
if (description.length > MAX_DESC) {
|
|
throw new Error(
|
|
`Codex description for "${name}" is ${description.length} chars (max ${MAX_DESC}). ` +
|
|
`Compress the description in the .tmpl file.`
|
|
);
|
|
}
|
|
|
|
// Re-emit Codex frontmatter (name + description only)
|
|
const indentedDesc = description.split('\n').map(l => ` ${l}`).join('\n');
|
|
const codexFm = `---\nname: ${name}\ndescription: |\n${indentedDesc}\n---`;
|
|
return codexFm + body;
|
|
}
|
|
|
|
/**
|
|
* Extract hook descriptions from frontmatter for inline safety prose.
|
|
* Returns a description of what the hooks do, or null if no hooks.
|
|
*/
|
|
export function extractHookSafetyProse(tmplContent: string): string | null {
|
|
if (!tmplContent.match(/^hooks:/m)) return null;
|
|
|
|
// Parse the hook matchers to build a human-readable safety description
|
|
const matchers: string[] = [];
|
|
const matcherRegex = /matcher:\s*"(\w+)"/g;
|
|
let m;
|
|
while ((m = matcherRegex.exec(tmplContent)) !== null) {
|
|
if (!matchers.includes(m[1])) matchers.push(m[1]);
|
|
}
|
|
|
|
if (matchers.length === 0) return null;
|
|
|
|
// Build safety prose based on what tools are hooked
|
|
const toolDescriptions: Record<string, string> = {
|
|
Bash: 'check bash commands for destructive operations (rm -rf, DROP TABLE, force-push, git reset --hard, etc.) before execution',
|
|
Edit: 'verify file edits are within the allowed scope boundary before applying',
|
|
Write: 'verify file writes are within the allowed scope boundary before applying',
|
|
};
|
|
|
|
const safetyChecks = matchers
|
|
.map(t => toolDescriptions[t] || `check ${t} operations for safety`)
|
|
.join(', and ');
|
|
|
|
return `> **Safety Advisory:** This skill includes safety checks that ${safetyChecks}. When using this skill, always pause and verify before executing potentially destructive operations. If uncertain about a command's safety, ask the user for confirmation before proceeding.`;
|
|
}
|