mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-07 05:56:41 +02:00
Merge remote-tracking branch 'origin/main' into garrytan/auq-auto-mode
# Conflicts: # CHANGELOG.md # VERSION # package.json
This commit is contained in:
+12
-1
@@ -2770,7 +2770,14 @@ glab mr view -F json 2>/dev/null | jq -r 'if .state == "opened" then "MR_EXISTS"
|
||||
|
||||
If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary, documentation_section from Step 18). Never reuse stale PR body content from a prior run.
|
||||
|
||||
**Also update the PR title** if the version changed on rerun. PR titles use the workspace-aware format `v<NEW_VERSION> <type>: <summary>` — version ALWAYS first. If the current title's version prefix doesn't match `NEW_VERSION`, run `gh pr edit --title "v$NEW_VERSION <type>: <summary>"` (or the `glab mr update -t ...` equivalent). This keeps the title truthful when Step 12's queue-drift detection rebumps a stale version. If the title has no `v<X.Y.Z.W>` prefix (a custom title kept intentionally), leave the title alone — only rewrite titles that already follow the format.
|
||||
**Always update the PR title to start with `v$NEW_VERSION`.** PR titles use the workspace-aware format `v<NEW_VERSION> <type>: <summary>` — version ALWAYS first, no exceptions, no "custom title kept intentionally" escape hatch. The shared helper `bin/gstack-pr-title-rewrite.sh` is the single source of truth for the rule.
|
||||
|
||||
1. Read the current title: `CURRENT=$(gh pr view --json title -q .title)` (or `glab mr view -F json | jq -r .title`).
|
||||
2. Compute the corrected title: `NEW_TITLE=$(~/.claude/skills/gstack/bin/gstack-pr-title-rewrite.sh "$NEW_VERSION" "$CURRENT")`. The helper handles three cases: title already correct (no-op), title has a different `v<X.Y.Z.W>` prefix (replace it), or title has no version prefix (prepend one).
|
||||
3. If `NEW_TITLE` differs from `CURRENT`, run `gh pr edit --title "$NEW_TITLE"` (or `glab mr update -t "$NEW_TITLE"`).
|
||||
4. **Self-check:** re-fetch the title and assert it starts with `v$NEW_VERSION `. If it does not, retry the edit once. If still wrong, surface the failure to the user.
|
||||
|
||||
This keeps the title truthful when Step 12's queue-drift detection rebumps a stale version, and forces the format on PRs that were created without it.
|
||||
|
||||
Print the existing URL and continue to Step 20.
|
||||
|
||||
@@ -2840,6 +2847,8 @@ you missed it.>
|
||||
**If GitHub:**
|
||||
|
||||
```bash
|
||||
# PR title MUST start with v$NEW_VERSION — enforced on every run, no exceptions.
|
||||
# (See Step 19 idempotency block + bin/gstack-pr-title-rewrite.sh for the rule.)
|
||||
gh pr create --base <base> --title "v$NEW_VERSION <type>: <summary>" --body "$(cat <<'EOF'
|
||||
<PR body from above>
|
||||
EOF
|
||||
@@ -2849,6 +2858,8 @@ EOF
|
||||
**If GitLab:**
|
||||
|
||||
```bash
|
||||
# MR title MUST start with v$NEW_VERSION — enforced on every run, no exceptions.
|
||||
# (See Step 19 idempotency block + bin/gstack-pr-title-rewrite.sh for the rule.)
|
||||
glab mr create -b <base> -t "v$NEW_VERSION <type>: <summary>" -d "$(cat <<'EOF'
|
||||
<MR body from above>
|
||||
EOF
|
||||
|
||||
+12
-1
@@ -2385,7 +2385,14 @@ glab mr view -F json 2>/dev/null | jq -r 'if .state == "opened" then "MR_EXISTS"
|
||||
|
||||
If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary, documentation_section from Step 18). Never reuse stale PR body content from a prior run.
|
||||
|
||||
**Also update the PR title** if the version changed on rerun. PR titles use the workspace-aware format `v<NEW_VERSION> <type>: <summary>` — version ALWAYS first. If the current title's version prefix doesn't match `NEW_VERSION`, run `gh pr edit --title "v$NEW_VERSION <type>: <summary>"` (or the `glab mr update -t ...` equivalent). This keeps the title truthful when Step 12's queue-drift detection rebumps a stale version. If the title has no `v<X.Y.Z.W>` prefix (a custom title kept intentionally), leave the title alone — only rewrite titles that already follow the format.
|
||||
**Always update the PR title to start with `v$NEW_VERSION`.** PR titles use the workspace-aware format `v<NEW_VERSION> <type>: <summary>` — version ALWAYS first, no exceptions, no "custom title kept intentionally" escape hatch. The shared helper `bin/gstack-pr-title-rewrite.sh` is the single source of truth for the rule.
|
||||
|
||||
1. Read the current title: `CURRENT=$(gh pr view --json title -q .title)` (or `glab mr view -F json | jq -r .title`).
|
||||
2. Compute the corrected title: `NEW_TITLE=$($GSTACK_ROOT/bin/gstack-pr-title-rewrite.sh "$NEW_VERSION" "$CURRENT")`. The helper handles three cases: title already correct (no-op), title has a different `v<X.Y.Z.W>` prefix (replace it), or title has no version prefix (prepend one).
|
||||
3. If `NEW_TITLE` differs from `CURRENT`, run `gh pr edit --title "$NEW_TITLE"` (or `glab mr update -t "$NEW_TITLE"`).
|
||||
4. **Self-check:** re-fetch the title and assert it starts with `v$NEW_VERSION `. If it does not, retry the edit once. If still wrong, surface the failure to the user.
|
||||
|
||||
This keeps the title truthful when Step 12's queue-drift detection rebumps a stale version, and forces the format on PRs that were created without it.
|
||||
|
||||
Print the existing URL and continue to Step 20.
|
||||
|
||||
@@ -2455,6 +2462,8 @@ you missed it.>
|
||||
**If GitHub:**
|
||||
|
||||
```bash
|
||||
# PR title MUST start with v$NEW_VERSION — enforced on every run, no exceptions.
|
||||
# (See Step 19 idempotency block + bin/gstack-pr-title-rewrite.sh for the rule.)
|
||||
gh pr create --base <base> --title "v$NEW_VERSION <type>: <summary>" --body "$(cat <<'EOF'
|
||||
<PR body from above>
|
||||
EOF
|
||||
@@ -2464,6 +2473,8 @@ EOF
|
||||
**If GitLab:**
|
||||
|
||||
```bash
|
||||
# MR title MUST start with v$NEW_VERSION — enforced on every run, no exceptions.
|
||||
# (See Step 19 idempotency block + bin/gstack-pr-title-rewrite.sh for the rule.)
|
||||
glab mr create -b <base> -t "v$NEW_VERSION <type>: <summary>" -d "$(cat <<'EOF'
|
||||
<MR body from above>
|
||||
EOF
|
||||
|
||||
+12
-1
@@ -2761,7 +2761,14 @@ glab mr view -F json 2>/dev/null | jq -r 'if .state == "opened" then "MR_EXISTS"
|
||||
|
||||
If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary, documentation_section from Step 18). Never reuse stale PR body content from a prior run.
|
||||
|
||||
**Also update the PR title** if the version changed on rerun. PR titles use the workspace-aware format `v<NEW_VERSION> <type>: <summary>` — version ALWAYS first. If the current title's version prefix doesn't match `NEW_VERSION`, run `gh pr edit --title "v$NEW_VERSION <type>: <summary>"` (or the `glab mr update -t ...` equivalent). This keeps the title truthful when Step 12's queue-drift detection rebumps a stale version. If the title has no `v<X.Y.Z.W>` prefix (a custom title kept intentionally), leave the title alone — only rewrite titles that already follow the format.
|
||||
**Always update the PR title to start with `v$NEW_VERSION`.** PR titles use the workspace-aware format `v<NEW_VERSION> <type>: <summary>` — version ALWAYS first, no exceptions, no "custom title kept intentionally" escape hatch. The shared helper `bin/gstack-pr-title-rewrite.sh` is the single source of truth for the rule.
|
||||
|
||||
1. Read the current title: `CURRENT=$(gh pr view --json title -q .title)` (or `glab mr view -F json | jq -r .title`).
|
||||
2. Compute the corrected title: `NEW_TITLE=$($GSTACK_ROOT/bin/gstack-pr-title-rewrite.sh "$NEW_VERSION" "$CURRENT")`. The helper handles three cases: title already correct (no-op), title has a different `v<X.Y.Z.W>` prefix (replace it), or title has no version prefix (prepend one).
|
||||
3. If `NEW_TITLE` differs from `CURRENT`, run `gh pr edit --title "$NEW_TITLE"` (or `glab mr update -t "$NEW_TITLE"`).
|
||||
4. **Self-check:** re-fetch the title and assert it starts with `v$NEW_VERSION `. If it does not, retry the edit once. If still wrong, surface the failure to the user.
|
||||
|
||||
This keeps the title truthful when Step 12's queue-drift detection rebumps a stale version, and forces the format on PRs that were created without it.
|
||||
|
||||
Print the existing URL and continue to Step 20.
|
||||
|
||||
@@ -2831,6 +2838,8 @@ you missed it.>
|
||||
**If GitHub:**
|
||||
|
||||
```bash
|
||||
# PR title MUST start with v$NEW_VERSION — enforced on every run, no exceptions.
|
||||
# (See Step 19 idempotency block + bin/gstack-pr-title-rewrite.sh for the rule.)
|
||||
gh pr create --base <base> --title "v$NEW_VERSION <type>: <summary>" --body "$(cat <<'EOF'
|
||||
<PR body from above>
|
||||
EOF
|
||||
@@ -2840,6 +2849,8 @@ EOF
|
||||
**If GitLab:**
|
||||
|
||||
```bash
|
||||
# MR title MUST start with v$NEW_VERSION — enforced on every run, no exceptions.
|
||||
# (See Step 19 idempotency block + bin/gstack-pr-title-rewrite.sh for the rule.)
|
||||
glab mr create -b <base> -t "v$NEW_VERSION <type>: <summary>" -d "$(cat <<'EOF'
|
||||
<MR body from above>
|
||||
EOF
|
||||
|
||||
@@ -0,0 +1,101 @@
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import { spawnSync } from 'child_process';
|
||||
import * as path from 'path';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const BIN = path.join(ROOT, 'bin', 'gstack-paths');
|
||||
|
||||
// Invoke via `bash` rather than executing the shebang-script directly.
|
||||
// On Windows, spawnSync(scriptPath, ...) goes through CreateProcess, which
|
||||
// doesn't parse `#!/usr/bin/env bash`. Production usage always sources the
|
||||
// helper from inside a bash block (`eval "$(~/.claude/skills/gstack/bin/gstack-paths)"`)
|
||||
// so bash is always the executor — this matches that contract.
|
||||
//
|
||||
// USERPROFILE: '' is a Windows-specific override. Git Bash auto-populates
|
||||
// HOME from USERPROFILE at shell startup if HOME is unset/empty, which
|
||||
// silently breaks the "HOME unset" test scenarios. Clearing USERPROFILE
|
||||
// alongside HOME prevents that auto-population on Windows runners.
|
||||
function run(env: Record<string, string | undefined>): Record<string, string> {
|
||||
const result = spawnSync('bash', [BIN], {
|
||||
env: { PATH: process.env.PATH, USERPROFILE: '', ...env } as Record<string, string>,
|
||||
encoding: 'utf-8',
|
||||
});
|
||||
if (result.status !== 0) {
|
||||
throw new Error(`gstack-paths failed (status ${result.status}): ${result.stderr}`);
|
||||
}
|
||||
const out: Record<string, string> = {};
|
||||
for (const line of result.stdout.split('\n')) {
|
||||
const eq = line.indexOf('=');
|
||||
if (eq > 0) out[line.slice(0, eq)] = line.slice(eq + 1);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
describe('gstack-paths', () => {
|
||||
test('GSTACK_HOME wins over CLAUDE_PLUGIN_DATA and HOME', () => {
|
||||
const got = run({
|
||||
GSTACK_HOME: '/tmp/explicit-state',
|
||||
CLAUDE_PLUGIN_DATA: '/tmp/plugin-data',
|
||||
HOME: '/tmp/home',
|
||||
});
|
||||
expect(got.GSTACK_STATE_ROOT).toBe('/tmp/explicit-state');
|
||||
});
|
||||
|
||||
test('CLAUDE_PLUGIN_DATA wins over HOME when GSTACK_HOME unset', () => {
|
||||
const got = run({
|
||||
CLAUDE_PLUGIN_DATA: '/tmp/plugin-data',
|
||||
HOME: '/tmp/home',
|
||||
});
|
||||
expect(got.GSTACK_STATE_ROOT).toBe('/tmp/plugin-data');
|
||||
});
|
||||
|
||||
test('HOME-derived state root when GSTACK_HOME and CLAUDE_PLUGIN_DATA unset', () => {
|
||||
const got = run({ HOME: '/tmp/myhome' });
|
||||
expect(got.GSTACK_STATE_ROOT).toBe('/tmp/myhome/.gstack');
|
||||
});
|
||||
|
||||
test('CWD fallback when HOME also unset (container env)', () => {
|
||||
// Skip on Windows: Git Bash auto-derives HOME from USERPROFILE,
|
||||
// HOMEDRIVE, and HOMEPATH at shell startup. Even with all three
|
||||
// cleared, bash falls back to /c/Users/<user>. The container env
|
||||
// (HOME genuinely unset) is unreachable on Windows runners. The bash
|
||||
// script's CWD fallback IS correct — exercised on Linux/Mac CI.
|
||||
if (process.platform === 'win32') return;
|
||||
const got = run({ HOME: '' });
|
||||
expect(got.GSTACK_STATE_ROOT).toBe('.gstack');
|
||||
});
|
||||
|
||||
test('PLAN_ROOT chain: GSTACK_PLAN_DIR > CLAUDE_PLANS_DIR > HOME > CWD', () => {
|
||||
expect(run({ GSTACK_PLAN_DIR: '/tmp/explicit', HOME: '/h' }).PLAN_ROOT).toBe('/tmp/explicit');
|
||||
expect(run({ CLAUDE_PLANS_DIR: '/tmp/claude', HOME: '/h' }).PLAN_ROOT).toBe('/tmp/claude');
|
||||
expect(run({ HOME: '/tmp/myhome' }).PLAN_ROOT).toBe('/tmp/myhome/.claude/plans');
|
||||
// CWD fallback only verifiable on POSIX — Git Bash auto-populates HOME.
|
||||
if (process.platform !== 'win32') {
|
||||
expect(run({ HOME: '' }).PLAN_ROOT).toBe('.claude/plans');
|
||||
}
|
||||
});
|
||||
|
||||
test('TMP_ROOT chain: TMPDIR > TMP > .gstack/tmp', () => {
|
||||
expect(run({ TMPDIR: '/tmp/x', HOME: '/h' }).TMP_ROOT).toBe('/tmp/x');
|
||||
expect(run({ TMP: '/tmp/y', HOME: '/h' }).TMP_ROOT).toBe('/tmp/y');
|
||||
expect(run({ HOME: '' }).TMP_ROOT).toBe('.gstack/tmp');
|
||||
});
|
||||
|
||||
test('emits all three exports on every invocation', () => {
|
||||
const got = run({ HOME: '/tmp/h' });
|
||||
expect(got).toHaveProperty('GSTACK_STATE_ROOT');
|
||||
expect(got).toHaveProperty('PLAN_ROOT');
|
||||
expect(got).toHaveProperty('TMP_ROOT');
|
||||
});
|
||||
|
||||
test('output is shell-evalable: only KEY=VALUE lines, no extra prose', () => {
|
||||
const result = spawnSync('bash', [BIN], {
|
||||
env: { PATH: process.env.PATH, USERPROFILE: '', HOME: '/tmp/h' } as Record<string, string>,
|
||||
encoding: 'utf-8',
|
||||
});
|
||||
const lines = result.stdout.split('\n').filter(Boolean);
|
||||
for (const line of lines) {
|
||||
expect(line).toMatch(/^[A-Z_]+=.*/);
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -35,7 +35,7 @@ import {
|
||||
} from '@anthropic-ai/claude-agent-sdk';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { execSync } from 'child_process';
|
||||
import { resolveClaudeBinary as resolveClaudeBinaryShared } from '../../browse/src/claude-bin';
|
||||
import type { SkillTestResult } from './session-runner';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -278,11 +278,7 @@ function resolveSdkVersion(): string {
|
||||
}
|
||||
|
||||
export function resolveClaudeBinary(): string | null {
|
||||
try {
|
||||
return execSync('which claude', { encoding: 'utf-8' }).trim() || null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
return resolveClaudeBinaryShared();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import type { ProviderAdapter, RunOpts, RunResult, AvailabilityCheck } from './types';
|
||||
import { estimateCostUsd } from '../pricing';
|
||||
import { execFileSync, spawnSync } from 'child_process';
|
||||
import { execFileSync } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { resolveClaudeCommand } from '../../../browse/src/claude-bin';
|
||||
|
||||
/**
|
||||
* Claude adapter — wraps the `claude` CLI via claude -p.
|
||||
@@ -18,10 +19,11 @@ export class ClaudeAdapter implements ProviderAdapter {
|
||||
readonly family = 'claude' as const;
|
||||
|
||||
async available(): Promise<AvailabilityCheck> {
|
||||
// Binary on PATH?
|
||||
const res = spawnSync('sh', ['-c', 'command -v claude'], { timeout: 2000 });
|
||||
if (res.status !== 0) {
|
||||
return { ok: false, reason: 'claude CLI not found on PATH. Install from https://claude.ai/download or npm i -g @anthropic-ai/claude-code' };
|
||||
// Binary on PATH (or GSTACK_CLAUDE_BIN override). Routes through the shared
|
||||
// resolver so Windows + override paths behave the same as production sites.
|
||||
const resolved = resolveClaudeCommand();
|
||||
if (!resolved) {
|
||||
return { ok: false, reason: 'claude CLI not found on PATH. Install from https://claude.ai/download or npm i -g @anthropic-ai/claude-code (or set GSTACK_CLAUDE_BIN)' };
|
||||
}
|
||||
// Auth sniff: ~/.claude/.credentials.json OR ANTHROPIC_API_KEY
|
||||
const credsPath = path.join(os.homedir(), '.claude', '.credentials.json');
|
||||
@@ -35,12 +37,16 @@ export class ClaudeAdapter implements ProviderAdapter {
|
||||
|
||||
async run(opts: RunOpts): Promise<RunResult> {
|
||||
const start = Date.now();
|
||||
const args = ['-p', '--output-format', 'json'];
|
||||
const resolved = resolveClaudeCommand();
|
||||
if (!resolved) {
|
||||
throw new Error('claude CLI not resolvable (set GSTACK_CLAUDE_BIN or install)');
|
||||
}
|
||||
const args = [...resolved.argsPrefix, '-p', '--output-format', 'json'];
|
||||
if (opts.model) args.push('--model', opts.model);
|
||||
if (opts.extraArgs) args.push(...opts.extraArgs);
|
||||
|
||||
try {
|
||||
const out = execFileSync('claude', args, {
|
||||
const out = execFileSync(resolved.command, args, {
|
||||
input: opts.prompt,
|
||||
cwd: opts.workdir,
|
||||
timeout: opts.timeoutMs,
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import { spawnSync } from 'child_process';
|
||||
import * as path from 'path';
|
||||
|
||||
const HELPER = path.join(import.meta.dir, '..', 'bin', 'gstack-pr-title-rewrite.sh');
|
||||
|
||||
function rewrite(version: string, title: string): { stdout: string; status: number; stderr: string } {
|
||||
const r = spawnSync(HELPER, [version, title], { encoding: 'utf-8' });
|
||||
return { stdout: (r.stdout ?? '').trimEnd(), status: r.status ?? -1, stderr: r.stderr ?? '' };
|
||||
}
|
||||
|
||||
describe('gstack-pr-title-rewrite', () => {
|
||||
test('already correct: no change', () => {
|
||||
const r = rewrite('1.2.3.4', 'v1.2.3.4 feat: foo');
|
||||
expect(r.status).toBe(0);
|
||||
expect(r.stdout).toBe('v1.2.3.4 feat: foo');
|
||||
});
|
||||
|
||||
test('different version prefix: replaces it', () => {
|
||||
expect(rewrite('1.2.3.5', 'v1.2.3.4 feat: foo').stdout).toBe('v1.2.3.5 feat: foo');
|
||||
});
|
||||
|
||||
test('different prefix length (3-part vs 4-part): replaces it', () => {
|
||||
expect(rewrite('1.2.3.4', 'v1.2.3 feat: foo').stdout).toBe('v1.2.3.4 feat: foo');
|
||||
});
|
||||
|
||||
test('no version prefix: prepends', () => {
|
||||
expect(rewrite('1.2.3.4', 'feat: foo').stdout).toBe('v1.2.3.4 feat: foo');
|
||||
});
|
||||
|
||||
test('does not mistake plain words for a prefix', () => {
|
||||
expect(rewrite('1.2.3.4', 'version 5 feature').stdout).toBe('v1.2.3.4 version 5 feature');
|
||||
});
|
||||
|
||||
test('does not strip a single-segment prefix like v1', () => {
|
||||
expect(rewrite('1.2.3.4', 'v1 feat: foo').stdout).toBe('v1.2.3.4 v1 feat: foo');
|
||||
});
|
||||
|
||||
test('errors on missing args', () => {
|
||||
const r = spawnSync(HELPER, ['1.2.3.4'], { encoding: 'utf-8' });
|
||||
expect(r.status).not.toBe(0);
|
||||
});
|
||||
|
||||
test('rejects malformed VERSION with shell metacharacters', () => {
|
||||
expect(rewrite('1.*.*.*', 'feat: foo').status).toBe(2);
|
||||
expect(rewrite('1.2.3.4; rm -rf /', 'feat: foo').status).toBe(2);
|
||||
});
|
||||
|
||||
test('idempotent: applying twice yields the same result', () => {
|
||||
const once = rewrite('1.2.3.4', 'feat: foo').stdout;
|
||||
const twice = rewrite('1.2.3.4', once).stdout;
|
||||
expect(twice).toBe(once);
|
||||
});
|
||||
});
|
||||
@@ -1458,6 +1458,107 @@ describe('Skill trigger phrases', () => {
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Private-path leak detector ──────────────────────────────
|
||||
//
|
||||
// Catches accidental references to maintainer-private files in skill output.
|
||||
// Adapted from the McGluut fork's skill-contract-audit.ts (we don't take the
|
||||
// whole script — these are the unique checks not already covered by
|
||||
// test/gen-skill-docs.test.ts:1668-2074 .claude/skills leakage tests).
|
||||
|
||||
describe('Private-path leak detection', () => {
|
||||
const PRIVATE_PATTERNS: Array<{ pattern: RegExp; label: string }> = [
|
||||
{ pattern: /coordination-board\.md/i, label: 'coordination-board.md' },
|
||||
{ pattern: /SEEKING_LOG\.md/, label: 'SEEKING_LOG.md' },
|
||||
{ pattern: /RATIONAL_SUBJECT\.md/, label: 'RATIONAL_SUBJECT.md' },
|
||||
{ pattern: /VALUE_SIGNAL_LOOP\.md/, label: 'VALUE_SIGNAL_LOOP.md' },
|
||||
{ pattern: /C:\\\\LLM Playground\\\\go/i, label: 'C:\\LLM Playground\\go' },
|
||||
];
|
||||
|
||||
// Walk every SKILL.md and SKILL.md.tmpl in the repo (excluding node_modules,
|
||||
// generated host outputs, and .git).
|
||||
function discoverSkillSurface(): string[] {
|
||||
const results: string[] = [];
|
||||
function walk(dir: string) {
|
||||
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
||||
if (entry.name.startsWith('.') && entry.name !== '.agents') continue;
|
||||
if (entry.name === 'node_modules' || entry.name === 'dist') continue;
|
||||
const full = path.join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
walk(full);
|
||||
} else if (entry.name === 'SKILL.md' || entry.name === 'SKILL.md.tmpl') {
|
||||
results.push(full);
|
||||
}
|
||||
}
|
||||
}
|
||||
walk(ROOT);
|
||||
return results;
|
||||
}
|
||||
|
||||
test('no SKILL.md or SKILL.md.tmpl references private maintainer files', () => {
|
||||
const files = discoverSkillSurface();
|
||||
expect(files.length).toBeGreaterThan(0);
|
||||
const leaks: string[] = [];
|
||||
for (const file of files) {
|
||||
const content = fs.readFileSync(file, 'utf-8');
|
||||
for (const { pattern, label } of PRIVATE_PATTERNS) {
|
||||
if (pattern.test(content)) {
|
||||
leaks.push(`${path.relative(ROOT, file)} mentions ${label}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
expect(leaks).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Doc-inventory cross-check ───────────────────────────────
|
||||
//
|
||||
// Every skill directory (with a SKILL.md.tmpl) must appear in both AGENTS.md
|
||||
// and docs/skills.md. Catches the inventory drift codex flagged (/debug
|
||||
// → /investigate; missing /autoplan, /context-save, /plan-devex-review, etc.).
|
||||
|
||||
describe('Doc inventory cross-check', () => {
|
||||
// Skills that don't get user-invocation lines in agent-facing docs.
|
||||
// - 'qa-only' is a sub-mode of /qa with shared docs.
|
||||
// - The 5 listed below are infrastructure (model overlays, shipped binary,
|
||||
// hosts) that don't show up in the user-facing skill table.
|
||||
const DOC_INVENTORY_EXCLUDE = new Set([
|
||||
// Infra / non-skills
|
||||
'agents', 'claude', 'connect-chrome', 'contrib', 'hosts',
|
||||
'lib', 'model-overlays', 'openclaw', 'supabase', 'scripts', 'test',
|
||||
]);
|
||||
|
||||
function discoverSkillDirs(): string[] {
|
||||
const dirs: string[] = [];
|
||||
for (const entry of fs.readdirSync(ROOT, { withFileTypes: true })) {
|
||||
if (!entry.isDirectory()) continue;
|
||||
if (entry.name.startsWith('.')) continue;
|
||||
if (DOC_INVENTORY_EXCLUDE.has(entry.name)) continue;
|
||||
const tmplPath = path.join(ROOT, entry.name, 'SKILL.md.tmpl');
|
||||
if (fs.existsSync(tmplPath)) dirs.push(entry.name);
|
||||
}
|
||||
return dirs.sort();
|
||||
}
|
||||
|
||||
test('every skill is documented in AGENTS.md', () => {
|
||||
const agents = fs.readFileSync(path.join(ROOT, 'AGENTS.md'), 'utf-8');
|
||||
const missing: string[] = [];
|
||||
for (const skill of discoverSkillDirs()) {
|
||||
// Match `/skill-name` as a token boundary.
|
||||
if (!new RegExp(`/${skill}\\b`).test(agents)) missing.push(skill);
|
||||
}
|
||||
expect(missing).toEqual([]);
|
||||
});
|
||||
|
||||
test('every skill is documented in docs/skills.md', () => {
|
||||
const docs = fs.readFileSync(path.join(ROOT, 'docs', 'skills.md'), 'utf-8');
|
||||
const missing: string[] = [];
|
||||
for (const skill of discoverSkillDirs()) {
|
||||
if (!new RegExp(`/${skill}\\b`).test(docs)) missing.push(skill);
|
||||
}
|
||||
expect(missing).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Codex Skill Validation ──────────────────────────────────
|
||||
|
||||
describe('Codex skill validation', () => {
|
||||
|
||||
@@ -0,0 +1,128 @@
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import {
|
||||
isFreeTestFile,
|
||||
collectFreeTestFiles,
|
||||
detectWindowsFragility,
|
||||
curateWindowsSafe,
|
||||
stableHash,
|
||||
assignFilesToShards,
|
||||
normalizeRelativePath,
|
||||
} from '../scripts/test-free-shards';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
|
||||
describe('test-free-shards: enumeration', () => {
|
||||
test('isFreeTestFile rejects non-test files', () => {
|
||||
expect(isFreeTestFile('test/foo.ts')).toBe(false);
|
||||
expect(isFreeTestFile('test/foo.test.ts')).toBe(true);
|
||||
expect(isFreeTestFile('test/foo.test.tsx')).toBe(true);
|
||||
expect(isFreeTestFile('test/foo.test.mjs')).toBe(true);
|
||||
});
|
||||
|
||||
test('isFreeTestFile rejects paid eval tests', () => {
|
||||
expect(isFreeTestFile('test/skill-e2e-foo.test.ts')).toBe(false);
|
||||
expect(isFreeTestFile('test/skill-llm-eval.test.ts')).toBe(false);
|
||||
expect(isFreeTestFile('test/codex-e2e.test.ts')).toBe(false);
|
||||
expect(isFreeTestFile('test/gemini-e2e.test.ts')).toBe(false);
|
||||
});
|
||||
|
||||
test('collectFreeTestFiles returns sorted, deduped, only-free list', () => {
|
||||
const files = collectFreeTestFiles(ROOT);
|
||||
expect(files.length).toBeGreaterThan(10);
|
||||
expect(files).toEqual([...files].sort());
|
||||
expect(new Set(files).size).toBe(files.length);
|
||||
for (const f of files) {
|
||||
expect(isFreeTestFile(f)).toBe(true);
|
||||
}
|
||||
});
|
||||
|
||||
test('normalizeRelativePath converts Windows backslashes to forward slashes', () => {
|
||||
expect(normalizeRelativePath('test\\foo\\bar.test.ts')).toBe('test/foo/bar.test.ts');
|
||||
expect(normalizeRelativePath('test/foo/bar.test.ts')).toBe('test/foo/bar.test.ts');
|
||||
});
|
||||
});
|
||||
|
||||
describe('test-free-shards: Windows curation', () => {
|
||||
function withTempFile(content: string, fn: (filePath: string) => void): void {
|
||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'curation-test-'));
|
||||
const file = path.join(dir, 'sample.test.ts');
|
||||
fs.writeFileSync(file, content);
|
||||
try {
|
||||
fn(file);
|
||||
} finally {
|
||||
fs.rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
test('detects /bin/bash hardcode', () => {
|
||||
withTempFile(`spawn('/bin/bash', ['-c', 'echo hi']);`, (f) => {
|
||||
expect(detectWindowsFragility(f)?.reason).toBe('hardcoded /bin/sh or /bin/bash');
|
||||
});
|
||||
});
|
||||
|
||||
test('detects spawn("sh", ...)', () => {
|
||||
withTempFile(`spawnSync('sh', ['-c', 'command -v claude']);`, (f) => {
|
||||
expect(detectWindowsFragility(f)?.reason).toBe('spawn("sh", ...)');
|
||||
});
|
||||
});
|
||||
|
||||
test('detects raw /tmp/ paths', () => {
|
||||
withTempFile(`const TMPERR = '/tmp/codex-err.txt';`, (f) => {
|
||||
expect(detectWindowsFragility(f)?.reason).toBe('raw /tmp/ path (use os.tmpdir())');
|
||||
});
|
||||
});
|
||||
|
||||
test('detects which claude shell command', () => {
|
||||
withTempFile(`execSync('which claude').trim();`, (f) => {
|
||||
expect(detectWindowsFragility(f)?.reason).toBe('which claude (use Bun.which)');
|
||||
});
|
||||
});
|
||||
|
||||
test('Windows-safe code passes the filter', () => {
|
||||
withTempFile(`import { spawn } from 'child_process'; spawn(claude.command, args);`, (f) => {
|
||||
expect(detectWindowsFragility(f)).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
test('curateWindowsSafe partitions files into safe + excluded', () => {
|
||||
const files = collectFreeTestFiles(ROOT);
|
||||
const result = curateWindowsSafe(files, ROOT);
|
||||
expect(result.safe.length + result.excluded.length).toBe(files.length);
|
||||
// Sanity: at least one excluded entry, since we know test/ship-version-sync.test.ts uses /bin/bash
|
||||
expect(result.excluded.length).toBeGreaterThan(0);
|
||||
// Every excluded entry has a non-empty reason
|
||||
for (const { reason } of result.excluded) {
|
||||
expect(reason.length).toBeGreaterThan(0);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('test-free-shards: sharding', () => {
|
||||
test('stableHash is deterministic', () => {
|
||||
expect(stableHash('foo.test.ts')).toBe(stableHash('foo.test.ts'));
|
||||
expect(stableHash('foo.test.ts')).not.toBe(stableHash('bar.test.ts'));
|
||||
});
|
||||
|
||||
test('assignFilesToShards distributes files into N non-empty shards', () => {
|
||||
const files = ['a.test.ts', 'b.test.ts', 'c.test.ts', 'd.test.ts', 'e.test.ts'];
|
||||
const shards = assignFilesToShards(files, 3);
|
||||
const flattened = shards.flat();
|
||||
expect(flattened.sort()).toEqual([...files].sort());
|
||||
expect(shards.every((s) => s.length > 0)).toBe(true);
|
||||
});
|
||||
|
||||
test('assignFilesToShards rejects invalid shard counts', () => {
|
||||
expect(() => assignFilesToShards(['a.test.ts'], 0)).toThrow();
|
||||
expect(() => assignFilesToShards(['a.test.ts'], -1)).toThrow();
|
||||
});
|
||||
|
||||
test('shards are stable across runs (same files always land in same shard)', () => {
|
||||
const files = ['x.test.ts', 'y.test.ts', 'z.test.ts'];
|
||||
const a = assignFilesToShards(files, 5);
|
||||
const b = assignFilesToShards(files, 5);
|
||||
expect(a).toEqual(b);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user