merge: resolve package.json version conflict (accept main's 0.12.5.0)

This commit is contained in:
Garry Tan
2026-03-26 21:00:12 -06:00
37 changed files with 1306 additions and 71 deletions
+3
View File
@@ -329,6 +329,9 @@ export const LLM_JUDGE_TOUCHFILES: Record<string, string[]> = {
'retro/SKILL.md instructions': ['retro/SKILL.md', 'retro/SKILL.md.tmpl'],
'qa-only/SKILL.md workflow': ['qa-only/SKILL.md', 'qa-only/SKILL.md.tmpl'],
'gstack-upgrade/SKILL.md upgrade flow': ['gstack-upgrade/SKILL.md', 'gstack-upgrade/SKILL.md.tmpl'],
// Voice directive
'voice directive tone': ['scripts/resolvers/preamble.ts', 'review/SKILL.md', 'review/SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
};
/**
+63
View File
@@ -778,6 +778,69 @@ describeIfSelected('Other skill evals', [
}, 30_000);
});
// Voice directive eval — tests that the voice section produces the right tone
describeIfSelected('Voice directive eval', ['voice directive tone'], () => {
testIfSelected('voice directive tone', async () => {
const t0 = Date.now();
// Read a tier 2+ skill to get the full voice directive in context
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
const voiceStart = content.indexOf('## Voice');
if (voiceStart === -1) {
throw new Error('Voice section not found in review/SKILL.md. Was preamble.ts regenerated?');
}
const voiceEnd = content.indexOf('\n## ', voiceStart + 1);
const voiceSection = content.slice(voiceStart, voiceEnd > 0 ? voiceEnd : voiceStart + 3000);
const result = await callJudge<{
directness: number;
concreteness: number;
avoids_corporate: number;
avoids_ai_vocabulary: number;
connects_user_outcomes: number;
reasoning: string;
}>(`You are evaluating a voice directive for an AI coding assistant framework called GStack.
Score each dimension 1-5 where 5 is excellent:
1. directness: Does it instruct the agent to be direct, lead with the point, take positions?
2. concreteness: Does it instruct the agent to name specific files, commands, line numbers, real numbers?
3. avoids_corporate: Does it explicitly ban corporate/formal/academic tone and provide alternatives?
4. avoids_ai_vocabulary: Does it ban AI-tell words and phrases with specific lists?
5. connects_user_outcomes: Does it instruct the agent to connect technical work to real user experience?
Return JSON only:
{"directness": N, "concreteness": N, "avoids_corporate": N, "avoids_ai_vocabulary": N, "connects_user_outcomes": N, "reasoning": "..."}
THE VOICE DIRECTIVE:
${voiceSection}`);
console.log('Voice directive scores:', JSON.stringify(result, null, 2));
evalCollector?.addTest({
name: 'voice directive tone',
suite: 'Voice directive eval',
tier: 'llm-judge',
passed: result.directness >= 4 && result.concreteness >= 4 && result.avoids_corporate >= 4
&& result.avoids_ai_vocabulary >= 4 && result.connects_user_outcomes >= 4,
duration_ms: Date.now() - t0,
cost_usd: 0.02,
judge_scores: {
directness: result.directness,
concreteness: result.concreteness,
avoids_corporate: result.avoids_corporate,
avoids_ai_vocabulary: result.avoids_ai_vocabulary,
connects_user_outcomes: result.connects_user_outcomes,
},
judge_reasoning: result.reasoning,
});
expect(result.directness).toBeGreaterThanOrEqual(4);
expect(result.concreteness).toBeGreaterThanOrEqual(4);
expect(result.avoids_corporate).toBeGreaterThanOrEqual(4);
expect(result.avoids_ai_vocabulary).toBeGreaterThanOrEqual(4);
expect(result.connects_user_outcomes).toBeGreaterThanOrEqual(4);
}, 30_000);
});
// Module-level afterAll — finalize eval collector after all tests complete
afterAll(async () => {
if (evalCollector) {
+2 -2
View File
@@ -1325,7 +1325,7 @@ describe('Codex skill', () => {
expect(content).toContain('fall back to the Claude adversarial subagent');
// Review log uses new skill name
expect(content).toContain('adversarial-review');
expect(content).toContain('xhigh');
expect(content).toContain('reasoning_effort="high"');
expect(content).toContain('ADVERSARIAL REVIEW SYNTHESIS');
});
@@ -1335,7 +1335,7 @@ describe('Codex skill', () => {
expect(content).toContain('< 50');
expect(content).toContain('200+');
expect(content).toContain('adversarial-review');
expect(content).toContain('xhigh');
expect(content).toContain('reasoning_effort="high"');
expect(content).toContain('Investigate and fix');
});