mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-01 19:25:10 +02:00
feat: voice directive for all skills (v0.12.3.0) (#520)
* feat: add voice directive to skill preamble with tiered context/concreteness/humor Adds a Voice section to all skill preambles via the template resolver. Three new subsections: context-dependent tone (YC partner / senior eng / blog post), concreteness standard (exact commands, line numbers, real numbers), and connect-to-user-outcomes guidance. Humor calibrated to dry observations about software absurdity. Includes eval test for voice directive presence and banned-word filtering. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * chore: regenerate SKILL.md files with voice directive Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: sync package.json version with VERSION file (0.12.2.0) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * chore: regenerate connect-chrome SKILL.md with voice directive Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * chore: bump version and changelog (v0.12.3.0) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -321,6 +321,9 @@ export const LLM_JUDGE_TOUCHFILES: Record<string, string[]> = {
|
||||
'retro/SKILL.md instructions': ['retro/SKILL.md', 'retro/SKILL.md.tmpl'],
|
||||
'qa-only/SKILL.md workflow': ['qa-only/SKILL.md', 'qa-only/SKILL.md.tmpl'],
|
||||
'gstack-upgrade/SKILL.md upgrade flow': ['gstack-upgrade/SKILL.md', 'gstack-upgrade/SKILL.md.tmpl'],
|
||||
|
||||
// Voice directive
|
||||
'voice directive tone': ['scripts/resolvers/preamble.ts', 'review/SKILL.md', 'review/SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -778,6 +778,69 @@ describeIfSelected('Other skill evals', [
|
||||
}, 30_000);
|
||||
});
|
||||
|
||||
// Voice directive eval — tests that the voice section produces the right tone
|
||||
describeIfSelected('Voice directive eval', ['voice directive tone'], () => {
|
||||
testIfSelected('voice directive tone', async () => {
|
||||
const t0 = Date.now();
|
||||
// Read a tier 2+ skill to get the full voice directive in context
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
const voiceStart = content.indexOf('## Voice');
|
||||
if (voiceStart === -1) {
|
||||
throw new Error('Voice section not found in review/SKILL.md. Was preamble.ts regenerated?');
|
||||
}
|
||||
const voiceEnd = content.indexOf('\n## ', voiceStart + 1);
|
||||
const voiceSection = content.slice(voiceStart, voiceEnd > 0 ? voiceEnd : voiceStart + 3000);
|
||||
|
||||
const result = await callJudge<{
|
||||
directness: number;
|
||||
concreteness: number;
|
||||
avoids_corporate: number;
|
||||
avoids_ai_vocabulary: number;
|
||||
connects_user_outcomes: number;
|
||||
reasoning: string;
|
||||
}>(`You are evaluating a voice directive for an AI coding assistant framework called GStack.
|
||||
Score each dimension 1-5 where 5 is excellent:
|
||||
|
||||
1. directness: Does it instruct the agent to be direct, lead with the point, take positions?
|
||||
2. concreteness: Does it instruct the agent to name specific files, commands, line numbers, real numbers?
|
||||
3. avoids_corporate: Does it explicitly ban corporate/formal/academic tone and provide alternatives?
|
||||
4. avoids_ai_vocabulary: Does it ban AI-tell words and phrases with specific lists?
|
||||
5. connects_user_outcomes: Does it instruct the agent to connect technical work to real user experience?
|
||||
|
||||
Return JSON only:
|
||||
{"directness": N, "concreteness": N, "avoids_corporate": N, "avoids_ai_vocabulary": N, "connects_user_outcomes": N, "reasoning": "..."}
|
||||
|
||||
THE VOICE DIRECTIVE:
|
||||
${voiceSection}`);
|
||||
|
||||
console.log('Voice directive scores:', JSON.stringify(result, null, 2));
|
||||
|
||||
evalCollector?.addTest({
|
||||
name: 'voice directive tone',
|
||||
suite: 'Voice directive eval',
|
||||
tier: 'llm-judge',
|
||||
passed: result.directness >= 4 && result.concreteness >= 4 && result.avoids_corporate >= 4
|
||||
&& result.avoids_ai_vocabulary >= 4 && result.connects_user_outcomes >= 4,
|
||||
duration_ms: Date.now() - t0,
|
||||
cost_usd: 0.02,
|
||||
judge_scores: {
|
||||
directness: result.directness,
|
||||
concreteness: result.concreteness,
|
||||
avoids_corporate: result.avoids_corporate,
|
||||
avoids_ai_vocabulary: result.avoids_ai_vocabulary,
|
||||
connects_user_outcomes: result.connects_user_outcomes,
|
||||
},
|
||||
judge_reasoning: result.reasoning,
|
||||
});
|
||||
|
||||
expect(result.directness).toBeGreaterThanOrEqual(4);
|
||||
expect(result.concreteness).toBeGreaterThanOrEqual(4);
|
||||
expect(result.avoids_corporate).toBeGreaterThanOrEqual(4);
|
||||
expect(result.avoids_ai_vocabulary).toBeGreaterThanOrEqual(4);
|
||||
expect(result.connects_user_outcomes).toBeGreaterThanOrEqual(4);
|
||||
}, 30_000);
|
||||
});
|
||||
|
||||
// Module-level afterAll — finalize eval collector after all tests complete
|
||||
afterAll(async () => {
|
||||
if (evalCollector) {
|
||||
|
||||
Reference in New Issue
Block a user