mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-27 12:10:00 +02:00
Merge origin/main into /spec branch — retag v1.45.0.0 → v1.47.0.0
main moved to v1.46.0.0 (gstack v2 foundation, eval-first floor across 51 skills) while this branch was at v1.45.0.0. v1.46 also reserved v1.45.0.0 for the design daemon feature. Retag this branch's release v1.45.0.0 → v1.47.0.0 so it lands cleanly on top of main. Conflict resolutions: - VERSION: 1.47.0.0 (MINOR continues on top of main's 1.46.0.0; this branch is also a MINOR per scale-aware rules — new skill capability). - CHANGELOG: rewrite this branch's release header v1.45.0.0 → v1.47.0.0. Keep both main entries above main's older history. Adapts to main's eval-first floor (v1.46.0.0 test/skill-coverage-matrix.ts + test/skill-coverage-floor.test.ts): - Register /spec in SKILL_COVERAGE with 3 gate entries + 2 periodic. - Skill catalog grows 51 → 52. Floor 6/6 structural checks pass. - Catalog tokens: 4045 → 4116 (+71 for /spec, within v1.46's ≤7000 budget). - Trim spec frontmatter description to single-paragraph block form to respect v1.46's catalog-trim intent (was 14 lines / ~900 chars, now 5 lines / ~350 chars; routing prose stays in body sections). - 363/363 gate-tier tests pass across skill-coverage-floor (309) + skill-coverage-matrix (10) + skill-size-budget (3) + parity-suite (4) + spec-template-invariants (35) + spec-template-sync (2). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -891,8 +891,11 @@ If the JSON contains \`"regenerated": true\`:
|
||||
1. Read \`regenerateAction\` (or \`remixSpec\` for remix requests)
|
||||
2. Generate new variants with \`$D iterate\` or \`$D variants\` using updated brief
|
||||
3. Create new board with \`$D compare\`
|
||||
4. POST the new HTML to the running server via \`curl -X POST http://localhost:PORT/api/reload -H 'Content-Type: application/json' -d '{"html":"$_DESIGN_DIR/design-board.html"}'\`
|
||||
(parse the port from stderr: look for \`SERVE_STARTED: port=XXXXX\`)
|
||||
4. POST the new HTML to the running board. Parse the board URL from stderr
|
||||
(\`BOARD_URL: http://127.0.0.1:N/boards/<id>/\` — the daemon path) or fall
|
||||
back to the legacy port (\`SERVE_STARTED: port=N\` — only emitted under
|
||||
\`--no-daemon\`, hits \`/api/reload\` root). Daemon path:
|
||||
\`curl -X POST "\${BOARD_URL}api/reload" -H 'Content-Type: application/json' -d '{"html":"$_DESIGN_DIR/design-board.html"}'\`
|
||||
5. Board auto-refreshes in the same tab
|
||||
|
||||
If \`"regenerated": false\`: proceed with the approved variant.
|
||||
@@ -919,8 +922,12 @@ This command generates the board HTML, starts an HTTP server on a random port,
|
||||
and opens it in the user's default browser. **Run it in the background** with \`&\`
|
||||
because the server needs to stay running while the user interacts with the board.
|
||||
|
||||
Parse the port from stderr output: \`SERVE_STARTED: port=XXXXX\`. You need this
|
||||
for the board URL and for reloading during regeneration cycles.
|
||||
Parse the board URL from stderr output. Default daemon path:
|
||||
\`BOARD_URL: http://127.0.0.1:N/boards/<id>/\` (already includes the per-board
|
||||
path; use this for the AskUserQuestion URL AND as the base for the reload
|
||||
endpoint). Legacy \`--no-daemon\` path emits \`SERVE_STARTED: port=XXXXX\` and
|
||||
serves a single board at \`/\`, with reload at \`/api/reload\` — only relevant
|
||||
when an external caller explicitly passes \`--no-daemon\`.
|
||||
|
||||
**PRIMARY WAIT: AskUserQuestion with board URL**
|
||||
|
||||
@@ -928,11 +935,14 @@ After the board is serving, use AskUserQuestion to wait for the user. Include th
|
||||
board URL so they can click it if they lost the browser tab:
|
||||
|
||||
"I've opened a comparison board with the design variants:
|
||||
http://127.0.0.1:<PORT>/ — Rate them, leave comments, remix
|
||||
<BOARD_URL> — Rate them, leave comments, remix
|
||||
elements you like, and click Submit when you're done. Let me know when you've
|
||||
submitted your feedback (or paste your preferences here). If you clicked
|
||||
Regenerate or Remix on the board, tell me and I'll generate new variants."
|
||||
|
||||
Substitute \`<BOARD_URL>\` with the URL parsed from stderr (the daemon path
|
||||
emits \`BOARD_URL: http://127.0.0.1:N/boards/<id>/\`).
|
||||
|
||||
**Do NOT use AskUserQuestion to ask which variant the user prefers.** The comparison
|
||||
board IS the chooser. AskUserQuestion is just the blocking wait mechanism.
|
||||
|
||||
@@ -976,8 +986,13 @@ the approved variant.
|
||||
2. If \`regenerateAction\` is \`"remix"\`, read \`remixSpec\` (e.g. \`{"layout":"A","colors":"B"}\`)
|
||||
3. Generate new variants with \`$D iterate\` or \`$D variants\` using updated brief
|
||||
4. Create new board: \`$D compare --images "..." --output "$_DESIGN_DIR/design-board.html"\`
|
||||
5. Reload the board in the user's browser (same tab):
|
||||
\`curl -s -X POST http://127.0.0.1:PORT/api/reload -H 'Content-Type: application/json' -d '{"html":"$_DESIGN_DIR/design-board.html"}'\`
|
||||
5. Reload the board in the user's browser (same tab) — the URL is per-board
|
||||
under daemon mode, so use \`<BOARD_URL>\` (from the \`BOARD_URL:\` stderr
|
||||
line) as the base:
|
||||
\`curl -s -X POST "\${BOARD_URL}api/reload" -H 'Content-Type: application/json' -d '{"html":"$_DESIGN_DIR/design-board.html"}'\`
|
||||
Under \`--no-daemon\` the reload endpoint is \`/api/reload\` at the legacy
|
||||
port; this path only matters if the caller explicitly opted out of the
|
||||
daemon.
|
||||
6. The board auto-refreshes. **AskUserQuestion again** with the same board URL to
|
||||
wait for the next round of feedback. Repeat until \`feedback.json\` appears.
|
||||
|
||||
|
||||
@@ -1,9 +1,20 @@
|
||||
/**
|
||||
* RESOLVERS record — maps {{PLACEHOLDER}} names to generator functions.
|
||||
* RESOLVERS record — maps {{PLACEHOLDER}} names to generator functions
|
||||
* or gated entries.
|
||||
*
|
||||
* Each resolver takes a TemplateContext and returns the replacement string.
|
||||
* Resolvers may be either a bare function (always fires) or a gated entry
|
||||
* ({ resolve, appliesTo }) where appliesTo can return false to skip the
|
||||
* resolver for a given skill. See ./types.ts: ResolverEntry.
|
||||
*
|
||||
* Most resolvers don't need a gate — the {{NAME}} placeholder system is
|
||||
* already conditional at the template level (the resolver only fires for
|
||||
* skills that reference it). Use a gate when you want a structural
|
||||
* guardrail that says "this placeholder is meaningful only in skills X, Y, Z"
|
||||
* even if someone later adds {{NAME}} to skill W.
|
||||
*/
|
||||
|
||||
import type { TemplateContext, ResolverFn } from './types';
|
||||
import type { TemplateContext, ResolverFn, ResolverValue } from './types';
|
||||
|
||||
// Domain modules
|
||||
import { generatePreamble } from './preamble';
|
||||
@@ -24,7 +35,7 @@ import { generateQuestionPreferenceCheck, generateQuestionLog, generateInlineTun
|
||||
import { generateMakePdfSetup } from './make-pdf';
|
||||
import { generateTasksSectionEmit, generateTasksSectionAggregate } from './tasks-section';
|
||||
|
||||
export const RESOLVERS: Record<string, ResolverFn> = {
|
||||
export const RESOLVERS: Record<string, ResolverValue> = {
|
||||
SLUG_EVAL: generateSlugEval,
|
||||
SLUG_SETUP: generateSlugSetup,
|
||||
COMMAND_REFERENCE: generateCommandReference,
|
||||
|
||||
@@ -109,10 +109,10 @@ export function generatePreamble(ctx: TemplateContext): string {
|
||||
...(tier >= 2 ? [
|
||||
generateContextRecovery(ctx),
|
||||
generateWritingStyle(ctx),
|
||||
generateCompletenessSection(),
|
||||
generateConfusionProtocol(),
|
||||
generateCompletenessSection(ctx),
|
||||
generateConfusionProtocol(ctx),
|
||||
generateContinuousCheckpoint(),
|
||||
generateContextHealth(),
|
||||
generateContextHealth(ctx),
|
||||
generateQuestionTuning(ctx),
|
||||
] : []),
|
||||
...(tier >= 3 ? [generateRepoModeSection(), generateSearchBeforeBuildingSection(ctx)] : []),
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import type { TemplateContext } from '../types';
|
||||
|
||||
|
||||
export function generateCompletenessSection(): string {
|
||||
export function generateCompletenessSection(ctx?: TemplateContext): string {
|
||||
if (ctx?.explainLevel === 'terse') return '';
|
||||
return `## Completeness Principle — Boil the Lake
|
||||
|
||||
AI makes completeness cheap. Recommend complete lakes (tests, edge cases, error paths); flag oceans (rewrites, multi-quarter migrations).
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
export function generateConfusionProtocol(): string {
|
||||
import type { TemplateContext } from '../types';
|
||||
|
||||
export function generateConfusionProtocol(ctx?: TemplateContext): string {
|
||||
if (ctx?.explainLevel === 'terse') return '';
|
||||
return `## Confusion Protocol
|
||||
|
||||
For high-stakes ambiguity (architecture, data model, destructive scope, missing context), STOP. Name it in one sentence, present 2-3 options with tradeoffs, and ask. Do not use for routine coding or obvious changes.`;
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import type { TemplateContext } from '../types';
|
||||
|
||||
|
||||
export function generateContextHealth(): string {
|
||||
export function generateContextHealth(ctx?: TemplateContext): string {
|
||||
if (ctx?.explainLevel === 'terse') return '';
|
||||
return `## Context Health (soft directive)
|
||||
|
||||
During long-running skill sessions, periodically write a brief \`[PROGRESS]\` summary: done, next, surprises.
|
||||
|
||||
@@ -1,25 +1,24 @@
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import type { TemplateContext } from '../types';
|
||||
|
||||
function loadJargonList(): string[] {
|
||||
const jargonPath = path.join(__dirname, '..', '..', 'jargon-list.json');
|
||||
try {
|
||||
const raw = fs.readFileSync(jargonPath, 'utf-8');
|
||||
const data = JSON.parse(raw);
|
||||
if (Array.isArray(data?.terms)) return data.terms.filter((t: unknown): t is string => typeof t === 'string');
|
||||
} catch {
|
||||
// Missing or malformed: fall back to empty list. Writing Style block still fires,
|
||||
// but with no terms to gloss — graceful degradation.
|
||||
/**
|
||||
* Writing Style preamble section.
|
||||
*
|
||||
* v1.45.0.0 changes (T3):
|
||||
* - Jargon list is referenced by path, not inlined. The 80-term list was
|
||||
* duplicated into every tier-2+ skill (~1.5-2 KB × 48 skills = ~80 KB
|
||||
* across the corpus). The pointer asks the agent to Read the JSON on
|
||||
* first jargon term encountered — one extra Read per session, but the
|
||||
* per-corpus payload is ~30 bytes.
|
||||
* - When `ctx.explainLevel === 'terse'`, the entire section is replaced
|
||||
* with a one-line pointer. Saves ~1.5 KB per tier-2+ skill in the
|
||||
* opt-in terse build.
|
||||
*/
|
||||
export function generateWritingStyle(ctx: TemplateContext): string {
|
||||
if (ctx.explainLevel === 'terse') {
|
||||
return `## Writing Style\n\nTerse mode (build-time): skip jargon glossing, outcome-framing layer, and decision-impact closers. Lead with the answer.\n`;
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
export function generateWritingStyle(_ctx: TemplateContext): string {
|
||||
const terms = loadJargonList();
|
||||
const jargonBlock = terms.length > 0
|
||||
? `Jargon list, gloss on first use if the term appears:\n${terms.map(t => `- ${t}`).join('\n')}`
|
||||
: `Jargon list unavailable. Skip jargon glossing until \`scripts/jargon-list.json\` is restored.`;
|
||||
const jargonPath = `${ctx.paths.skillRoot}/scripts/jargon-list.json`;
|
||||
|
||||
return `## Writing Style (skip entirely if \`EXPLAIN_LEVEL: terse\` appears in the preamble echo OR the user's current message explicitly requests terse / no-explanations output)
|
||||
|
||||
@@ -32,6 +31,6 @@ Applies to AskUserQuestion, user replies, and findings. AskUserQuestion Format i
|
||||
- User-turn override wins: if the current message asks for terse / no explanations / just the answer, skip this section.
|
||||
- Terse mode (EXPLAIN_LEVEL: terse): no glosses, no outcome-framing layer, shorter responses.
|
||||
|
||||
${jargonBlock}
|
||||
Curated jargon list lives at \`${jargonPath}\` (80+ terms). On the first jargon term you encounter this session, Read that file once; treat the \`terms\` array as the canonical list. The list is repo-owned and may grow between releases.
|
||||
`;
|
||||
}
|
||||
|
||||
@@ -62,7 +62,56 @@ export interface TemplateContext {
|
||||
preambleTier?: number; // 1-4, controls which preamble sections are included
|
||||
model?: Model; // model family for behavioral overlay. Omitted/undefined → no overlay.
|
||||
interactive?: boolean; // true → emit plan-mode handshake in preamble. Generator-only, not written to SKILL.md.
|
||||
/**
|
||||
* Build-time compression mode. Defaults to 'default'.
|
||||
*
|
||||
* - 'default': full preamble prose ships as today (writing style, completeness,
|
||||
* confusion protocol, context health are all present).
|
||||
* - 'terse': writing-style + completeness + confusion-protocol + context-health
|
||||
* sections are compressed to a one-line pointer at gen time. Saves ~3-5 KB
|
||||
* per tier-2+ skill. Opt-in via `--explain-level=terse` build flag for
|
||||
* users who want shipped skills to match their runtime preference and
|
||||
* avoid the per-session terse-mode prose.
|
||||
*
|
||||
* Default builds keep the runtime-conditional behavior intact (Writing Style
|
||||
* section says "skip entirely if EXPLAIN_LEVEL: terse appears in preamble echo").
|
||||
* Terse builds make the compression structural — bytes never ship in the first place.
|
||||
*/
|
||||
explainLevel?: 'default' | 'terse';
|
||||
}
|
||||
|
||||
/** Resolver function signature. args is populated for parameterized placeholders like {{INVOKE_SKILL:name}}. */
|
||||
export type ResolverFn = (ctx: TemplateContext, args?: string[]) => string;
|
||||
|
||||
/**
|
||||
* Optional gated resolver. When the gate returns false, the resolver is
|
||||
* skipped (substituted with empty string) — same effect as the placeholder
|
||||
* not being referenced. Use when a resolver's output is only meaningful for
|
||||
* a known subset of skills, so future template authors get a structural
|
||||
* guardrail instead of relying on social knowledge.
|
||||
*
|
||||
* Most resolvers don't need this — the {{NAME}} placeholder system is
|
||||
* already conditional at the template level. Use only when a resolver
|
||||
* lives inside another resolver (e.g. via preamble composition) AND must
|
||||
* be conditionalized, or when a top-level resolver has a small, well-defined
|
||||
* audience.
|
||||
*/
|
||||
export interface ResolverEntry {
|
||||
resolve: ResolverFn;
|
||||
appliesTo?: (ctx: TemplateContext) => boolean;
|
||||
}
|
||||
|
||||
/** Anything the RESOLVERS map accepts — either a bare function or a gated entry. */
|
||||
export type ResolverValue = ResolverFn | ResolverEntry;
|
||||
|
||||
/**
|
||||
* Type-narrowing helper for the gen-skill-docs lookup.
|
||||
* Returns (resolverFn, gate) so callers can do gate?.(ctx) before invoking.
|
||||
*/
|
||||
export function unwrapResolver(entry: ResolverValue): {
|
||||
resolve: ResolverFn;
|
||||
appliesTo?: (ctx: TemplateContext) => boolean;
|
||||
} {
|
||||
if (typeof entry === 'function') return { resolve: entry };
|
||||
return { resolve: entry.resolve, appliesTo: entry.appliesTo };
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user