mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-02 11:45:20 +02:00
Merge remote-tracking branch 'origin/main' into garrytan/prompt-injection-guard
This commit is contained in:
+69
-7
@@ -2,9 +2,10 @@
|
||||
# gstack-config — read/write ~/.gstack/config.yaml
|
||||
#
|
||||
# Usage:
|
||||
# gstack-config get <key> — read a config value
|
||||
# gstack-config get <key> — read a config value (falls back to DEFAULTS)
|
||||
# gstack-config set <key> <value> — write a config value
|
||||
# gstack-config list — show all config
|
||||
# gstack-config list — show all config (values + defaults)
|
||||
# gstack-config defaults — show just the defaults table
|
||||
#
|
||||
# Env overrides (for testing):
|
||||
# GSTACK_STATE_DIR — override ~/.gstack state directory
|
||||
@@ -14,6 +15,8 @@ STATE_DIR="${GSTACK_STATE_DIR:-$HOME/.gstack}"
|
||||
CONFIG_FILE="$STATE_DIR/config.yaml"
|
||||
|
||||
# Annotated header for new config files. Written once on first `set`.
|
||||
# Default semantics: DEFAULTS table below is the canonical source. Header text
|
||||
# is documentation that must stay in sync with DEFAULTS.
|
||||
CONFIG_HEADER='# gstack configuration — edit freely, changes take effect on next skill run.
|
||||
# Docs: https://github.com/garrytan/gstack
|
||||
#
|
||||
@@ -25,8 +28,8 @@ CONFIG_HEADER='# gstack configuration — edit freely, changes take effect on ne
|
||||
# # prompt. Set back to false to be asked again.
|
||||
#
|
||||
# ─── Telemetry ───────────────────────────────────────────────────────
|
||||
# telemetry: anonymous # off | anonymous | community
|
||||
# # off — no data sent, no local analytics
|
||||
# telemetry: off # off | anonymous | community
|
||||
# # off — no data sent, no local analytics (default)
|
||||
# # anonymous — counter only, no device ID
|
||||
# # community — usage data + stable device ID
|
||||
#
|
||||
@@ -38,6 +41,16 @@ CONFIG_HEADER='# gstack configuration — edit freely, changes take effect on ne
|
||||
# skill_prefix: false # true = namespace skills as /gstack-qa, /gstack-ship
|
||||
# # false = short names /qa, /ship
|
||||
#
|
||||
# ─── Checkpoint ──────────────────────────────────────────────────────
|
||||
# checkpoint_mode: explicit # explicit | continuous
|
||||
# # explicit — commit only when you run /ship or /checkpoint
|
||||
# # continuous — auto-commit after each significant change
|
||||
# # with WIP: prefix + [gstack-context] body
|
||||
#
|
||||
# checkpoint_push: false # true = push WIP commits to remote as you go
|
||||
# # false = keep WIP commits local only (default)
|
||||
# # Pushing can trigger CI/deploy hooks — opt in carefully.
|
||||
#
|
||||
# ─── Writing style (V1) ──────────────────────────────────────────────
|
||||
# explain_level: default # default = jargon-glossed, outcome-framed prose
|
||||
# # (V1 default — more accessible for everyone)
|
||||
@@ -53,6 +66,27 @@ CONFIG_HEADER='# gstack configuration — edit freely, changes take effect on ne
|
||||
#
|
||||
'
|
||||
|
||||
# DEFAULTS table — canonical default values for known keys.
|
||||
# `get <key>` returns DEFAULTS[key] when the key is absent from the config file
|
||||
# AND the env override is not set. Keep in sync with the CONFIG_HEADER comments.
|
||||
lookup_default() {
|
||||
case "$1" in
|
||||
proactive) echo "true" ;;
|
||||
routing_declined) echo "false" ;;
|
||||
telemetry) echo "off" ;;
|
||||
auto_upgrade) echo "false" ;;
|
||||
update_check) echo "true" ;;
|
||||
skill_prefix) echo "false" ;;
|
||||
checkpoint_mode) echo "explicit" ;;
|
||||
checkpoint_push) echo "false" ;;
|
||||
codex_reviews) echo "enabled" ;;
|
||||
gstack_contributor) echo "false" ;;
|
||||
skip_eng_review) echo "false" ;;
|
||||
cross_project_learnings) echo "" ;; # intentionally empty → unset triggers first-time prompt
|
||||
*) echo "" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
case "${1:-}" in
|
||||
get)
|
||||
KEY="${2:?Usage: gstack-config get <key>}"
|
||||
@@ -61,7 +95,11 @@ case "${1:-}" in
|
||||
echo "Error: key must contain only alphanumeric characters and underscores" >&2
|
||||
exit 1
|
||||
fi
|
||||
grep -E "^${KEY}:" "$CONFIG_FILE" 2>/dev/null | tail -1 | awk '{print $2}' | tr -d '[:space:]' || true
|
||||
VALUE=$(grep -E "^${KEY}:" "$CONFIG_FILE" 2>/dev/null | tail -1 | awk '{print $2}' | tr -d '[:space:]' || true)
|
||||
if [ -z "$VALUE" ]; then
|
||||
VALUE=$(lookup_default "$KEY")
|
||||
fi
|
||||
printf '%s' "$VALUE"
|
||||
;;
|
||||
set)
|
||||
KEY="${2:?Usage: gstack-config set <key> <value>}"
|
||||
@@ -97,10 +135,34 @@ case "${1:-}" in
|
||||
fi
|
||||
;;
|
||||
list)
|
||||
cat "$CONFIG_FILE" 2>/dev/null || true
|
||||
if [ -f "$CONFIG_FILE" ]; then
|
||||
cat "$CONFIG_FILE"
|
||||
fi
|
||||
echo ""
|
||||
echo "# ─── Active values (including defaults for unset keys) ───"
|
||||
for KEY in proactive routing_declined telemetry auto_upgrade update_check \
|
||||
skill_prefix checkpoint_mode checkpoint_push codex_reviews \
|
||||
gstack_contributor skip_eng_review; do
|
||||
VALUE=$(grep -E "^${KEY}:" "$CONFIG_FILE" 2>/dev/null | tail -1 | awk '{print $2}' | tr -d '[:space:]' || true)
|
||||
SOURCE="default"
|
||||
if [ -n "$VALUE" ]; then
|
||||
SOURCE="set"
|
||||
else
|
||||
VALUE=$(lookup_default "$KEY")
|
||||
fi
|
||||
printf ' %-24s %s (%s)\n' "$KEY:" "$VALUE" "$SOURCE"
|
||||
done
|
||||
;;
|
||||
defaults)
|
||||
echo "# gstack-config defaults"
|
||||
for KEY in proactive routing_declined telemetry auto_upgrade update_check \
|
||||
skill_prefix checkpoint_mode checkpoint_push codex_reviews \
|
||||
gstack_contributor skip_eng_review; do
|
||||
printf ' %-24s %s\n' "$KEY:" "$(lookup_default "$KEY")"
|
||||
done
|
||||
;;
|
||||
*)
|
||||
echo "Usage: gstack-config {get|set|list} [key] [value]"
|
||||
echo "Usage: gstack-config {get|set|list|defaults} [key] [value]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
Executable
+168
@@ -0,0 +1,168 @@
|
||||
#!/usr/bin/env bun
|
||||
/**
|
||||
* gstack-model-benchmark — run the same prompt across multiple providers
|
||||
* and compare latency, tokens, cost, quality, and tool-call count.
|
||||
*
|
||||
* Usage:
|
||||
* gstack-model-benchmark <skill-or-prompt-file> [options]
|
||||
*
|
||||
* Options:
|
||||
* --models claude,gpt,gemini Comma-separated provider list (default: claude)
|
||||
* --prompt "<text>" Inline prompt instead of a file
|
||||
* --workdir <path> Working dir passed to each CLI (default: cwd)
|
||||
* --timeout-ms <n> Per-provider timeout (default: 300000)
|
||||
* --output table|json|markdown Output format (default: table)
|
||||
* --skip-unavailable Skip providers that fail available() check
|
||||
* (default: include them with unavailable marker)
|
||||
* --judge Run Anthropic SDK judge on outputs for quality score
|
||||
* (requires ANTHROPIC_API_KEY; adds ~$0.05 per call)
|
||||
* --dry-run Validate flags + resolve auth, don't invoke providers
|
||||
*
|
||||
* Examples:
|
||||
* gstack-model-benchmark --prompt "Write a haiku about databases" --models claude,gpt
|
||||
* gstack-model-benchmark ./test-prompt.txt --models claude,gpt,gemini --judge
|
||||
* gstack-model-benchmark --prompt "hi" --models claude,gpt,gemini --dry-run
|
||||
*/
|
||||
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { runBenchmark, formatTable, formatJson, formatMarkdown, type BenchmarkInput } from '../test/helpers/benchmark-runner';
|
||||
import { ClaudeAdapter } from '../test/helpers/providers/claude';
|
||||
import { GptAdapter } from '../test/helpers/providers/gpt';
|
||||
import { GeminiAdapter } from '../test/helpers/providers/gemini';
|
||||
|
||||
const ADAPTER_FACTORIES = {
|
||||
claude: () => new ClaudeAdapter(),
|
||||
gpt: () => new GptAdapter(),
|
||||
gemini: () => new GeminiAdapter(),
|
||||
};
|
||||
|
||||
type OutputFormat = 'table' | 'json' | 'markdown';
|
||||
|
||||
function arg(name: string, def?: string): string | undefined {
|
||||
const idx = process.argv.findIndex(a => a === name || a.startsWith(name + '='));
|
||||
if (idx < 0) return def;
|
||||
const eqIdx = process.argv[idx].indexOf('=');
|
||||
if (eqIdx >= 0) return process.argv[idx].slice(eqIdx + 1);
|
||||
return process.argv[idx + 1];
|
||||
}
|
||||
|
||||
function flag(name: string): boolean {
|
||||
return process.argv.includes(name);
|
||||
}
|
||||
|
||||
function parseProviders(s: string | undefined): Array<'claude' | 'gpt' | 'gemini'> {
|
||||
if (!s) return ['claude'];
|
||||
const seen = new Set<'claude' | 'gpt' | 'gemini'>();
|
||||
for (const p of s.split(',').map(x => x.trim()).filter(Boolean)) {
|
||||
if (p === 'claude' || p === 'gpt' || p === 'gemini') seen.add(p);
|
||||
else {
|
||||
console.error(`WARN: unknown provider '${p}' — skipping. Valid: claude, gpt, gemini.`);
|
||||
}
|
||||
}
|
||||
return seen.size ? Array.from(seen) : ['claude'];
|
||||
}
|
||||
|
||||
function resolvePrompt(positional: string | undefined): string {
|
||||
const inline = arg('--prompt');
|
||||
if (inline) return inline;
|
||||
if (!positional) {
|
||||
console.error('ERROR: specify a prompt via positional path or --prompt "<text>"');
|
||||
process.exit(1);
|
||||
}
|
||||
if (fs.existsSync(positional)) {
|
||||
return fs.readFileSync(positional, 'utf-8');
|
||||
}
|
||||
// Not a file — treat as inline prompt
|
||||
return positional;
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const positional = process.argv.slice(2).find(a => !a.startsWith('--'));
|
||||
const prompt = resolvePrompt(positional);
|
||||
const providers = parseProviders(arg('--models'));
|
||||
const workdir = arg('--workdir', process.cwd())!;
|
||||
const timeoutMs = parseInt(arg('--timeout-ms', '300000')!, 10);
|
||||
const output = (arg('--output', 'table') as OutputFormat);
|
||||
const skipUnavailable = flag('--skip-unavailable');
|
||||
const doJudge = flag('--judge');
|
||||
const dryRun = flag('--dry-run');
|
||||
|
||||
if (dryRun) {
|
||||
await dryRunReport({ prompt, providers, workdir, timeoutMs, output, doJudge });
|
||||
return;
|
||||
}
|
||||
|
||||
const input: BenchmarkInput = {
|
||||
prompt,
|
||||
workdir,
|
||||
providers,
|
||||
timeoutMs,
|
||||
skipUnavailable,
|
||||
};
|
||||
|
||||
const report = await runBenchmark(input);
|
||||
|
||||
if (doJudge) {
|
||||
try {
|
||||
const { judgeEntries } = await import('../test/helpers/benchmark-judge');
|
||||
await judgeEntries(report);
|
||||
} catch (err) {
|
||||
console.error(`WARN: judge unavailable: ${(err as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
let out: string;
|
||||
switch (output) {
|
||||
case 'json': out = formatJson(report); break;
|
||||
case 'markdown': out = formatMarkdown(report); break;
|
||||
case 'table':
|
||||
default: out = formatTable(report); break;
|
||||
}
|
||||
process.stdout.write(out + '\n');
|
||||
}
|
||||
|
||||
async function dryRunReport(opts: {
|
||||
prompt: string;
|
||||
providers: Array<'claude' | 'gpt' | 'gemini'>;
|
||||
workdir: string;
|
||||
timeoutMs: number;
|
||||
output: OutputFormat;
|
||||
doJudge: boolean;
|
||||
}): Promise<void> {
|
||||
const lines: string[] = [];
|
||||
lines.push('== gstack-model-benchmark --dry-run ==');
|
||||
lines.push(` prompt: ${opts.prompt.length > 80 ? opts.prompt.slice(0, 80) + '…' : opts.prompt}`);
|
||||
lines.push(` providers: ${opts.providers.join(', ')}`);
|
||||
lines.push(` workdir: ${opts.workdir}`);
|
||||
lines.push(` timeout_ms: ${opts.timeoutMs}`);
|
||||
lines.push(` output: ${opts.output}`);
|
||||
lines.push(` judge: ${opts.doJudge ? 'on (Anthropic SDK)' : 'off'}`);
|
||||
lines.push('');
|
||||
lines.push('Adapter availability:');
|
||||
let authFailures = 0;
|
||||
for (const name of opts.providers) {
|
||||
const factory = ADAPTER_FACTORIES[name];
|
||||
if (!factory) {
|
||||
lines.push(` ${name}: UNKNOWN PROVIDER`);
|
||||
authFailures += 1;
|
||||
continue;
|
||||
}
|
||||
const adapter = factory();
|
||||
const check = await adapter.available();
|
||||
if (check.ok) {
|
||||
lines.push(` ${adapter.name}: OK`);
|
||||
} else {
|
||||
lines.push(` ${adapter.name}: NOT READY — ${check.reason}`);
|
||||
authFailures += 1;
|
||||
}
|
||||
}
|
||||
lines.push('');
|
||||
lines.push(`(--dry-run — no prompts sent. ${authFailures} provider(s) unavailable.)`);
|
||||
process.stdout.write(lines.join('\n') + '\n');
|
||||
}
|
||||
|
||||
main().catch(err => {
|
||||
console.error('FATAL:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
Executable
+293
@@ -0,0 +1,293 @@
|
||||
#!/usr/bin/env bun
|
||||
// gstack-taste-update — update the persistent taste profile at
|
||||
// ~/.gstack/projects/$SLUG/taste-profile.json
|
||||
//
|
||||
// Usage:
|
||||
// gstack-taste-update approved <variant-path> [--reason "<why>"]
|
||||
// gstack-taste-update rejected <variant-path> [--reason "<why>"]
|
||||
// gstack-taste-update show — print current profile summary
|
||||
// gstack-taste-update migrate — upgrade legacy approved.json to v1
|
||||
//
|
||||
// Schema v1 at ~/.gstack/projects/$SLUG/taste-profile.json:
|
||||
//
|
||||
// {
|
||||
// "version": 1,
|
||||
// "updated_at": "<ISO 8601>",
|
||||
// "dimensions": {
|
||||
// "fonts": { "approved": [...], "rejected": [...] },
|
||||
// "colors": { "approved": [...], "rejected": [...] },
|
||||
// "layouts": { "approved": [...], "rejected": [...] },
|
||||
// "aesthetics": { "approved": [...], "rejected": [...] }
|
||||
// },
|
||||
// "sessions": [ // last 50 only — truncated via decay
|
||||
// { "ts": "<ISO>", "action": "approved"|"rejected", "variant": "<path>", "reason": "<optional>" }
|
||||
// ]
|
||||
// }
|
||||
//
|
||||
// Each Preference entry:
|
||||
// { value: string, confidence: number (0-1), approved_count, rejected_count, last_seen }
|
||||
//
|
||||
// Confidence is computed with Laplace smoothing + 5% weekly decay at read time.
|
||||
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { execSync } from 'child_process';
|
||||
|
||||
const STATE_DIR = process.env.GSTACK_STATE_DIR || path.join(process.env.HOME || '/', '.gstack');
|
||||
const SCHEMA_VERSION = 1;
|
||||
const SESSION_CAP = 50;
|
||||
const DECAY_PER_WEEK = 0.05;
|
||||
|
||||
type Dimension = 'fonts' | 'colors' | 'layouts' | 'aesthetics';
|
||||
const DIMENSIONS: Dimension[] = ['fonts', 'colors', 'layouts', 'aesthetics'];
|
||||
|
||||
interface Preference {
|
||||
value: string;
|
||||
confidence: number;
|
||||
approved_count: number;
|
||||
rejected_count: number;
|
||||
last_seen: string;
|
||||
}
|
||||
|
||||
interface SessionRecord {
|
||||
ts: string;
|
||||
action: 'approved' | 'rejected';
|
||||
variant: string;
|
||||
reason?: string;
|
||||
}
|
||||
|
||||
interface TasteProfile {
|
||||
version: number;
|
||||
updated_at: string;
|
||||
dimensions: Record<Dimension, { approved: Preference[]; rejected: Preference[] }>;
|
||||
sessions: SessionRecord[];
|
||||
}
|
||||
|
||||
function getSlug(): string {
|
||||
try {
|
||||
const output = execSync('git rev-parse --show-toplevel', { stdio: ['ignore', 'pipe', 'ignore'] }).toString().trim();
|
||||
return path.basename(output);
|
||||
} catch {
|
||||
return 'unknown';
|
||||
}
|
||||
}
|
||||
|
||||
function profilePath(slug: string): string {
|
||||
return path.join(STATE_DIR, 'projects', slug, 'taste-profile.json');
|
||||
}
|
||||
|
||||
function emptyProfile(): TasteProfile {
|
||||
return {
|
||||
version: SCHEMA_VERSION,
|
||||
updated_at: new Date().toISOString(),
|
||||
dimensions: {
|
||||
fonts: { approved: [], rejected: [] },
|
||||
colors: { approved: [], rejected: [] },
|
||||
layouts: { approved: [], rejected: [] },
|
||||
aesthetics: { approved: [], rejected: [] },
|
||||
},
|
||||
sessions: [],
|
||||
};
|
||||
}
|
||||
|
||||
function load(slug: string): TasteProfile {
|
||||
const p = profilePath(slug);
|
||||
if (!fs.existsSync(p)) return emptyProfile();
|
||||
try {
|
||||
const raw = JSON.parse(fs.readFileSync(p, 'utf-8'));
|
||||
if (!raw.version || raw.version < SCHEMA_VERSION) {
|
||||
return migrate(raw);
|
||||
}
|
||||
return raw as TasteProfile;
|
||||
} catch (err) {
|
||||
console.error(`WARN: could not parse ${p}:`, (err as Error).message);
|
||||
return emptyProfile();
|
||||
}
|
||||
}
|
||||
|
||||
function save(slug: string, profile: TasteProfile): void {
|
||||
const p = profilePath(slug);
|
||||
fs.mkdirSync(path.dirname(p), { recursive: true });
|
||||
profile.updated_at = new Date().toISOString();
|
||||
fs.writeFileSync(p, JSON.stringify(profile, null, 2) + '\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Migrate a legacy profile (no version or version < SCHEMA_VERSION) into the
|
||||
* current schema, preserving data where possible. Legacy approved.json aggregates
|
||||
* get normalized into empty-but-valid v1 profiles so the next write populates them.
|
||||
*/
|
||||
function migrate(legacy: unknown): TasteProfile {
|
||||
const fresh = emptyProfile();
|
||||
if (legacy && typeof legacy === 'object') {
|
||||
const anyLegacy = legacy as Record<string, unknown>;
|
||||
// Preserve sessions if present
|
||||
if (Array.isArray(anyLegacy.sessions)) {
|
||||
fresh.sessions = anyLegacy.sessions.slice(-SESSION_CAP) as SessionRecord[];
|
||||
}
|
||||
// Preserve dimensions if present and well-formed
|
||||
if (anyLegacy.dimensions && typeof anyLegacy.dimensions === 'object') {
|
||||
for (const dim of DIMENSIONS) {
|
||||
const src = (anyLegacy.dimensions as Record<string, unknown>)[dim];
|
||||
if (src && typeof src === 'object') {
|
||||
const ss = src as Record<string, unknown>;
|
||||
if (Array.isArray(ss.approved)) fresh.dimensions[dim].approved = ss.approved as Preference[];
|
||||
if (Array.isArray(ss.rejected)) fresh.dimensions[dim].rejected = ss.rejected as Preference[];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return fresh;
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply 5% per-week decay to confidence values at read/show time.
|
||||
* Returns a copy; does NOT mutate or persist the input.
|
||||
*/
|
||||
function applyDecay(profile: TasteProfile): TasteProfile {
|
||||
const now = Date.now();
|
||||
const decayed = JSON.parse(JSON.stringify(profile)) as TasteProfile;
|
||||
for (const dim of DIMENSIONS) {
|
||||
for (const bucket of ['approved', 'rejected'] as const) {
|
||||
for (const pref of decayed.dimensions[dim][bucket]) {
|
||||
const lastSeen = new Date(pref.last_seen).getTime();
|
||||
const weeks = Math.max(0, (now - lastSeen) / (7 * 24 * 60 * 60 * 1000));
|
||||
pref.confidence = Math.max(0, pref.confidence * Math.pow(1 - DECAY_PER_WEEK, weeks));
|
||||
}
|
||||
}
|
||||
}
|
||||
return decayed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract dimension values from a variant description. V1 keeps this simple:
|
||||
* the variant is a path/name like "variant-A" — we can't extract real design
|
||||
* tokens without the mockup's metadata. Callers should pass a reason string
|
||||
* that mentions fonts/colors/layouts/aesthetics. If the reason is missing,
|
||||
* the session is recorded but dimensions don't get updated.
|
||||
*
|
||||
* Future v2: parse the variant PNG's EXIF, or read an accompanying manifest
|
||||
* that design-shotgun writes next to each variant.
|
||||
*/
|
||||
function extractSignals(reason?: string): Partial<Record<Dimension, string[]>> {
|
||||
if (!reason) return {};
|
||||
const out: Partial<Record<Dimension, string[]>> = {};
|
||||
// naive pattern: "fonts: X, Y; colors: Z" — split by dimension label
|
||||
const labelRe = /(fonts|colors|layouts|aesthetics):\s*([^;]+)/gi;
|
||||
let m: RegExpExecArray | null;
|
||||
while ((m = labelRe.exec(reason)) !== null) {
|
||||
const dim = m[1].toLowerCase() as Dimension;
|
||||
const values = m[2].split(',').map(s => s.trim()).filter(Boolean);
|
||||
out[dim] = values;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function bumpPref(list: Preference[], value: string, opposite: Preference[], action: 'approved' | 'rejected'): Preference[] {
|
||||
const now = new Date().toISOString();
|
||||
let entry = list.find(p => p.value.toLowerCase() === value.toLowerCase());
|
||||
if (!entry) {
|
||||
entry = { value, confidence: 0, approved_count: 0, rejected_count: 0, last_seen: now };
|
||||
list.push(entry);
|
||||
}
|
||||
if (action === 'approved') {
|
||||
entry.approved_count += 1;
|
||||
} else {
|
||||
entry.rejected_count += 1;
|
||||
}
|
||||
entry.last_seen = now;
|
||||
// Laplace-smoothed confidence
|
||||
const total = entry.approved_count + entry.rejected_count;
|
||||
entry.confidence = entry.approved_count / (total + 1);
|
||||
// Flag conflict if the opposite bucket has a strong entry for this value
|
||||
const opp = opposite.find(p => p.value.toLowerCase() === value.toLowerCase());
|
||||
if (opp && opp.approved_count + opp.rejected_count >= 3 && opp.confidence >= 0.6) {
|
||||
console.error(`NOTE: taste drift — "${value}" previously ${action === 'approved' ? 'rejected' : 'approved'} with confidence ${opp.confidence.toFixed(2)}. Keep both signals; aggregate confidence will rebalance.`);
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
function cmdUpdate(action: 'approved' | 'rejected', variant: string, reason?: string): void {
|
||||
const slug = getSlug();
|
||||
const profile = load(slug);
|
||||
const signals = extractSignals(reason);
|
||||
|
||||
for (const dim of DIMENSIONS) {
|
||||
const values = signals[dim];
|
||||
if (!values) continue;
|
||||
const bucket = profile.dimensions[dim][action];
|
||||
const opposite = profile.dimensions[dim][action === 'approved' ? 'rejected' : 'approved'];
|
||||
for (const v of values) bumpPref(bucket, v, opposite, action);
|
||||
}
|
||||
|
||||
// Always record the session even if no dimensions were extracted
|
||||
profile.sessions.push({ ts: new Date().toISOString(), action, variant, reason });
|
||||
// Truncate sessions to last SESSION_CAP entries (FIFO)
|
||||
if (profile.sessions.length > SESSION_CAP) {
|
||||
profile.sessions = profile.sessions.slice(-SESSION_CAP);
|
||||
}
|
||||
|
||||
save(slug, profile);
|
||||
console.log(`${action}: ${variant} → ${profilePath(slug)}`);
|
||||
}
|
||||
|
||||
function cmdShow(): void {
|
||||
const slug = getSlug();
|
||||
const profile = applyDecay(load(slug));
|
||||
console.log(`taste-profile.json (slug: ${slug}, sessions: ${profile.sessions.length})`);
|
||||
for (const dim of DIMENSIONS) {
|
||||
const top = [...profile.dimensions[dim].approved]
|
||||
.sort((a, b) => b.confidence * b.approved_count - a.confidence * a.approved_count)
|
||||
.slice(0, 3);
|
||||
const topRej = [...profile.dimensions[dim].rejected]
|
||||
.sort((a, b) => b.confidence * b.rejected_count - a.confidence * a.rejected_count)
|
||||
.slice(0, 3);
|
||||
if (top.length || topRej.length) {
|
||||
console.log(`\n[${dim}]`);
|
||||
if (top.length) {
|
||||
console.log(' approved (decayed):');
|
||||
for (const p of top) console.log(` ${p.value} — conf ${p.confidence.toFixed(2)} (+${p.approved_count}/-${p.rejected_count})`);
|
||||
}
|
||||
if (topRej.length) {
|
||||
console.log(' rejected:');
|
||||
for (const p of topRej) console.log(` ${p.value} — conf ${p.confidence.toFixed(2)} (+${p.approved_count}/-${p.rejected_count})`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function cmdMigrate(): void {
|
||||
const slug = getSlug();
|
||||
const profile = load(slug);
|
||||
save(slug, profile);
|
||||
console.log(`migrated taste profile to v${SCHEMA_VERSION} at ${profilePath(slug)}`);
|
||||
}
|
||||
|
||||
// ─── CLI entry ────────────────────────────────────────────────
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
const cmd = args[0];
|
||||
|
||||
switch (cmd) {
|
||||
case 'approved':
|
||||
case 'rejected': {
|
||||
const variant = args[1];
|
||||
if (!variant) {
|
||||
console.error(`Usage: gstack-taste-update ${cmd} <variant-path> [--reason "<why>"]`);
|
||||
process.exit(1);
|
||||
}
|
||||
const reasonIdx = args.indexOf('--reason');
|
||||
const reason = reasonIdx >= 0 ? args[reasonIdx + 1] : undefined;
|
||||
cmdUpdate(cmd as 'approved' | 'rejected', variant, reason);
|
||||
break;
|
||||
}
|
||||
case 'show':
|
||||
cmdShow();
|
||||
break;
|
||||
case 'migrate':
|
||||
cmdMigrate();
|
||||
break;
|
||||
default:
|
||||
console.error('Usage: gstack-taste-update {approved|rejected|show|migrate} [args]');
|
||||
process.exit(1);
|
||||
}
|
||||
Reference in New Issue
Block a user