fix: pre-landing review fixes (datamark, DRY, compact, coverage)

Addresses the pre-landing review findings (all INFORMATIONAL, no criticals):
- security: datamark resurfaced decision text at the render boundary
  (lib/gstack-decision.ts datamark() — neutralizes code fences, --- banners,
  <|role|>/</system> markers, control chars, newlines). Applied in
  gstack-decision-search human output so stored text can't masquerade as
  instructions in Context Recovery (codex hardening #3 / AC #7). --json stays raw.
- DRY: extract resolveSlug/gitBranch/flagValue to lib/bin-context.ts; both
  decision bins use it instead of duplicating the helpers.
- compact(): batch the archive append (one write, not N) and shrink the
  mid-compact crash window; simplify the opaque branch/issue ternary.
- coverage: learnings-log injection rejection (D2A wiring), search --recent/
  --scope + NaN-safe --recent, datamark-applied, unparseable lock body,
  compact-empty, corrupt-snapshot degrade.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-06-07 19:17:44 -07:00
parent 02eba57f3a
commit 55e7ed9fec
8 changed files with 164 additions and 47 deletions
+28
View File
@@ -0,0 +1,28 @@
/**
* bin-context — tiny shared helpers for non-interactive gstack bins that need the
* project slug, current branch, and argv flags. Extracted from the decision bins
* (gstack-decision-log / gstack-decision-search) so the slug/branch/flag plumbing
* lives in one audited place instead of being copy-pasted per bin.
*/
import { spawnSync } from "child_process";
/** Resolve the project slug via the `gstack-slug` helper (parses `SLUG=...`). */
export function resolveSlug(slugBinPath: string): string {
const r = spawnSync(slugBinPath, { encoding: "utf-8" });
const m = (r.stdout || "").match(/^SLUG=(.+)$/m);
return m ? m[1].trim() : "unknown";
}
/** Current git branch, or undefined on detached HEAD / outside a repo. */
export function gitBranch(): string | undefined {
const r = spawnSync("git", ["rev-parse", "--abbrev-ref", "HEAD"], { encoding: "utf-8" });
const b = (r.stdout || "").trim();
return b && b !== "HEAD" ? b : undefined;
}
/** The value following `--flag` in argv, or undefined if absent. */
export function flagValue(args: string[], name: string): string | undefined {
const i = args.indexOf(name);
return i >= 0 ? args[i + 1] : undefined;
}
+28 -4
View File
@@ -16,7 +16,7 @@
import { join } from "path";
import { homedir } from "os";
import { randomUUID } from "crypto";
import { writeFileSync, renameSync, existsSync, readFileSync } from "fs";
import { writeFileSync, renameSync, existsSync, readFileSync, appendFileSync } from "fs";
import { appendJsonl, readJsonl, hasInjection } from "./jsonl-store";
import { scan } from "./redact-engine";
@@ -65,6 +65,26 @@ export function decisionPaths(slug: string, gstackHome?: string): DecisionPaths
};
}
/**
* Datamark resurfaced decision text so a stored string can't masquerade as
* instructions or break out of the Context Recovery fence when it lands in agent
* context (codex hardening #3: resurface = DATA, not instructions). Write-time
* `hasInjection` is a denylist; this is the render-boundary defense-in-depth that
* also covers `--all`/snapshot reads and records written before a pattern existed.
* Neutralizes: control chars, newlines (defensive — events are single-line),
* code fences, `---` banner sentinels, and `<|role|>` / `</system>` markers.
*/
export function datamark(text: string): string {
const ZWSP = "\u200b"; // zero-width space: breaks token recognition, near-invisible
return text
.replace(/[\u0000-\u001f]/g, " ") // strip ASCII control chars (incl. newlines)
.replace(/`{3,}/g, "'''") // neutralize markdown code fences
.replace(/-{3,}/g, "\u2014") // neutralize `---` banner sentinels (em dash)
.replace(/<\|/g, `<${ZWSP}|`) // neutralize <|im_start|>-style chat markers
.replace(/\|>/g, `|${ZWSP}>`)
.replace(/<(\/?)(system|user|assistant|tool)>/gi, `<${ZWSP}$1$2>`); // neutralize role tags
}
export type ValidateResult =
| { ok: true; event: DecisionEvent }
| { ok: false; error: string };
@@ -116,8 +136,8 @@ export function validateDecide(input: Partial<DecisionEvent>): ValidateResult {
rationale: input.rationale,
alternatives_considered: input.alternatives_considered,
scope,
branch: scope === "branch" ? input.branch : input.branch || undefined,
issue: scope === "issue" ? input.issue : input.issue || undefined,
branch: input.branch || undefined,
issue: input.issue || undefined,
date: input.date || new Date().toISOString(),
session: input.session,
source,
@@ -237,7 +257,11 @@ export function compact(paths: DecisionPaths): CompactResult {
const superseded = events.filter(
(e): e is DecisionEvent => e.kind === "decide" && !activeIds.has(e.id) && !redactedIds.has(e.id),
);
for (const e of superseded) appendJsonl(paths.archive, e);
// One batched append (not one open/write/close per event) — matches the atomic
// batched rewrite of the active log below and shrinks the mid-compact crash window.
if (superseded.length) {
appendFileSync(paths.archive, superseded.map((e) => JSON.stringify(e)).join("\n") + "\n", "utf-8");
}
const tmp = `${paths.log}.tmp.${process.pid}`;
writeFileSync(tmp, active.map((d) => JSON.stringify(d)).join("\n") + (active.length ? "\n" : ""), "utf-8");