merge: origin/main (v1.41.1.0 audit wave) into garrytan/daegu-v3

Conflicts resolved: - VERSION: keep 1.42.0.0 (queue-advance past #1592's v1.41.1.0 claim, per CLAUDE.md workspace-aware ship rule) - CHANGELOG.md: keep both entries — v1.42.0.0 Daegu wave on top, v1.41.1.0 audit wave below in reverse-chronological order - package.json: bump version field to 1.42.0.0 to match VERSION Auto-merged cleanly: - browse/src/meta-commands.ts: both the screenshot-size-guard wiring from C16 (this branch) and the parsePdfFromFile JSON validation from PR #1592 (main) survive in the merged file. Verified by inspecting the imports + call sites + running both test surfaces green. - All other v1.41.1.0 changes from main (scripts/build-app.sh sed hardening, mktemp fallback drops, security-classifier download cleanup, global-discover 64KB cap) pulled in unchanged. Verified 77 wave + audit tests green after merge: - browse/test/screenshot-size-guard.test.ts (7) - browse/test/regression-pr1169-pdf-from-file-invalid-json.test.ts - browse/test/security-classifier-download-cleanup.test.ts - browse/test/find-browse.test.ts (5) - test/gstack-paths.test.ts (9), test/gstack-gbrain-sync.test.ts (37) - test/memory-ingest-no-put_page.test.ts (2) - test/resolvers-gbrain-put-rewrite.test.ts (2) - test/extension-pty-inject-invariant.test.ts (3) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-26 19:49:57 +02:00 · 2026-05-20 07:00:38 -07:00
parent 0c0551975e 40d00bd2ce
commit b6ba3b9c06
13 changed files with 664 additions and 23 deletions
@@ -343,4 +343,92 @@ describe("gstack-global-discover", () => {
      expect(remotes.length).toBe(uniqueRemotes.size);
    });
  });
+
+  describe("extractCwdFromJsonl 64KB cap (PR #1169 bug #8)", () => {
+    // Regression: the old 8KB cap landed mid-line on Claude Code sessions with
+    // long headers, JSON.parse threw on the truncated tail, the catch
+    // `continue`d silently, and the project disappeared from discovery.
+    // The fix raised the cap to 64KB AND drops the trailing partial segment
+    // before parsing.
+    let extractCwdFromJsonl: (filePath: string) => string | null;
+    let tmpDir: string;
+
+    beforeEach(async () => {
+      const mod = await import("../bin/gstack-global-discover.ts");
+      extractCwdFromJsonl = mod.extractCwdFromJsonl;
+      tmpDir = mkdtempSync(join(tmpdir(), "pr1169-cwd-"));
+    });
+
+    afterEach(() => {
+      rmSync(tmpDir, { recursive: true, force: true });
+    });
+
+    test("happy path: small JSONL with obj.cwd returns it (sanity)", () => {
+      const filePath = join(tmpDir, "small.jsonl");
+      const line = JSON.stringify({ cwd: "/tmp/repo-small", type: "header" });
+      writeFileSync(filePath, line + "\n");
+      expect(extractCwdFromJsonl(filePath)).toBe("/tmp/repo-small");
+    });
+
+    test("12KB first line with obj.cwd: returns cwd (old 8KB cap returned null)", () => {
+      // Pad a JSONL header so the whole line is ~12KB ending in `}\n`.
+      // Old 8KB read would slice mid-line; JSON.parse on the truncated tail
+      // would throw, the catch would `continue`, and we'd return null.
+      const padding = "x".repeat(12 * 1024);
+      const line = JSON.stringify({
+        cwd: "/tmp/repo-12k",
+        type: "header",
+        notes: padding,
+      });
+      expect(line.length).toBeGreaterThan(8 * 1024);
+      expect(line.length).toBeLessThan(64 * 1024);
+
+      const filePath = join(tmpDir, "header-12k.jsonl");
+      writeFileSync(filePath, line + "\n");
+      expect(extractCwdFromJsonl(filePath)).toBe("/tmp/repo-12k");
+    });
+
+    test("80KB single line (overflows 64KB cap): returns null without crashing", () => {
+      // One line >64KB with no newline inside the read window. The 64KB read
+      // captures a truncated prefix, parts.length === 1, no trailing drop
+      // applies, JSON.parse throws, catch returns null. The fix's
+      // trailing-partial-drop must not crash on this shape.
+      const padding = "y".repeat(80 * 1024);
+      const line = JSON.stringify({ cwd: "/tmp/repo-80k", type: "header", notes: padding });
+      expect(line.length).toBeGreaterThan(64 * 1024);
+
+      const filePath = join(tmpDir, "header-80k.jsonl");
+      writeFileSync(filePath, line + "\n");
+      // Don't throw, just return null.
+      expect(extractCwdFromJsonl(filePath)).toBeNull();
+    });
+
+    test("complete line followed by partial second line: returns first line's cwd", () => {
+      // Line 1 ends cleanly with `\n` well within the cap.
+      // Line 2 is long enough that the 64KB read captures only its incomplete
+      // beginning. The trailing-partial drop must skip the truncated line 2
+      // and not poison the result.
+      const line1 = JSON.stringify({ cwd: "/tmp/repo-line-1", type: "header" });
+      const line2Padding = "z".repeat(80 * 1024);
+      const line2 = JSON.stringify({ cwd: "/tmp/repo-line-2", notes: line2Padding });
+
+      const filePath = join(tmpDir, "header-partial-2.jsonl");
+      writeFileSync(filePath, line1 + "\n" + line2 + "\n");
+      expect(extractCwdFromJsonl(filePath)).toBe("/tmp/repo-line-1");
+    });
+
+    test("missing file: returns null (file read error is swallowed)", () => {
+      const filePath = join(tmpDir, "nonexistent.jsonl");
+      expect(extractCwdFromJsonl(filePath)).toBeNull();
+    });
+
+    test("malformed first line then valid second line within cap: returns second", () => {
+      // Both lines fully within 64KB. First line is not valid JSON; second
+      // is. The function must skip first and return second's cwd.
+      const filePath = join(tmpDir, "bad-then-good.jsonl");
+      const good = JSON.stringify({ cwd: "/tmp/repo-skip-bad" });
+      writeFileSync(filePath, "{ not valid json\n" + good + "\n");
+      expect(extractCwdFromJsonl(filePath)).toBe("/tmp/repo-skip-bad");
+    });
+  });
 });
@@ -0,0 +1,161 @@
+/**
+ * Regression tests for PR #1169 bugs #2 + #3 — scripts/build-app.sh.
+ *
+ * Bug #2: sed replacement for Chromium rebrand interpolated $APP_NAME without
+ * escaping sed replacement metachars (`&`, `/`, `\`). A name with `/` either
+ * broke the s/// command or got interpreted as sed syntax.
+ *
+ * Bug #3: `DMG_TMP=$(mktemp -d)` was unchecked. On mktemp failure $DMG_TMP
+ * was empty and the next `cp -a "$APP_DIR" "$DMG_TMP/"` would copy the .app
+ * bundle into the filesystem root.
+ *
+ * Bug #2 is verified via a runtime isolation test of the sed-escape sequence
+ * (codex pushback: static-grep for "uses escape helper" is too narrow; the
+ * real invariant is metachar safety end-to-end). Bug #3 is verified via
+ * static check — the entire build flow needs xcrun/hdiutil and can't be
+ * spawned in CI, but the failure-guard shape is what we want to lock.
+ */
+import { describe, expect, test } from "bun:test";
+import * as fs from "node:fs";
+import * as path from "node:path";
+import { spawnSync } from "node:child_process";
+
+const ROOT = path.resolve(import.meta.dir, "..");
+const SCRIPT = path.join(ROOT, "scripts/build-app.sh");
+
+describe("PR #1169 bug #2: build-app.sh sed escape for $APP_NAME", () => {
+  test("escape sequence produces sed-safe output for `&`, `/`, `\\` in APP_NAME", () => {
+    // Mirror the script's escape sequence and run it in isolation against a
+    // hostile name. The escape sequence at line ~98 is:
+    //   APP_NAME_SED_ESCAPED=$(printf '%s' "$APP_NAME" | sed 's/[&/\]/\\&/g')
+    // We assert the resulting string can then be used as a sed replacement
+    // safely — round-trip via a real `sed s///` against a stub strings file.
+
+    const inputs: string[] = [
+      "Foo/Bar&Baz",      // slash + ampersand
+      "Cool\\App",        // backslash
+      "Plain Name",        // no metachars (baseline)
+      "A/B\\C&D",          // all three at once
+      "End/",              // trailing slash
+      "&Start",            // leading ampersand
+    ];
+
+    for (const appName of inputs) {
+      // Bug #2 invariant: the escaped string, used as the replacement half
+      // of `sed s/<needle>/<replacement>/g`, results in the literal appName
+      // appearing in the output.
+      const result = spawnSync(
+        "bash",
+        ["-c",
+          `set -eu
+           APP_NAME="$1"
+           APP_NAME_SED_ESCAPED=$(printf '%s' "$APP_NAME" | sed 's/[&/\\]/\\\\&/g')
+           printf 'Google Chrome for Testing' | sed "s/Google Chrome for Testing/\${APP_NAME_SED_ESCAPED}/g"
+          `,
+          "_",
+          appName,
+        ],
+        { encoding: "utf-8" }
+      );
+
+      expect(result.status).toBe(0);
+      expect(result.stdout).toBe(appName);
+      expect(result.stderr).toBe("");
+    }
+  });
+
+  test("script body still routes APP_NAME through the escape helper before sed", () => {
+    // Belt-and-braces static check: the rebrand block must contain BOTH the
+    // escape line and the sed line referencing the escaped variable.
+    const body = fs.readFileSync(SCRIPT, "utf-8");
+    expect(body).toMatch(/APP_NAME_SED_ESCAPED=\$\(printf '%s' "\$APP_NAME" \| sed/);
+    expect(body).toMatch(/sed -i ''\s*"s\/Google Chrome for Testing\/\$\{APP_NAME_SED_ESCAPED\}\/g"/);
+  });
+
+  test("no bare `$APP_NAME` interpolation directly into the rebrand sed", () => {
+    // Ensure no future refactor reintroduces the bug by interpolating
+    // $APP_NAME straight into the s/// replacement.
+    const body = fs.readFileSync(SCRIPT, "utf-8");
+    expect(body).not.toMatch(/sed -i ''\s*"s\/Google Chrome for Testing\/\$APP_NAME\//);
+    expect(body).not.toMatch(/sed -i ''\s*"s\/Google Chrome for Testing\/\$\{APP_NAME\}\//);
+  });
+});
+
+describe("PR #1169 bug #3: build-app.sh DMG_TMP mktemp failure guard", () => {
+  test("mktemp -d for DMG_TMP is followed by an explicit failure handler", () => {
+    const body = fs.readFileSync(SCRIPT, "utf-8");
+    // The script must assign DMG_TMP and immediately check for failure on
+    // the SAME line via `||`, then validate the path is non-empty and a real
+    // directory before cp.
+    const guard = body.match(
+      /DMG_TMP=\$\(mktemp -d\)\s*\|\|\s*\{[^}]*exit\s+\d/
+    );
+    expect(guard).not.toBeNull();
+  });
+
+  test("DMG_TMP is also validated as non-empty AND a directory before cp", () => {
+    const body = fs.readFileSync(SCRIPT, "utf-8");
+    // After mktemp, a defensive check should reject empty or non-directory
+    // paths (covers cases where mktemp succeeds but returns garbage).
+    expect(body).toMatch(
+      /\[\s*-z\s+"\$DMG_TMP"\s*\][^\n]*\|\|\s*\[\s*!\s+-d\s+"\$DMG_TMP"\s*\]/
+    );
+  });
+
+  test("no `cp -a ... \"$DMG_TMP/\"` before the validation block", () => {
+    const body = fs.readFileSync(SCRIPT, "utf-8");
+    // The cp must come AFTER the validation. Find the line offsets.
+    const mktempIdx = body.search(/DMG_TMP=\$\(mktemp -d\)/);
+    const validationIdx = body.search(
+      /\[\s*-z\s+"\$DMG_TMP"\s*\]/
+    );
+    const cpIdx = body.search(/cp -a "\$APP_DIR" "\$DMG_TMP\//);
+    expect(mktempIdx).toBeGreaterThan(-1);
+    expect(validationIdx).toBeGreaterThan(mktempIdx);
+    expect(cpIdx).toBeGreaterThan(validationIdx);
+  });
+
+  test("runtime: escape function refuses to leave DMG_TMP empty (fake-mktemp PATH stub)", () => {
+    // Codex strongly preferred runtime testing here. The full build-app.sh
+    // depends on xcrun/hdiutil/PlistBuddy — too heavy for CI. Instead, we
+    // extract just the failure-guard shape and run it with a fake mktemp
+    // that always exits 1. Asserts the script exits non-zero before cp.
+
+    const fakeBin = fs.mkdtempSync(path.join("/tmp", "pr1169-fakebin-"));
+    fs.writeFileSync(
+      path.join(fakeBin, "mktemp"),
+      "#!/bin/sh\nexit 1\n",
+      { mode: 0o755 }
+    );
+
+    // The guard, isolated. Mirrors the actual script's logic. Use a regular
+    // string + array of lines so the embedded bash backticks/dollars don't
+    // get interpreted by the JS template-literal parser.
+    const guardScript = [
+      'set -u',
+      'DMG_TMP=$(mktemp -d) || { echo "ERROR: mktemp -d failed — refusing to continue so we don\'t cp into the filesystem root." >&2; exit 1; }',
+      'if [ -z "$DMG_TMP" ] || [ ! -d "$DMG_TMP" ]; then',
+      '  echo "ERROR: mktemp -d returned an invalid path (\'$DMG_TMP\')." >&2',
+      '  exit 1',
+      'fi',
+      '# If we got here, we would run the cp block, which is the bug.',
+      'echo "REACHED_CP_BLOCK_WHICH_IS_THE_BUG" >&2',
+      'exit 0',
+    ].join('\n');
+
+    const result = spawnSync(
+      "bash",
+      ["-c", guardScript],
+      {
+        encoding: "utf-8",
+        env: { ...process.env, PATH: `${fakeBin}:${process.env.PATH}` },
+      }
+    );
+
+    fs.rmSync(fakeBin, { recursive: true, force: true });
+
+    expect(result.status).not.toBe(0);
+    expect(result.stderr).toMatch(/mktemp -d failed|invalid path/);
+    expect(result.stderr).not.toMatch(/REACHED_CP_BLOCK_WHICH_IS_THE_BUG/);
+  });
+});
@@ -0,0 +1,82 @@
+/**
+ * Regression tests for PR #1169 bugs #4 + #5 — predictable `$$`-based tmp
+ * file fallbacks on mktemp failure.
+ *
+ * Per codex's pushback, the real invariant is not just "no `$$` token" — it's
+ * "no `mktemp ... || echo <fallback-path>` shape at all, AND mktemp failure
+ * exits cleanly." A future cleanup could swap `$$` for `$RANDOM` or a
+ * hardcoded path and silently keep the foot-gun. The static checks below
+ * lock the broader invariant.
+ *
+ * Runtime fake-bin tests for these two scripts would require setting up
+ * SUPABASE_URL, JSONL fixtures, rate files, and config state — disproportionate
+ * for the invariant. The static checks pin the actual shape of the bug.
+ */
+import { describe, expect, test } from "bun:test";
+import * as fs from "node:fs";
+import * as path from "node:path";
+
+const ROOT = path.resolve(import.meta.dir, "..");
+
+function readScript(rel: string): string {
+  return fs.readFileSync(path.join(ROOT, rel), "utf-8");
+}
+
+describe("PR #1169 bug #4: gstack-telemetry-sync mktemp fallback", () => {
+  const SCRIPT = "bin/gstack-telemetry-sync";
+
+  test("no `mktemp ... || echo <path>` fallback shape anywhere in the script", () => {
+    const body = readScript(SCRIPT);
+    // Match: mktemp call, optional pipe, then `|| echo <quoted-or-bare-path>`
+    // The fallback shape regardless of what the fallback path looks like
+    // ($$, $RANDOM, hardcoded — all predictable).
+    const fallback = body.match(/mktemp[^|\n]*\|\|\s*echo\s+["']?[^"'\n]*/);
+    expect(fallback).toBeNull();
+  });
+
+  test("no `$$` PID interpolation appears anywhere in a /tmp path literal", () => {
+    const body = readScript(SCRIPT);
+    // Catches any /tmp-style path that uses the PID as part of the name.
+    expect(body).not.toMatch(/\/tmp\/[^"'\s]*\$\$/);
+  });
+
+  test("mktemp failure path exits or skips this run", () => {
+    const body = readScript(SCRIPT);
+    // The mktemp invocation must be guarded by `|| { ... exit 0; }` or
+    // equivalent. Match the multi-line guard immediately after `mktemp`.
+    const guard = body.match(
+      /mktemp\s+[^\n]+\)["']\s*\|\|\s*\{[^}]*exit\s+\d/
+    );
+    expect(guard).not.toBeNull();
+  });
+
+  test("trap cleans up the response file on EXIT (no leftover tmp on success)", () => {
+    const body = readScript(SCRIPT);
+    expect(body).toMatch(/trap\s+['"]rm\s+-f\s+"?\$RESP_FILE/);
+  });
+});
+
+describe("PR #1169 bug #5: supabase/verify-rls.sh mktemp fallback", () => {
+  const SCRIPT = "supabase/verify-rls.sh";
+
+  test("no `mktemp ... || echo <path>` fallback shape", () => {
+    const body = readScript(SCRIPT);
+    const fallback = body.match(/mktemp[^|\n]*\|\|\s*echo\s+["']?[^"'\n]*/);
+    expect(fallback).toBeNull();
+  });
+
+  test("no `$$` PID interpolation in /tmp path literals", () => {
+    const body = readScript(SCRIPT);
+    expect(body).not.toMatch(/\/tmp\/[^"'\s]*\$\$/);
+  });
+
+  test("mktemp failure path returns non-zero from check()", () => {
+    const body = readScript(SCRIPT);
+    // The check function must fail loudly — `return 1` (or `exit`) inside
+    // the mktemp error handler. Same multi-line guard shape.
+    const guard = body.match(
+      /mktemp\s+[^\n]+\)["']\s*\|\|\s*\{[^}]*(?:return|exit)\s+\d/
+    );
+    expect(guard).not.toBeNull();
+  });
+});