mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-07 05:56:41 +02:00
merge: incorporate origin/main into community-mode branch
Resolve conflicts in VERSION, package.json, and CHANGELOG.md. Keep 0.12.0.0 version with community mode entry on top, followed by 0.11.12.0 and 0.11.11.0 entries from main. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
interface:
|
||||
display_name: "gstack-setup-browser-cookies"
|
||||
short_description: "Import cookies from your real browser (Comet, Chrome, Arc, Brave, Edge) into the headless browse session. Opens an..."
|
||||
short_description: "Import cookies from your real Chromium browser into the headless browse session. Opens an interactive picker UI..."
|
||||
default_prompt: "Use gstack-setup-browser-cookies for this task."
|
||||
policy:
|
||||
allow_implicit_invocation: true
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
self-hosted-runner:
|
||||
labels:
|
||||
- ubicloud-standard-2
|
||||
- ubicloud-standard-8
|
||||
@@ -29,13 +29,22 @@ RUN curl -fsSL https://bun.sh/install | bash
|
||||
# Claude CLI
|
||||
RUN npm i -g @anthropic-ai/claude-code
|
||||
|
||||
# Playwright system deps (Chromium) — needed for browse E2E tests
|
||||
RUN npx playwright install-deps chromium
|
||||
|
||||
# Pre-install dependencies (cached layer — only rebuilds when package.json changes)
|
||||
COPY package.json /workspace/
|
||||
WORKDIR /workspace
|
||||
RUN bun install && rm -rf /tmp/*
|
||||
|
||||
# Install Playwright Chromium to a shared location accessible by all users
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/opt/playwright-browsers
|
||||
RUN npx playwright install chromium \
|
||||
&& chmod -R a+rX /opt/playwright-browsers
|
||||
|
||||
# Verify everything works
|
||||
RUN bun --version && node --version && claude --version && jq --version && gh --version
|
||||
RUN bun --version && node --version && claude --version && jq --version && gh --version \
|
||||
&& npx playwright --version
|
||||
|
||||
# At runtime: checkout overwrites /workspace, but node_modules persists
|
||||
# if we move it out of the way and symlink back
|
||||
@@ -47,4 +56,8 @@ RUN mv /workspace/node_modules /opt/node_modules_cache \
|
||||
# Create a non-root user for eval runs (GH Actions overrides USER, so
|
||||
# the workflow must set options.user or use gosu/su-exec at runtime).
|
||||
RUN useradd -m -s /bin/bash runner \
|
||||
&& chmod -R a+rX /opt/node_modules_cache
|
||||
&& chmod -R a+rX /opt/node_modules_cache \
|
||||
&& mkdir -p /home/runner/.gstack && chown -R runner:runner /home/runner/.gstack \
|
||||
&& chmod 1777 /tmp \
|
||||
&& mkdir -p /home/runner/.bun && chown -R runner:runner /home/runner/.bun \
|
||||
&& chmod -R 1777 /tmp
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
name: Workflow Lint
|
||||
on: [push, pull_request]
|
||||
jobs:
|
||||
actionlint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: rhysd/actionlint@v1.7.11
|
||||
@@ -55,7 +55,7 @@ jobs:
|
||||
${{ env.IMAGE }}:latest
|
||||
|
||||
evals:
|
||||
runs-on: ubicloud-standard-2
|
||||
runs-on: ${{ matrix.suite.runner || 'ubicloud-standard-2' }}
|
||||
needs: build-image
|
||||
container:
|
||||
image: ${{ needs.build-image.outputs.image-tag }}
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --user runner
|
||||
timeout-minutes: 20
|
||||
timeout-minutes: 25
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -71,7 +71,8 @@ jobs:
|
||||
- name: llm-judge
|
||||
file: test/skill-llm-eval.test.ts
|
||||
- name: e2e-browse
|
||||
file: test/skill-e2e-browse.test.ts
|
||||
file: test/skill-e2e-bws.test.ts
|
||||
runner: ubicloud-standard-8
|
||||
- name: e2e-plan
|
||||
file: test/skill-e2e-plan.test.ts
|
||||
- name: e2e-deploy
|
||||
@@ -86,8 +87,10 @@ jobs:
|
||||
file: test/skill-e2e-review.test.ts
|
||||
- name: e2e-workflow
|
||||
file: test/skill-e2e-workflow.test.ts
|
||||
allow_failure: true # /ship + /setup-browser-cookies are env-dependent
|
||||
- name: e2e-routing
|
||||
file: test/skill-routing-e2e.test.ts
|
||||
allow_failure: true # LLM routing is non-deterministic
|
||||
- name: e2e-codex
|
||||
file: test/codex-e2e.test.ts
|
||||
- name: e2e-gemini
|
||||
@@ -97,8 +100,18 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
# Bun creates root-owned temp dirs during Docker build. GH Actions runs as
|
||||
# runner user with HOME=/github/home. Redirect bun's cache to a writable dir.
|
||||
- name: Fix bun temp
|
||||
run: |
|
||||
mkdir -p /home/runner/.cache/bun
|
||||
{
|
||||
echo "BUN_INSTALL_CACHE_DIR=/home/runner/.cache/bun"
|
||||
echo "BUN_TMPDIR=/home/runner/.cache/bun"
|
||||
echo "TMPDIR=/home/runner/.cache"
|
||||
} >> "$GITHUB_ENV"
|
||||
|
||||
# Restore pre-installed node_modules from Docker image via symlink (~0s vs ~15s install)
|
||||
# If package.json changed since image was built, fall back to fresh install
|
||||
- name: Restore deps
|
||||
run: |
|
||||
if [ -d /opt/node_modules_cache ] && diff -q /opt/node_modules_cache/.package.json package.json >/dev/null 2>&1; then
|
||||
@@ -109,12 +122,22 @@ jobs:
|
||||
|
||||
- run: bun run build
|
||||
|
||||
# Verify Playwright can launch Chromium (fails fast if sandbox/deps are broken)
|
||||
- name: Verify Chromium
|
||||
if: matrix.suite.name == 'e2e-browse'
|
||||
run: |
|
||||
echo "whoami=$(whoami) HOME=$HOME TMPDIR=${TMPDIR:-unset}"
|
||||
touch /tmp/.bun-test && rm /tmp/.bun-test && echo "/tmp writable"
|
||||
bun -e "import {chromium} from 'playwright';const b=await chromium.launch({args:['--no-sandbox']});console.log('Chromium OK');await b.close()"
|
||||
|
||||
- name: Run ${{ matrix.suite.name }}
|
||||
continue-on-error: ${{ matrix.suite.allow_failure || false }}
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
||||
EVALS_CONCURRENCY: "40"
|
||||
PLAYWRIGHT_BROWSERS_PATH: /opt/playwright-browsers
|
||||
run: EVALS=1 bun test --retry 2 --concurrent --max-concurrency 40 ${{ matrix.suite.file }}
|
||||
|
||||
- name: Upload eval results
|
||||
@@ -149,6 +172,7 @@ jobs:
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
# shellcheck disable=SC2086,SC2059
|
||||
RESULTS=$(find /tmp/eval-results -name '*.json' 2>/dev/null | sort)
|
||||
if [ -z "$RESULTS" ]; then
|
||||
echo "No eval results found"
|
||||
@@ -158,6 +182,10 @@ jobs:
|
||||
TOTAL=0; PASSED=0; FAILED=0; COST="0"
|
||||
SUITE_LINES=""
|
||||
for f in $RESULTS; do
|
||||
if ! jq -e '.total_tests' "$f" >/dev/null 2>&1; then
|
||||
echo "Skipping malformed JSON: $f"
|
||||
continue
|
||||
fi
|
||||
T=$(jq -r '.total_tests // 0' "$f")
|
||||
P=$(jq -r '.passed // 0' "$f")
|
||||
F=$(jq -r '.failed // 0' "$f")
|
||||
@@ -190,9 +218,10 @@ jobs:
|
||||
if [ "$FAILED" -gt 0 ]; then
|
||||
FAILURES=""
|
||||
for f in $RESULTS; do
|
||||
if ! jq -e '.failed' "$f" >/dev/null 2>&1; then continue; fi
|
||||
F=$(jq -r '.failed // 0' "$f")
|
||||
[ "$F" -eq 0 ] && continue
|
||||
FAILS=$(jq -r '.tests[] | select(.passed == false) | "- ❌ \(.name): \(.exit_reason // "unknown")"' "$f")
|
||||
FAILS=$(jq -r '.tests[] | select(.passed == false) | "- ❌ \(.name): \(.exit_reason // "unknown")"' "$f" 2>/dev/null || echo "- ⚠️ $(basename "$f"): parse error")
|
||||
FAILURES="${FAILURES}${FAILS}\n"
|
||||
done
|
||||
BODY="${BODY}
|
||||
@@ -206,8 +235,8 @@ jobs:
|
||||
--jq '.[] | select(.body | startswith("## E2E Evals")) | .id' | tail -1)
|
||||
|
||||
if [ -n "$COMMENT_ID" ]; then
|
||||
gh api repos/${{ github.repository }}/issues/comments/$COMMENT_ID \
|
||||
gh api "repos/${{ github.repository }}/issues/comments/${COMMENT_ID}" \
|
||||
-X PATCH -f body="$BODY"
|
||||
else
|
||||
gh pr comment ${{ github.event.pull_request.number }} --body "$BODY"
|
||||
gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY"
|
||||
fi
|
||||
|
||||
@@ -9,6 +9,17 @@ jobs:
|
||||
- run: bun install
|
||||
- name: Check Claude host freshness
|
||||
run: bun run gen:skill-docs
|
||||
- run: git diff --exit-code || (echo "Generated SKILL.md files are stale. Run: bun run gen:skill-docs" && exit 1)
|
||||
- name: Check Codex host generation succeeds
|
||||
- name: Verify Claude skill docs are fresh
|
||||
run: |
|
||||
git diff --exit-code || {
|
||||
echo "Generated SKILL.md files are stale. Run: bun run gen:skill-docs"
|
||||
exit 1
|
||||
}
|
||||
- name: Check Codex host freshness
|
||||
run: bun run gen:skill-docs --host codex
|
||||
- name: Verify Codex skill docs are fresh
|
||||
run: |
|
||||
git diff --exit-code -- .agents/ || {
|
||||
echo "Generated Codex SKILL.md files are stale. Run: bun run gen:skill-docs --host codex"
|
||||
exit 1
|
||||
}
|
||||
|
||||
+1
-1
@@ -247,7 +247,7 @@ Tests spin up a local HTTP server (`browse/test/test-server.ts`) serving HTML fi
|
||||
| `browse/src/read-commands.ts` | Non-mutating commands: `text`, `html`, `links`, `js`, `css`, `is`, `dialog`, `forms`, etc. Exports `getCleanText()`. |
|
||||
| `browse/src/write-commands.ts` | Mutating commands: `goto`, `click`, `fill`, `upload`, `dialog-accept`, `useragent` (with context recreation), etc. |
|
||||
| `browse/src/meta-commands.ts` | Server management, chain routing, diff (DRY via `getCleanText`), snapshot delegation. |
|
||||
| `browse/src/cookie-import-browser.ts` | Decrypt Chromium cookies via macOS Keychain + PBKDF2/AES-128-CBC. Auto-detects installed browsers. |
|
||||
| `browse/src/cookie-import-browser.ts` | Decrypt Chromium cookies from macOS and Linux browser profiles using platform-specific safe-storage key lookup. Auto-detects installed browsers. |
|
||||
| `browse/src/cookie-picker-routes.ts` | HTTP routes for `/cookie-picker/*` — browser list, domain search, import, remove. |
|
||||
| `browse/src/cookie-picker-ui.ts` | Self-contained HTML generator for the interactive cookie picker (dark theme, no frameworks). |
|
||||
| `browse/src/buffers.ts` | `CircularBuffer<T>` (O(1) ring buffer) + console/network/dialog capture with async disk flush. |
|
||||
|
||||
@@ -33,6 +33,46 @@
|
||||
- Homebrew tap deferred to TODOS.md (P2)
|
||||
- 8 new telemetry tests (source field, duration caps, fingerprint persistence)
|
||||
|
||||
## [0.11.12.0] - 2026-03-24 — Triple-Voice Autoplan
|
||||
|
||||
Every `/autoplan` phase now gets two independent second opinions — one from Codex (OpenAI's frontier model) and one from a fresh Claude subagent. Three AI reviewers looking at your plan from different angles, each phase building on the last.
|
||||
|
||||
### Added
|
||||
|
||||
- **Dual voices in every autoplan phase.** CEO review, Design review, and Eng review each run both a Codex challenge and an independent Claude subagent simultaneously. You get a consensus table showing where the models agree and disagree — disagreements surface as taste decisions at the final gate.
|
||||
- **Phase-cascading context.** Codex gets prior-phase findings as context (CEO concerns inform Design review, CEO+Design inform Eng). Claude subagent stays truly independent for genuine cross-model validation.
|
||||
- **Structured consensus tables.** CEO phase scores 6 strategic dimensions, Design uses the litmus scorecard, Eng scores 6 architecture dimensions. CONFIRMED/DISAGREE for each.
|
||||
- **Cross-phase synthesis.** Phase 4 gate highlights themes that appeared independently in multiple phases — high-confidence signals when different reviewers catch the same issue.
|
||||
- **Sequential enforcement.** STOP markers between phases + pre-phase checklists prevent autoplan from accidentally parallelizing CEO/Design/Eng (each phase depends on the previous).
|
||||
- **Phase-transition summaries.** Brief status at each phase boundary so you can track progress without waiting for the full pipeline.
|
||||
- **Degradation matrix.** When Codex or the Claude subagent fails, autoplan gracefully degrades with clear labels (`[codex-only]`, `[subagent-only]`, `[single-reviewer mode]`).
|
||||
|
||||
## [0.11.11.0] - 2026-03-23 — Community Wave 3
|
||||
|
||||
10 community PRs merged — bug fixes, platform support, and workflow improvements.
|
||||
|
||||
### Added
|
||||
|
||||
- **Chrome multi-profile cookie import.** You can now import cookies from any Chrome profile, not just Default. Profile picker shows account email for easy identification. Batch import across all visible domains.
|
||||
- **Linux Chromium cookie import.** Cookie import now works on Linux for Chrome, Chromium, Brave, and Edge. Supports both GNOME Keyring (libsecret) and the "peanuts" fallback for headless environments.
|
||||
- **Chrome extensions in browse sessions.** Set `BROWSE_EXTENSIONS_DIR` to load Chrome extensions (ad blockers, accessibility tools, custom headers) into your browse testing sessions.
|
||||
- **Project-scoped gstack install.** `setup --local` installs gstack into `.claude/skills/` in your current project instead of globally. Useful for per-project version pinning.
|
||||
- **Distribution pipeline checks.** `/office-hours`, `/plan-eng-review`, `/ship`, and `/review` now check whether new CLI tools or libraries have a build/publish pipeline. No more shipping artifacts nobody can download.
|
||||
- **Dynamic skill discovery.** Adding a new skill directory no longer requires editing a hardcoded list. `skill-check` and `gen-skill-docs` automatically discover skills from the filesystem.
|
||||
- **Auto-trigger guard.** Skills now include explicit trigger criteria in their descriptions to prevent Claude Code from auto-firing them based on semantic similarity. The existing proactive suggestion system is preserved.
|
||||
|
||||
### Fixed
|
||||
|
||||
- **Browse server startup crash.** The browse server lock acquisition failed when `.gstack/` directory didn't exist, causing every invocation to think another process held the lock. Fixed by creating the state directory before lock acquisition.
|
||||
- **Zsh glob errors in skill preamble.** The telemetry cleanup loop no longer throws `no matches found` in zsh when no pending files exist.
|
||||
- **`--force` now actually forces upgrades.** `gstack-upgrade --force` clears the snooze file, so you can upgrade immediately after snoozing.
|
||||
- **Three-dot diff in /review scope drift detection.** Scope drift analysis now correctly shows changes since branch creation, not accumulated changes on the base branch.
|
||||
- **CI workflow YAML parsing.** Fixed unquoted multiline `run:` scalars that broke YAML parsing. Added actionlint CI workflow.
|
||||
|
||||
### Community
|
||||
|
||||
Thanks to @osc, @Explorer1092, @Qike-Li, @francoisaubert1, @itstimwhite, @yinanli1917-cloud for contributions in this wave.
|
||||
|
||||
## [0.11.10.0] - 2026-03-23 — CI Evals on Ubicloud
|
||||
|
||||
### Added
|
||||
|
||||
@@ -79,12 +79,14 @@ gstack/
|
||||
├── office-hours/ # /office-hours skill (YC Office Hours — startup diagnostic + builder brainstorm)
|
||||
├── investigate/ # /investigate skill (systematic root-cause debugging)
|
||||
├── retro/ # Retrospective skill (includes /retro global cross-project mode)
|
||||
├── bin/ # Standalone scripts (gstack-global-discover for cross-tool session discovery)
|
||||
├── bin/ # CLI utilities (gstack-repo-mode, gstack-slug, gstack-config, etc.)
|
||||
├── document-release/ # /document-release skill (post-ship doc updates)
|
||||
├── cso/ # /cso skill (OWASP Top 10 + STRIDE security audit)
|
||||
├── design-consultation/ # /design-consultation skill (design system from scratch)
|
||||
├── setup-deploy/ # /setup-deploy skill (one-time deploy config)
|
||||
├── bin/ # CLI utilities (gstack-repo-mode, gstack-slug, gstack-config, etc.)
|
||||
├── .github/ # CI workflows + Docker image
|
||||
│ ├── workflows/ # evals.yml (E2E on Ubicloud), skill-docs.yml, actionlint.yml
|
||||
│ └── docker/ # Dockerfile.ci (pre-baked toolchain + Playwright/Chromium)
|
||||
├── setup # One-time setup: build binary + symlink skills
|
||||
├── SKILL.md # Generated from SKILL.md.tmpl (don't edit directly)
|
||||
├── SKILL.md.tmpl # Template: edit this, run gen:skill-docs
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: gstack
|
||||
version: 1.1.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /gstack.
|
||||
Fast headless browser for QA testing and site dogfooding. Navigate pages, interact with
|
||||
elements, verify state, diff before/after, take annotated screenshots, test responsive
|
||||
layouts, forms, uploads, dialogs, and capture bug evidence. Use when asked to open or
|
||||
@@ -653,7 +654,7 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
|
||||
| `click <sel>` | Click element |
|
||||
| `cookie <name>=<value>` | Set cookie on current page domain |
|
||||
| `cookie-import <json>` | Import cookies from JSON file |
|
||||
| `cookie-import-browser [browser] [--domain d]` | Import cookies from Comet, Chrome, Arc, Brave, or Edge (opens picker, or use --domain for direct import) |
|
||||
| `cookie-import-browser [browser] [--domain d]` | Import cookies from installed Chromium browsers (opens picker, or use --domain for direct import) |
|
||||
| `dialog-accept [text]` | Auto-accept next alert/confirm/prompt. Optional text is sent as the prompt response |
|
||||
| `dialog-dismiss` | Auto-dismiss next dialog |
|
||||
| `fill <sel> <val>` | Fill input |
|
||||
|
||||
@@ -154,14 +154,17 @@
|
||||
**Effort:** M
|
||||
**Priority:** P4
|
||||
|
||||
### Linux/Windows cookie decryption
|
||||
### Linux cookie decryption — PARTIALLY SHIPPED
|
||||
|
||||
**What:** GNOME Keyring / kwallet / DPAPI support for non-macOS cookie import.
|
||||
~~**What:** GNOME Keyring / kwallet / DPAPI support for non-macOS cookie import.~~
|
||||
|
||||
**Why:** Cross-platform cookie import. Currently macOS-only (Keychain).
|
||||
Linux cookie import shipped in v0.11.11.0 (Wave 3). Supports Chrome, Chromium, Brave, Edge on Linux with GNOME Keyring (libsecret) and "peanuts" fallback. Windows DPAPI support remains deferred.
|
||||
|
||||
**Effort:** L
|
||||
**Remaining:** Windows cookie decryption (DPAPI). Needs complete rewrite — PR #64 was 1346 lines and stale.
|
||||
|
||||
**Effort:** L (Windows only)
|
||||
**Priority:** P4
|
||||
**Completed (Linux):** v0.11.11.0 (2026-03-23)
|
||||
|
||||
## Ship
|
||||
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
self-hosted-runner:
|
||||
labels:
|
||||
- ubicloud-standard-2
|
||||
+221
-6
@@ -2,6 +2,7 @@
|
||||
name: autoplan
|
||||
version: 1.0.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /autoplan.
|
||||
Auto-review pipeline — reads the full CEO, design, and eng review skills from disk
|
||||
and runs them sequentially with auto-decisions using 6 decision principles. Surfaces
|
||||
taste decisions (close approaches, borderline scope, codex disagreements) at a final
|
||||
@@ -476,6 +477,17 @@ Examples: run codex (always yes), run evals (always yes), reduce scope on a comp
|
||||
|
||||
---
|
||||
|
||||
## Sequential Execution — MANDATORY
|
||||
|
||||
Phases MUST execute in strict order: CEO → Design → Eng.
|
||||
Each phase MUST complete fully before the next begins.
|
||||
NEVER run phases in parallel — each builds on the previous.
|
||||
|
||||
Between each phase, emit a phase-transition summary and verify that all required
|
||||
outputs from the prior phase are written before starting the next.
|
||||
|
||||
---
|
||||
|
||||
## What "Auto-Decide" Means
|
||||
|
||||
Auto-decide replaces the USER'S judgment with the 6 principles. It does NOT replace
|
||||
@@ -561,6 +573,8 @@ Read each file using the Read tool:
|
||||
- Review Readiness Dashboard
|
||||
- Plan File Review Report
|
||||
- Prerequisite Skill Offer (BENEFITS_FROM)
|
||||
- Outside Voice — Independent Plan Challenge
|
||||
- Design Outside Voices (parallel)
|
||||
|
||||
Follow ONLY the review-specific methodology, sections, and required outputs.
|
||||
|
||||
@@ -584,6 +598,38 @@ Override: every AskUserQuestion → auto-decide using the 6 principles.
|
||||
- Scope expansion: in blast radius + <1d CC → approve (P2). Outside → defer to TODOS.md (P3).
|
||||
Duplicates → reject (P4). Borderline (3-5 files) → mark TASTE DECISION.
|
||||
- All 10 review sections: run fully, auto-decide each issue, log every decision.
|
||||
- Dual voices: always run BOTH Claude subagent AND Codex if available (P6).
|
||||
Run them simultaneously (Agent tool for subagent, Bash for Codex).
|
||||
|
||||
**Codex CEO voice** (via Bash):
|
||||
Command: `codex exec "You are a CEO/founder advisor reviewing a development plan.
|
||||
Challenge the strategic foundations: Are the premises valid or assumed? Is this the
|
||||
right problem to solve, or is there a reframing that would be 10x more impactful?
|
||||
What alternatives were dismissed too quickly? What competitive or market risks are
|
||||
unaddressed? What scope decisions will look foolish in 6 months? Be adversarial.
|
||||
No compliments. Just the strategic blind spots.
|
||||
File: <plan_path>" -s read-only --enable web_search_cached`
|
||||
Timeout: 10 minutes
|
||||
|
||||
**Claude CEO subagent** (via Agent tool):
|
||||
"Read the plan file at <plan_path>. You are an independent CEO/strategist
|
||||
reviewing this plan. You have NOT seen any prior review. Evaluate:
|
||||
1. Is this the right problem to solve? Could a reframing yield 10x impact?
|
||||
2. Are the premises stated or just assumed? Which ones could be wrong?
|
||||
3. What's the 6-month regret scenario — what will look foolish?
|
||||
4. What alternatives were dismissed without sufficient analysis?
|
||||
5. What's the competitive risk — could someone else solve this first/better?
|
||||
For each finding: what's wrong, severity (critical/high/medium), and the fix."
|
||||
|
||||
**Error handling:** All non-blocking. Codex auth/timeout/empty → proceed with
|
||||
Claude subagent only, tagged `[single-model]`. If Claude subagent also fails →
|
||||
"Outside voices unavailable — continuing with primary review."
|
||||
|
||||
**Degradation matrix:** Both fail → "single-reviewer mode". Codex only →
|
||||
tag `[codex-only]`. Subagent only → tag `[subagent-only]`.
|
||||
|
||||
- Strategy choices: if codex disagrees with a premise or scope decision with valid
|
||||
strategic reason → TASTE DECISION.
|
||||
|
||||
**Required execution checklist (CEO):**
|
||||
|
||||
@@ -596,6 +642,27 @@ Step 0 (0A-0F) — run each sub-step and produce:
|
||||
- 0E: Temporal interrogation (HOUR 1 → HOUR 6+)
|
||||
- 0F: Mode selection confirmation
|
||||
|
||||
Step 0.5 (Dual Voices): Run Claude subagent AND Codex simultaneously. Present
|
||||
Codex output under CODEX SAYS (CEO — strategy challenge) header. Present subagent
|
||||
output under CLAUDE SUBAGENT (CEO — strategic independence) header. Produce CEO
|
||||
consensus table:
|
||||
|
||||
```
|
||||
CEO DUAL VOICES — CONSENSUS TABLE:
|
||||
═══════════════════════════════════════════════════════════════
|
||||
Dimension Claude Codex Consensus
|
||||
──────────────────────────────────── ─────── ─────── ─────────
|
||||
1. Premises valid? — — —
|
||||
2. Right problem to solve? — — —
|
||||
3. Scope calibration correct? — — —
|
||||
4. Alternatives sufficiently explored?— — —
|
||||
5. Competitive/market risks covered? — — —
|
||||
6. 6-month trajectory sound? — — —
|
||||
═══════════════════════════════════════════════════════════════
|
||||
CONFIRMED = both agree. DISAGREE = models differ (→ taste decision).
|
||||
Missing voice = N/A (not CONFIRMED). Single critical finding from one voice = flagged regardless.
|
||||
```
|
||||
|
||||
Sections 1-10 — for EACH section, run the evaluation criteria from the loaded skill file:
|
||||
- Sections WITH findings: full analysis, auto-decide each issue, log to audit trail
|
||||
- Sections with NO findings: 1-2 sentences stating what was examined and why nothing
|
||||
@@ -610,8 +677,23 @@ Sections 1-10 — for EACH section, run the evaluation criteria from the loaded
|
||||
- Dream state delta (where this plan leaves us vs 12-month ideal)
|
||||
- Completion Summary (the full summary table from the CEO skill)
|
||||
|
||||
**PHASE 1 COMPLETE.** Emit phase-transition summary:
|
||||
> **Phase 1 complete.** Codex: [N concerns]. Claude subagent: [N issues].
|
||||
> Consensus: [X/6 confirmed, Y disagreements → surfaced at gate].
|
||||
> Passing to Phase 2.
|
||||
|
||||
Do NOT begin Phase 2 until all Phase 1 outputs are written to the plan file
|
||||
and the premise gate has been passed.
|
||||
|
||||
---
|
||||
|
||||
**Pre-Phase 2 checklist (verify before starting):**
|
||||
- [ ] CEO completion summary written to plan file
|
||||
- [ ] CEO dual voices ran (Codex + Claude subagent, or noted unavailable)
|
||||
- [ ] CEO consensus table produced
|
||||
- [ ] Premise gate passed (user confirmed)
|
||||
- [ ] Phase-transition summary emitted
|
||||
|
||||
## Phase 2: Design Review (conditional — skip if no UI scope)
|
||||
|
||||
Follow plan-design-review/SKILL.md — all 7 dimensions, full depth.
|
||||
@@ -622,19 +704,102 @@ Override: every AskUserQuestion → auto-decide using the 6 principles.
|
||||
- Structural issues (missing states, broken hierarchy): auto-fix (P5)
|
||||
- Aesthetic/taste issues: mark TASTE DECISION
|
||||
- Design system alignment: auto-fix if DESIGN.md exists and fix is obvious
|
||||
- Dual voices: always run BOTH Claude subagent AND Codex if available (P6).
|
||||
|
||||
**Codex design voice** (via Bash):
|
||||
Command: `codex exec "Read the plan file at <plan_path>. Evaluate this plan's
|
||||
UI/UX design decisions.
|
||||
|
||||
Also consider these findings from the CEO review phase:
|
||||
<insert CEO dual voice findings summary — key concerns, disagreements>
|
||||
|
||||
Does the information hierarchy serve the user or the developer? Are interaction
|
||||
states (loading, empty, error, partial) specified or left to the implementer's
|
||||
imagination? Is the responsive strategy intentional or afterthought? Are
|
||||
accessibility requirements (keyboard nav, contrast, touch targets) specified or
|
||||
aspirational? Does the plan describe specific UI decisions or generic patterns?
|
||||
What design decisions will haunt the implementer if left ambiguous?
|
||||
Be opinionated. No hedging." -s read-only --enable web_search_cached`
|
||||
Timeout: 10 minutes
|
||||
|
||||
**Claude design subagent** (via Agent tool):
|
||||
"Read the plan file at <plan_path>. You are an independent senior product designer
|
||||
reviewing this plan. You have NOT seen any prior review. Evaluate:
|
||||
1. Information hierarchy: what does the user see first, second, third? Is it right?
|
||||
2. Missing states: loading, empty, error, success, partial — which are unspecified?
|
||||
3. User journey: what's the emotional arc? Where does it break?
|
||||
4. Specificity: does the plan describe SPECIFIC UI or generic patterns?
|
||||
5. What design decisions will haunt the implementer if left ambiguous?
|
||||
For each finding: what's wrong, severity (critical/high/medium), and the fix."
|
||||
NO prior-phase context — subagent must be truly independent.
|
||||
|
||||
Error handling: same as Phase 1 (non-blocking, degradation matrix applies).
|
||||
|
||||
- Design choices: if codex disagrees with a design decision with valid UX reasoning
|
||||
→ TASTE DECISION.
|
||||
|
||||
**Required execution checklist (Design):**
|
||||
|
||||
1. Step 0 (Design Scope): Rate completeness 0-10. Check DESIGN.md. Map existing patterns.
|
||||
|
||||
2. Step 0.5 (Dual Voices): Run Claude subagent AND Codex simultaneously. Present under
|
||||
CODEX SAYS (design — UX challenge) and CLAUDE SUBAGENT (design — independent review)
|
||||
headers. Produce design litmus scorecard (consensus table). Use the litmus scorecard
|
||||
format from plan-design-review. Include CEO phase findings in Codex prompt ONLY
|
||||
(not Claude subagent — stays independent).
|
||||
|
||||
3. Passes 1-7: Run each from loaded skill. Rate 0-10. Auto-decide each issue.
|
||||
DISAGREE items from scorecard → raised in the relevant pass with both perspectives.
|
||||
|
||||
**PHASE 2 COMPLETE.** Emit phase-transition summary:
|
||||
> **Phase 2 complete.** Codex: [N concerns]. Claude subagent: [N issues].
|
||||
> Consensus: [X/Y confirmed, Z disagreements → surfaced at gate].
|
||||
> Passing to Phase 3.
|
||||
|
||||
Do NOT begin Phase 3 until all Phase 2 outputs (if run) are written to the plan file.
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: Eng Review + Codex
|
||||
**Pre-Phase 3 checklist (verify before starting):**
|
||||
- [ ] All Phase 1 items above confirmed
|
||||
- [ ] Design completion summary written (or "skipped, no UI scope")
|
||||
- [ ] Design dual voices ran (if Phase 2 ran)
|
||||
- [ ] Design consensus table produced (if Phase 2 ran)
|
||||
- [ ] Phase-transition summary emitted
|
||||
|
||||
## Phase 3: Eng Review + Dual Voices
|
||||
|
||||
Follow plan-eng-review/SKILL.md — all sections, full depth.
|
||||
Override: every AskUserQuestion → auto-decide using the 6 principles.
|
||||
|
||||
**Override rules:**
|
||||
- Scope challenge: never reduce (P2)
|
||||
- Codex review: always run if available (P6)
|
||||
Command: `codex exec "Review this plan for architectural issues, missing edge cases, and hidden complexity. Be adversarial. File: <plan_path>" -s read-only --enable web_search_cached`
|
||||
Timeout: 10 minutes, then proceed with "Codex timed out — single-reviewer mode"
|
||||
- Dual voices: always run BOTH Claude subagent AND Codex if available (P6).
|
||||
|
||||
**Codex eng voice** (via Bash):
|
||||
Command: `codex exec "Review this plan for architectural issues, missing edge cases,
|
||||
and hidden complexity. Be adversarial.
|
||||
|
||||
Also consider these findings from prior review phases:
|
||||
CEO: <insert CEO consensus table summary — key concerns, DISAGREEs>
|
||||
Design: <insert Design consensus table summary, or 'skipped, no UI scope'>
|
||||
|
||||
File: <plan_path>" -s read-only --enable web_search_cached`
|
||||
Timeout: 10 minutes
|
||||
|
||||
**Claude eng subagent** (via Agent tool):
|
||||
"Read the plan file at <plan_path>. You are an independent senior engineer
|
||||
reviewing this plan. You have NOT seen any prior review. Evaluate:
|
||||
1. Architecture: Is the component structure sound? Coupling concerns?
|
||||
2. Edge cases: What breaks under 10x load? What's the nil/empty/error path?
|
||||
3. Tests: What's missing from the test plan? What would break at 2am Friday?
|
||||
4. Security: New attack surface? Auth boundaries? Input validation?
|
||||
5. Hidden complexity: What looks simple but isn't?
|
||||
For each finding: what's wrong, severity, and the fix."
|
||||
NO prior-phase context — subagent must be truly independent.
|
||||
|
||||
Error handling: same as Phase 1 (non-blocking, degradation matrix applies).
|
||||
|
||||
- Architecture choices: explicit over clever (P5). If codex disagrees with valid reason → TASTE DECISION.
|
||||
- Evals: always include all relevant suites (P1)
|
||||
- Test plan: generate artifact at `~/.gstack/projects/$SLUG/{user}-{branch}-test-plan-{datetime}.md`
|
||||
@@ -645,7 +810,26 @@ Override: every AskUserQuestion → auto-decide using the 6 principles.
|
||||
1. Step 0 (Scope Challenge): Read actual code referenced by the plan. Map each
|
||||
sub-problem to existing code. Run the complexity check. Produce concrete findings.
|
||||
|
||||
2. Step 0.5 (Codex): Run if available. Present full output under CODEX SAYS header.
|
||||
2. Step 0.5 (Dual Voices): Run Claude subagent AND Codex simultaneously. Present
|
||||
Codex output under CODEX SAYS (eng — architecture challenge) header. Present subagent
|
||||
output under CLAUDE SUBAGENT (eng — independent review) header. Produce eng consensus
|
||||
table:
|
||||
|
||||
```
|
||||
ENG DUAL VOICES — CONSENSUS TABLE:
|
||||
═══════════════════════════════════════════════════════════════
|
||||
Dimension Claude Codex Consensus
|
||||
──────────────────────────────────── ─────── ─────── ─────────
|
||||
1. Architecture sound? — — —
|
||||
2. Test coverage sufficient? — — —
|
||||
3. Performance risks addressed? — — —
|
||||
4. Security threats covered? — — —
|
||||
5. Error paths handled? — — —
|
||||
6. Deployment risk manageable? — — —
|
||||
═══════════════════════════════════════════════════════════════
|
||||
CONFIRMED = both agree. DISAGREE = models differ (→ taste decision).
|
||||
Missing voice = N/A (not CONFIRMED). Single critical finding from one voice = flagged regardless.
|
||||
```
|
||||
|
||||
3. Section 1 (Architecture): Produce ASCII dependency graph showing new components
|
||||
and their relationships to existing ones. Evaluate coupling, scaling, security.
|
||||
@@ -709,10 +893,14 @@ produced. Check the plan file and conversation for each item.
|
||||
- [ ] "What already exists" section written
|
||||
- [ ] Dream state delta written
|
||||
- [ ] Completion Summary produced
|
||||
- [ ] Dual voices ran (Codex + Claude subagent, or noted unavailable)
|
||||
- [ ] CEO consensus table produced
|
||||
|
||||
**Phase 2 (Design) outputs — only if UI scope detected:**
|
||||
- [ ] All 7 dimensions evaluated with scores
|
||||
- [ ] Issues identified and auto-decided
|
||||
- [ ] Dual voices ran (or noted unavailable/skipped with phase)
|
||||
- [ ] Design litmus scorecard produced
|
||||
|
||||
**Phase 3 (Eng) outputs:**
|
||||
- [ ] Scope challenge with actual code analysis (not just "scope is fine")
|
||||
@@ -723,6 +911,11 @@ produced. Check the plan file and conversation for each item.
|
||||
- [ ] "What already exists" section written
|
||||
- [ ] Failure modes registry with critical gap assessment
|
||||
- [ ] Completion Summary produced
|
||||
- [ ] Dual voices ran (Codex + Claude subagent, or noted unavailable)
|
||||
- [ ] Eng consensus table produced
|
||||
|
||||
**Cross-phase:**
|
||||
- [ ] Cross-phase themes section written
|
||||
|
||||
**Audit trail:**
|
||||
- [ ] Decision Audit Trail has at least one row per auto-decision (not empty)
|
||||
@@ -757,9 +950,16 @@ I recommend [X] — [principle]. But [Y] is also viable:
|
||||
|
||||
### Review Scores
|
||||
- CEO: [summary]
|
||||
- CEO Voices: Codex [summary], Claude subagent [summary], Consensus [X/6 confirmed]
|
||||
- Design: [summary or "skipped, no UI scope"]
|
||||
- Design Voices: Codex [summary], Claude subagent [summary], Consensus [X/7 confirmed] (or "skipped")
|
||||
- Eng: [summary]
|
||||
- Codex: [summary or "unavailable"]
|
||||
- Eng Voices: Codex [summary], Claude subagent [summary], Consensus [X/6 confirmed]
|
||||
|
||||
### Cross-Phase Themes
|
||||
[For any concern that appeared in 2+ phases' dual voices independently:]
|
||||
**Theme: [topic]** — flagged in [Phase 1, Phase 3]. High-confidence signal.
|
||||
[If no themes span phases:] "No cross-phase themes — each phase's concerns were distinct."
|
||||
|
||||
### Deferred to TODOS.md
|
||||
[Items auto-deferred with reasons]
|
||||
@@ -806,6 +1006,21 @@ If Phase 2 ran (UI scope):
|
||||
|
||||
Replace field values with actual counts from the review.
|
||||
|
||||
Dual voice logs (one per phase that ran):
|
||||
```bash
|
||||
~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"autoplan-voices","timestamp":"'"$TIMESTAMP"'","status":"STATUS","source":"SOURCE","phase":"ceo","via":"autoplan","consensus_confirmed":N,"consensus_disagree":N,"commit":"'"$COMMIT"'"}'
|
||||
|
||||
~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"autoplan-voices","timestamp":"'"$TIMESTAMP"'","status":"STATUS","source":"SOURCE","phase":"eng","via":"autoplan","consensus_confirmed":N,"consensus_disagree":N,"commit":"'"$COMMIT"'"}'
|
||||
```
|
||||
|
||||
If Phase 2 ran (UI scope), also log:
|
||||
```bash
|
||||
~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"autoplan-voices","timestamp":"'"$TIMESTAMP"'","status":"STATUS","source":"SOURCE","phase":"design","via":"autoplan","consensus_confirmed":N,"consensus_disagree":N,"commit":"'"$COMMIT"'"}'
|
||||
```
|
||||
|
||||
SOURCE = "codex+subagent", "codex-only", "subagent-only", or "unavailable".
|
||||
Replace N values with actual consensus counts from the tables.
|
||||
|
||||
Suggest next step: `/ship` when ready to create the PR.
|
||||
|
||||
---
|
||||
|
||||
+220
-6
@@ -72,6 +72,17 @@ Examples: run codex (always yes), run evals (always yes), reduce scope on a comp
|
||||
|
||||
---
|
||||
|
||||
## Sequential Execution — MANDATORY
|
||||
|
||||
Phases MUST execute in strict order: CEO → Design → Eng.
|
||||
Each phase MUST complete fully before the next begins.
|
||||
NEVER run phases in parallel — each builds on the previous.
|
||||
|
||||
Between each phase, emit a phase-transition summary and verify that all required
|
||||
outputs from the prior phase are written before starting the next.
|
||||
|
||||
---
|
||||
|
||||
## What "Auto-Decide" Means
|
||||
|
||||
Auto-decide replaces the USER'S judgment with the 6 principles. It does NOT replace
|
||||
@@ -157,6 +168,8 @@ Read each file using the Read tool:
|
||||
- Review Readiness Dashboard
|
||||
- Plan File Review Report
|
||||
- Prerequisite Skill Offer (BENEFITS_FROM)
|
||||
- Outside Voice — Independent Plan Challenge
|
||||
- Design Outside Voices (parallel)
|
||||
|
||||
Follow ONLY the review-specific methodology, sections, and required outputs.
|
||||
|
||||
@@ -180,6 +193,38 @@ Override: every AskUserQuestion → auto-decide using the 6 principles.
|
||||
- Scope expansion: in blast radius + <1d CC → approve (P2). Outside → defer to TODOS.md (P3).
|
||||
Duplicates → reject (P4). Borderline (3-5 files) → mark TASTE DECISION.
|
||||
- All 10 review sections: run fully, auto-decide each issue, log every decision.
|
||||
- Dual voices: always run BOTH Claude subagent AND Codex if available (P6).
|
||||
Run them simultaneously (Agent tool for subagent, Bash for Codex).
|
||||
|
||||
**Codex CEO voice** (via Bash):
|
||||
Command: `codex exec "You are a CEO/founder advisor reviewing a development plan.
|
||||
Challenge the strategic foundations: Are the premises valid or assumed? Is this the
|
||||
right problem to solve, or is there a reframing that would be 10x more impactful?
|
||||
What alternatives were dismissed too quickly? What competitive or market risks are
|
||||
unaddressed? What scope decisions will look foolish in 6 months? Be adversarial.
|
||||
No compliments. Just the strategic blind spots.
|
||||
File: <plan_path>" -s read-only --enable web_search_cached`
|
||||
Timeout: 10 minutes
|
||||
|
||||
**Claude CEO subagent** (via Agent tool):
|
||||
"Read the plan file at <plan_path>. You are an independent CEO/strategist
|
||||
reviewing this plan. You have NOT seen any prior review. Evaluate:
|
||||
1. Is this the right problem to solve? Could a reframing yield 10x impact?
|
||||
2. Are the premises stated or just assumed? Which ones could be wrong?
|
||||
3. What's the 6-month regret scenario — what will look foolish?
|
||||
4. What alternatives were dismissed without sufficient analysis?
|
||||
5. What's the competitive risk — could someone else solve this first/better?
|
||||
For each finding: what's wrong, severity (critical/high/medium), and the fix."
|
||||
|
||||
**Error handling:** All non-blocking. Codex auth/timeout/empty → proceed with
|
||||
Claude subagent only, tagged `[single-model]`. If Claude subagent also fails →
|
||||
"Outside voices unavailable — continuing with primary review."
|
||||
|
||||
**Degradation matrix:** Both fail → "single-reviewer mode". Codex only →
|
||||
tag `[codex-only]`. Subagent only → tag `[subagent-only]`.
|
||||
|
||||
- Strategy choices: if codex disagrees with a premise or scope decision with valid
|
||||
strategic reason → TASTE DECISION.
|
||||
|
||||
**Required execution checklist (CEO):**
|
||||
|
||||
@@ -192,6 +237,27 @@ Step 0 (0A-0F) — run each sub-step and produce:
|
||||
- 0E: Temporal interrogation (HOUR 1 → HOUR 6+)
|
||||
- 0F: Mode selection confirmation
|
||||
|
||||
Step 0.5 (Dual Voices): Run Claude subagent AND Codex simultaneously. Present
|
||||
Codex output under CODEX SAYS (CEO — strategy challenge) header. Present subagent
|
||||
output under CLAUDE SUBAGENT (CEO — strategic independence) header. Produce CEO
|
||||
consensus table:
|
||||
|
||||
```
|
||||
CEO DUAL VOICES — CONSENSUS TABLE:
|
||||
═══════════════════════════════════════════════════════════════
|
||||
Dimension Claude Codex Consensus
|
||||
──────────────────────────────────── ─────── ─────── ─────────
|
||||
1. Premises valid? — — —
|
||||
2. Right problem to solve? — — —
|
||||
3. Scope calibration correct? — — —
|
||||
4. Alternatives sufficiently explored?— — —
|
||||
5. Competitive/market risks covered? — — —
|
||||
6. 6-month trajectory sound? — — —
|
||||
═══════════════════════════════════════════════════════════════
|
||||
CONFIRMED = both agree. DISAGREE = models differ (→ taste decision).
|
||||
Missing voice = N/A (not CONFIRMED). Single critical finding from one voice = flagged regardless.
|
||||
```
|
||||
|
||||
Sections 1-10 — for EACH section, run the evaluation criteria from the loaded skill file:
|
||||
- Sections WITH findings: full analysis, auto-decide each issue, log to audit trail
|
||||
- Sections with NO findings: 1-2 sentences stating what was examined and why nothing
|
||||
@@ -206,8 +272,23 @@ Sections 1-10 — for EACH section, run the evaluation criteria from the loaded
|
||||
- Dream state delta (where this plan leaves us vs 12-month ideal)
|
||||
- Completion Summary (the full summary table from the CEO skill)
|
||||
|
||||
**PHASE 1 COMPLETE.** Emit phase-transition summary:
|
||||
> **Phase 1 complete.** Codex: [N concerns]. Claude subagent: [N issues].
|
||||
> Consensus: [X/6 confirmed, Y disagreements → surfaced at gate].
|
||||
> Passing to Phase 2.
|
||||
|
||||
Do NOT begin Phase 2 until all Phase 1 outputs are written to the plan file
|
||||
and the premise gate has been passed.
|
||||
|
||||
---
|
||||
|
||||
**Pre-Phase 2 checklist (verify before starting):**
|
||||
- [ ] CEO completion summary written to plan file
|
||||
- [ ] CEO dual voices ran (Codex + Claude subagent, or noted unavailable)
|
||||
- [ ] CEO consensus table produced
|
||||
- [ ] Premise gate passed (user confirmed)
|
||||
- [ ] Phase-transition summary emitted
|
||||
|
||||
## Phase 2: Design Review (conditional — skip if no UI scope)
|
||||
|
||||
Follow plan-design-review/SKILL.md — all 7 dimensions, full depth.
|
||||
@@ -218,19 +299,102 @@ Override: every AskUserQuestion → auto-decide using the 6 principles.
|
||||
- Structural issues (missing states, broken hierarchy): auto-fix (P5)
|
||||
- Aesthetic/taste issues: mark TASTE DECISION
|
||||
- Design system alignment: auto-fix if DESIGN.md exists and fix is obvious
|
||||
- Dual voices: always run BOTH Claude subagent AND Codex if available (P6).
|
||||
|
||||
**Codex design voice** (via Bash):
|
||||
Command: `codex exec "Read the plan file at <plan_path>. Evaluate this plan's
|
||||
UI/UX design decisions.
|
||||
|
||||
Also consider these findings from the CEO review phase:
|
||||
<insert CEO dual voice findings summary — key concerns, disagreements>
|
||||
|
||||
Does the information hierarchy serve the user or the developer? Are interaction
|
||||
states (loading, empty, error, partial) specified or left to the implementer's
|
||||
imagination? Is the responsive strategy intentional or afterthought? Are
|
||||
accessibility requirements (keyboard nav, contrast, touch targets) specified or
|
||||
aspirational? Does the plan describe specific UI decisions or generic patterns?
|
||||
What design decisions will haunt the implementer if left ambiguous?
|
||||
Be opinionated. No hedging." -s read-only --enable web_search_cached`
|
||||
Timeout: 10 minutes
|
||||
|
||||
**Claude design subagent** (via Agent tool):
|
||||
"Read the plan file at <plan_path>. You are an independent senior product designer
|
||||
reviewing this plan. You have NOT seen any prior review. Evaluate:
|
||||
1. Information hierarchy: what does the user see first, second, third? Is it right?
|
||||
2. Missing states: loading, empty, error, success, partial — which are unspecified?
|
||||
3. User journey: what's the emotional arc? Where does it break?
|
||||
4. Specificity: does the plan describe SPECIFIC UI or generic patterns?
|
||||
5. What design decisions will haunt the implementer if left ambiguous?
|
||||
For each finding: what's wrong, severity (critical/high/medium), and the fix."
|
||||
NO prior-phase context — subagent must be truly independent.
|
||||
|
||||
Error handling: same as Phase 1 (non-blocking, degradation matrix applies).
|
||||
|
||||
- Design choices: if codex disagrees with a design decision with valid UX reasoning
|
||||
→ TASTE DECISION.
|
||||
|
||||
**Required execution checklist (Design):**
|
||||
|
||||
1. Step 0 (Design Scope): Rate completeness 0-10. Check DESIGN.md. Map existing patterns.
|
||||
|
||||
2. Step 0.5 (Dual Voices): Run Claude subagent AND Codex simultaneously. Present under
|
||||
CODEX SAYS (design — UX challenge) and CLAUDE SUBAGENT (design — independent review)
|
||||
headers. Produce design litmus scorecard (consensus table). Use the litmus scorecard
|
||||
format from plan-design-review. Include CEO phase findings in Codex prompt ONLY
|
||||
(not Claude subagent — stays independent).
|
||||
|
||||
3. Passes 1-7: Run each from loaded skill. Rate 0-10. Auto-decide each issue.
|
||||
DISAGREE items from scorecard → raised in the relevant pass with both perspectives.
|
||||
|
||||
**PHASE 2 COMPLETE.** Emit phase-transition summary:
|
||||
> **Phase 2 complete.** Codex: [N concerns]. Claude subagent: [N issues].
|
||||
> Consensus: [X/Y confirmed, Z disagreements → surfaced at gate].
|
||||
> Passing to Phase 3.
|
||||
|
||||
Do NOT begin Phase 3 until all Phase 2 outputs (if run) are written to the plan file.
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: Eng Review + Codex
|
||||
**Pre-Phase 3 checklist (verify before starting):**
|
||||
- [ ] All Phase 1 items above confirmed
|
||||
- [ ] Design completion summary written (or "skipped, no UI scope")
|
||||
- [ ] Design dual voices ran (if Phase 2 ran)
|
||||
- [ ] Design consensus table produced (if Phase 2 ran)
|
||||
- [ ] Phase-transition summary emitted
|
||||
|
||||
## Phase 3: Eng Review + Dual Voices
|
||||
|
||||
Follow plan-eng-review/SKILL.md — all sections, full depth.
|
||||
Override: every AskUserQuestion → auto-decide using the 6 principles.
|
||||
|
||||
**Override rules:**
|
||||
- Scope challenge: never reduce (P2)
|
||||
- Codex review: always run if available (P6)
|
||||
Command: `codex exec "Review this plan for architectural issues, missing edge cases, and hidden complexity. Be adversarial. File: <plan_path>" -s read-only --enable web_search_cached`
|
||||
Timeout: 10 minutes, then proceed with "Codex timed out — single-reviewer mode"
|
||||
- Dual voices: always run BOTH Claude subagent AND Codex if available (P6).
|
||||
|
||||
**Codex eng voice** (via Bash):
|
||||
Command: `codex exec "Review this plan for architectural issues, missing edge cases,
|
||||
and hidden complexity. Be adversarial.
|
||||
|
||||
Also consider these findings from prior review phases:
|
||||
CEO: <insert CEO consensus table summary — key concerns, DISAGREEs>
|
||||
Design: <insert Design consensus table summary, or 'skipped, no UI scope'>
|
||||
|
||||
File: <plan_path>" -s read-only --enable web_search_cached`
|
||||
Timeout: 10 minutes
|
||||
|
||||
**Claude eng subagent** (via Agent tool):
|
||||
"Read the plan file at <plan_path>. You are an independent senior engineer
|
||||
reviewing this plan. You have NOT seen any prior review. Evaluate:
|
||||
1. Architecture: Is the component structure sound? Coupling concerns?
|
||||
2. Edge cases: What breaks under 10x load? What's the nil/empty/error path?
|
||||
3. Tests: What's missing from the test plan? What would break at 2am Friday?
|
||||
4. Security: New attack surface? Auth boundaries? Input validation?
|
||||
5. Hidden complexity: What looks simple but isn't?
|
||||
For each finding: what's wrong, severity, and the fix."
|
||||
NO prior-phase context — subagent must be truly independent.
|
||||
|
||||
Error handling: same as Phase 1 (non-blocking, degradation matrix applies).
|
||||
|
||||
- Architecture choices: explicit over clever (P5). If codex disagrees with valid reason → TASTE DECISION.
|
||||
- Evals: always include all relevant suites (P1)
|
||||
- Test plan: generate artifact at `~/.gstack/projects/$SLUG/{user}-{branch}-test-plan-{datetime}.md`
|
||||
@@ -241,7 +405,26 @@ Override: every AskUserQuestion → auto-decide using the 6 principles.
|
||||
1. Step 0 (Scope Challenge): Read actual code referenced by the plan. Map each
|
||||
sub-problem to existing code. Run the complexity check. Produce concrete findings.
|
||||
|
||||
2. Step 0.5 (Codex): Run if available. Present full output under CODEX SAYS header.
|
||||
2. Step 0.5 (Dual Voices): Run Claude subagent AND Codex simultaneously. Present
|
||||
Codex output under CODEX SAYS (eng — architecture challenge) header. Present subagent
|
||||
output under CLAUDE SUBAGENT (eng — independent review) header. Produce eng consensus
|
||||
table:
|
||||
|
||||
```
|
||||
ENG DUAL VOICES — CONSENSUS TABLE:
|
||||
═══════════════════════════════════════════════════════════════
|
||||
Dimension Claude Codex Consensus
|
||||
──────────────────────────────────── ─────── ─────── ─────────
|
||||
1. Architecture sound? — — —
|
||||
2. Test coverage sufficient? — — —
|
||||
3. Performance risks addressed? — — —
|
||||
4. Security threats covered? — — —
|
||||
5. Error paths handled? — — —
|
||||
6. Deployment risk manageable? — — —
|
||||
═══════════════════════════════════════════════════════════════
|
||||
CONFIRMED = both agree. DISAGREE = models differ (→ taste decision).
|
||||
Missing voice = N/A (not CONFIRMED). Single critical finding from one voice = flagged regardless.
|
||||
```
|
||||
|
||||
3. Section 1 (Architecture): Produce ASCII dependency graph showing new components
|
||||
and their relationships to existing ones. Evaluate coupling, scaling, security.
|
||||
@@ -305,10 +488,14 @@ produced. Check the plan file and conversation for each item.
|
||||
- [ ] "What already exists" section written
|
||||
- [ ] Dream state delta written
|
||||
- [ ] Completion Summary produced
|
||||
- [ ] Dual voices ran (Codex + Claude subagent, or noted unavailable)
|
||||
- [ ] CEO consensus table produced
|
||||
|
||||
**Phase 2 (Design) outputs — only if UI scope detected:**
|
||||
- [ ] All 7 dimensions evaluated with scores
|
||||
- [ ] Issues identified and auto-decided
|
||||
- [ ] Dual voices ran (or noted unavailable/skipped with phase)
|
||||
- [ ] Design litmus scorecard produced
|
||||
|
||||
**Phase 3 (Eng) outputs:**
|
||||
- [ ] Scope challenge with actual code analysis (not just "scope is fine")
|
||||
@@ -319,6 +506,11 @@ produced. Check the plan file and conversation for each item.
|
||||
- [ ] "What already exists" section written
|
||||
- [ ] Failure modes registry with critical gap assessment
|
||||
- [ ] Completion Summary produced
|
||||
- [ ] Dual voices ran (Codex + Claude subagent, or noted unavailable)
|
||||
- [ ] Eng consensus table produced
|
||||
|
||||
**Cross-phase:**
|
||||
- [ ] Cross-phase themes section written
|
||||
|
||||
**Audit trail:**
|
||||
- [ ] Decision Audit Trail has at least one row per auto-decision (not empty)
|
||||
@@ -353,9 +545,16 @@ I recommend [X] — [principle]. But [Y] is also viable:
|
||||
|
||||
### Review Scores
|
||||
- CEO: [summary]
|
||||
- CEO Voices: Codex [summary], Claude subagent [summary], Consensus [X/6 confirmed]
|
||||
- Design: [summary or "skipped, no UI scope"]
|
||||
- Design Voices: Codex [summary], Claude subagent [summary], Consensus [X/7 confirmed] (or "skipped")
|
||||
- Eng: [summary]
|
||||
- Codex: [summary or "unavailable"]
|
||||
- Eng Voices: Codex [summary], Claude subagent [summary], Consensus [X/6 confirmed]
|
||||
|
||||
### Cross-Phase Themes
|
||||
[For any concern that appeared in 2+ phases' dual voices independently:]
|
||||
**Theme: [topic]** — flagged in [Phase 1, Phase 3]. High-confidence signal.
|
||||
[If no themes span phases:] "No cross-phase themes — each phase's concerns were distinct."
|
||||
|
||||
### Deferred to TODOS.md
|
||||
[Items auto-deferred with reasons]
|
||||
@@ -402,6 +601,21 @@ If Phase 2 ran (UI scope):
|
||||
|
||||
Replace field values with actual counts from the review.
|
||||
|
||||
Dual voice logs (one per phase that ran):
|
||||
```bash
|
||||
~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"autoplan-voices","timestamp":"'"$TIMESTAMP"'","status":"STATUS","source":"SOURCE","phase":"ceo","via":"autoplan","consensus_confirmed":N,"consensus_disagree":N,"commit":"'"$COMMIT"'"}'
|
||||
|
||||
~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"autoplan-voices","timestamp":"'"$TIMESTAMP"'","status":"STATUS","source":"SOURCE","phase":"eng","via":"autoplan","consensus_confirmed":N,"consensus_disagree":N,"commit":"'"$COMMIT"'"}'
|
||||
```
|
||||
|
||||
If Phase 2 ran (UI scope), also log:
|
||||
```bash
|
||||
~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"autoplan-voices","timestamp":"'"$TIMESTAMP"'","status":"STATUS","source":"SOURCE","phase":"design","via":"autoplan","consensus_confirmed":N,"consensus_disagree":N,"commit":"'"$COMMIT"'"}'
|
||||
```
|
||||
|
||||
SOURCE = "codex+subagent", "codex-only", "subagent-only", or "unavailable".
|
||||
Replace N values with actual consensus counts from the tables.
|
||||
|
||||
Suggest next step: `/ship` when ready to create the PR.
|
||||
|
||||
---
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: benchmark
|
||||
version: 1.0.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /benchmark.
|
||||
Performance regression detection using the browse daemon. Establishes
|
||||
baselines for page load times, Core Web Vitals, and resource sizes.
|
||||
Compares before/after on every PR. Tracks performance trends over time.
|
||||
|
||||
@@ -20,9 +20,10 @@ SNOOZE_FILE="$STATE_DIR/update-snoozed"
|
||||
VERSION_FILE="$GSTACK_DIR/VERSION"
|
||||
REMOTE_URL="${GSTACK_REMOTE_URL:-https://raw.githubusercontent.com/garrytan/gstack/main/VERSION}"
|
||||
|
||||
# ─── Force flag (busts cache for standalone /gstack-upgrade) ──
|
||||
# ─── Force flag (busts cache + snooze for standalone /gstack-upgrade) ──
|
||||
if [ "${1:-}" = "--force" ]; then
|
||||
rm -f "$CACHE_FILE"
|
||||
rm -f "$SNOOZE_FILE"
|
||||
fi
|
||||
|
||||
# ─── Step 0: Check if updates are disabled ────────────────────
|
||||
|
||||
+2
-1
@@ -2,6 +2,7 @@
|
||||
name: browse
|
||||
version: 1.1.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /browse.
|
||||
Fast headless browser for QA testing and site dogfooding. Navigate any URL, interact with
|
||||
elements, verify page state, diff before/after actions, take annotated screenshots, check
|
||||
responsive layouts, test forms and uploads, handle dialogs, and assert element states.
|
||||
@@ -550,7 +551,7 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
|
||||
| `click <sel>` | Click element |
|
||||
| `cookie <name>=<value>` | Set cookie on current page domain |
|
||||
| `cookie-import <json>` | Import cookies from JSON file |
|
||||
| `cookie-import-browser [browser] [--domain d]` | Import cookies from Comet, Chrome, Arc, Brave, or Edge (opens picker, or use --domain for direct import) |
|
||||
| `cookie-import-browser [browser] [--domain d]` | Import cookies from installed Chromium browsers (opens picker, or use --domain for direct import) |
|
||||
| `dialog-accept [text]` | Auto-accept next alert/confirm/prompt. Optional text is sent as the prompt response |
|
||||
| `dialog-dismiss` | Auto-dismiss next dialog |
|
||||
| `fill <sel> <val>` | Fill input |
|
||||
|
||||
@@ -62,7 +62,35 @@ export class BrowserManager {
|
||||
private consecutiveFailures: number = 0;
|
||||
|
||||
async launch() {
|
||||
this.browser = await chromium.launch({ headless: true });
|
||||
// ─── Extension Support ────────────────────────────────────
|
||||
// BROWSE_EXTENSIONS_DIR points to an unpacked Chrome extension directory.
|
||||
// Extensions only work in headed mode, so we use an off-screen window.
|
||||
const extensionsDir = process.env.BROWSE_EXTENSIONS_DIR;
|
||||
const launchArgs: string[] = [];
|
||||
let useHeadless = true;
|
||||
|
||||
// Docker/CI: Chromium sandbox requires unprivileged user namespaces which
|
||||
// are typically disabled in containers. Detect container environment and
|
||||
// add --no-sandbox automatically.
|
||||
if (process.env.CI || process.env.CONTAINER) {
|
||||
launchArgs.push('--no-sandbox');
|
||||
}
|
||||
|
||||
if (extensionsDir) {
|
||||
launchArgs.push(
|
||||
`--disable-extensions-except=${extensionsDir}`,
|
||||
`--load-extension=${extensionsDir}`,
|
||||
'--window-position=-9999,-9999',
|
||||
'--window-size=1,1',
|
||||
);
|
||||
useHeadless = false; // extensions require headed mode; off-screen window simulates headless
|
||||
console.log(`[browse] Extensions loaded from: ${extensionsDir}`);
|
||||
}
|
||||
|
||||
this.browser = await chromium.launch({
|
||||
headless: useHeadless,
|
||||
...(launchArgs.length > 0 ? { args: launchArgs } : {}),
|
||||
});
|
||||
|
||||
// Chromium crash → exit with clear message
|
||||
this.browser.on('disconnected', () => {
|
||||
|
||||
+4
-1
@@ -15,7 +15,7 @@ import { resolveConfig, ensureStateDir, readVersionHash } from './config';
|
||||
|
||||
const config = resolveConfig();
|
||||
const IS_WINDOWS = process.platform === 'win32';
|
||||
const MAX_START_WAIT = IS_WINDOWS ? 15000 : 8000; // Node+Chromium takes longer on Windows
|
||||
const MAX_START_WAIT = IS_WINDOWS ? 15000 : (process.env.CI ? 30000 : 8000); // Node+Chromium takes longer on Windows
|
||||
|
||||
export function resolveServerScript(
|
||||
env: Record<string, string | undefined> = process.env,
|
||||
@@ -262,6 +262,9 @@ async function ensureServer(): Promise<ServerState> {
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure state directory exists before lock acquisition (lock file lives there)
|
||||
ensureStateDir(config);
|
||||
|
||||
// Acquire lock to prevent concurrent restart races (TOCTOU)
|
||||
const releaseLock = acquireServerLock();
|
||||
if (!releaseLock) {
|
||||
|
||||
@@ -73,7 +73,7 @@ export const COMMAND_DESCRIPTIONS: Record<string, { category: string; descriptio
|
||||
'viewport':{ category: 'Interaction', description: 'Set viewport size', usage: 'viewport <WxH>' },
|
||||
'cookie': { category: 'Interaction', description: 'Set cookie on current page domain', usage: 'cookie <name>=<value>' },
|
||||
'cookie-import': { category: 'Interaction', description: 'Import cookies from JSON file', usage: 'cookie-import <json>' },
|
||||
'cookie-import-browser': { category: 'Interaction', description: 'Import cookies from Comet, Chrome, Arc, Brave, or Edge (opens picker, or use --domain for direct import)', usage: 'cookie-import-browser [browser] [--domain d]' },
|
||||
'cookie-import-browser': { category: 'Interaction', description: 'Import cookies from installed Chromium browsers (opens picker, or use --domain for direct import)', usage: 'cookie-import-browser [browser] [--domain d]' },
|
||||
'header': { category: 'Interaction', description: 'Set custom request header (colon-separated, sensitive values auto-redacted)', usage: 'header <name>:<value>' },
|
||||
'useragent': { category: 'Interaction', description: 'Set user agent', usage: 'useragent <string>' },
|
||||
'dialog-accept': { category: 'Interaction', description: 'Auto-accept next alert/confirm/prompt. Optional text is sent as the prompt response', usage: 'dialog-accept [text]' },
|
||||
|
||||
@@ -1,25 +1,28 @@
|
||||
/**
|
||||
* Chromium browser cookie import — read and decrypt cookies from real browsers
|
||||
*
|
||||
* Supports macOS Chromium-based browsers: Comet, Chrome, Arc, Brave, Edge.
|
||||
* Supports macOS and Linux Chromium-based browsers.
|
||||
* Pure logic module — no Playwright dependency, no HTTP concerns.
|
||||
*
|
||||
* Decryption pipeline (Chromium macOS "v10" format):
|
||||
* Decryption pipeline:
|
||||
*
|
||||
* ┌──────────────────────────────────────────────────────────────────┐
|
||||
* │ 1. Keychain: `security find-generic-password -s "<svc>" -w` │
|
||||
* │ → base64 password string │
|
||||
* │ 1. Resolve the cookie DB from the browser profile dir │
|
||||
* │ - macOS: ~/Library/Application Support/<browser>/<profile> │
|
||||
* │ - Linux: ~/.config/<browser>/<profile> │
|
||||
* │ │
|
||||
* │ 2. Key derivation: │
|
||||
* │ PBKDF2(password, salt="saltysalt", iter=1003, len=16, sha1) │
|
||||
* │ → 16-byte AES key │
|
||||
* │ 2. Derive the AES key │
|
||||
* │ - macOS v10: Keychain password, PBKDF2(..., iter=1003) │
|
||||
* │ - Linux v10: "peanuts", PBKDF2(..., iter=1) │
|
||||
* │ - Linux v11: libsecret/secret-tool password, iter=1 │
|
||||
* │ │
|
||||
* │ 3. For each cookie with encrypted_value starting with "v10": │
|
||||
* │ 3. For each cookie with encrypted_value starting with "v10"/ │
|
||||
* │ "v11": │
|
||||
* │ - Ciphertext = encrypted_value[3:] │
|
||||
* │ - IV = 16 bytes of 0x20 (space character) │
|
||||
* │ - Plaintext = AES-128-CBC-decrypt(key, iv, ciphertext) │
|
||||
* │ - Remove PKCS7 padding │
|
||||
* │ - Skip first 32 bytes (HMAC-SHA256 authentication tag) │
|
||||
* │ - Skip first 32 bytes of Chromium cookie metadata │
|
||||
* │ - Remaining bytes = cookie value (UTF-8) │
|
||||
* │ │
|
||||
* │ 4. If encrypted_value is empty but `value` field is set, │
|
||||
@@ -42,9 +45,16 @@ import * as os from 'os';
|
||||
|
||||
export interface BrowserInfo {
|
||||
name: string;
|
||||
dataDir: string; // relative to ~/Library/Application Support/
|
||||
dataDir: string; // primary storage dir (retained for compatibility with existing callers/tests)
|
||||
keychainService: string;
|
||||
aliases: string[];
|
||||
linuxDataDir?: string;
|
||||
linuxApplication?: string;
|
||||
}
|
||||
|
||||
export interface ProfileEntry {
|
||||
name: string; // e.g. "Default", "Profile 1", "Profile 3"
|
||||
displayName: string; // human-friendly name from Preferences, or falls back to dir name
|
||||
}
|
||||
|
||||
export interface DomainEntry {
|
||||
@@ -81,15 +91,24 @@ export class CookieImportError extends Error {
|
||||
}
|
||||
}
|
||||
|
||||
type BrowserPlatform = 'darwin' | 'linux';
|
||||
|
||||
interface BrowserMatch {
|
||||
browser: BrowserInfo;
|
||||
platform: BrowserPlatform;
|
||||
dbPath: string;
|
||||
}
|
||||
|
||||
// ─── Browser Registry ───────────────────────────────────────────
|
||||
// Hardcoded — NEVER interpolate user input into shell commands.
|
||||
|
||||
const BROWSER_REGISTRY: BrowserInfo[] = [
|
||||
{ name: 'Comet', dataDir: 'Comet/', keychainService: 'Comet Safe Storage', aliases: ['comet', 'perplexity'] },
|
||||
{ name: 'Chrome', dataDir: 'Google/Chrome/', keychainService: 'Chrome Safe Storage', aliases: ['chrome', 'google-chrome'] },
|
||||
{ name: 'Arc', dataDir: 'Arc/User Data/', keychainService: 'Arc Safe Storage', aliases: ['arc'] },
|
||||
{ name: 'Brave', dataDir: 'BraveSoftware/Brave-Browser/', keychainService: 'Brave Safe Storage', aliases: ['brave'] },
|
||||
{ name: 'Edge', dataDir: 'Microsoft Edge/', keychainService: 'Microsoft Edge Safe Storage', aliases: ['edge'] },
|
||||
{ name: 'Comet', dataDir: 'Comet/', keychainService: 'Comet Safe Storage', aliases: ['comet', 'perplexity'] },
|
||||
{ name: 'Chrome', dataDir: 'Google/Chrome/', keychainService: 'Chrome Safe Storage', aliases: ['chrome', 'google-chrome', 'google-chrome-stable'], linuxDataDir: 'google-chrome/', linuxApplication: 'chrome' },
|
||||
{ name: 'Chromium', dataDir: 'chromium/', keychainService: 'Chromium Safe Storage', aliases: ['chromium'], linuxDataDir: 'chromium/', linuxApplication: 'chromium' },
|
||||
{ name: 'Arc', dataDir: 'Arc/User Data/', keychainService: 'Arc Safe Storage', aliases: ['arc'] },
|
||||
{ name: 'Brave', dataDir: 'BraveSoftware/Brave-Browser/', keychainService: 'Brave Safe Storage', aliases: ['brave'], linuxDataDir: 'BraveSoftware/Brave-Browser/', linuxApplication: 'brave' },
|
||||
{ name: 'Edge', dataDir: 'Microsoft Edge/', keychainService: 'Microsoft Edge Safe Storage', aliases: ['edge'], linuxDataDir: 'microsoft-edge/', linuxApplication: 'microsoft-edge' },
|
||||
];
|
||||
|
||||
// ─── Key Cache ──────────────────────────────────────────────────
|
||||
@@ -101,23 +120,105 @@ const keyCache = new Map<string, Buffer>();
|
||||
// ─── Public API ─────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Find which browsers are installed (have a cookie DB on disk).
|
||||
* Find which browsers are installed (have a cookie DB on disk in any profile).
|
||||
*/
|
||||
export function findInstalledBrowsers(): BrowserInfo[] {
|
||||
const appSupport = path.join(os.homedir(), 'Library', 'Application Support');
|
||||
return BROWSER_REGISTRY.filter(b => {
|
||||
const dbPath = path.join(appSupport, b.dataDir, 'Default', 'Cookies');
|
||||
try { return fs.existsSync(dbPath); } catch { return false; }
|
||||
return BROWSER_REGISTRY.filter(browser => {
|
||||
// Check Default profile on any platform
|
||||
if (findBrowserMatch(browser, 'Default') !== null) return true;
|
||||
// Check numbered profiles (Profile 1, Profile 2, etc.)
|
||||
for (const platform of getSearchPlatforms()) {
|
||||
const dataDir = getDataDirForPlatform(browser, platform);
|
||||
if (!dataDir) continue;
|
||||
const browserDir = path.join(getBaseDir(platform), dataDir);
|
||||
try {
|
||||
const entries = fs.readdirSync(browserDir, { withFileTypes: true });
|
||||
if (entries.some(e =>
|
||||
e.isDirectory() && e.name.startsWith('Profile ') &&
|
||||
fs.existsSync(path.join(browserDir, e.name, 'Cookies'))
|
||||
)) return true;
|
||||
} catch {}
|
||||
}
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
export function listSupportedBrowserNames(): string[] {
|
||||
const hostPlatform = getHostPlatform();
|
||||
return BROWSER_REGISTRY
|
||||
.filter(browser => hostPlatform ? getDataDirForPlatform(browser, hostPlatform) !== null : true)
|
||||
.map(browser => browser.name);
|
||||
}
|
||||
|
||||
/**
|
||||
* List available profiles for a browser.
|
||||
*/
|
||||
export function listProfiles(browserName: string): ProfileEntry[] {
|
||||
const browser = resolveBrowser(browserName);
|
||||
const profiles: ProfileEntry[] = [];
|
||||
|
||||
// Scan each supported platform for profile directories
|
||||
for (const platform of getSearchPlatforms()) {
|
||||
const dataDir = getDataDirForPlatform(browser, platform);
|
||||
if (!dataDir) continue;
|
||||
const browserDir = path.join(getBaseDir(platform), dataDir);
|
||||
if (!fs.existsSync(browserDir)) continue;
|
||||
|
||||
let entries: fs.Dirent[];
|
||||
try {
|
||||
entries = fs.readdirSync(browserDir, { withFileTypes: true });
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const entry of entries) {
|
||||
if (!entry.isDirectory()) continue;
|
||||
if (entry.name !== 'Default' && !entry.name.startsWith('Profile ')) continue;
|
||||
const cookiePath = path.join(browserDir, entry.name, 'Cookies');
|
||||
if (!fs.existsSync(cookiePath)) continue;
|
||||
|
||||
// Avoid duplicates if the same profile appears on multiple platforms
|
||||
if (profiles.some(p => p.name === entry.name)) continue;
|
||||
|
||||
// Try to read display name from Preferences.
|
||||
// Prefer account email — signed-in Chrome profiles often have generic
|
||||
// names like "Person 2" while the email is far more readable.
|
||||
let displayName = entry.name;
|
||||
try {
|
||||
const prefsPath = path.join(browserDir, entry.name, 'Preferences');
|
||||
if (fs.existsSync(prefsPath)) {
|
||||
const prefs = JSON.parse(fs.readFileSync(prefsPath, 'utf-8'));
|
||||
const email = prefs?.account_info?.[0]?.email;
|
||||
if (email && typeof email === 'string') {
|
||||
displayName = email;
|
||||
} else {
|
||||
const profileName = prefs?.profile?.name;
|
||||
if (profileName && typeof profileName === 'string') {
|
||||
displayName = profileName;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Ignore — fall back to directory name
|
||||
}
|
||||
|
||||
profiles.push({ name: entry.name, displayName });
|
||||
}
|
||||
|
||||
// Found profiles on this platform — no need to check others
|
||||
if (profiles.length > 0) break;
|
||||
}
|
||||
|
||||
return profiles;
|
||||
}
|
||||
|
||||
/**
|
||||
* List unique cookie domains + counts from a browser's DB. No decryption.
|
||||
*/
|
||||
export function listDomains(browserName: string, profile = 'Default'): { domains: DomainEntry[]; browser: string } {
|
||||
const browser = resolveBrowser(browserName);
|
||||
const dbPath = getCookieDbPath(browser, profile);
|
||||
const db = openDb(dbPath, browser.name);
|
||||
const match = getBrowserMatch(browser, profile);
|
||||
const db = openDb(match.dbPath, browser.name);
|
||||
try {
|
||||
const now = chromiumNow();
|
||||
const rows = db.query(
|
||||
@@ -144,9 +245,9 @@ export async function importCookies(
|
||||
if (domains.length === 0) return { cookies: [], count: 0, failed: 0, domainCounts: {} };
|
||||
|
||||
const browser = resolveBrowser(browserName);
|
||||
const derivedKey = await getDerivedKey(browser);
|
||||
const dbPath = getCookieDbPath(browser, profile);
|
||||
const db = openDb(dbPath, browser.name);
|
||||
const match = getBrowserMatch(browser, profile);
|
||||
const derivedKeys = await getDerivedKeys(match);
|
||||
const db = openDb(match.dbPath, browser.name);
|
||||
|
||||
try {
|
||||
const now = chromiumNow();
|
||||
@@ -167,7 +268,7 @@ export async function importCookies(
|
||||
|
||||
for (const row of rows) {
|
||||
try {
|
||||
const value = decryptCookieValue(row, derivedKey);
|
||||
const value = decryptCookieValue(row, derivedKeys);
|
||||
const cookie = toPlaywrightCookie(row, value);
|
||||
cookies.push(cookie);
|
||||
domainCounts[row.host_key] = (domainCounts[row.host_key] || 0) + 1;
|
||||
@@ -208,17 +309,61 @@ function validateProfile(profile: string): void {
|
||||
}
|
||||
}
|
||||
|
||||
function getCookieDbPath(browser: BrowserInfo, profile: string): string {
|
||||
validateProfile(profile);
|
||||
const appSupport = path.join(os.homedir(), 'Library', 'Application Support');
|
||||
const dbPath = path.join(appSupport, browser.dataDir, profile, 'Cookies');
|
||||
if (!fs.existsSync(dbPath)) {
|
||||
throw new CookieImportError(
|
||||
`${browser.name} is not installed (no cookie database at ${dbPath})`,
|
||||
'not_installed',
|
||||
);
|
||||
function getHostPlatform(): BrowserPlatform | null {
|
||||
if (process.platform === 'darwin' || process.platform === 'linux') return process.platform;
|
||||
return null;
|
||||
}
|
||||
|
||||
function getSearchPlatforms(): BrowserPlatform[] {
|
||||
const current = getHostPlatform();
|
||||
const order: BrowserPlatform[] = [];
|
||||
if (current) order.push(current);
|
||||
for (const platform of ['darwin', 'linux'] as BrowserPlatform[]) {
|
||||
if (!order.includes(platform)) order.push(platform);
|
||||
}
|
||||
return dbPath;
|
||||
return order;
|
||||
}
|
||||
|
||||
function getDataDirForPlatform(browser: BrowserInfo, platform: BrowserPlatform): string | null {
|
||||
return platform === 'darwin' ? browser.dataDir : browser.linuxDataDir || null;
|
||||
}
|
||||
|
||||
function getBaseDir(platform: BrowserPlatform): string {
|
||||
return platform === 'darwin'
|
||||
? path.join(os.homedir(), 'Library', 'Application Support')
|
||||
: path.join(os.homedir(), '.config');
|
||||
}
|
||||
|
||||
function findBrowserMatch(browser: BrowserInfo, profile: string): BrowserMatch | null {
|
||||
validateProfile(profile);
|
||||
for (const platform of getSearchPlatforms()) {
|
||||
const dataDir = getDataDirForPlatform(browser, platform);
|
||||
if (!dataDir) continue;
|
||||
const dbPath = path.join(getBaseDir(platform), dataDir, profile, 'Cookies');
|
||||
try {
|
||||
if (fs.existsSync(dbPath)) {
|
||||
return { browser, platform, dbPath };
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function getBrowserMatch(browser: BrowserInfo, profile: string): BrowserMatch {
|
||||
const match = findBrowserMatch(browser, profile);
|
||||
if (match) return match;
|
||||
|
||||
const attempted = getSearchPlatforms()
|
||||
.map(platform => {
|
||||
const dataDir = getDataDirForPlatform(browser, platform);
|
||||
return dataDir ? path.join(getBaseDir(platform), dataDir, profile, 'Cookies') : null;
|
||||
})
|
||||
.filter((entry): entry is string => entry !== null);
|
||||
|
||||
throw new CookieImportError(
|
||||
`${browser.name} is not installed (no cookie database at ${attempted.join(' or ')})`,
|
||||
'not_installed',
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Internal: SQLite Access ────────────────────────────────────
|
||||
@@ -273,17 +418,40 @@ function openDbFromCopy(dbPath: string, browserName: string): Database {
|
||||
|
||||
// ─── Internal: Keychain Access (async, 10s timeout) ─────────────
|
||||
|
||||
async function getDerivedKey(browser: BrowserInfo): Promise<Buffer> {
|
||||
const cached = keyCache.get(browser.keychainService);
|
||||
if (cached) return cached;
|
||||
function deriveKey(password: string, iterations: number): Buffer {
|
||||
return crypto.pbkdf2Sync(password, 'saltysalt', iterations, 16, 'sha1');
|
||||
}
|
||||
|
||||
const password = await getKeychainPassword(browser.keychainService);
|
||||
const derived = crypto.pbkdf2Sync(password, 'saltysalt', 1003, 16, 'sha1');
|
||||
keyCache.set(browser.keychainService, derived);
|
||||
function getCachedDerivedKey(cacheKey: string, password: string, iterations: number): Buffer {
|
||||
const cached = keyCache.get(cacheKey);
|
||||
if (cached) return cached;
|
||||
const derived = deriveKey(password, iterations);
|
||||
keyCache.set(cacheKey, derived);
|
||||
return derived;
|
||||
}
|
||||
|
||||
async function getKeychainPassword(service: string): Promise<string> {
|
||||
async function getDerivedKeys(match: BrowserMatch): Promise<Map<string, Buffer>> {
|
||||
if (match.platform === 'darwin') {
|
||||
const password = await getMacKeychainPassword(match.browser.keychainService);
|
||||
return new Map([
|
||||
['v10', getCachedDerivedKey(`darwin:${match.browser.keychainService}:v10`, password, 1003)],
|
||||
]);
|
||||
}
|
||||
|
||||
const keys = new Map<string, Buffer>();
|
||||
keys.set('v10', getCachedDerivedKey('linux:v10', 'peanuts', 1));
|
||||
|
||||
const linuxPassword = await getLinuxSecretPassword(match.browser);
|
||||
if (linuxPassword) {
|
||||
keys.set(
|
||||
'v11',
|
||||
getCachedDerivedKey(`linux:${match.browser.keychainService}:v11`, linuxPassword, 1),
|
||||
);
|
||||
}
|
||||
return keys;
|
||||
}
|
||||
|
||||
async function getMacKeychainPassword(service: string): Promise<string> {
|
||||
// Use async Bun.spawn with timeout to avoid blocking the event loop.
|
||||
// macOS may show an Allow/Deny dialog that blocks until the user responds.
|
||||
const proc = Bun.spawn(
|
||||
@@ -341,6 +509,47 @@ async function getKeychainPassword(service: string): Promise<string> {
|
||||
}
|
||||
}
|
||||
|
||||
async function getLinuxSecretPassword(browser: BrowserInfo): Promise<string | null> {
|
||||
const attempts: string[][] = [
|
||||
['secret-tool', 'lookup', 'Title', browser.keychainService],
|
||||
];
|
||||
|
||||
if (browser.linuxApplication) {
|
||||
attempts.push(
|
||||
['secret-tool', 'lookup', 'xdg:schema', 'chrome_libsecret_os_crypt_password_v2', 'application', browser.linuxApplication],
|
||||
['secret-tool', 'lookup', 'xdg:schema', 'chrome_libsecret_os_crypt_password', 'application', browser.linuxApplication],
|
||||
);
|
||||
}
|
||||
|
||||
for (const cmd of attempts) {
|
||||
const password = await runPasswordLookup(cmd, 3_000);
|
||||
if (password) return password;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function runPasswordLookup(cmd: string[], timeoutMs: number): Promise<string | null> {
|
||||
try {
|
||||
const proc = Bun.spawn(cmd, { stdout: 'pipe', stderr: 'pipe' });
|
||||
const timeout = new Promise<never>((_, reject) =>
|
||||
setTimeout(() => {
|
||||
proc.kill();
|
||||
reject(new Error('timeout'));
|
||||
}, timeoutMs),
|
||||
);
|
||||
|
||||
const exitCode = await Promise.race([proc.exited, timeout]);
|
||||
const stdout = await new Response(proc.stdout).text();
|
||||
if (exitCode !== 0) return null;
|
||||
|
||||
const password = stdout.trim();
|
||||
return password.length > 0 ? password : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Internal: Cookie Decryption ────────────────────────────────
|
||||
|
||||
interface RawCookie {
|
||||
@@ -356,7 +565,7 @@ interface RawCookie {
|
||||
samesite: number;
|
||||
}
|
||||
|
||||
function decryptCookieValue(row: RawCookie, key: Buffer): string {
|
||||
function decryptCookieValue(row: RawCookie, keys: Map<string, Buffer>): string {
|
||||
// Prefer unencrypted value if present
|
||||
if (row.value && row.value.length > 0) return row.value;
|
||||
|
||||
@@ -364,16 +573,15 @@ function decryptCookieValue(row: RawCookie, key: Buffer): string {
|
||||
if (ev.length === 0) return '';
|
||||
|
||||
const prefix = ev.slice(0, 3).toString('utf-8');
|
||||
if (prefix !== 'v10') {
|
||||
throw new Error(`Unknown encryption prefix: ${prefix}`);
|
||||
}
|
||||
const key = keys.get(prefix);
|
||||
if (!key) throw new Error(`No decryption key available for ${prefix} cookies`);
|
||||
|
||||
const ciphertext = ev.slice(3);
|
||||
const iv = Buffer.alloc(16, 0x20); // 16 space characters
|
||||
const decipher = crypto.createDecipheriv('aes-128-cbc', key, iv);
|
||||
const plaintext = Buffer.concat([decipher.update(ciphertext), decipher.final()]);
|
||||
|
||||
// First 32 bytes are HMAC-SHA256 authentication tag; actual value follows
|
||||
// Chromium prefixes encrypted cookie payloads with 32 bytes of metadata.
|
||||
if (plaintext.length <= 32) return '';
|
||||
return plaintext.slice(32).toString('utf-8');
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
*/
|
||||
|
||||
import type { BrowserManager } from './browser-manager';
|
||||
import { findInstalledBrowsers, listDomains, importCookies, CookieImportError, type PlaywrightCookie } from './cookie-import-browser';
|
||||
import { findInstalledBrowsers, listProfiles, listDomains, importCookies, CookieImportError, type PlaywrightCookie } from './cookie-import-browser';
|
||||
import { getCookiePickerHTML } from './cookie-picker-ui';
|
||||
|
||||
// ─── State ──────────────────────────────────────────────────────
|
||||
@@ -90,13 +90,24 @@ export async function handleCookiePickerRoute(
|
||||
}, { port });
|
||||
}
|
||||
|
||||
// GET /cookie-picker/domains?browser=<name> — list domains + counts
|
||||
// GET /cookie-picker/profiles?browser=<name> — list profiles for a browser
|
||||
if (pathname === '/cookie-picker/profiles' && req.method === 'GET') {
|
||||
const browserName = url.searchParams.get('browser');
|
||||
if (!browserName) {
|
||||
return errorResponse("Missing 'browser' parameter", 'missing_param', { port });
|
||||
}
|
||||
const profiles = listProfiles(browserName);
|
||||
return jsonResponse({ profiles }, { port });
|
||||
}
|
||||
|
||||
// GET /cookie-picker/domains?browser=<name>&profile=<profile> — list domains + counts
|
||||
if (pathname === '/cookie-picker/domains' && req.method === 'GET') {
|
||||
const browserName = url.searchParams.get('browser');
|
||||
if (!browserName) {
|
||||
return errorResponse("Missing 'browser' parameter", 'missing_param', { port });
|
||||
}
|
||||
const result = listDomains(browserName);
|
||||
const profile = url.searchParams.get('profile') || 'Default';
|
||||
const result = listDomains(browserName, profile);
|
||||
return jsonResponse({
|
||||
browser: result.browser,
|
||||
domains: result.domains,
|
||||
@@ -112,14 +123,14 @@ export async function handleCookiePickerRoute(
|
||||
return errorResponse('Invalid JSON body', 'bad_request', { port });
|
||||
}
|
||||
|
||||
const { browser, domains } = body;
|
||||
const { browser, domains, profile } = body;
|
||||
if (!browser) return errorResponse("Missing 'browser' field", 'missing_param', { port });
|
||||
if (!domains || !Array.isArray(domains) || domains.length === 0) {
|
||||
return errorResponse("Missing or empty 'domains' array", 'missing_param', { port });
|
||||
}
|
||||
|
||||
// Decrypt cookies from the browser DB
|
||||
const result = await importCookies(browser, domains);
|
||||
const result = await importCookies(browser, domains, profile || 'Default');
|
||||
|
||||
if (result.cookies.length === 0) {
|
||||
return jsonResponse({
|
||||
|
||||
@@ -101,6 +101,30 @@ export function getCookiePickerHTML(serverPort: number): string {
|
||||
background: #4ade80;
|
||||
}
|
||||
|
||||
/* ─── Profile Pills ─────────────────── */
|
||||
.profile-pills {
|
||||
display: flex;
|
||||
gap: 6px;
|
||||
padding: 0 20px 12px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.profile-pill {
|
||||
padding: 4px 10px;
|
||||
border-radius: 14px;
|
||||
border: 1px solid #2a2a2a;
|
||||
background: #141414;
|
||||
color: #888;
|
||||
font-size: 12px;
|
||||
cursor: pointer;
|
||||
transition: all 0.15s;
|
||||
}
|
||||
.profile-pill:hover { border-color: #444; color: #bbb; }
|
||||
.profile-pill.active {
|
||||
border-color: #60a5fa;
|
||||
background: #0a1a2a;
|
||||
color: #60a5fa;
|
||||
}
|
||||
|
||||
/* ─── Search ──────────────────────────── */
|
||||
.search-wrap {
|
||||
padding: 0 20px 12px;
|
||||
@@ -189,7 +213,22 @@ export function getCookiePickerHTML(serverPort: number): string {
|
||||
border-top: 1px solid #222;
|
||||
font-size: 12px;
|
||||
color: #666;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
}
|
||||
.btn-import-all {
|
||||
padding: 4px 12px;
|
||||
border-radius: 6px;
|
||||
border: 1px solid #333;
|
||||
background: #1a1a1a;
|
||||
color: #4ade80;
|
||||
font-size: 12px;
|
||||
cursor: pointer;
|
||||
transition: all 0.15s;
|
||||
}
|
||||
.btn-import-all:hover { border-color: #4ade80; background: #0a2a14; }
|
||||
.btn-import-all:disabled { opacity: 0.3; cursor: not-allowed; pointer-events: none; }
|
||||
|
||||
/* ─── Imported Panel ──────────────────── */
|
||||
.imported-empty {
|
||||
@@ -268,13 +307,14 @@ export function getCookiePickerHTML(serverPort: number): string {
|
||||
<div class="panel panel-left">
|
||||
<div class="panel-header">Source Browser</div>
|
||||
<div id="browser-pills" class="browser-pills"></div>
|
||||
<div id="profile-pills" class="profile-pills" style="display:none"></div>
|
||||
<div class="search-wrap">
|
||||
<input type="text" class="search-input" id="search" placeholder="Search domains..." />
|
||||
</div>
|
||||
<div class="domain-list" id="source-domains">
|
||||
<div class="loading-row"><span class="spinner"></span> Detecting browsers...</div>
|
||||
</div>
|
||||
<div class="panel-footer" id="source-footer"></div>
|
||||
<div class="panel-footer" id="source-footer"><span id="source-footer-text"></span><button class="btn-import-all" id="btn-import-all" style="display:none">Import All</button></div>
|
||||
</div>
|
||||
|
||||
<!-- Right Panel: Imported -->
|
||||
@@ -291,15 +331,19 @@ export function getCookiePickerHTML(serverPort: number): string {
|
||||
(function() {
|
||||
const BASE = '${baseUrl}';
|
||||
let activeBrowser = null;
|
||||
let activeProfile = 'Default';
|
||||
let allProfiles = [];
|
||||
let allDomains = [];
|
||||
let importedSet = {}; // domain → count
|
||||
let inflight = {}; // domain → true (prevents double-click)
|
||||
|
||||
const $pills = document.getElementById('browser-pills');
|
||||
const $profilePills = document.getElementById('profile-pills');
|
||||
const $search = document.getElementById('search');
|
||||
const $sourceDomains = document.getElementById('source-domains');
|
||||
const $importedDomains = document.getElementById('imported-domains');
|
||||
const $sourceFooter = document.getElementById('source-footer');
|
||||
const $sourceFooter = document.getElementById('source-footer-text');
|
||||
const $btnImportAll = document.getElementById('btn-import-all');
|
||||
const $importedFooter = document.getElementById('imported-footer');
|
||||
const $banner = document.getElementById('banner');
|
||||
|
||||
@@ -380,22 +424,76 @@ export function getCookiePickerHTML(serverPort: number): string {
|
||||
// ─── Select Browser ────────────────────
|
||||
async function selectBrowser(name) {
|
||||
activeBrowser = name;
|
||||
activeProfile = 'Default';
|
||||
|
||||
// Update pills
|
||||
$pills.querySelectorAll('.pill').forEach(p => {
|
||||
p.classList.toggle('active', p.textContent === name);
|
||||
});
|
||||
|
||||
$sourceDomains.innerHTML = '<div class="loading-row"><span class="spinner"></span> Loading domains...</div>';
|
||||
$sourceDomains.innerHTML = '<div class="loading-row"><span class="spinner"></span> Loading...</div>';
|
||||
$sourceFooter.textContent = '';
|
||||
$search.value = '';
|
||||
|
||||
try {
|
||||
const data = await api('/domains?browser=' + encodeURIComponent(name));
|
||||
// Fetch profiles for this browser
|
||||
const profileData = await api('/profiles?browser=' + encodeURIComponent(name));
|
||||
allProfiles = profileData.profiles || [];
|
||||
|
||||
if (allProfiles.length > 1) {
|
||||
// Show profile pills when multiple profiles exist
|
||||
$profilePills.style.display = 'flex';
|
||||
renderProfilePills();
|
||||
// Auto-select profile with the most recent/largest cookie DB, or Default
|
||||
activeProfile = allProfiles[0].name;
|
||||
} else {
|
||||
$profilePills.style.display = 'none';
|
||||
activeProfile = allProfiles.length === 1 ? allProfiles[0].name : 'Default';
|
||||
}
|
||||
|
||||
await loadDomains();
|
||||
} catch (err) {
|
||||
showBanner(err.message, 'error', err.action === 'retry' ? () => selectBrowser(name) : null);
|
||||
$sourceDomains.innerHTML = '<div class="imported-empty">Failed to load</div>';
|
||||
$profilePills.style.display = 'none';
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Render Profile Pills ─────────────
|
||||
function renderProfilePills() {
|
||||
let html = '';
|
||||
for (const p of allProfiles) {
|
||||
const isActive = p.name === activeProfile;
|
||||
const label = p.displayName || p.name;
|
||||
html += '<button class="profile-pill' + (isActive ? ' active' : '') + '" data-profile="' + escHtml(p.name) + '">' + escHtml(label) + '</button>';
|
||||
}
|
||||
$profilePills.innerHTML = html;
|
||||
|
||||
$profilePills.querySelectorAll('.profile-pill').forEach(btn => {
|
||||
btn.addEventListener('click', () => selectProfile(btn.dataset.profile));
|
||||
});
|
||||
}
|
||||
|
||||
// ─── Select Profile ───────────────────
|
||||
async function selectProfile(profileName) {
|
||||
activeProfile = profileName;
|
||||
renderProfilePills();
|
||||
|
||||
$sourceDomains.innerHTML = '<div class="loading-row"><span class="spinner"></span> Loading domains...</div>';
|
||||
$sourceFooter.textContent = '';
|
||||
$search.value = '';
|
||||
|
||||
await loadDomains();
|
||||
}
|
||||
|
||||
// ─── Load Domains ─────────────────────
|
||||
async function loadDomains() {
|
||||
try {
|
||||
const data = await api('/domains?browser=' + encodeURIComponent(activeBrowser) + '&profile=' + encodeURIComponent(activeProfile));
|
||||
allDomains = data.domains;
|
||||
renderSourceDomains();
|
||||
} catch (err) {
|
||||
showBanner(err.message, 'error', err.action === 'retry' ? () => selectBrowser(name) : null);
|
||||
showBanner(err.message, 'error', err.action === 'retry' ? () => loadDomains() : null);
|
||||
$sourceDomains.innerHTML = '<div class="imported-empty">Failed to load domains</div>';
|
||||
}
|
||||
}
|
||||
@@ -437,6 +535,16 @@ export function getCookiePickerHTML(serverPort: number): string {
|
||||
const totalCookies = allDomains.reduce((s, d) => s + d.count, 0);
|
||||
$sourceFooter.textContent = totalDomains + ' domains · ' + totalCookies.toLocaleString() + ' cookies';
|
||||
|
||||
// Show/hide Import All button
|
||||
const unimported = filtered.filter(d => !(d.domain in importedSet) && !inflight[d.domain]);
|
||||
if (unimported.length > 0) {
|
||||
$btnImportAll.style.display = '';
|
||||
$btnImportAll.disabled = false;
|
||||
$btnImportAll.textContent = 'Import All (' + unimported.length + ')';
|
||||
} else {
|
||||
$btnImportAll.style.display = 'none';
|
||||
}
|
||||
|
||||
// Click handlers
|
||||
$sourceDomains.querySelectorAll('.btn-add[data-domain]').forEach(btn => {
|
||||
btn.addEventListener('click', () => importDomain(btn.dataset.domain));
|
||||
@@ -453,7 +561,7 @@ export function getCookiePickerHTML(serverPort: number): string {
|
||||
const data = await api('/import', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ browser: activeBrowser, domains: [domain] }),
|
||||
body: JSON.stringify({ browser: activeBrowser, domains: [domain], profile: activeProfile }),
|
||||
});
|
||||
|
||||
if (data.domainCounts) {
|
||||
@@ -471,6 +579,42 @@ export function getCookiePickerHTML(serverPort: number): string {
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Import All ───────────────────────
|
||||
async function importAll() {
|
||||
const query = $search.value.toLowerCase();
|
||||
const filtered = query
|
||||
? allDomains.filter(d => d.domain.toLowerCase().includes(query))
|
||||
: allDomains;
|
||||
const toImport = filtered.filter(d => !(d.domain in importedSet) && !inflight[d.domain]);
|
||||
if (toImport.length === 0) return;
|
||||
|
||||
$btnImportAll.disabled = true;
|
||||
$btnImportAll.textContent = 'Importing...';
|
||||
|
||||
const domains = toImport.map(d => d.domain);
|
||||
try {
|
||||
const data = await api('/import', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ browser: activeBrowser, domains: domains, profile: activeProfile }),
|
||||
});
|
||||
|
||||
if (data.domainCounts) {
|
||||
for (const [d, count] of Object.entries(data.domainCounts)) {
|
||||
importedSet[d] = (importedSet[d] || 0) + count;
|
||||
}
|
||||
}
|
||||
renderImported();
|
||||
} catch (err) {
|
||||
showBanner('Import all failed: ' + err.message, 'error',
|
||||
err.action === 'retry' ? () => importAll() : null);
|
||||
} finally {
|
||||
renderSourceDomains();
|
||||
}
|
||||
}
|
||||
|
||||
$btnImportAll.addEventListener('click', importAll);
|
||||
|
||||
// ─── Render Imported ───────────────────
|
||||
function renderImported() {
|
||||
const entries = Object.entries(importedSet).sort((a, b) => b[1] - a[1]);
|
||||
|
||||
@@ -82,8 +82,12 @@ export async function validateNavigationUrl(url: string): Promise<void> {
|
||||
);
|
||||
}
|
||||
|
||||
// DNS rebinding protection: resolve hostname and check if it points to metadata IPs
|
||||
if (await resolvesToBlockedIp(hostname)) {
|
||||
// DNS rebinding protection: resolve hostname and check if it points to metadata IPs.
|
||||
// Skip for loopback/private IPs — they can't be DNS-rebinded and the async DNS
|
||||
// resolution adds latency that breaks concurrent E2E tests under load.
|
||||
const isLoopback = hostname === 'localhost' || hostname === '127.0.0.1' || hostname === '::1';
|
||||
const isPrivateNet = /^(10\.|172\.(1[6-9]|2[0-9]|3[01])\.|192\.168\.)/.test(hostname);
|
||||
if (!isLoopback && !isPrivateNet && await resolvesToBlockedIp(hostname)) {
|
||||
throw new Error(
|
||||
`Blocked: ${parsed.hostname} resolves to a cloud metadata IP. Possible DNS rebinding attack.`
|
||||
);
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
*/
|
||||
|
||||
import type { BrowserManager } from './browser-manager';
|
||||
import { findInstalledBrowsers, importCookies } from './cookie-import-browser';
|
||||
import { findInstalledBrowsers, importCookies, listSupportedBrowserNames } from './cookie-import-browser';
|
||||
import { validateNavigationUrl } from './url-validation';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
@@ -309,16 +309,18 @@ export async function handleWriteCommand(
|
||||
|
||||
case 'cookie-import-browser': {
|
||||
// Two modes:
|
||||
// 1. Direct CLI import: cookie-import-browser <browser> --domain <domain>
|
||||
// 1. Direct CLI import: cookie-import-browser <browser> --domain <domain> [--profile <profile>]
|
||||
// 2. Open picker UI: cookie-import-browser [browser]
|
||||
const browserArg = args[0];
|
||||
const domainIdx = args.indexOf('--domain');
|
||||
const profileIdx = args.indexOf('--profile');
|
||||
const profile = (profileIdx !== -1 && profileIdx + 1 < args.length) ? args[profileIdx + 1] : 'Default';
|
||||
|
||||
if (domainIdx !== -1 && domainIdx + 1 < args.length) {
|
||||
// Direct import mode — no UI
|
||||
const domain = args[domainIdx + 1];
|
||||
const browser = browserArg || 'comet';
|
||||
const result = await importCookies(browser, [domain]);
|
||||
const result = await importCookies(browser, [domain], profile);
|
||||
if (result.cookies.length > 0) {
|
||||
await page.context().addCookies(result.cookies);
|
||||
}
|
||||
@@ -333,7 +335,7 @@ export async function handleWriteCommand(
|
||||
|
||||
const browsers = findInstalledBrowsers();
|
||||
if (browsers.length === 0) {
|
||||
throw new Error('No Chromium browsers found. Supported: Comet, Chrome, Arc, Brave, Edge');
|
||||
throw new Error(`No Chromium browsers found. Supported: ${listSupportedBrowserNames().join(', ')}`);
|
||||
}
|
||||
|
||||
const pickerUrl = `http://127.0.0.1:${port}/cookie-picker`;
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
* Remaining bytes = actual cookie value
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll, mock } from 'bun:test';
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { Database } from 'bun:sqlite';
|
||||
import * as crypto from 'crypto';
|
||||
import * as fs from 'fs';
|
||||
@@ -24,16 +24,26 @@ import * as os from 'os';
|
||||
|
||||
const TEST_PASSWORD = 'test-keychain-password';
|
||||
const TEST_KEY = crypto.pbkdf2Sync(TEST_PASSWORD, 'saltysalt', 1003, 16, 'sha1');
|
||||
const LINUX_V10_PASSWORD = 'peanuts';
|
||||
const LINUX_V10_KEY = crypto.pbkdf2Sync(LINUX_V10_PASSWORD, 'saltysalt', 1, 16, 'sha1');
|
||||
const LINUX_V11_PASSWORD = 'test-linux-secret';
|
||||
const LINUX_V11_KEY = crypto.pbkdf2Sync(LINUX_V11_PASSWORD, 'saltysalt', 1, 16, 'sha1');
|
||||
const IV = Buffer.alloc(16, 0x20);
|
||||
const CHROMIUM_EPOCH_OFFSET = 11644473600000000n;
|
||||
|
||||
// Fixture DB path
|
||||
const FIXTURE_DIR = path.join(import.meta.dir, 'fixtures');
|
||||
const FIXTURE_DB = path.join(FIXTURE_DIR, 'test-cookies.db');
|
||||
const LINUX_FIXTURE_DB = path.join(FIXTURE_DIR, 'test-cookies-linux.db');
|
||||
|
||||
// ─── Encryption Helper ──────────────────────────────────────────
|
||||
|
||||
function encryptCookieValue(value: string): Buffer {
|
||||
function encryptCookieValue(
|
||||
value: string,
|
||||
options?: { key?: Buffer; prefix?: 'v10' | 'v11' },
|
||||
): Buffer {
|
||||
const key = options?.key ?? TEST_KEY;
|
||||
const prefix = options?.prefix ?? 'v10';
|
||||
// 32-byte HMAC tag (random for test) + actual value
|
||||
const hmacTag = crypto.randomBytes(32);
|
||||
const plaintext = Buffer.concat([hmacTag, Buffer.from(value, 'utf-8')]);
|
||||
@@ -43,12 +53,11 @@ function encryptCookieValue(value: string): Buffer {
|
||||
const padLen = blockSize - (plaintext.length % blockSize);
|
||||
const padded = Buffer.concat([plaintext, Buffer.alloc(padLen, padLen)]);
|
||||
|
||||
const cipher = crypto.createCipheriv('aes-128-cbc', TEST_KEY, IV);
|
||||
const cipher = crypto.createCipheriv('aes-128-cbc', key, IV);
|
||||
cipher.setAutoPadding(false); // We padded manually
|
||||
const encrypted = Buffer.concat([cipher.update(padded), cipher.final()]);
|
||||
|
||||
// Prefix with "v10"
|
||||
return Buffer.concat([Buffer.from('v10'), encrypted]);
|
||||
return Buffer.concat([Buffer.from(prefix), encrypted]);
|
||||
}
|
||||
|
||||
function chromiumEpoch(unixSeconds: number): bigint {
|
||||
@@ -57,11 +66,11 @@ function chromiumEpoch(unixSeconds: number): bigint {
|
||||
|
||||
// ─── Create Fixture Database ────────────────────────────────────
|
||||
|
||||
function createFixtureDb() {
|
||||
function createFixtureDb(dbPath: string): Database {
|
||||
fs.mkdirSync(FIXTURE_DIR, { recursive: true });
|
||||
if (fs.existsSync(FIXTURE_DB)) fs.unlinkSync(FIXTURE_DB);
|
||||
if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath);
|
||||
|
||||
const db = new Database(FIXTURE_DB);
|
||||
const db = new Database(dbPath);
|
||||
db.run(`CREATE TABLE cookies (
|
||||
host_key TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
@@ -74,7 +83,11 @@ function createFixtureDb() {
|
||||
has_expires INTEGER NOT NULL DEFAULT 0,
|
||||
samesite INTEGER NOT NULL DEFAULT 1
|
||||
)`);
|
||||
return db;
|
||||
}
|
||||
|
||||
function createMacFixtureDb() {
|
||||
const db = createFixtureDb(FIXTURE_DB);
|
||||
const insert = db.prepare(`INSERT INTO cookies
|
||||
(host_key, name, value, encrypted_value, path, expires_utc, is_secure, is_httponly, has_expires, samesite)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`);
|
||||
@@ -110,6 +123,21 @@ function createFixtureDb() {
|
||||
db.close();
|
||||
}
|
||||
|
||||
function createLinuxFixtureDb() {
|
||||
const db = createFixtureDb(LINUX_FIXTURE_DB);
|
||||
const insert = db.prepare(`INSERT INTO cookies
|
||||
(host_key, name, value, encrypted_value, path, expires_utc, is_secure, is_httponly, has_expires, samesite)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`);
|
||||
|
||||
const futureExpiry = Number(chromiumEpoch(Math.floor(Date.now() / 1000) + 86400 * 365));
|
||||
|
||||
insert.run('.linux-v10.com', 'sid', '', encryptCookieValue('linux-v10-value', { key: LINUX_V10_KEY, prefix: 'v10' }), '/', futureExpiry, 1, 1, 1, 1);
|
||||
insert.run('.linux-v11.com', 'auth', '', encryptCookieValue('linux-v11-value', { key: LINUX_V11_KEY, prefix: 'v11' }), '/', futureExpiry, 1, 1, 1, 1);
|
||||
insert.run('.linux-plain.com', 'plain', 'plain-linux', Buffer.alloc(0), '/', futureExpiry, 0, 0, 1, 1);
|
||||
|
||||
db.close();
|
||||
}
|
||||
|
||||
// ─── Mock Setup ─────────────────────────────────────────────────
|
||||
// We need to mock:
|
||||
// 1. The Keychain access (getKeychainPassword) to return TEST_PASSWORD
|
||||
@@ -120,17 +148,18 @@ let findInstalledBrowsers: any;
|
||||
let listDomains: any;
|
||||
let importCookies: any;
|
||||
let CookieImportError: any;
|
||||
let originalSpawn: typeof Bun.spawn;
|
||||
|
||||
beforeAll(async () => {
|
||||
createFixtureDb();
|
||||
createMacFixtureDb();
|
||||
createLinuxFixtureDb();
|
||||
|
||||
// Mock Bun.spawn to return test password for keychain access
|
||||
const origSpawn = Bun.spawn;
|
||||
originalSpawn = Bun.spawn;
|
||||
// @ts-ignore - monkey-patching for test
|
||||
Bun.spawn = function(cmd: any, opts: any) {
|
||||
// Intercept security find-generic-password calls
|
||||
if (Array.isArray(cmd) && cmd[0] === 'security' && cmd[1] === 'find-generic-password') {
|
||||
const service = cmd[3]; // -s <service>
|
||||
// Return test password for any known test service
|
||||
return {
|
||||
stdout: new ReadableStream({
|
||||
@@ -146,8 +175,23 @@ beforeAll(async () => {
|
||||
kill: () => {},
|
||||
};
|
||||
}
|
||||
if (Array.isArray(cmd) && cmd[0] === 'secret-tool' && cmd[1] === 'lookup') {
|
||||
return {
|
||||
stdout: new ReadableStream({
|
||||
start(controller) {
|
||||
controller.enqueue(new TextEncoder().encode(LINUX_V11_PASSWORD + '\n'));
|
||||
controller.close();
|
||||
}
|
||||
}),
|
||||
stderr: new ReadableStream({
|
||||
start(controller) { controller.close(); }
|
||||
}),
|
||||
exited: Promise.resolve(0),
|
||||
kill: () => {},
|
||||
};
|
||||
}
|
||||
// Pass through other spawn calls
|
||||
return origSpawn(cmd, opts);
|
||||
return originalSpawn(cmd, opts);
|
||||
};
|
||||
|
||||
// Import the module (uses our mocked Bun.spawn)
|
||||
@@ -159,8 +203,12 @@ beforeAll(async () => {
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
// Restore Bun.spawn
|
||||
// @ts-ignore - monkey-patching for test
|
||||
Bun.spawn = originalSpawn;
|
||||
// Clean up fixture DB
|
||||
try { fs.unlinkSync(FIXTURE_DB); } catch {}
|
||||
try { fs.unlinkSync(LINUX_FIXTURE_DB); } catch {}
|
||||
try { fs.rmdirSync(FIXTURE_DIR); } catch {}
|
||||
});
|
||||
|
||||
@@ -176,6 +224,35 @@ afterAll(() => {
|
||||
// 2. Decrypting them with the module's decryption logic
|
||||
// The actual DB path resolution is tested separately.
|
||||
|
||||
async function withInstalledProfile<T>(
|
||||
relativeBrowserDir: string,
|
||||
sourceDb: string,
|
||||
run: () => Promise<T>,
|
||||
profile = 'Default',
|
||||
): Promise<T> {
|
||||
const homeDir = os.homedir();
|
||||
const profileDir = path.join(homeDir, relativeBrowserDir, profile);
|
||||
const cookiesPath = path.join(profileDir, 'Cookies');
|
||||
const backupPath = path.join(profileDir, `Cookies.backup-${crypto.randomUUID()}`);
|
||||
const hadOriginal = fs.existsSync(cookiesPath);
|
||||
|
||||
fs.mkdirSync(profileDir, { recursive: true });
|
||||
if (hadOriginal) fs.copyFileSync(cookiesPath, backupPath);
|
||||
fs.copyFileSync(sourceDb, cookiesPath);
|
||||
|
||||
try {
|
||||
return await run();
|
||||
} finally {
|
||||
if (hadOriginal) {
|
||||
fs.copyFileSync(backupPath, cookiesPath);
|
||||
fs.unlinkSync(backupPath);
|
||||
} else {
|
||||
try { fs.unlinkSync(cookiesPath); } catch {}
|
||||
try { fs.rmdirSync(profileDir); } catch {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Tests ──────────────────────────────────────────────────────
|
||||
|
||||
describe('Cookie Import Browser', () => {
|
||||
@@ -351,6 +428,51 @@ describe('Cookie Import Browser', () => {
|
||||
expect(b).toHaveProperty('aliases');
|
||||
}
|
||||
});
|
||||
|
||||
test('detects linux-style Chromium profiles under ~/.config', async () => {
|
||||
await withInstalledProfile('.config/chromium', LINUX_FIXTURE_DB, async () => {
|
||||
const browsers = findInstalledBrowsers();
|
||||
const names = browsers.map((browser: any) => browser.name);
|
||||
|
||||
expect(names).toContain('Chromium');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Real Profile Imports', () => {
|
||||
test('imports Linux v10 cookies from ~/.config/chromium', async () => {
|
||||
await withInstalledProfile('.config/chromium', LINUX_FIXTURE_DB, async () => {
|
||||
const result = await importCookies('chromium', ['.linux-v10.com'], 'GstackLinuxV10');
|
||||
|
||||
expect(result.count).toBe(1);
|
||||
expect(result.failed).toBe(0);
|
||||
expect(result.cookies[0].name).toBe('sid');
|
||||
expect(result.cookies[0].value).toBe('linux-v10-value');
|
||||
}, 'GstackLinuxV10');
|
||||
});
|
||||
|
||||
test('imports Linux v11 cookies when secret-tool returns a key', async () => {
|
||||
await withInstalledProfile('.config/chromium', LINUX_FIXTURE_DB, async () => {
|
||||
const result = await importCookies('chromium', ['.linux-v11.com'], 'GstackLinuxV11');
|
||||
|
||||
expect(result.count).toBe(1);
|
||||
expect(result.failed).toBe(0);
|
||||
expect(result.cookies[0].name).toBe('auth');
|
||||
expect(result.cookies[0].value).toBe('linux-v11-value');
|
||||
}, 'GstackLinuxV11');
|
||||
});
|
||||
|
||||
test('lists domains from Linux Chromium profiles', async () => {
|
||||
await withInstalledProfile('.config/chromium', LINUX_FIXTURE_DB, async () => {
|
||||
const result = listDomains('chromium', 'GstackLinuxDomains');
|
||||
const domains = result.domains.map((entry: any) => entry.domain);
|
||||
|
||||
expect(result.browser).toBe('Chromium');
|
||||
expect(domains).toContain('.linux-v10.com');
|
||||
expect(domains).toContain('.linux-v11.com');
|
||||
expect(domains).toContain('.linux-plain.com');
|
||||
}, 'GstackLinuxDomains');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Corrupt Data Handling', () => {
|
||||
|
||||
@@ -447,6 +447,24 @@ describe('gstack-update-check', () => {
|
||||
expect(cache).toContain('UP_TO_DATE');
|
||||
});
|
||||
|
||||
test('--force clears snooze so user can upgrade after snoozing', () => {
|
||||
writeFileSync(join(gstackDir, 'VERSION'), '0.3.3\n');
|
||||
writeFileSync(join(gstackDir, 'REMOTE_VERSION'), '0.4.0\n');
|
||||
writeSnooze('0.4.0', 1, nowEpoch() - 60); // snoozed 1 min ago (within 24h)
|
||||
|
||||
// Without --force: snoozed, silent
|
||||
const snoozed = run();
|
||||
expect(snoozed.exitCode).toBe(0);
|
||||
expect(snoozed.stdout).toBe('');
|
||||
|
||||
// With --force: snooze cleared, outputs upgrade
|
||||
const forced = run({}, ['--force']);
|
||||
expect(forced.exitCode).toBe(0);
|
||||
expect(forced.stdout).toBe('UPGRADE_AVAILABLE 0.3.3 0.4.0');
|
||||
// Snooze file should be deleted
|
||||
expect(existsSync(join(stateDir, 'update-snoozed'))).toBe(false);
|
||||
});
|
||||
|
||||
// ─── Split TTL tests ─────────────────────────────────────────
|
||||
|
||||
test('UP_TO_DATE cache expires after 60 min (not 720)', () => {
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: canary
|
||||
version: 1.0.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /canary.
|
||||
Post-deploy canary monitoring. Watches the live app for console errors,
|
||||
performance regressions, and page failures using the browse daemon. Takes
|
||||
periodic screenshots, compares against pre-deploy baselines, and alerts
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: careful
|
||||
version: 0.1.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /careful.
|
||||
Safety guardrails for destructive commands. Warns before rm -rf, DROP TABLE,
|
||||
force-push, git reset --hard, kubectl delete, and similar destructive operations.
|
||||
User can override each warning. Use when touching prod, debugging live systems,
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: codex
|
||||
version: 1.0.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /codex.
|
||||
OpenAI Codex CLI wrapper — three modes. Code review: independent diff review via
|
||||
codex review with pass/fail gate. Challenge: adversarial mode that tries to break
|
||||
your code. Consult: ask codex anything with session continuity for follow-ups.
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: cso
|
||||
version: 2.0.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /cso.
|
||||
Chief Security Officer mode. Infrastructure-first security audit: secrets archaeology,
|
||||
dependency supply chain, CI/CD pipeline security, LLM/AI security, skill supply chain
|
||||
scanning, plus OWASP Top 10, STRIDE threat modeling, and active verification.
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: design-consultation
|
||||
version: 1.0.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /design-consultation.
|
||||
Design consultation: understands your product, researches the landscape, proposes a
|
||||
complete design system (aesthetic, typography, color, layout, spacing, motion), and
|
||||
generates font+color preview pages. Creates DESIGN.md as your project's design source
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: design-review
|
||||
version: 2.0.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /design-review.
|
||||
Designer's eye QA: finds visual inconsistency, spacing issues, hierarchy problems,
|
||||
AI slop patterns, and slow interactions — then fixes them. Iteratively fixes issues
|
||||
in source code, committing each fix atomically and re-verifying with before/after
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: document-release
|
||||
version: 1.0.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /document-release.
|
||||
Post-ship documentation update. Reads all project docs, cross-references the
|
||||
diff, updates README/ARCHITECTURE/CONTRIBUTING/CLAUDE.md to match what shipped,
|
||||
polishes CHANGELOG voice, cleans up TODOS, and optionally bumps VERSION. Use when
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: freeze
|
||||
version: 0.1.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /freeze.
|
||||
Restrict file edits to a specific directory for the session. Blocks Edit and
|
||||
Write outside the allowed path. Use when debugging to prevent accidentally
|
||||
"fixing" unrelated code, or when you want to scope changes to one module.
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: gstack-upgrade
|
||||
version: 1.1.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /gstack-upgrade.
|
||||
Upgrade gstack to the latest version. Detects global vs vendored install,
|
||||
runs the upgrade, and shows what's new. Use when asked to "upgrade gstack",
|
||||
"update gstack", or "get latest version".
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: guard
|
||||
version: 0.1.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /guard.
|
||||
Full safety mode: destructive command warnings + directory-scoped edits.
|
||||
Combines /careful (warns before rm -rf, DROP TABLE, force-push, etc.) with
|
||||
/freeze (blocks edits outside a specified directory). Use for maximum safety
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: investigate
|
||||
version: 1.0.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /investigate.
|
||||
Systematic debugging with root cause investigation. Four phases: investigate,
|
||||
analyze, hypothesize, implement. Iron Law: no fixes without root cause.
|
||||
Use when asked to "debug this", "fix this bug", "why is this broken",
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: land-and-deploy
|
||||
version: 1.0.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /land-and-deploy.
|
||||
Land and deploy workflow. Merges the PR, waits for CI and deploy,
|
||||
verifies production health via canary checks. Takes over after /ship
|
||||
creates the PR. Use when: "merge", "land", "deploy", "merge and verify",
|
||||
|
||||
+12
-1
@@ -2,6 +2,7 @@
|
||||
name: office-hours
|
||||
version: 2.0.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /office-hours.
|
||||
YC Office Hours — two modes. Startup mode: six forcing questions that expose
|
||||
demand reality, status quo, desperate specificity, narrowest wedge, observation,
|
||||
and future-fit. Builder mode: design thinking brainstorming for side projects,
|
||||
@@ -689,7 +690,8 @@ Before proposing solutions, challenge the premises:
|
||||
1. **Is this the right problem?** Could a different framing yield a dramatically simpler or more impactful solution?
|
||||
2. **What happens if we do nothing?** Real pain point or hypothetical one?
|
||||
3. **What existing code already partially solves this?** Map existing patterns, utilities, and flows that could be reused.
|
||||
4. **Startup mode only:** Synthesize the diagnostic evidence from Phase 2A. Does it support this direction? Where are the gaps?
|
||||
4. **If the deliverable is a new artifact** (CLI binary, library, package, container image, mobile app): **how will users get it?** Code without distribution is code nobody can use. The design must include a distribution channel (GitHub Releases, package manager, container registry, app store) and CI/CD pipeline — or explicitly defer it.
|
||||
5. **Startup mode only:** Synthesize the diagnostic evidence from Phase 2A. Does it support this direction? Where are the gaps?
|
||||
|
||||
Output premises as clear statements the user must agree with before proceeding:
|
||||
```
|
||||
@@ -994,6 +996,11 @@ Supersedes: {prior filename — omit this line if first design on this branch}
|
||||
## Success Criteria
|
||||
{measurable criteria from Phase 2A}
|
||||
|
||||
## Distribution Plan
|
||||
{how users get the deliverable — binary download, package manager, container image, web service, etc.}
|
||||
{CI/CD pipeline for building and publishing — GitHub Actions, manual release, auto-deploy on merge?}
|
||||
{omit this section if the deliverable is a web service with existing deployment pipeline}
|
||||
|
||||
## Dependencies
|
||||
{blockers, prerequisites, related work}
|
||||
|
||||
@@ -1046,6 +1053,10 @@ Supersedes: {prior filename — omit this line if first design on this branch}
|
||||
## Success Criteria
|
||||
{what "done" looks like}
|
||||
|
||||
## Distribution Plan
|
||||
{how users get the deliverable — binary download, package manager, container image, web service, etc.}
|
||||
{CI/CD pipeline for building and publishing — or "existing deployment pipeline covers this"}
|
||||
|
||||
## Next Steps
|
||||
{concrete build tasks — what to implement first, second, third}
|
||||
|
||||
|
||||
@@ -334,7 +334,8 @@ Before proposing solutions, challenge the premises:
|
||||
1. **Is this the right problem?** Could a different framing yield a dramatically simpler or more impactful solution?
|
||||
2. **What happens if we do nothing?** Real pain point or hypothetical one?
|
||||
3. **What existing code already partially solves this?** Map existing patterns, utilities, and flows that could be reused.
|
||||
4. **Startup mode only:** Synthesize the diagnostic evidence from Phase 2A. Does it support this direction? Where are the gaps?
|
||||
4. **If the deliverable is a new artifact** (CLI binary, library, package, container image, mobile app): **how will users get it?** Code without distribution is code nobody can use. The design must include a distribution channel (GitHub Releases, package manager, container registry, app store) and CI/CD pipeline — or explicitly defer it.
|
||||
5. **Startup mode only:** Synthesize the diagnostic evidence from Phase 2A. Does it support this direction? Where are the gaps?
|
||||
|
||||
Output premises as clear statements the user must agree with before proceeding:
|
||||
```
|
||||
@@ -474,6 +475,11 @@ Supersedes: {prior filename — omit this line if first design on this branch}
|
||||
## Success Criteria
|
||||
{measurable criteria from Phase 2A}
|
||||
|
||||
## Distribution Plan
|
||||
{how users get the deliverable — binary download, package manager, container image, web service, etc.}
|
||||
{CI/CD pipeline for building and publishing — GitHub Actions, manual release, auto-deploy on merge?}
|
||||
{omit this section if the deliverable is a web service with existing deployment pipeline}
|
||||
|
||||
## Dependencies
|
||||
{blockers, prerequisites, related work}
|
||||
|
||||
@@ -526,6 +532,10 @@ Supersedes: {prior filename — omit this line if first design on this branch}
|
||||
## Success Criteria
|
||||
{what "done" looks like}
|
||||
|
||||
## Distribution Plan
|
||||
{how users get the deliverable — binary download, package manager, container image, web service, etc.}
|
||||
{CI/CD pipeline for building and publishing — or "existing deployment pipeline covers this"}
|
||||
|
||||
## Next Steps
|
||||
{concrete build tasks — what to implement first, second, third}
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: plan-ceo-review
|
||||
version: 1.0.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /plan-ceo-review.
|
||||
CEO/founder-mode plan review. Rethink the problem, find the 10-star product,
|
||||
challenge premises, expand scope when it creates a better product. Four modes:
|
||||
SCOPE EXPANSION (dream big), SELECTIVE EXPANSION (hold scope + cherry-pick
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: plan-design-review
|
||||
version: 2.0.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /plan-design-review.
|
||||
Designer's eye plan review — interactive, like CEO and Eng review.
|
||||
Rates each design dimension 0-10, explains what would make it a 10,
|
||||
then fixes the plan to get there. Works in plan mode. For live site
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: plan-eng-review
|
||||
version: 1.0.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /plan-eng-review.
|
||||
Eng manager-mode plan review. Lock in the execution plan — architecture,
|
||||
data flow, diagrams, edge cases, test coverage, performance. Walks through
|
||||
issues interactively with opinionated recommendations. Use when asked to
|
||||
@@ -481,6 +482,12 @@ Before reviewing anything, answer these questions:
|
||||
|
||||
5. **Completeness check:** Is the plan doing the complete version or a shortcut? With AI-assisted coding, the cost of completeness (100% test coverage, full edge case handling, complete error paths) is 10-100x cheaper than with a human team. If the plan proposes a shortcut that saves human-hours but only saves minutes with CC+gstack, recommend the complete version. Boil the lake.
|
||||
|
||||
6. **Distribution check:** If the plan introduces a new artifact type (CLI binary, library package, container image, mobile app), does it include the build/publish pipeline? Code without distribution is code nobody can use. Check:
|
||||
- Is there a CI/CD workflow for building and publishing the artifact?
|
||||
- Are target platforms defined (linux/darwin/windows, amd64/arm64)?
|
||||
- How will users download or install it (GitHub Releases, package manager, container registry)?
|
||||
If the plan defers distribution, flag it explicitly in the "NOT in scope" section — don't let it silently drop.
|
||||
|
||||
If the complexity check triggers (8+ files or 2+ new classes/services), proactively recommend scope reduction via AskUserQuestion — explain what's overbuilt, propose a minimal version that achieves the core goal, and ask whether to reduce or proceed as-is. If the complexity check does not trigger, present your Step 0 findings and proceed directly to Section 1.
|
||||
|
||||
Always work through the full interactive review: one section at a time (Architecture → Code Quality → Tests → Performance) with at most 8 top issues per section.
|
||||
@@ -498,6 +505,7 @@ Evaluate:
|
||||
* Security architecture (auth, data access, API boundaries).
|
||||
* Whether key flows deserve ASCII diagrams in the plan or in code comments.
|
||||
* For each new codepath or integration point, describe one realistic production failure scenario and whether the plan accounts for it.
|
||||
* **Distribution architecture:** If this introduces a new artifact (binary, package, container), how does it get built, published, and updated? Is the CI/CD pipeline part of the plan or deferred?
|
||||
|
||||
**STOP.** For each issue found in this section, call AskUserQuestion individually. One issue per call. Present options, state your recommendation, explain WHY. Do NOT batch multiple issues into one AskUserQuestion. Only proceed to the next section after ALL issues in this section are resolved.
|
||||
|
||||
|
||||
@@ -94,6 +94,12 @@ Before reviewing anything, answer these questions:
|
||||
|
||||
5. **Completeness check:** Is the plan doing the complete version or a shortcut? With AI-assisted coding, the cost of completeness (100% test coverage, full edge case handling, complete error paths) is 10-100x cheaper than with a human team. If the plan proposes a shortcut that saves human-hours but only saves minutes with CC+gstack, recommend the complete version. Boil the lake.
|
||||
|
||||
6. **Distribution check:** If the plan introduces a new artifact type (CLI binary, library package, container image, mobile app), does it include the build/publish pipeline? Code without distribution is code nobody can use. Check:
|
||||
- Is there a CI/CD workflow for building and publishing the artifact?
|
||||
- Are target platforms defined (linux/darwin/windows, amd64/arm64)?
|
||||
- How will users download or install it (GitHub Releases, package manager, container registry)?
|
||||
If the plan defers distribution, flag it explicitly in the "NOT in scope" section — don't let it silently drop.
|
||||
|
||||
If the complexity check triggers (8+ files or 2+ new classes/services), proactively recommend scope reduction via AskUserQuestion — explain what's overbuilt, propose a minimal version that achieves the core goal, and ask whether to reduce or proceed as-is. If the complexity check does not trigger, present your Step 0 findings and proceed directly to Section 1.
|
||||
|
||||
Always work through the full interactive review: one section at a time (Architecture → Code Quality → Tests → Performance) with at most 8 top issues per section.
|
||||
@@ -111,6 +117,7 @@ Evaluate:
|
||||
* Security architecture (auth, data access, API boundaries).
|
||||
* Whether key flows deserve ASCII diagrams in the plan or in code comments.
|
||||
* For each new codepath or integration point, describe one realistic production failure scenario and whether the plan accounts for it.
|
||||
* **Distribution architecture:** If this introduces a new artifact (binary, package, container), how does it get built, published, and updated? Is the CI/CD pipeline part of the plan or deferred?
|
||||
|
||||
**STOP.** For each issue found in this section, call AskUserQuestion individually. One issue per call. Present options, state your recommendation, explain WHY. Do NOT batch multiple issues into one AskUserQuestion. Only proceed to the next section after ALL issues in this section are resolved.
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: qa-only
|
||||
version: 1.0.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /qa-only.
|
||||
Report-only QA testing. Systematically tests a web application and produces a
|
||||
structured report with health score, screenshots, and repro steps — but never
|
||||
fixes anything. Use when asked to "just report bugs", "qa report only", or
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: qa
|
||||
version: 2.0.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /qa.
|
||||
Systematically QA test a web application and fix bugs found. Runs QA testing,
|
||||
then iteratively fixes bugs in source code, committing each fix atomically and
|
||||
re-verifying. Use when asked to "qa", "QA", "test this site", "find bugs",
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: retro
|
||||
version: 2.0.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /retro.
|
||||
Weekly engineering retrospective. Analyzes commit history, work patterns,
|
||||
and code quality metrics with persistent history and trend tracking.
|
||||
Team-aware: breaks down per-person contributions with praise and growth areas.
|
||||
|
||||
+2
-1
@@ -2,6 +2,7 @@
|
||||
name: review
|
||||
version: 1.0.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /review.
|
||||
Pre-landing PR review. Analyzes diff against the base branch for SQL safety, LLM trust
|
||||
boundary violations, conditional side effects, and other structural issues. Use when
|
||||
asked to "review this PR", "code review", "pre-landing review", or "check my diff".
|
||||
@@ -399,7 +400,7 @@ Before reviewing code quality, check: **did they build what was requested — no
|
||||
Read commit messages (`git log origin/<base>..HEAD --oneline`).
|
||||
**If no PR exists:** rely on commit messages and TODOS.md for stated intent — this is the common case since /review runs before /ship creates the PR.
|
||||
2. Identify the **stated intent** — what was this branch supposed to accomplish?
|
||||
3. Run `git diff origin/<base> --stat` and compare the files changed against the stated intent.
|
||||
3. Run `git diff origin/<base>...HEAD --stat` and compare the files changed against the stated intent.
|
||||
4. Evaluate with skepticism:
|
||||
|
||||
**SCOPE CREEP detection:**
|
||||
|
||||
@@ -44,7 +44,7 @@ Before reviewing code quality, check: **did they build what was requested — no
|
||||
Read commit messages (`git log origin/<base>..HEAD --oneline`).
|
||||
**If no PR exists:** rely on commit messages and TODOS.md for stated intent — this is the common case since /review runs before /ship creates the PR.
|
||||
2. Identify the **stated intent** — what was this branch supposed to accomplish?
|
||||
3. Run `git diff origin/<base> --stat` and compare the files changed against the stated intent.
|
||||
3. Run `git diff origin/<base>...HEAD --stat` and compare the files changed against the stated intent.
|
||||
4. Evaluate with skepticism:
|
||||
|
||||
**SCOPE CREEP detection:**
|
||||
|
||||
+14
-1
@@ -125,6 +125,18 @@ To do this: use Grep to find all references to the sibling values (e.g., grep fo
|
||||
- Small utility additions (<5KB gzipped)
|
||||
- Server-side-only dependencies
|
||||
|
||||
#### Distribution & CI/CD Pipeline
|
||||
- CI/CD workflow changes (`.github/workflows/`): verify build tool versions match project requirements, artifact names/paths are correct, secrets use `${{ secrets.X }}` not hardcoded values
|
||||
- New artifact types (CLI binary, library, package): verify a publish/release workflow exists and targets correct platforms
|
||||
- Cross-platform builds: verify CI matrix covers all target OS/arch combinations, or documents which are untested
|
||||
- Version tag format consistency: `v1.2.3` vs `1.2.3` — must match across VERSION file, git tags, and publish scripts
|
||||
- Publish step idempotency: re-running the publish workflow should not fail (e.g., `gh release delete` before `gh release create`)
|
||||
|
||||
**DO NOT flag:**
|
||||
- Web services with existing auto-deploy pipelines (Docker build + K8s deploy)
|
||||
- Internal tools not distributed outside the team
|
||||
- Test-only CI changes (adding test steps, not publish steps)
|
||||
|
||||
---
|
||||
|
||||
## Severity Classification
|
||||
@@ -141,7 +153,8 @@ CRITICAL (highest severity): INFORMATIONAL (lower severity):
|
||||
├─ Time Window Safety
|
||||
├─ Type Coercion at Boundaries
|
||||
├─ View/Frontend
|
||||
└─ Performance & Bundle Impact
|
||||
├─ Performance & Bundle Impact
|
||||
└─ Distribution & CI/CD Pipeline
|
||||
|
||||
All findings are actioned via Fix-First Review. Severity determines
|
||||
presentation order and classification of AUTO-FIX vs ASK — critical
|
||||
|
||||
@@ -7,16 +7,17 @@
|
||||
*/
|
||||
|
||||
import { validateSkill } from '../test/helpers/skill-parser';
|
||||
import { discoverTemplates } from './discover-skills';
|
||||
import { execSync } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
|
||||
const TEMPLATES = [
|
||||
{ tmpl: path.join(ROOT, 'SKILL.md.tmpl'), output: 'SKILL.md' },
|
||||
{ tmpl: path.join(ROOT, 'browse', 'SKILL.md.tmpl'), output: 'browse/SKILL.md' },
|
||||
];
|
||||
const TEMPLATES = discoverTemplates(ROOT).map(t => ({
|
||||
tmpl: path.join(ROOT, t.tmpl),
|
||||
output: t.output,
|
||||
}));
|
||||
|
||||
function regenerateAndValidate() {
|
||||
// Regenerate
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
/**
|
||||
* Shared discovery for SKILL.md and .tmpl files.
|
||||
* Scans root + one level of subdirs, skipping node_modules/.git/dist.
|
||||
*/
|
||||
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
const SKIP = new Set(['node_modules', '.git', 'dist']);
|
||||
|
||||
function subdirs(root: string): string[] {
|
||||
return fs.readdirSync(root, { withFileTypes: true })
|
||||
.filter(d => d.isDirectory() && !SKIP.has(d.name))
|
||||
.map(d => d.name);
|
||||
}
|
||||
|
||||
export function discoverTemplates(root: string): Array<{ tmpl: string; output: string }> {
|
||||
const dirs = ['', ...subdirs(root)];
|
||||
const results: Array<{ tmpl: string; output: string }> = [];
|
||||
for (const dir of dirs) {
|
||||
const rel = dir ? `${dir}/SKILL.md.tmpl` : 'SKILL.md.tmpl';
|
||||
if (fs.existsSync(path.join(root, rel))) {
|
||||
results.push({ tmpl: rel, output: rel.replace(/\.tmpl$/, '') });
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
export function discoverSkillFiles(root: string): string[] {
|
||||
const dirs = ['', ...subdirs(root)];
|
||||
const results: string[] = [];
|
||||
for (const dir of dirs) {
|
||||
const rel = dir ? `${dir}/SKILL.md` : 'SKILL.md';
|
||||
if (fs.existsSync(path.join(root, rel))) {
|
||||
results.push(rel);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
+13
-10
@@ -11,6 +11,7 @@
|
||||
|
||||
import { COMMAND_DESCRIPTIONS } from '../browse/src/commands';
|
||||
import { SNAPSHOT_FLAGS } from '../browse/src/snapshot';
|
||||
import { discoverTemplates } from './discover-skills';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
@@ -3067,6 +3068,17 @@ function processTemplate(tmplPath: string, host: Host = 'claude'): { outputPath:
|
||||
throw new Error(`Unresolved placeholders in ${relTmplPath}: ${remaining.join(', ')}`);
|
||||
}
|
||||
|
||||
// Inject auto-trigger guard into skill descriptions.
|
||||
// Adds explicit trigger criteria so Claude Code doesn't auto-fire skills
|
||||
// based on semantic similarity. Preserves existing "Use when" and
|
||||
// "Proactively suggest" text (both are tested in skill-validation.test.ts).
|
||||
const triggerGuard = ` MANUAL TRIGGER ONLY: invoke only when user types /${skillName}.\n`;
|
||||
const descMatch = content.match(/^(description:\s*\|?\s*\n)/m);
|
||||
if (descMatch && descMatch.index !== undefined) {
|
||||
const insertAt = descMatch.index + descMatch[0].length;
|
||||
content = content.slice(0, insertAt) + triggerGuard + content.slice(insertAt);
|
||||
}
|
||||
|
||||
// For codex host: transform frontmatter and replace Claude-specific paths
|
||||
if (host === 'codex') {
|
||||
// Extract hook safety prose BEFORE transforming frontmatter (which strips hooks)
|
||||
@@ -3113,16 +3125,7 @@ function processTemplate(tmplPath: string, host: Host = 'claude'): { outputPath:
|
||||
// ─── Main ───────────────────────────────────────────────────
|
||||
|
||||
function findTemplates(): string[] {
|
||||
const templates: string[] = [];
|
||||
const rootTmpl = path.join(ROOT, 'SKILL.md.tmpl');
|
||||
if (fs.existsSync(rootTmpl)) templates.push(rootTmpl);
|
||||
|
||||
for (const entry of fs.readdirSync(ROOT, { withFileTypes: true })) {
|
||||
if (!entry.isDirectory() || entry.name.startsWith('.') || entry.name === 'node_modules') continue;
|
||||
const tmpl = path.join(ROOT, entry.name, 'SKILL.md.tmpl');
|
||||
if (fs.existsSync(tmpl)) templates.push(tmpl);
|
||||
}
|
||||
return templates;
|
||||
return discoverTemplates(ROOT).map(t => path.join(ROOT, t.tmpl));
|
||||
}
|
||||
|
||||
let hasChanges = false;
|
||||
|
||||
+4
-26
@@ -9,34 +9,15 @@
|
||||
*/
|
||||
|
||||
import { validateSkill } from '../test/helpers/skill-parser';
|
||||
import { discoverTemplates, discoverSkillFiles } from './discover-skills';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { execSync } from 'child_process';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
|
||||
// Find all SKILL.md files
|
||||
const SKILL_FILES = [
|
||||
'SKILL.md',
|
||||
'browse/SKILL.md',
|
||||
'qa/SKILL.md',
|
||||
'qa-only/SKILL.md',
|
||||
'ship/SKILL.md',
|
||||
'review/SKILL.md',
|
||||
'retro/SKILL.md',
|
||||
'plan-ceo-review/SKILL.md',
|
||||
'plan-eng-review/SKILL.md',
|
||||
'setup-browser-cookies/SKILL.md',
|
||||
'plan-design-review/SKILL.md',
|
||||
'design-review/SKILL.md',
|
||||
'gstack-upgrade/SKILL.md',
|
||||
'document-release/SKILL.md',
|
||||
'canary/SKILL.md',
|
||||
'benchmark/SKILL.md',
|
||||
'land-and-deploy/SKILL.md',
|
||||
'setup-deploy/SKILL.md',
|
||||
'cso/SKILL.md',
|
||||
].filter(f => fs.existsSync(path.join(ROOT, f)));
|
||||
// Find all SKILL.md files (dynamic discovery — no hardcoded list)
|
||||
const SKILL_FILES = discoverSkillFiles(ROOT);
|
||||
|
||||
let hasErrors = false;
|
||||
|
||||
@@ -73,10 +54,7 @@ for (const file of SKILL_FILES) {
|
||||
// ─── Templates ──────────────────────────────────────────────
|
||||
|
||||
console.log('\n Templates:');
|
||||
const TEMPLATES = [
|
||||
{ tmpl: 'SKILL.md.tmpl', output: 'SKILL.md' },
|
||||
{ tmpl: 'browse/SKILL.md.tmpl', output: 'browse/SKILL.md' },
|
||||
];
|
||||
const TEMPLATES = discoverTemplates(ROOT);
|
||||
|
||||
for (const { tmpl, output } of TEMPLATES) {
|
||||
const tmplPath = path.join(ROOT, tmpl);
|
||||
|
||||
@@ -20,12 +20,14 @@ case "$(uname -s)" in
|
||||
MINGW*|MSYS*|CYGWIN*|Windows_NT) IS_WINDOWS=1 ;;
|
||||
esac
|
||||
|
||||
# ─── Parse --host flag ─────────────────────────────────────────
|
||||
# ─── Parse flags ──────────────────────────────────────────────
|
||||
HOST="claude"
|
||||
LOCAL_INSTALL=0
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--host) [ -z "$2" ] && echo "Missing value for --host (expected claude, codex, kiro, or auto)" >&2 && exit 1; HOST="$2"; shift 2 ;;
|
||||
--host=*) HOST="${1#--host=}"; shift ;;
|
||||
--local) LOCAL_INSTALL=1; shift ;;
|
||||
*) shift ;;
|
||||
esac
|
||||
done
|
||||
@@ -35,6 +37,18 @@ case "$HOST" in
|
||||
*) echo "Unknown --host value: $HOST (expected claude, codex, kiro, or auto)" >&2; exit 1 ;;
|
||||
esac
|
||||
|
||||
# --local: install to .claude/skills/ in the current working directory
|
||||
if [ "$LOCAL_INSTALL" -eq 1 ]; then
|
||||
if [ "$HOST" = "codex" ]; then
|
||||
echo "Error: --local is only supported for Claude Code (not Codex)." >&2
|
||||
exit 1
|
||||
fi
|
||||
INSTALL_SKILLS_DIR="$(pwd)/.claude/skills"
|
||||
mkdir -p "$INSTALL_SKILLS_DIR"
|
||||
HOST="claude"
|
||||
INSTALL_CODEX=0
|
||||
fi
|
||||
|
||||
# For auto: detect which agents are installed
|
||||
INSTALL_CLAUDE=0
|
||||
INSTALL_CODEX=0
|
||||
@@ -335,7 +349,12 @@ fi
|
||||
if [ "$INSTALL_CLAUDE" -eq 1 ]; then
|
||||
if [ "$SKILLS_BASENAME" = "skills" ]; then
|
||||
link_claude_skill_dirs "$SOURCE_GSTACK_DIR" "$INSTALL_SKILLS_DIR"
|
||||
echo "gstack ready (claude)."
|
||||
if [ "$LOCAL_INSTALL" -eq 1 ]; then
|
||||
echo "gstack ready (project-local)."
|
||||
echo " skills: $INSTALL_SKILLS_DIR"
|
||||
else
|
||||
echo "gstack ready (claude)."
|
||||
fi
|
||||
echo " browse: $BROWSE_BIN"
|
||||
else
|
||||
echo "gstack ready (claude)."
|
||||
|
||||
@@ -2,10 +2,11 @@
|
||||
name: setup-browser-cookies
|
||||
version: 1.0.0
|
||||
description: |
|
||||
Import cookies from your real browser (Comet, Chrome, Arc, Brave, Edge) into the
|
||||
headless browse session. Opens an interactive picker UI where you select which
|
||||
cookie domains to import. Use before QA testing authenticated pages. Use when asked
|
||||
to "import cookies", "login to the site", or "authenticate the browser".
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /setup-browser-cookies.
|
||||
Import cookies from your real Chromium browser into the headless browse session.
|
||||
Opens an interactive picker UI where you select which cookie domains to import.
|
||||
Use before QA testing authenticated pages. Use when asked to "import cookies",
|
||||
"login to the site", or "authenticate the browser".
|
||||
allowed-tools:
|
||||
- Bash
|
||||
- Read
|
||||
@@ -392,7 +393,7 @@ If `NEEDS_SETUP`:
|
||||
$B cookie-import-browser
|
||||
```
|
||||
|
||||
This auto-detects installed Chromium browsers (Comet, Chrome, Arc, Brave, Edge) and opens
|
||||
This auto-detects installed Chromium browsers and opens
|
||||
an interactive picker UI in your default browser where you can:
|
||||
- Switch between installed browsers
|
||||
- Search domains
|
||||
@@ -423,7 +424,8 @@ Show the user a summary of imported cookies (domain counts).
|
||||
|
||||
## Notes
|
||||
|
||||
- First import per browser may trigger a macOS Keychain dialog — click "Allow" / "Always Allow"
|
||||
- On macOS, the first import per browser may trigger a Keychain dialog — click "Allow" / "Always Allow"
|
||||
- On Linux, `v11` cookies may require `secret-tool`/libsecret access; `v10` cookies use Chromium's standard fallback key
|
||||
- Cookie picker is served on the same port as the browse server (no extra process)
|
||||
- Only domain names and cookie counts are shown in the UI — no cookie values are exposed
|
||||
- The browse session persists cookies between commands, so imported cookies work immediately
|
||||
|
||||
@@ -2,10 +2,10 @@
|
||||
name: setup-browser-cookies
|
||||
version: 1.0.0
|
||||
description: |
|
||||
Import cookies from your real browser (Comet, Chrome, Arc, Brave, Edge) into the
|
||||
headless browse session. Opens an interactive picker UI where you select which
|
||||
cookie domains to import. Use before QA testing authenticated pages. Use when asked
|
||||
to "import cookies", "login to the site", or "authenticate the browser".
|
||||
Import cookies from your real Chromium browser into the headless browse session.
|
||||
Opens an interactive picker UI where you select which cookie domains to import.
|
||||
Use before QA testing authenticated pages. Use when asked to "import cookies",
|
||||
"login to the site", or "authenticate the browser".
|
||||
allowed-tools:
|
||||
- Bash
|
||||
- Read
|
||||
@@ -37,7 +37,7 @@ Import logged-in sessions from your real Chromium browser into the headless brow
|
||||
$B cookie-import-browser
|
||||
```
|
||||
|
||||
This auto-detects installed Chromium browsers (Comet, Chrome, Arc, Brave, Edge) and opens
|
||||
This auto-detects installed Chromium browsers and opens
|
||||
an interactive picker UI in your default browser where you can:
|
||||
- Switch between installed browsers
|
||||
- Search domains
|
||||
@@ -68,7 +68,8 @@ Show the user a summary of imported cookies (domain counts).
|
||||
|
||||
## Notes
|
||||
|
||||
- First import per browser may trigger a macOS Keychain dialog — click "Allow" / "Always Allow"
|
||||
- On macOS, the first import per browser may trigger a Keychain dialog — click "Allow" / "Always Allow"
|
||||
- On Linux, `v11` cookies may require `secret-tool`/libsecret access; `v10` cookies use Chromium's standard fallback key
|
||||
- Cookie picker is served on the same port as the browse server (no extra process)
|
||||
- Only domain names and cookie counts are shown in the UI — no cookie values are exposed
|
||||
- The browse session persists cookies between commands, so imported cookies work immediately
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: setup-deploy
|
||||
version: 1.0.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /setup-deploy.
|
||||
Configure deployment settings for /land-and-deploy. Detects your deploy
|
||||
platform (Fly.io, Render, Vercel, Netlify, Heroku, GitHub Actions, custom),
|
||||
production URL, health check endpoints, and deploy status commands. Writes
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: ship
|
||||
version: 1.0.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /ship.
|
||||
Ship workflow: detect + merge base branch, run tests, review diff, bump VERSION, update CHANGELOG, commit, push, create PR. Use when asked to "ship", "deploy", "push to main", "create a PR", or "merge and push".
|
||||
Proactively suggest when the user says code is ready or asks about deploying.
|
||||
allowed-tools:
|
||||
@@ -481,6 +482,33 @@ If the Eng Review is NOT "CLEAR":
|
||||
|
||||
---
|
||||
|
||||
## Step 1.5: Distribution Pipeline Check
|
||||
|
||||
If the diff introduces a new standalone artifact (CLI binary, library package, tool) — not a web
|
||||
service with existing deployment — verify that a distribution pipeline exists.
|
||||
|
||||
1. Check if the diff adds a new `cmd/` directory, `main.go`, or `bin/` entry point:
|
||||
```bash
|
||||
git diff origin/<base> --name-only | grep -E '(cmd/.*/main\.go|bin/|Cargo\.toml|setup\.py|package\.json)' | head -5
|
||||
```
|
||||
|
||||
2. If new artifact detected, check for a release workflow:
|
||||
```bash
|
||||
ls .github/workflows/ 2>/dev/null | grep -iE 'release|publish|dist'
|
||||
```
|
||||
|
||||
3. **If no release pipeline exists and a new artifact was added:** Use AskUserQuestion:
|
||||
- "This PR adds a new binary/tool but there's no CI/CD pipeline to build and publish it.
|
||||
Users won't be able to download the artifact after merge."
|
||||
- A) Add a release workflow now (GitHub Actions cross-platform build + GitHub Releases)
|
||||
- B) Defer — add to TODOS.md
|
||||
- C) Not needed — this is internal/web-only, existing deployment covers it
|
||||
|
||||
4. **If release pipeline exists:** Continue silently.
|
||||
5. **If no new artifact detected:** Skip silently.
|
||||
|
||||
---
|
||||
|
||||
## Step 2: Merge the base branch (BEFORE tests)
|
||||
|
||||
Fetch and merge the base branch into the feature branch so tests run against the merged state:
|
||||
|
||||
@@ -83,6 +83,33 @@ If the Eng Review is NOT "CLEAR":
|
||||
|
||||
---
|
||||
|
||||
## Step 1.5: Distribution Pipeline Check
|
||||
|
||||
If the diff introduces a new standalone artifact (CLI binary, library package, tool) — not a web
|
||||
service with existing deployment — verify that a distribution pipeline exists.
|
||||
|
||||
1. Check if the diff adds a new `cmd/` directory, `main.go`, or `bin/` entry point:
|
||||
```bash
|
||||
git diff origin/<base> --name-only | grep -E '(cmd/.*/main\.go|bin/|Cargo\.toml|setup\.py|package\.json)' | head -5
|
||||
```
|
||||
|
||||
2. If new artifact detected, check for a release workflow:
|
||||
```bash
|
||||
ls .github/workflows/ 2>/dev/null | grep -iE 'release|publish|dist'
|
||||
```
|
||||
|
||||
3. **If no release pipeline exists and a new artifact was added:** Use AskUserQuestion:
|
||||
- "This PR adds a new binary/tool but there's no CI/CD pipeline to build and publish it.
|
||||
Users won't be able to download the artifact after merge."
|
||||
- A) Add a release workflow now (GitHub Actions cross-platform build + GitHub Releases)
|
||||
- B) Defer — add to TODOS.md
|
||||
- C) Not needed — this is internal/web-only, existing deployment covers it
|
||||
|
||||
4. **If release pipeline exists:** Continue silently.
|
||||
5. **If no new artifact detected:** Skip silently.
|
||||
|
||||
---
|
||||
|
||||
## Step 2: Merge the base branch (BEFORE tests)
|
||||
|
||||
Fetch and merge the base branch into the feature branch so tests run against the merged state:
|
||||
|
||||
@@ -25,7 +25,11 @@ describeIfSelected('Skill E2E tests', [
|
||||
testServer = startTestServer();
|
||||
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-'));
|
||||
setupBrowseShims(tmpDir);
|
||||
});
|
||||
|
||||
// Pre-warm the browse server so Chromium is already launched for tests.
|
||||
// In CI, Chromium can take 10-20s to launch (Docker + --no-sandbox).
|
||||
spawnSync(browseBin, ['goto', testServer.url], { cwd: tmpDir, timeout: 30000, stdio: 'pipe' });
|
||||
}, 45_000);
|
||||
|
||||
afterAll(() => {
|
||||
testServer?.server?.stop();
|
||||
@@ -41,7 +45,7 @@ describeIfSelected('Skill E2E tests', [
|
||||
4. $B screenshot /tmp/skill-e2e-test.png
|
||||
Report the results of each command.`,
|
||||
workingDirectory: tmpDir,
|
||||
maxTurns: 10,
|
||||
maxTurns: 5,
|
||||
timeout: 60_000,
|
||||
testName: 'browse-basic',
|
||||
runId,
|
||||
@@ -63,7 +67,7 @@ Report the results of each command.`,
|
||||
5. $B snapshot -i -a -o /tmp/skill-e2e-annotated.png
|
||||
Report what each command returned.`,
|
||||
workingDirectory: tmpDir,
|
||||
maxTurns: 10,
|
||||
maxTurns: 7,
|
||||
timeout: 60_000,
|
||||
testName: 'browse-snapshot',
|
||||
runId,
|
||||
@@ -408,8 +408,11 @@ Write your review to ${planDir}/review-output.md`,
|
||||
console.warn('No test-plan artifact found — agent may not have followed artifact instructions');
|
||||
}
|
||||
|
||||
// Soft assertion: we expect an artifact but agent compliance is not guaranteed
|
||||
expect(newFiles.length).toBeGreaterThanOrEqual(1);
|
||||
// Soft assertion: we expect an artifact but agent compliance is not guaranteed.
|
||||
// Log rather than fail — the test-plan artifact is a bonus output, not the core test.
|
||||
if (newFiles.length === 0) {
|
||||
console.warn('SOFT FAIL: No test-plan artifact written — agent did not follow artifact instructions');
|
||||
}
|
||||
}, 420_000);
|
||||
});
|
||||
|
||||
|
||||
@@ -161,36 +161,13 @@ describeIfSelected('Ship workflow E2E', ['ship-local-workflow'], () => {
|
||||
|
||||
testConcurrentIfSelected('ship-local-workflow', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `You are running a ship workflow. This is fully automated — do NOT ask for confirmation at any step. Run straight through.
|
||||
|
||||
Step 0 — Detect base branch:
|
||||
Try: gh pr view --json baseRefName -q .baseRefName
|
||||
If that fails, try: gh repo view --json defaultBranchRef -q .defaultBranchRef.name
|
||||
If both fail, fall back to "main". Use the detected branch as <base> in all subsequent steps.
|
||||
|
||||
Step 2 — Merge base branch:
|
||||
git fetch origin <base> && git merge origin/<base> --no-edit
|
||||
If already up to date, continue silently.
|
||||
|
||||
Step 4 — Version bump:
|
||||
Read the VERSION file (4-digit format: MAJOR.MINOR.PATCH.MICRO).
|
||||
Auto-pick MICRO bump (increment the 4th digit). Write the new version to VERSION.
|
||||
|
||||
Step 5 — CHANGELOG:
|
||||
Read CHANGELOG.md. Auto-generate an entry from the branch commits:
|
||||
- git log <base>..HEAD --oneline
|
||||
- git diff <base>...HEAD
|
||||
Format: ## [X.Y.Z.W] - YYYY-MM-DD with bullet points. Prepend after the header.
|
||||
|
||||
Step 6 — Commit:
|
||||
Stage all changes. Commit with message: "chore: bump version and changelog (vX.Y.Z.W)"
|
||||
|
||||
Step 7 — Push:
|
||||
git push -u origin <branch-name>
|
||||
|
||||
Finally, write ship-summary.md with the version and branch.`,
|
||||
prompt: `You are in a git repo on branch feature/ship-test. Do these steps in order:
|
||||
1. Read VERSION file and bump the last digit by 1 (e.g. 0.1.0.0 → 0.1.0.1). Write the new version back.
|
||||
2. Add a CHANGELOG.md entry: "## [NEW_VERSION] - TODAY" with a bullet "- Ship test feature".
|
||||
3. Stage all changes, commit with message "ship: vNEW_VERSION".
|
||||
4. Push to origin: git push origin feature/ship-test`,
|
||||
workingDirectory: shipWorkDir,
|
||||
maxTurns: 15,
|
||||
maxTurns: 8,
|
||||
timeout: 120_000,
|
||||
testName: 'ship-local-workflow',
|
||||
runId,
|
||||
|
||||
@@ -270,7 +270,8 @@ describeE2E('Skill Routing E2E — Developer Journey', () => {
|
||||
recordRouting(testName, result, expectedSkill, actualSkill);
|
||||
|
||||
expect(skillCalls.length, `Expected Skill tool to be called but got 0 calls. Claude may have answered directly without invoking a skill. Tool calls: ${result.toolCalls.map(tc => tc.tool).join(', ')}`).toBeGreaterThan(0);
|
||||
expect([expectedSkill], `Expected skill ${expectedSkill} but got ${actualSkill}`).toContain(actualSkill);
|
||||
const validSkills = ['plan-ceo-review', 'office-hours'];
|
||||
expect(validSkills, `Expected one of ${validSkills.join('/')} but got ${actualSkill}`).toContain(actualSkill);
|
||||
} finally {
|
||||
fs.rmSync(tmpDir, { recursive: true, force: true });
|
||||
}
|
||||
@@ -327,7 +328,8 @@ export default app;
|
||||
recordRouting(testName, result, expectedSkill, actualSkill);
|
||||
|
||||
expect(skillCalls.length, `Expected Skill tool to be called but got 0 calls. Claude may have answered directly without invoking a skill. Tool calls: ${result.toolCalls.map(tc => tc.tool).join(', ')}`).toBeGreaterThan(0);
|
||||
expect([expectedSkill], `Expected skill ${expectedSkill} but got ${actualSkill}`).toContain(actualSkill);
|
||||
const validSkills = ['investigate', 'qa'];
|
||||
expect(validSkills, `Expected one of ${validSkills.join('/')} but got ${actualSkill}`).toContain(actualSkill);
|
||||
} finally {
|
||||
fs.rmSync(tmpDir, { recursive: true, force: true });
|
||||
}
|
||||
@@ -602,7 +604,8 @@ body { font-family: sans-serif; }
|
||||
recordRouting(testName, result, expectedSkill, actualSkill);
|
||||
|
||||
expect(skillCalls.length, `Expected Skill tool to be called but got 0 calls. Claude may have answered directly without invoking a skill. Tool calls: ${result.toolCalls.map(tc => tc.tool).join(', ')}`).toBeGreaterThan(0);
|
||||
expect([expectedSkill], `Expected skill ${expectedSkill} but got ${actualSkill}`).toContain(actualSkill);
|
||||
const validSkills = ['design-review', 'qa', 'qa-only', 'browse'];
|
||||
expect(validSkills, `Expected one of ${validSkills.join('/')} but got ${actualSkill}`).toContain(actualSkill);
|
||||
} finally {
|
||||
fs.rmSync(tmpDir, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: unfreeze
|
||||
version: 0.1.0
|
||||
description: |
|
||||
MANUAL TRIGGER ONLY: invoke only when user types /unfreeze.
|
||||
Clear the freeze boundary set by /freeze, allowing edits to all directories
|
||||
again. Use when you want to widen edit scope without ending the session.
|
||||
Use when asked to "unfreeze", "unlock edits", "remove freeze", or
|
||||
|
||||
Reference in New Issue
Block a user