mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-02 11:45:20 +02:00
Merge remote-tracking branch 'origin/main' into garrytan/design
Resolves conflicts in CHANGELOG.md and VERSION (keep 0.5.0). Regenerated all SKILL.md files after merge. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -15,6 +15,27 @@
|
||||
- Added `~/.gstack-dev/plans/` as a local plans directory for long-range vision docs (not checked in). CLAUDE.md and TODOS.md updated.
|
||||
- Added `/setup-design-md` to TODOS.md (P2) for interactive DESIGN.md creation from scratch.
|
||||
|
||||
## 0.4.5 — 2026-03-16
|
||||
|
||||
- **Review findings now actually get fixed, not just listed.** `/review` and `/ship` used to print informational findings (dead code, test gaps, N+1 queries) and then ignore them. Now every finding gets action: obvious mechanical fixes are applied automatically, and genuinely ambiguous issues are batched into a single question instead of 8 separate prompts. You see `[AUTO-FIXED] file:line Problem → what was done` for each auto-fix.
|
||||
- **You control the line between "just fix it" and "ask me first."** Dead code, stale comments, N+1 queries get auto-fixed. Security issues, race conditions, design decisions get surfaced for your call. The classification lives in one place (`review/checklist.md`) so both `/review` and `/ship` stay in sync.
|
||||
|
||||
### Fixed
|
||||
|
||||
- **`$B js "const x = await fetch(...); return x.status"` now works.** The `js` command used to wrap everything as an expression — so `const`, semicolons, and multi-line code all broke. It now detects statements and uses a block wrapper, just like `eval` already did.
|
||||
- **Clicking a dropdown option no longer hangs forever.** If an agent sees `@e3 [option] "Admin"` in a snapshot and runs `click @e3`, gstack now auto-selects that option instead of hanging on an impossible Playwright click. The right thing just happens.
|
||||
- **When click is the wrong tool, gstack tells you.** Clicking an `<option>` via CSS selector used to time out with a cryptic Playwright error. Now you get: `"Use 'browse select' instead of 'click' for dropdown options."`
|
||||
|
||||
### For contributors
|
||||
|
||||
- Gate Classification → Severity Classification rename (severity determines presentation order, not whether you see a prompt).
|
||||
- Fix-First Heuristic section added to `review/checklist.md` — the canonical AUTO-FIX vs ASK classification.
|
||||
- New validation test: `Fix-First Heuristic exists in checklist and is referenced by review + ship`.
|
||||
- Extracted `needsBlockWrapper()` and `wrapForEvaluate()` helpers in `read-commands.ts` — shared by both `js` and `eval` commands (DRY).
|
||||
- Added `getRefRole()` to `BrowserManager` — exposes ARIA role for ref selectors without changing `resolveRef` return type.
|
||||
- Click handler auto-routes `[role=option]` refs to `selectOption()` via parent `<select>`, with DOM `tagName` check to avoid blocking custom listbox components.
|
||||
- 6 new tests: multi-line js, semicolons, statement keywords, simple expressions, option auto-routing, CSS option error guidance.
|
||||
|
||||
## 0.4.4 — 2026-03-16
|
||||
|
||||
- **New releases detected in under an hour, not half a day.** The update check cache was set to 12 hours, which meant you could be stuck on an old version all day while new releases dropped. Now "you're up to date" expires after 60 minutes, so you'll see upgrades within the hour. "Upgrade available" still nags for 12 hours (that's the point).
|
||||
|
||||
@@ -208,6 +208,15 @@ export class BrowserManager {
|
||||
return { selector };
|
||||
}
|
||||
|
||||
/** Get the ARIA role for a ref selector, or null for CSS selectors / unknown refs. */
|
||||
getRefRole(selector: string): string | null {
|
||||
if (selector.startsWith('@e') || selector.startsWith('@c')) {
|
||||
const entry = this.refMap.get(selector.slice(1));
|
||||
return entry?.role ?? null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
getRefCount(): number {
|
||||
return this.refMap.size;
|
||||
}
|
||||
|
||||
@@ -17,6 +17,24 @@ function hasAwait(code: string): boolean {
|
||||
return /\bawait\b/.test(stripped);
|
||||
}
|
||||
|
||||
/** Detect whether code needs a block wrapper {…} vs expression wrapper (…) inside an async IIFE. */
|
||||
function needsBlockWrapper(code: string): boolean {
|
||||
const trimmed = code.trim();
|
||||
if (trimmed.split('\n').length > 1) return true;
|
||||
if (/\b(const|let|var|function|class|return|throw|if|for|while|switch|try)\b/.test(trimmed)) return true;
|
||||
if (trimmed.includes(';')) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Wrap code for page.evaluate(), using async IIFE with block or expression body as needed. */
|
||||
function wrapForEvaluate(code: string): string {
|
||||
if (!hasAwait(code)) return code;
|
||||
const trimmed = code.trim();
|
||||
return needsBlockWrapper(trimmed)
|
||||
? `(async()=>{\n${code}\n})()`
|
||||
: `(async()=>(${trimmed}))()`;
|
||||
}
|
||||
|
||||
// Security: Path validation to prevent path traversal attacks
|
||||
const SAFE_DIRECTORIES = ['/tmp', process.cwd()];
|
||||
|
||||
@@ -124,7 +142,7 @@ export async function handleReadCommand(
|
||||
case 'js': {
|
||||
const expr = args[0];
|
||||
if (!expr) throw new Error('Usage: browse js <expression>');
|
||||
const wrapped = hasAwait(expr) ? `(async()=>(${expr}))()` : expr;
|
||||
const wrapped = wrapForEvaluate(expr);
|
||||
const result = await page.evaluate(wrapped);
|
||||
return typeof result === 'object' ? JSON.stringify(result, null, 2) : String(result ?? '');
|
||||
}
|
||||
@@ -135,14 +153,8 @@ export async function handleReadCommand(
|
||||
validateReadPath(filePath);
|
||||
if (!fs.existsSync(filePath)) throw new Error(`File not found: ${filePath}`);
|
||||
const code = fs.readFileSync(filePath, 'utf-8');
|
||||
if (hasAwait(code)) {
|
||||
const trimmed = code.trim();
|
||||
const isSingleExpr = trimmed.split('\n').length === 1;
|
||||
const wrapped = isSingleExpr ? `(async()=>(${trimmed}))()` : `(async()=>{\n${code}\n})()`;
|
||||
const result = await page.evaluate(wrapped);
|
||||
return typeof result === 'object' ? JSON.stringify(result, null, 2) : String(result ?? '');
|
||||
}
|
||||
const result = await page.evaluate(code);
|
||||
const wrapped = wrapForEvaluate(code);
|
||||
const result = await page.evaluate(wrapped);
|
||||
return typeof result === 'object' ? JSON.stringify(result, null, 2) : String(result ?? '');
|
||||
}
|
||||
|
||||
|
||||
@@ -44,11 +44,48 @@ export async function handleWriteCommand(
|
||||
case 'click': {
|
||||
const selector = args[0];
|
||||
if (!selector) throw new Error('Usage: browse click <selector>');
|
||||
|
||||
// Auto-route: if ref points to a real <option> inside a <select>, use selectOption
|
||||
const role = bm.getRefRole(selector);
|
||||
if (role === 'option') {
|
||||
const resolved = await bm.resolveRef(selector);
|
||||
if ('locator' in resolved) {
|
||||
const optionInfo = await resolved.locator.evaluate(el => {
|
||||
if (el.tagName !== 'OPTION') return null; // custom [role=option], not real <option>
|
||||
const option = el as HTMLOptionElement;
|
||||
const select = option.closest('select');
|
||||
if (!select) return null;
|
||||
return { value: option.value, text: option.text };
|
||||
});
|
||||
if (optionInfo) {
|
||||
await resolved.locator.locator('xpath=ancestor::select').selectOption(optionInfo.value, { timeout: 5000 });
|
||||
return `Selected "${optionInfo.text}" (auto-routed from click on <option>) → now at ${page.url()}`;
|
||||
}
|
||||
// Real <option> with no parent <select> or custom [role=option] — fall through to normal click
|
||||
}
|
||||
}
|
||||
|
||||
const resolved = await bm.resolveRef(selector);
|
||||
if ('locator' in resolved) {
|
||||
await resolved.locator.click({ timeout: 5000 });
|
||||
} else {
|
||||
await page.click(resolved.selector, { timeout: 5000 });
|
||||
try {
|
||||
if ('locator' in resolved) {
|
||||
await resolved.locator.click({ timeout: 5000 });
|
||||
} else {
|
||||
await page.click(resolved.selector, { timeout: 5000 });
|
||||
}
|
||||
} catch (err: any) {
|
||||
// Enhanced error guidance: clicking <option> elements always fails (not visible / timeout)
|
||||
const isOption = 'locator' in resolved
|
||||
? await resolved.locator.evaluate(el => el.tagName === 'OPTION').catch(() => false)
|
||||
: await page.evaluate(
|
||||
(sel: string) => document.querySelector(sel)?.tagName === 'OPTION',
|
||||
(resolved as { selector: string }).selector
|
||||
).catch(() => false);
|
||||
if (isOption) {
|
||||
throw new Error(
|
||||
`Cannot click <option> elements. Use 'browse select <parent-select> <value>' instead of 'click' for dropdown options.`
|
||||
);
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
// Wait briefly for any navigation/DOM update
|
||||
await page.waitForLoadState('domcontentloaded').catch(() => {});
|
||||
|
||||
@@ -198,6 +198,27 @@ describe('Inspection', () => {
|
||||
}
|
||||
});
|
||||
|
||||
test('js handles multi-line with await', async () => {
|
||||
const code = 'const x = await Promise.resolve(42);\nreturn x;';
|
||||
const result = await handleReadCommand('js', [code], bm);
|
||||
expect(result).toBe('42');
|
||||
});
|
||||
|
||||
test('js handles await with semicolons', async () => {
|
||||
const result = await handleReadCommand('js', ['const x = await Promise.resolve(5); return x + 1;'], bm);
|
||||
expect(result).toBe('6');
|
||||
});
|
||||
|
||||
test('js handles await with statement keywords', async () => {
|
||||
const result = await handleReadCommand('js', ['const res = await Promise.resolve("ok"); return res;'], bm);
|
||||
expect(result).toBe('ok');
|
||||
});
|
||||
|
||||
test('js still works for simple expressions', async () => {
|
||||
const result = await handleReadCommand('js', ['1 + 2'], bm);
|
||||
expect(result).toBe('3');
|
||||
});
|
||||
|
||||
test('css returns computed property', async () => {
|
||||
const result = await handleReadCommand('css', ['h1', 'color'], bm);
|
||||
// Navy color
|
||||
@@ -247,6 +268,36 @@ describe('Interaction', () => {
|
||||
expect(val).toBe('admin');
|
||||
});
|
||||
|
||||
test('click on option ref auto-routes to selectOption', async () => {
|
||||
await handleWriteCommand('goto', [baseUrl + '/forms.html'], bm);
|
||||
// Reset select to default
|
||||
await handleReadCommand('js', ['document.querySelector("#role").value = ""'], bm);
|
||||
const snap = await handleMetaCommand('snapshot', [], bm, async () => {});
|
||||
// Find an option ref (e.g., "Admin" option)
|
||||
const optionLine = snap.split('\n').find((l: string) => l.includes('[option]') && l.includes('"Admin"'));
|
||||
expect(optionLine).toBeDefined();
|
||||
const refMatch = optionLine!.match(/@(e\d+)/);
|
||||
expect(refMatch).toBeDefined();
|
||||
const ref = `@${refMatch![1]}`;
|
||||
const result = await handleWriteCommand('click', [ref], bm);
|
||||
expect(result).toContain('auto-routed');
|
||||
expect(result).toContain('Selected');
|
||||
// Verify the select value actually changed
|
||||
const val = await handleReadCommand('js', ['document.querySelector("#role").value'], bm);
|
||||
expect(val).toBe('admin');
|
||||
});
|
||||
|
||||
test('click CSS selector on option gives helpful error', async () => {
|
||||
await handleWriteCommand('goto', [baseUrl + '/forms.html'], bm);
|
||||
try {
|
||||
await handleWriteCommand('click', ['option[value="admin"]'], bm);
|
||||
expect(true).toBe(false); // Should not reach here
|
||||
} catch (err: any) {
|
||||
expect(err.message).toContain('select');
|
||||
expect(err.message).toContain('option');
|
||||
}
|
||||
}, 15000);
|
||||
|
||||
test('hover works', async () => {
|
||||
const result = await handleWriteCommand('hover', ['h1'], bm);
|
||||
expect(result).toContain('Hovered');
|
||||
|
||||
+47
-8
@@ -157,14 +157,53 @@ Follow the output format specified in the checklist. Respect the suppressions
|
||||
|
||||
---
|
||||
|
||||
## Step 5: Output findings
|
||||
## Step 5: Fix-First Review
|
||||
|
||||
**Always output ALL findings** — both critical and informational. The user must see every issue.
|
||||
**Every finding gets action — not just critical ones.**
|
||||
|
||||
- If CRITICAL issues found: output all findings, then for EACH critical issue use a separate AskUserQuestion with the problem, then `RECOMMENDATION: Choose A because [one-line reason]`, then options (A: Fix it now, B: Acknowledge, C: False positive — skip).
|
||||
After all critical questions are answered, output a summary of what the user chose for each issue. If the user chose A (fix) on any issue, apply the recommended fixes. If only B/C were chosen, no action needed.
|
||||
- If only non-critical issues found: output findings. No further action needed.
|
||||
- If no issues found: output `Pre-Landing Review: No issues found.`
|
||||
Output a summary header: `Pre-Landing Review: N issues (X critical, Y informational)`
|
||||
|
||||
### Step 5a: Classify each finding
|
||||
|
||||
For each finding, classify as AUTO-FIX or ASK per the Fix-First Heuristic in
|
||||
checklist.md. Critical findings lean toward ASK; informational findings lean
|
||||
toward AUTO-FIX.
|
||||
|
||||
### Step 5b: Auto-fix all AUTO-FIX items
|
||||
|
||||
Apply each fix directly. For each one, output a one-line summary:
|
||||
`[AUTO-FIXED] [file:line] Problem → what you did`
|
||||
|
||||
### Step 5c: Batch-ask about ASK items
|
||||
|
||||
If there are ASK items remaining, present them in ONE AskUserQuestion:
|
||||
|
||||
- List each item with a number, the severity label, the problem, and a recommended fix
|
||||
- For each item, provide options: A) Fix as recommended, B) Skip
|
||||
- Include an overall RECOMMENDATION
|
||||
|
||||
Example format:
|
||||
```
|
||||
I auto-fixed 5 issues. 2 need your input:
|
||||
|
||||
1. [CRITICAL] app/models/post.rb:42 — Race condition in status transition
|
||||
Fix: Add `WHERE status = 'draft'` to the UPDATE
|
||||
→ A) Fix B) Skip
|
||||
|
||||
2. [INFORMATIONAL] app/services/generator.rb:88 — LLM output not type-checked before DB write
|
||||
Fix: Add JSON schema validation
|
||||
→ A) Fix B) Skip
|
||||
|
||||
RECOMMENDATION: Fix both — #1 is a real race condition, #2 prevents silent data corruption.
|
||||
```
|
||||
|
||||
If 3 or fewer ASK items, you may use individual AskUserQuestion calls instead of batching.
|
||||
|
||||
### Step 5d: Apply user-approved fixes
|
||||
|
||||
Apply fixes for items where the user chose "Fix." Output what was fixed.
|
||||
|
||||
If no ASK items exist (everything was AUTO-FIX), skip the question entirely.
|
||||
|
||||
### Greptile comment resolution
|
||||
|
||||
@@ -174,7 +213,7 @@ After outputting your own findings, if Greptile comments were classified in Step
|
||||
|
||||
Before replying to any comment, run the **Escalation Detection** algorithm from greptile-triage.md to determine whether to use Tier 1 (friendly) or Tier 2 (firm) reply templates.
|
||||
|
||||
1. **VALID & ACTIONABLE comments:** These are already included in your CRITICAL findings — they follow the same AskUserQuestion flow (A: Fix it now, B: Acknowledge, C: False positive). If the user chooses A (fix), reply using the **Fix reply template** from greptile-triage.md (include inline diff + explanation). If the user chooses C (false positive), reply using the **False Positive reply template** (include evidence + suggested re-rank), save to both per-project and global greptile-history.
|
||||
1. **VALID & ACTIONABLE comments:** These are included in your findings — they follow the Fix-First flow (auto-fixed if mechanical, batched into ASK if not) (A: Fix it now, B: Acknowledge, C: False positive). If the user chooses A (fix), reply using the **Fix reply template** from greptile-triage.md (include inline diff + explanation). If the user chooses C (false positive), reply using the **False Positive reply template** (include evidence + suggested re-rank), save to both per-project and global greptile-history.
|
||||
|
||||
2. **FALSE POSITIVE comments:** Present each one via AskUserQuestion:
|
||||
- Show the Greptile comment: file:line (or [top-level]) + body summary + permalink URL
|
||||
@@ -223,7 +262,7 @@ If no documentation files exist, skip this step silently.
|
||||
## Important Rules
|
||||
|
||||
- **Read the FULL diff before commenting.** Do not flag issues already addressed in the diff.
|
||||
- **Read-only by default.** Only modify files if the user explicitly chooses "Fix it now" on a critical issue. Never commit, push, or create PRs.
|
||||
- **Fix-first, not read-only.** AUTO-FIX items are applied directly. ASK items are only applied after user approval. Never commit, push, or create PRs — that's /ship's job.
|
||||
- **Be terse.** One line problem, one line fix. No preamble.
|
||||
- **Only flag real problems.** Skip anything that's fine.
|
||||
- **Use Greptile reply templates from greptile-triage.md.** Every reply includes evidence. Never post vague replies.
|
||||
|
||||
+47
-8
@@ -75,14 +75,53 @@ Follow the output format specified in the checklist. Respect the suppressions
|
||||
|
||||
---
|
||||
|
||||
## Step 5: Output findings
|
||||
## Step 5: Fix-First Review
|
||||
|
||||
**Always output ALL findings** — both critical and informational. The user must see every issue.
|
||||
**Every finding gets action — not just critical ones.**
|
||||
|
||||
- If CRITICAL issues found: output all findings, then for EACH critical issue use a separate AskUserQuestion with the problem, then `RECOMMENDATION: Choose A because [one-line reason]`, then options (A: Fix it now, B: Acknowledge, C: False positive — skip).
|
||||
After all critical questions are answered, output a summary of what the user chose for each issue. If the user chose A (fix) on any issue, apply the recommended fixes. If only B/C were chosen, no action needed.
|
||||
- If only non-critical issues found: output findings. No further action needed.
|
||||
- If no issues found: output `Pre-Landing Review: No issues found.`
|
||||
Output a summary header: `Pre-Landing Review: N issues (X critical, Y informational)`
|
||||
|
||||
### Step 5a: Classify each finding
|
||||
|
||||
For each finding, classify as AUTO-FIX or ASK per the Fix-First Heuristic in
|
||||
checklist.md. Critical findings lean toward ASK; informational findings lean
|
||||
toward AUTO-FIX.
|
||||
|
||||
### Step 5b: Auto-fix all AUTO-FIX items
|
||||
|
||||
Apply each fix directly. For each one, output a one-line summary:
|
||||
`[AUTO-FIXED] [file:line] Problem → what you did`
|
||||
|
||||
### Step 5c: Batch-ask about ASK items
|
||||
|
||||
If there are ASK items remaining, present them in ONE AskUserQuestion:
|
||||
|
||||
- List each item with a number, the severity label, the problem, and a recommended fix
|
||||
- For each item, provide options: A) Fix as recommended, B) Skip
|
||||
- Include an overall RECOMMENDATION
|
||||
|
||||
Example format:
|
||||
```
|
||||
I auto-fixed 5 issues. 2 need your input:
|
||||
|
||||
1. [CRITICAL] app/models/post.rb:42 — Race condition in status transition
|
||||
Fix: Add `WHERE status = 'draft'` to the UPDATE
|
||||
→ A) Fix B) Skip
|
||||
|
||||
2. [INFORMATIONAL] app/services/generator.rb:88 — LLM output not type-checked before DB write
|
||||
Fix: Add JSON schema validation
|
||||
→ A) Fix B) Skip
|
||||
|
||||
RECOMMENDATION: Fix both — #1 is a real race condition, #2 prevents silent data corruption.
|
||||
```
|
||||
|
||||
If 3 or fewer ASK items, you may use individual AskUserQuestion calls instead of batching.
|
||||
|
||||
### Step 5d: Apply user-approved fixes
|
||||
|
||||
Apply fixes for items where the user chose "Fix." Output what was fixed.
|
||||
|
||||
If no ASK items exist (everything was AUTO-FIX), skip the question entirely.
|
||||
|
||||
### Greptile comment resolution
|
||||
|
||||
@@ -92,7 +131,7 @@ After outputting your own findings, if Greptile comments were classified in Step
|
||||
|
||||
Before replying to any comment, run the **Escalation Detection** algorithm from greptile-triage.md to determine whether to use Tier 1 (friendly) or Tier 2 (firm) reply templates.
|
||||
|
||||
1. **VALID & ACTIONABLE comments:** These are already included in your CRITICAL findings — they follow the same AskUserQuestion flow (A: Fix it now, B: Acknowledge, C: False positive). If the user chooses A (fix), reply using the **Fix reply template** from greptile-triage.md (include inline diff + explanation). If the user chooses C (false positive), reply using the **False Positive reply template** (include evidence + suggested re-rank), save to both per-project and global greptile-history.
|
||||
1. **VALID & ACTIONABLE comments:** These are included in your findings — they follow the Fix-First flow (auto-fixed if mechanical, batched into ASK if not) (A: Fix it now, B: Acknowledge, C: False positive). If the user chooses A (fix), reply using the **Fix reply template** from greptile-triage.md (include inline diff + explanation). If the user chooses C (false positive), reply using the **False Positive reply template** (include evidence + suggested re-rank), save to both per-project and global greptile-history.
|
||||
|
||||
2. **FALSE POSITIVE comments:** Present each one via AskUserQuestion:
|
||||
- Show the Greptile comment: file:line (or [top-level]) + body summary + permalink URL
|
||||
@@ -141,7 +180,7 @@ If no documentation files exist, skip this step silently.
|
||||
## Important Rules
|
||||
|
||||
- **Read the FULL diff before commenting.** Do not flag issues already addressed in the diff.
|
||||
- **Read-only by default.** Only modify files if the user explicitly chooses "Fix it now" on a critical issue. Never commit, push, or create PRs.
|
||||
- **Fix-first, not read-only.** AUTO-FIX items are applied directly. ASK items are only applied after user approval. Never commit, push, or create PRs — that's /ship's job.
|
||||
- **Be terse.** One line problem, one line fix. No preamble.
|
||||
- **Only flag real problems.** Skip anything that's fine.
|
||||
- **Use Greptile reply templates from greptile-triage.md.** Every reply includes evidence. Never post vague replies.
|
||||
|
||||
+42
-9
@@ -5,21 +5,23 @@
|
||||
Review the `git diff origin/main` output for the issues listed below. Be specific — cite `file:line` and suggest fixes. Skip anything that's fine. Only flag real problems.
|
||||
|
||||
**Two-pass review:**
|
||||
- **Pass 1 (CRITICAL):** Run SQL & Data Safety and LLM Output Trust Boundary first. These can block `/ship`.
|
||||
- **Pass 2 (INFORMATIONAL):** Run all remaining categories. These are included in the PR body but do not block.
|
||||
- **Pass 1 (CRITICAL):** Run SQL & Data Safety and LLM Output Trust Boundary first. Highest severity.
|
||||
- **Pass 2 (INFORMATIONAL):** Run all remaining categories. Lower severity but still actioned.
|
||||
|
||||
All findings get action via Fix-First Review: obvious mechanical fixes are applied automatically,
|
||||
genuinely ambiguous issues are batched into a single user question.
|
||||
|
||||
**Output format:**
|
||||
|
||||
```
|
||||
Pre-Landing Review: N issues (X critical, Y informational)
|
||||
|
||||
**CRITICAL** (blocking /ship):
|
||||
- [file:line] Problem description
|
||||
Fix: suggested fix
|
||||
**AUTO-FIXED:**
|
||||
- [file:line] Problem → fix applied
|
||||
|
||||
**Issues** (non-blocking):
|
||||
**NEEDS INPUT:**
|
||||
- [file:line] Problem description
|
||||
Fix: suggested fix
|
||||
Recommended fix: suggested fix
|
||||
```
|
||||
|
||||
If no issues found: `Pre-Landing Review: No issues found.`
|
||||
@@ -102,10 +104,10 @@ To do this: use Grep to find all references to the sibling values (e.g., grep fo
|
||||
|
||||
---
|
||||
|
||||
## Gate Classification
|
||||
## Severity Classification
|
||||
|
||||
```
|
||||
CRITICAL (blocks /ship): INFORMATIONAL (in PR body):
|
||||
CRITICAL (highest severity): INFORMATIONAL (lower severity):
|
||||
├─ SQL & Data Safety ├─ Conditional Side Effects
|
||||
├─ Race Conditions & Concurrency ├─ Magic Numbers & String Coupling
|
||||
├─ LLM Output Trust Boundary ├─ Dead Code & Consistency
|
||||
@@ -115,10 +117,41 @@ CRITICAL (blocks /ship): INFORMATIONAL (in PR body):
|
||||
├─ Time Window Safety
|
||||
├─ Type Coercion at Boundaries
|
||||
└─ View/Frontend
|
||||
|
||||
All findings are actioned via Fix-First Review. Severity determines
|
||||
presentation order and classification of AUTO-FIX vs ASK — critical
|
||||
findings lean toward ASK (they're riskier), informational findings
|
||||
lean toward AUTO-FIX (they're more mechanical).
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Fix-First Heuristic
|
||||
|
||||
This heuristic is referenced by both `/review` and `/ship`. It determines whether
|
||||
the agent auto-fixes a finding or asks the user.
|
||||
|
||||
```
|
||||
AUTO-FIX (agent fixes without asking): ASK (needs human judgment):
|
||||
├─ Dead code / unused variables ├─ Security (auth, XSS, injection)
|
||||
├─ N+1 queries (missing .includes()) ├─ Race conditions
|
||||
├─ Stale comments contradicting code ├─ Design decisions
|
||||
├─ Magic numbers → named constants ├─ Large fixes (>20 lines)
|
||||
├─ Missing LLM output validation ├─ Enum completeness
|
||||
├─ Version/path mismatches ├─ Removing functionality
|
||||
├─ Variables assigned but never read └─ Anything changing user-visible
|
||||
└─ Inline styles, O(n*m) view lookups behavior
|
||||
```
|
||||
|
||||
**Rule of thumb:** If the fix is mechanical and a senior engineer would apply it
|
||||
without discussion, it's AUTO-FIX. If reasonable engineers could disagree about
|
||||
the fix, it's ASK.
|
||||
|
||||
**Critical findings default toward ASK** (they're inherently riskier).
|
||||
**Informational findings default toward AUTO-FIX** (they're more mechanical).
|
||||
|
||||
---
|
||||
|
||||
## Suppressions — DO NOT flag these
|
||||
|
||||
- "X is redundant with Y" when the redundancy is harmless and aids readability (e.g., `present?` redundant with `length > 20`)
|
||||
|
||||
+18
-11
@@ -107,7 +107,7 @@ You are running the `/ship` workflow. This is a **non-interactive, fully automat
|
||||
- On the base branch (abort)
|
||||
- Merge conflicts that can't be auto-resolved (stop, show conflicts)
|
||||
- Test failures (stop, show failures)
|
||||
- Pre-landing review finds CRITICAL issues and user chooses to fix (not acknowledge or skip)
|
||||
- Pre-landing review finds ASK items that need user judgment
|
||||
- MINOR or MAJOR version bump needed (ask — see Step 4)
|
||||
- Greptile review comments that need user decision (complex fixes, false positives)
|
||||
- TODOS.md missing and user wants to create one (ask — see Step 5.5)
|
||||
@@ -120,6 +120,7 @@ You are running the `/ship` workflow. This is a **non-interactive, fully automat
|
||||
- Commit message approval (auto-commit)
|
||||
- Multi-file changesets (auto-split into bisectable commits)
|
||||
- TODOS.md completed-item detection (auto-mark)
|
||||
- Auto-fixable review findings (dead code, N+1, stale comments — fixed automatically)
|
||||
|
||||
---
|
||||
|
||||
@@ -243,19 +244,25 @@ Review the diff for structural issues that tests don't catch.
|
||||
- **Pass 1 (CRITICAL):** SQL & Data Safety, LLM Output Trust Boundary
|
||||
- **Pass 2 (INFORMATIONAL):** All remaining categories
|
||||
|
||||
4. **Always output ALL findings** — both critical and informational. The user must see every issue found.
|
||||
4. **Classify each finding as AUTO-FIX or ASK** per the Fix-First Heuristic in
|
||||
checklist.md. Critical findings lean toward ASK; informational lean toward AUTO-FIX.
|
||||
|
||||
5. Output a summary header: `Pre-Landing Review: N issues (X critical, Y informational)`
|
||||
5. **Auto-fix all AUTO-FIX items.** Apply each fix. Output one line per fix:
|
||||
`[AUTO-FIXED] [file:line] Problem → what you did`
|
||||
|
||||
6. **If CRITICAL issues found:** For EACH critical issue, use a separate AskUserQuestion with:
|
||||
- The problem (`file:line` + description)
|
||||
- `RECOMMENDATION: Choose A because [one-line reason]`
|
||||
- Options: A) Fix it now, B) Acknowledge and ship anyway, C) It's a false positive — skip
|
||||
After resolving all critical issues: if the user chose A (fix) on any issue, apply the recommended fixes, then commit only the fixed files by name (`git add <fixed-files> && git commit -m "fix: apply pre-landing review fixes"`), then **STOP** and tell the user to run `/ship` again to re-test with the fixes applied. If the user chose only B (acknowledge) or C (false positive) on all issues, continue with Step 4.
|
||||
6. **If ASK items remain,** present them in ONE AskUserQuestion:
|
||||
- List each with number, severity, problem, recommended fix
|
||||
- Per-item options: A) Fix B) Skip
|
||||
- Overall RECOMMENDATION
|
||||
- If 3 or fewer ASK items, you may use individual AskUserQuestion calls instead
|
||||
|
||||
7. **If only non-critical issues found:** Output them and continue. They will be included in the PR body at Step 8.
|
||||
7. **After all fixes (auto + user-approved):**
|
||||
- If ANY fixes were applied: commit fixed files by name (`git add <fixed-files> && git commit -m "fix: pre-landing review fixes"`), then **STOP** and tell the user to run `/ship` again to re-test.
|
||||
- If no fixes applied (all ASK items skipped, or no issues found): continue to Step 4.
|
||||
|
||||
8. **If no issues found:** Output `Pre-Landing Review: No issues found.` and continue.
|
||||
8. Output summary: `Pre-Landing Review: N issues — M auto-fixed, K asked (J fixed, L skipped)`
|
||||
|
||||
If no issues found: `Pre-Landing Review: No issues found.`
|
||||
|
||||
Save the review output — it goes into the PR body in Step 8.
|
||||
|
||||
@@ -488,7 +495,7 @@ EOF
|
||||
- **Never skip tests.** If tests fail, stop.
|
||||
- **Never skip the pre-landing review.** If checklist.md is unreadable, stop.
|
||||
- **Never force push.** Use regular `git push` only.
|
||||
- **Never ask for confirmation** except for MINOR/MAJOR version bumps and CRITICAL review findings (one AskUserQuestion per critical issue with fix recommendation).
|
||||
- **Never ask for confirmation** except for MINOR/MAJOR version bumps and pre-landing review ASK items (batched into at most one AskUserQuestion).
|
||||
- **Always use the 4-digit version format** from the VERSION file.
|
||||
- **Date format in CHANGELOG:** `YYYY-MM-DD`
|
||||
- **Split commits for bisectability** — each commit = one logical change.
|
||||
|
||||
+18
-11
@@ -25,7 +25,7 @@ You are running the `/ship` workflow. This is a **non-interactive, fully automat
|
||||
- On the base branch (abort)
|
||||
- Merge conflicts that can't be auto-resolved (stop, show conflicts)
|
||||
- Test failures (stop, show failures)
|
||||
- Pre-landing review finds CRITICAL issues and user chooses to fix (not acknowledge or skip)
|
||||
- Pre-landing review finds ASK items that need user judgment
|
||||
- MINOR or MAJOR version bump needed (ask — see Step 4)
|
||||
- Greptile review comments that need user decision (complex fixes, false positives)
|
||||
- TODOS.md missing and user wants to create one (ask — see Step 5.5)
|
||||
@@ -38,6 +38,7 @@ You are running the `/ship` workflow. This is a **non-interactive, fully automat
|
||||
- Commit message approval (auto-commit)
|
||||
- Multi-file changesets (auto-split into bisectable commits)
|
||||
- TODOS.md completed-item detection (auto-mark)
|
||||
- Auto-fixable review findings (dead code, N+1, stale comments — fixed automatically)
|
||||
|
||||
---
|
||||
|
||||
@@ -161,19 +162,25 @@ Review the diff for structural issues that tests don't catch.
|
||||
- **Pass 1 (CRITICAL):** SQL & Data Safety, LLM Output Trust Boundary
|
||||
- **Pass 2 (INFORMATIONAL):** All remaining categories
|
||||
|
||||
4. **Always output ALL findings** — both critical and informational. The user must see every issue found.
|
||||
4. **Classify each finding as AUTO-FIX or ASK** per the Fix-First Heuristic in
|
||||
checklist.md. Critical findings lean toward ASK; informational lean toward AUTO-FIX.
|
||||
|
||||
5. Output a summary header: `Pre-Landing Review: N issues (X critical, Y informational)`
|
||||
5. **Auto-fix all AUTO-FIX items.** Apply each fix. Output one line per fix:
|
||||
`[AUTO-FIXED] [file:line] Problem → what you did`
|
||||
|
||||
6. **If CRITICAL issues found:** For EACH critical issue, use a separate AskUserQuestion with:
|
||||
- The problem (`file:line` + description)
|
||||
- `RECOMMENDATION: Choose A because [one-line reason]`
|
||||
- Options: A) Fix it now, B) Acknowledge and ship anyway, C) It's a false positive — skip
|
||||
After resolving all critical issues: if the user chose A (fix) on any issue, apply the recommended fixes, then commit only the fixed files by name (`git add <fixed-files> && git commit -m "fix: apply pre-landing review fixes"`), then **STOP** and tell the user to run `/ship` again to re-test with the fixes applied. If the user chose only B (acknowledge) or C (false positive) on all issues, continue with Step 4.
|
||||
6. **If ASK items remain,** present them in ONE AskUserQuestion:
|
||||
- List each with number, severity, problem, recommended fix
|
||||
- Per-item options: A) Fix B) Skip
|
||||
- Overall RECOMMENDATION
|
||||
- If 3 or fewer ASK items, you may use individual AskUserQuestion calls instead
|
||||
|
||||
7. **If only non-critical issues found:** Output them and continue. They will be included in the PR body at Step 8.
|
||||
7. **After all fixes (auto + user-approved):**
|
||||
- If ANY fixes were applied: commit fixed files by name (`git add <fixed-files> && git commit -m "fix: pre-landing review fixes"`), then **STOP** and tell the user to run `/ship` again to re-test.
|
||||
- If no fixes applied (all ASK items skipped, or no issues found): continue to Step 4.
|
||||
|
||||
8. **If no issues found:** Output `Pre-Landing Review: No issues found.` and continue.
|
||||
8. Output summary: `Pre-Landing Review: N issues — M auto-fixed, K asked (J fixed, L skipped)`
|
||||
|
||||
If no issues found: `Pre-Landing Review: No issues found.`
|
||||
|
||||
Save the review output — it goes into the PR body in Step 8.
|
||||
|
||||
@@ -406,7 +413,7 @@ EOF
|
||||
- **Never skip tests.** If tests fail, stop.
|
||||
- **Never skip the pre-landing review.** If checklist.md is unreadable, stop.
|
||||
- **Never force push.** Use regular `git push` only.
|
||||
- **Never ask for confirmation** except for MINOR/MAJOR version bumps and CRITICAL review findings (one AskUserQuestion per critical issue with fix recommendation).
|
||||
- **Never ask for confirmation** except for MINOR/MAJOR version bumps and pre-landing review ASK items (batched into at most one AskUserQuestion).
|
||||
- **Always use the 4-digit version format** from the VERSION file.
|
||||
- **Date format in CHANGELOG:** `YYYY-MM-DD`
|
||||
- **Split commits for bisectability** — each commit = one logical change.
|
||||
|
||||
@@ -593,8 +593,8 @@ describe('Enum & Value Completeness in review checklist', () => {
|
||||
expect(checklist).toContain('allowlist');
|
||||
});
|
||||
|
||||
test('Enum & Value Completeness is in the gate classification as CRITICAL', () => {
|
||||
const gateSection = checklist.slice(checklist.indexOf('## Gate Classification'));
|
||||
test('Enum & Value Completeness is in the severity classification as CRITICAL', () => {
|
||||
const gateSection = checklist.slice(checklist.indexOf('## Severity Classification'));
|
||||
// The ASCII art has CRITICAL on the left and INFORMATIONAL on the right
|
||||
// Enum & Value Completeness should appear on a line with the CRITICAL tree (├─ or └─)
|
||||
const enumLine = gateSection.split('\n').find(l => l.includes('Enum & Value Completeness'));
|
||||
@@ -602,6 +602,19 @@ describe('Enum & Value Completeness in review checklist', () => {
|
||||
// It's on the left (CRITICAL) side — starts with ├─ or └─
|
||||
expect(enumLine!.trimStart().startsWith('├─') || enumLine!.trimStart().startsWith('└─')).toBe(true);
|
||||
});
|
||||
|
||||
test('Fix-First Heuristic exists in checklist and is referenced by review + ship', () => {
|
||||
expect(checklist).toContain('## Fix-First Heuristic');
|
||||
expect(checklist).toContain('AUTO-FIX');
|
||||
expect(checklist).toContain('ASK');
|
||||
|
||||
const reviewSkill = fs.readFileSync(path.join(ROOT, 'review/SKILL.md'), 'utf-8');
|
||||
const shipSkill = fs.readFileSync(path.join(ROOT, 'ship/SKILL.md'), 'utf-8');
|
||||
expect(reviewSkill).toContain('AUTO-FIX');
|
||||
expect(reviewSkill).toContain('[AUTO-FIXED]');
|
||||
expect(shipSkill).toContain('AUTO-FIX');
|
||||
expect(shipSkill).toContain('[AUTO-FIXED]');
|
||||
});
|
||||
});
|
||||
|
||||
// --- Part 7: Planted-bug fixture validation (A4) ---
|
||||
|
||||
Reference in New Issue
Block a user