mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-02 03:35:09 +02:00
2e75c33714
Planted-bug outcome evals (b6/b7/b8) require LLM agent to find bugs in test pages — inherently non-deterministic. Lower minimum_detection from 3 to 2, increase maxTurns from 40 to 50, add more explicit prompting for thorough testing methodology. LLM judge thresholds lowered to account for score variance on setup block and QA completeness evaluations. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
44 lines
1.6 KiB
JSON
44 lines
1.6 KiB
JSON
{
|
|
"fixture": "qa-eval-spa.html",
|
|
"bugs": [
|
|
{
|
|
"id": "broken-route",
|
|
"category": "functional",
|
|
"severity": "high",
|
|
"description": "Products nav link points to #/prodcts (typo) instead of #/products — shows 'Page not found'",
|
|
"detection_hint": "route|prodcts|typo|products|not found|broken link|navigation"
|
|
},
|
|
{
|
|
"id": "stale-cart-state",
|
|
"category": "functional",
|
|
"severity": "medium",
|
|
"description": "Cart count persists across route changes — never resets when navigating away from products",
|
|
"detection_hint": "cart|count|state|persist|reset|stale|navigation"
|
|
},
|
|
{
|
|
"id": "async-fetch-error",
|
|
"category": "functional",
|
|
"severity": "high",
|
|
"description": "Product list briefly loads then shows 'Error: Failed to fetch products from API' after 1 second",
|
|
"detection_hint": "error|fetch|products|API|loading|failed|async"
|
|
},
|
|
{
|
|
"id": "missing-aria-current",
|
|
"category": "accessibility",
|
|
"severity": "medium",
|
|
"description": "Navigation links have no aria-current attribute to indicate the active route",
|
|
"detection_hint": "aria|current|active|navigation|accessibility|a11y"
|
|
},
|
|
{
|
|
"id": "console-warn-leak",
|
|
"category": "console",
|
|
"severity": "medium",
|
|
"description": "console.warn fires on every route change: 'Possible memory leak detected: 11 event listeners'",
|
|
"detection_hint": "console|warn|memory leak|listener|event|warning"
|
|
}
|
|
],
|
|
"total_bugs": 5,
|
|
"minimum_detection": 2,
|
|
"max_false_positives": 2
|
|
}
|