Files
gstack/design/src/evolve.ts
T
Matteo Hertel bd3a6c68b2 fix(design): bump image-gen timeout to 240s + pin gpt-image-2
The design binary calls /v1/responses (gpt-4o + image_generation tool,
quality:high, 1536x1024) but aborted the request after a hardcoded 120s.
That class of request consistently takes ~140-160s end-to-end, so every
generate/variants/evolve/iterate call aborted before the image returned.

In /design-shotgun this cascades: Step 3c launches N parallel agents,
each calling `$D generate`, each aborts at 120s and retries, all fail,
the comparison board never opens — the skill appears to hang indefinitely.

Reproduced the exact API call with a longer budget: HTTP 200, valid
image, 143.5s. A real /design-shotgun run after the patch generated 3
variants in parallel at 150.0s / 161.0s / 152.1s, all exit 0 — note the
161s case, which a naive 150s bump would still have failed.

- Bump AbortController timeout 120_000 -> 240_000 in generate.ts,
  variants.ts, evolve.ts, iterate.ts (both call sites)
- Pin the image_generation tool to model "gpt-image-2"

design/test/variants-retry-after.test.ts: 5 pass, 0 fail. The
feedback-roundtrip.test.ts failures are a pre-existing browse-module
breakage (session.clearLoadedHtml undefined), unrelated to this change.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 09:56:05 -07:00

152 lines
5.3 KiB
TypeScript

/**
* Screenshot-to-Mockup Evolution.
* Takes a screenshot of the live site and generates a mockup showing
* how it SHOULD look based on a design brief.
* Starts from reality, not blank canvas.
*/
import fs from "fs";
import path from "path";
import { requireApiKey } from "./auth";
export interface EvolveOptions {
screenshot: string; // Path to current site screenshot
brief: string; // What to change ("make it calmer", "fix the hierarchy")
output: string; // Output path for evolved mockup
}
/**
* Generate an evolved mockup from an existing screenshot + brief.
* Sends the screenshot as context to GPT-4o with image generation,
* asking it to produce a new version incorporating the brief's changes.
*/
export async function evolve(options: EvolveOptions): Promise<void> {
const apiKey = requireApiKey();
const screenshotData = fs.readFileSync(options.screenshot).toString("base64");
console.error(`Evolving ${options.screenshot} with: "${options.brief}"`);
const startTime = Date.now();
// Use the Responses API with both a text prompt referencing the screenshot
// and the image_generation tool to produce the evolved version.
// Since we can't send reference images directly to image_generation,
// we describe the current state in detail first via vision, then generate.
// Step 1: Analyze current screenshot
const analysis = await analyzeScreenshot(apiKey, screenshotData);
console.error(` Analyzed current design: ${analysis.slice(0, 100)}...`);
// Step 2: Generate evolved version using analysis + brief
const evolvedPrompt = [
"Generate a pixel-perfect UI mockup that is an improved version of an existing design.",
"",
"CURRENT DESIGN (what exists now):",
analysis,
"",
"REQUESTED CHANGES:",
options.brief,
"",
"Generate a new mockup that keeps the existing layout structure but applies the requested changes.",
"The result should look like a real production UI. All text must be readable.",
"1536x1024 pixels.",
].join("\n");
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 240_000);
try {
const response = await fetch("https://api.openai.com/v1/responses", {
method: "POST",
headers: {
"Authorization": `Bearer ${apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: "gpt-4o",
input: evolvedPrompt,
tools: [{ type: "image_generation", model: "gpt-image-2", size: "1536x1024", quality: "high" }],
}),
signal: controller.signal,
});
if (!response.ok) {
const error = await response.text();
if (response.status === 403 && error.includes("organization must be verified")) {
throw new Error(
"OpenAI organization verification required.\n"
+ "Go to https://platform.openai.com/settings/organization to verify.\n"
+ "After verification, wait up to 15 minutes for access to propagate.",
);
}
throw new Error(`API error (${response.status}): ${error.slice(0, 300)}`);
}
const data = await response.json() as any;
const imageItem = data.output?.find((item: any) => item.type === "image_generation_call");
if (!imageItem?.result) {
throw new Error("No image data in response");
}
fs.mkdirSync(path.dirname(options.output), { recursive: true });
const imageBuffer = Buffer.from(imageItem.result, "base64");
fs.writeFileSync(options.output, imageBuffer);
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
console.error(`Generated (${elapsed}s, ${(imageBuffer.length / 1024).toFixed(0)}KB) → ${options.output}`);
console.log(JSON.stringify({
outputPath: options.output,
sourceScreenshot: options.screenshot,
brief: options.brief,
}, null, 2));
} finally {
clearTimeout(timeout);
}
}
/**
* Analyze a screenshot to produce a detailed description for re-generation.
*/
async function analyzeScreenshot(apiKey: string, imageBase64: string): Promise<string> {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 30_000);
try {
const response = await fetch("https://api.openai.com/v1/chat/completions", {
method: "POST",
headers: {
"Authorization": `Bearer ${apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: "gpt-4o",
messages: [{
role: "user",
content: [
{
type: "image_url",
image_url: { url: `data:image/png;base64,${imageBase64}` },
},
{
type: "text",
text: `Describe this UI in detail for re-creation. Include: overall layout structure, color scheme (hex values), typography (sizes, weights), specific text content visible, spacing between elements, alignment patterns, and any decorative elements. Be precise enough that someone could recreate this UI from your description alone. 200 words max.`,
},
],
}],
max_tokens: 400,
}),
signal: controller.signal,
});
if (!response.ok) {
return "Unable to analyze screenshot";
}
const data = await response.json() as any;
return data.choices?.[0]?.message?.content?.trim() || "Unable to analyze screenshot";
} finally {
clearTimeout(timeout);
}
}