feat: mockup diffing + design intent verification

New commands: - $D diff --before old.png --after new.png: visual diff using GPT-4o vision. Returns differences by area with severity (high/medium/low) and a matchScore (0-100). - $D verify --mockup approved.png --screenshot live.png: compares live site screenshot against approved design mockup. Pass if matchScore >= 70 and no high-severity differences. Used by /design-review to close the design loop: design -> implement -> verify visually. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-06 13:45:35 +02:00 · 2026-03-26 22:17:12 -06:00
parent 9c1b7096a8
commit 10b843e3a2
2 changed files with 133 additions and 3 deletions
@@ -20,6 +20,7 @@ import { variants } from "./variants";
 import { iterate } from "./iterate";
 import { resolveApiKey, saveApiKey } from "./auth";
 import { extractDesignLanguage, updateDesignMd } from "./memory";
+import { diffMockups, verifyAgainstMockup } from "./diff";

 function parseArgs(argv: string[]): { command: string; flags: Record<string, string | boolean> } {
  const args = argv.slice(2); // skip bun/node and script path
@@ -178,10 +179,35 @@ async function main(): Promise<void> {
      break;
    }

-    case "diff":
+    case "diff": {
+      const before = flags.before as string;
+      const after = flags.after as string;
+      if (!before || !after) {
+        console.error("--before and --after are required");
+        process.exit(1);
+      }
+      console.error(`Comparing ${before} vs ${after}...`);
+      const diffResult = await diffMockups(before, after);
+      console.log(JSON.stringify(diffResult, null, 2));
+      break;
+    }
+
+    case "verify": {
+      const mockup = flags.mockup as string;
+      const screenshot = flags.screenshot as string;
+      if (!mockup || !screenshot) {
+        console.error("--mockup and --screenshot are required");
+        process.exit(1);
+      }
+      console.error(`Verifying implementation against approved mockup...`);
+      const verifyResult = await verifyAgainstMockup(mockup, screenshot);
+      console.error(`Match: ${verifyResult.matchScore}/100 — ${verifyResult.pass ? "PASS" : "FAIL"}`);
+      console.log(JSON.stringify(verifyResult, null, 2));
+      break;
+    }
+
    case "evolve":
-    case "verify":
-      console.error(`Command '${command}' will be implemented in Commit 7+.`);
+      console.error(`Command 'evolve' will be implemented in Commit 8.`);
      process.exit(1);
      break;
  }
@@ -0,0 +1,104 @@
+/**
+ * Visual diff between two mockups using GPT-4o vision.
+ * Identifies what changed between design iterations or between
+ * an approved mockup and the live implementation.
+ */
+
+import fs from "fs";
+import { requireApiKey } from "./auth";
+
+export interface DiffResult {
+  differences: { area: string; description: string; severity: string }[];
+  summary: string;
+  matchScore: number; // 0-100, how closely they match
+}
+
+/**
+ * Compare two images and describe the visual differences.
+ */
+export async function diffMockups(
+  beforePath: string,
+  afterPath: string,
+): Promise<DiffResult> {
+  const apiKey = requireApiKey();
+  const beforeData = fs.readFileSync(beforePath).toString("base64");
+  const afterData = fs.readFileSync(afterPath).toString("base64");
+
+  const controller = new AbortController();
+  const timeout = setTimeout(() => controller.abort(), 60_000);
+
+  try {
+    const response = await fetch("https://api.openai.com/v1/chat/completions", {
+      method: "POST",
+      headers: {
+        "Authorization": `Bearer ${apiKey}`,
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        model: "gpt-4o",
+        messages: [{
+          role: "user",
+          content: [
+            {
+              type: "text",
+              text: `Compare these two UI images. The first is the BEFORE (or design intent), the second is the AFTER (or actual implementation). Return valid JSON only:
+
+{
+  "differences": [
+    {"area": "header", "description": "Font size changed from ~32px to ~24px", "severity": "high"},
+    ...
+  ],
+  "summary": "one sentence overall assessment",
+  "matchScore": 85
+}
+
+severity: "high" = noticeable to any user, "medium" = visible on close inspection, "low" = minor/pixel-level.
+matchScore: 100 = identical, 0 = completely different.
+Focus on layout, typography, colors, spacing, and element presence/absence. Ignore rendering differences (anti-aliasing, sub-pixel).`,
+            },
+            {
+              type: "image_url",
+              image_url: { url: `data:image/png;base64,${beforeData}` },
+            },
+            {
+              type: "image_url",
+              image_url: { url: `data:image/png;base64,${afterData}` },
+            },
+          ],
+        }],
+        max_tokens: 600,
+        response_format: { type: "json_object" },
+      }),
+      signal: controller.signal,
+    });
+
+    if (!response.ok) {
+      const error = await response.text();
+      console.error(`Diff API error (${response.status}): ${error.slice(0, 200)}`);
+      return { differences: [], summary: "Diff unavailable", matchScore: -1 };
+    }
+
+    const data = await response.json() as any;
+    const content = data.choices?.[0]?.message?.content?.trim() || "";
+    return JSON.parse(content) as DiffResult;
+  } finally {
+    clearTimeout(timeout);
+  }
+}
+
+/**
+ * Verify a live implementation against an approved design mockup.
+ * Combines diff with a pass/fail gate.
+ */
+export async function verifyAgainstMockup(
+  mockupPath: string,
+  screenshotPath: string,
+): Promise<{ pass: boolean; matchScore: number; diff: DiffResult }> {
+  const diff = await diffMockups(mockupPath, screenshotPath);
+
+  // Pass if matchScore >= 70 and no high-severity differences
+  const highSeverity = diff.differences.filter(d => d.severity === "high");
+  const pass = diff.matchScore >= 70 && highSeverity.length === 0;
+
+  return { pass, matchScore: diff.matchScore, diff };
+}