fix(design): honor Retry-After header in variants 429 handler

Closes #1244.

The 429 handler in `generateVariant` discarded the `Retry-After` response
header and fell straight through to a local exponential schedule (2s/4s/8s).
In image-generation batches, that burns retry attempts inside the provider's
cooldown window and the request never recovers.

Now we parse `Retry-After` per RFC 7231 — both delta-seconds (`Retry-After: 5`)
and HTTP-date (`Retry-After: Fri, 31 Dec 1999 23:59:59 GMT`). Honored waits
are capped at 60s to bound stalls from hostile or buggy headers. Delta-seconds
are validated as digits-only (rejects `2abc`). When `Retry-After` is honored
(including 0 / past-date "retry now"), the next iteration's leading exponential
sleep is skipped so we don't double-wait. Invalid or missing headers fall
through to the existing exponential schedule unchanged.

Behavior matrix:

| Header                          | Behavior                                  |
|---------------------------------|-------------------------------------------|
| Retry-After: 5                  | wait 5s, skip leading on next attempt     |
| Retry-After: 999999             | capped to 60s, skip leading               |
| Retry-After: 2abc               | invalid, fall through to exponential      |
| Retry-After: 0                  | wait 0, skip leading (retry immediately)  |
| Retry-After: <past HTTP-date>   | wait 0, skip leading                      |
| Retry-After: <future date>      | wait diff capped at 60s, skip leading     |
| no header                       | fall through to existing exponential      |

`generateVariant` now accepts an optional `fetchFn` parameter (defaults to
`globalThis.fetch`) so tests can inject a stub. Production call sites are
unchanged.

Tests cover the five behavior buckets above, asserting both the 1st-to-2nd
call timing gap and call counts. All five pass in ~8s.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Stefan Neamtu
2026-05-06 12:01:52 +02:00
parent 19e699ab9b
commit 4bdb02070f
2 changed files with 166 additions and 3 deletions
+133
View File
@@ -0,0 +1,133 @@
import { describe, test, expect, beforeEach, afterEach } from "bun:test";
import fs from "fs";
import os from "os";
import path from "path";
import { generateVariant } from "../src/variants";
// 1x1 transparent PNG, base64 — valid bytes that fs.writeFileSync can write.
const TINY_PNG_BASE64 =
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkAAIAAAoAAv/lxKUAAAAASUVORK5CYII=";
function successResponse(): Response {
return new Response(
JSON.stringify({
output: [{ type: "image_generation_call", result: TINY_PNG_BASE64 }],
}),
{ status: 200, headers: { "Content-Type": "application/json" } },
);
}
function rateLimited(retryAfter?: string): Response {
const headers: Record<string, string> = {};
if (retryAfter !== undefined) headers["Retry-After"] = retryAfter;
return new Response("rate limited", { status: 429, headers });
}
interface CallRecord {
ts: number;
}
function makeStubFetch(
responses: Response[],
calls: CallRecord[],
): typeof globalThis.fetch {
let idx = 0;
return (async (_input: any, _init?: any) => {
calls.push({ ts: Date.now() });
const response = responses[idx];
if (!response) throw new Error(`stub fetch: no response for call ${idx + 1}`);
idx++;
return response;
}) as typeof globalThis.fetch;
}
describe("generateVariant Retry-After handling", () => {
let tmpDir: string;
let outputPath: string;
beforeEach(() => {
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "variants-retry-after-"));
outputPath = path.join(tmpDir, "variant.png");
});
afterEach(() => {
fs.rmSync(tmpDir, { recursive: true, force: true });
});
test("delta-seconds: honors Retry-After: 1 with no extra leading exponential", async () => {
const calls: CallRecord[] = [];
const fetchFn = makeStubFetch([rateLimited("1"), successResponse()], calls);
const result = await generateVariant(
"fake-key", "prompt", outputPath, "1024x1024", "high", fetchFn,
);
expect(result.success).toBe(true);
expect(calls.length).toBe(2);
const gap = calls[1].ts - calls[0].ts;
// Honored ~1s; should NOT add the 2s leading exponential on top
expect(gap).toBeGreaterThanOrEqual(900);
expect(gap).toBeLessThan(1700);
});
test("HTTP-date: honors a future date with no extra leading exponential", async () => {
const calls: CallRecord[] = [];
const future = new Date(Date.now() + 3000).toUTCString();
const fetchFn = makeStubFetch([rateLimited(future), successResponse()], calls);
const result = await generateVariant(
"fake-key", "prompt", outputPath, "1024x1024", "high", fetchFn,
);
expect(result.success).toBe(true);
expect(calls.length).toBe(2);
const gap = calls[1].ts - calls[0].ts;
expect(gap).toBeGreaterThanOrEqual(2500);
expect(gap).toBeLessThan(4500);
});
test("invalid Retry-After (alphanumeric): falls through to exponential", async () => {
const calls: CallRecord[] = [];
const fetchFn = makeStubFetch([rateLimited("2abc"), successResponse()], calls);
const result = await generateVariant(
"fake-key", "prompt", outputPath, "1024x1024", "high", fetchFn,
);
expect(result.success).toBe(true);
expect(calls.length).toBe(2);
const gap = calls[1].ts - calls[0].ts;
// Falls through to existing 2s exponential leading delay
expect(gap).toBeGreaterThanOrEqual(1800);
expect(gap).toBeLessThan(3000);
});
test("no Retry-After header: falls through to exponential", async () => {
const calls: CallRecord[] = [];
const fetchFn = makeStubFetch([rateLimited(), successResponse()], calls);
const result = await generateVariant(
"fake-key", "prompt", outputPath, "1024x1024", "high", fetchFn,
);
expect(result.success).toBe(true);
expect(calls.length).toBe(2);
const gap = calls[1].ts - calls[0].ts;
expect(gap).toBeGreaterThanOrEqual(1800);
expect(gap).toBeLessThan(3000);
});
test("Retry-After: 0 retries immediately, skips leading exponential", async () => {
const calls: CallRecord[] = [];
const fetchFn = makeStubFetch([rateLimited("0"), successResponse()], calls);
const result = await generateVariant(
"fake-key", "prompt", outputPath, "1024x1024", "high", fetchFn,
);
expect(result.success).toBe(true);
expect(calls.length).toBe(2);
const gap = calls[1].ts - calls[0].ts;
expect(gap).toBeLessThan(500);
});
});