fix(design): honor Retry-After header in variants 429 handler

Closes #1244.

The 429 handler in `generateVariant` discarded the `Retry-After` response
header and fell straight through to a local exponential schedule (2s/4s/8s).
In image-generation batches, that burns retry attempts inside the provider's
cooldown window and the request never recovers.

Now we parse `Retry-After` per RFC 7231 — both delta-seconds (`Retry-After: 5`)
and HTTP-date (`Retry-After: Fri, 31 Dec 1999 23:59:59 GMT`). Honored waits
are capped at 60s to bound stalls from hostile or buggy headers. Delta-seconds
are validated as digits-only (rejects `2abc`). When `Retry-After` is honored
(including 0 / past-date "retry now"), the next iteration's leading exponential
sleep is skipped so we don't double-wait. Invalid or missing headers fall
through to the existing exponential schedule unchanged.

Behavior matrix:

| Header                          | Behavior                                  |
|---------------------------------|-------------------------------------------|
| Retry-After: 5                  | wait 5s, skip leading on next attempt     |
| Retry-After: 999999             | capped to 60s, skip leading               |
| Retry-After: 2abc               | invalid, fall through to exponential      |
| Retry-After: 0                  | wait 0, skip leading (retry immediately)  |
| Retry-After: <past HTTP-date>   | wait 0, skip leading                      |
| Retry-After: <future date>      | wait diff capped at 60s, skip leading     |
| no header                       | fall through to existing exponential      |

`generateVariant` now accepts an optional `fetchFn` parameter (defaults to
`globalThis.fetch`) so tests can inject a stub. Production call sites are
unchanged.

Tests cover the five behavior buckets above, asserting both the 1st-to-2nd
call timing gap and call counts. All five pass in ~8s.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Stefan Neamtu
2026-05-06 12:01:52 +02:00
parent 19e699ab9b
commit 4bdb02070f
2 changed files with 166 additions and 3 deletions
+33 -3
View File
@@ -31,30 +31,37 @@ const STYLE_VARIATIONS = [
/**
* Generate a single variant with retry on 429.
*
* Exported for testability. Pass `fetchFn` to inject a stubbed fetch in tests;
* production code uses the global fetch by default.
*/
async function generateVariant(
export async function generateVariant(
apiKey: string,
prompt: string,
outputPath: string,
size: string,
quality: string,
fetchFn: typeof globalThis.fetch = globalThis.fetch,
): Promise<{ path: string; success: boolean; error?: string }> {
const maxRetries = 3;
const MAX_RETRY_AFTER_MS = 60_000; // cap honored Retry-After to bound stalls
let lastError = "";
let skipLeadingDelay = false;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
if (attempt > 0) {
if (attempt > 0 && !skipLeadingDelay) {
// Exponential backoff: 2s, 4s, 8s
const delay = Math.pow(2, attempt) * 1000;
console.error(` Rate limited, retrying in ${delay / 1000}s...`);
await new Promise(r => setTimeout(r, delay));
}
skipLeadingDelay = false;
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 120_000);
try {
const response = await fetch("https://api.openai.com/v1/responses", {
const response = await fetchFn("https://api.openai.com/v1/responses", {
method: "POST",
headers: {
"Authorization": `Bearer ${apiKey}`,
@@ -72,6 +79,29 @@ async function generateVariant(
if (response.status === 429) {
lastError = "Rate limited (429)";
const retryAfter = response.headers.get("retry-after");
if (retryAfter) {
const trimmed = retryAfter.trim();
let waitMs: number | null = null;
if (/^\d+$/.test(trimmed)) {
// delta-seconds (RFC 7231)
waitMs = Math.min(Number.parseInt(trimmed, 10) * 1000, MAX_RETRY_AFTER_MS);
} else {
// HTTP-date (RFC 7231)
const dateMs = Date.parse(trimmed);
if (!Number.isNaN(dateMs)) {
waitMs = Math.min(Math.max(0, dateMs - Date.now()), MAX_RETRY_AFTER_MS);
}
}
if (waitMs !== null) {
if (waitMs > 0) {
await new Promise(resolve => setTimeout(resolve, waitMs));
}
// Honored Retry-After (incl. 0 / past date "retry now") — skip the
// next iteration's leading exponential sleep so we don't double-wait.
skipLeadingDelay = true;
}
}
continue;
}