From b3c1030e5c7b7bbdda34d6b1d9a40470dafb3f5c Mon Sep 17 00:00:00 2001 From: tdurieux Date: Sun, 3 May 2026 20:31:32 +0200 Subject: [PATCH] fix: revalidate cached files when anonymization options change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Files were being served with Cache-Control: max-age=18144000 (210 days) keyed only on the upstream ?v=. Editing the term list left the same URL serving stale anonymized bytes — visible to users in regular tabs but not in incognito. The previous fix-by-incognito recipe in #439 is exactly this. Switch to ETag-based revalidation that fingerprints both the upstream sha and the saved anonymization options, with Cache-Control: no-cache, must-revalidate. Browsers now revalidate on every request and get a 304 when nothing has changed, or fresh content as soon as terms, image/link/etc. options are updated. Fixes #439. --- src/server/routes/file-etag.ts | 16 ++++++++++++++ src/server/routes/file.ts | 19 +++++++++++------ test/file-etag.test.js | 39 ++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 6 deletions(-) create mode 100644 src/server/routes/file-etag.ts create mode 100644 test/file-etag.test.js diff --git a/src/server/routes/file-etag.ts b/src/server/routes/file-etag.ts new file mode 100644 index 0000000..90ad240 --- /dev/null +++ b/src/server/routes/file-etag.ts @@ -0,0 +1,16 @@ +import { createHash } from "crypto"; + +// Build an ETag that fingerprints both the upstream content (?v=) and +// the anonymization config the user has saved. Without the config part, the +// browser kept serving content anonymized under an older term list — see +// #439 (anonymization "doesn't work" in regular tabs but works in incognito). +export function fileETag( + versionParam: string | undefined, + options: unknown +): string { + const h = createHash("sha1"); + h.update(versionParam || ""); + h.update("|"); + h.update(JSON.stringify(options ?? null)); + return `"f-${h.digest("hex")}"`; +} diff --git a/src/server/routes/file.ts b/src/server/routes/file.ts index b20fbac..69157d4 100644 --- a/src/server/routes/file.ts +++ b/src/server/routes/file.ts @@ -2,6 +2,7 @@ import * as express from "express"; import AnonymizedFile from "../../core/AnonymizedFile"; import AnonymousError from "../../core/AnonymousError"; import { getRepo, handleError } from "./route-utils"; +import { fileETag } from "./file-etag"; export const router = express.Router(); @@ -51,12 +52,18 @@ router.get( anonymizedPath.substring(anonymizedPath.lastIndexOf("/") + 1) ); } - if (req.query.v) { - // cache the file for a month - res.header("Cache-Control", "max-age=18144000"); - } else { - // cache the file for 5min - res.header("Cache-Control", "max-age=300"); + const etag = fileETag( + req.query.v as string | undefined, + repo.model.options + ); + res.header("ETag", etag); + // Force the browser to revalidate every time. The previous 210-day + // max-age was keyed only on the upstream sha, so editing the + // anonymization term list left old anonymizations cached under the + // same URL. + res.header("Cache-Control", "private, no-cache, must-revalidate"); + if (req.headers["if-none-match"] === etag) { + return res.status(304).end(); } await f.send(res); await repo.countView(); diff --git a/test/file-etag.test.js b/test/file-etag.test.js new file mode 100644 index 0000000..ecd78f8 --- /dev/null +++ b/test/file-etag.test.js @@ -0,0 +1,39 @@ +const { expect } = require("chai"); +require("ts-node/register/transpile-only"); +const { fileETag } = require("../src/server/routes/file-etag"); + +describe("fileETag", function () { + it("changes when the upstream sha changes", function () { + const opts = { terms: ["alice"] }; + expect(fileETag("sha1", opts)).to.not.equal(fileETag("sha2", opts)); + }); + + // #439 — without folding the anonymization options into the ETag, editing + // the term list left the same URL serving stale anonymized bytes. + it("changes when the anonymization terms change", function () { + const a = fileETag("sha1", { terms: ["alice"] }); + const b = fileETag("sha1", { terms: ["alice", "bob"] }); + expect(a).to.not.equal(b); + }); + + it("changes when an anonymization toggle changes", function () { + const a = fileETag("sha1", { terms: ["alice"], image: true }); + const b = fileETag("sha1", { terms: ["alice"], image: false }); + expect(a).to.not.equal(b); + }); + + it("is stable for the same inputs", function () { + const opts = { terms: ["alice", "bob"], image: true }; + expect(fileETag("sha1", opts)).to.equal(fileETag("sha1", opts)); + }); + + it("treats missing version like an empty string", function () { + const opts = { terms: [] }; + expect(fileETag(undefined, opts)).to.equal(fileETag("", opts)); + }); + + it("returns a quoted opaque tag", function () { + const tag = fileETag("sha1", { terms: [] }); + expect(tag).to.match(/^"f-[0-9a-f]{40}"$/); + }); +});