fix: revalidate cached files when anonymization options change

Files were being served with Cache-Control: max-age=18144000 (210 days)
keyed only on the upstream ?v=<sha>. Editing the term list left the
same URL serving stale anonymized bytes — visible to users in regular
tabs but not in incognito. The previous fix-by-incognito recipe in #439
is exactly this.

Switch to ETag-based revalidation that fingerprints both the upstream
sha and the saved anonymization options, with Cache-Control:
no-cache, must-revalidate. Browsers now revalidate on every request and
get a 304 when nothing has changed, or fresh content as soon as terms,
image/link/etc. options are updated.

Fixes #439.
This commit is contained in:
tdurieux
2026-05-03 20:31:32 +02:00
parent b316d18bd8
commit b3c1030e5c
3 changed files with 68 additions and 6 deletions
+16
View File
@@ -0,0 +1,16 @@
import { createHash } from "crypto";
// Build an ETag that fingerprints both the upstream content (?v=<sha>) and
// the anonymization config the user has saved. Without the config part, the
// browser kept serving content anonymized under an older term list — see
// #439 (anonymization "doesn't work" in regular tabs but works in incognito).
export function fileETag(
versionParam: string | undefined,
options: unknown
): string {
const h = createHash("sha1");
h.update(versionParam || "");
h.update("|");
h.update(JSON.stringify(options ?? null));
return `"f-${h.digest("hex")}"`;
}
+13 -6
View File
@@ -2,6 +2,7 @@ import * as express from "express";
import AnonymizedFile from "../../core/AnonymizedFile";
import AnonymousError from "../../core/AnonymousError";
import { getRepo, handleError } from "./route-utils";
import { fileETag } from "./file-etag";
export const router = express.Router();
@@ -51,12 +52,18 @@ router.get(
anonymizedPath.substring(anonymizedPath.lastIndexOf("/") + 1)
);
}
if (req.query.v) {
// cache the file for a month
res.header("Cache-Control", "max-age=18144000");
} else {
// cache the file for 5min
res.header("Cache-Control", "max-age=300");
const etag = fileETag(
req.query.v as string | undefined,
repo.model.options
);
res.header("ETag", etag);
// Force the browser to revalidate every time. The previous 210-day
// max-age was keyed only on the upstream sha, so editing the
// anonymization term list left old anonymizations cached under the
// same URL.
res.header("Cache-Control", "private, no-cache, must-revalidate");
if (req.headers["if-none-match"] === etag) {
return res.status(304).end();
}
await f.send(res);
await repo.countView();
+39
View File
@@ -0,0 +1,39 @@
const { expect } = require("chai");
require("ts-node/register/transpile-only");
const { fileETag } = require("../src/server/routes/file-etag");
describe("fileETag", function () {
it("changes when the upstream sha changes", function () {
const opts = { terms: ["alice"] };
expect(fileETag("sha1", opts)).to.not.equal(fileETag("sha2", opts));
});
// #439 — without folding the anonymization options into the ETag, editing
// the term list left the same URL serving stale anonymized bytes.
it("changes when the anonymization terms change", function () {
const a = fileETag("sha1", { terms: ["alice"] });
const b = fileETag("sha1", { terms: ["alice", "bob"] });
expect(a).to.not.equal(b);
});
it("changes when an anonymization toggle changes", function () {
const a = fileETag("sha1", { terms: ["alice"], image: true });
const b = fileETag("sha1", { terms: ["alice"], image: false });
expect(a).to.not.equal(b);
});
it("is stable for the same inputs", function () {
const opts = { terms: ["alice", "bob"], image: true };
expect(fileETag("sha1", opts)).to.equal(fileETag("sha1", opts));
});
it("treats missing version like an empty string", function () {
const opts = { terms: [] };
expect(fileETag(undefined, opts)).to.equal(fileETag("", opts));
});
it("returns a quoted opaque tag", function () {
const tag = fileETag("sha1", { terms: [] });
expect(tag).to.match(/^"f-[0-9a-f]{40}"$/);
});
});