Files
anonymous_github/src/config.ts
T
Thomas Durieux e4ffd74068 Security hardening + gist UI fixes (#731)
* security: harden against XSS, ReDoS, path traversal, and injection

Defensive fixes across the server, storage, and viewer:

- XSS (CWE-79): sanitise rendered notebooks with DOMPurify, escape file
  names interpolated into AngularJS expressions (escapeNgString), set
  Mermaid securityLevel to 'strict', and stop urlRel2abs from returning
  javascript:/vbscript:/data:text/html URLs.
- Path traversal / zip-slip (CWE-22/23/24): validate URL-derived path
  components before they reach the storage layer (file/webview routes +
  StorageBase.assertSafePath) and sanitise zip entry names on extract for
  both the filesystem and S3 backends.
- ReDoS (CWE-1333): escape anonymization terms with catastrophic
  backtracking shapes to literals instead of compiling them as regexes.
- Secret hardening (CWE-798): require SESSION_SECRET / OAuth creds / DB
  password in production, random dev SESSION_SECRET fallback.
- Rate-limit spoofing (CWE-290): derive request.ip via trust-proxy hop
  count instead of the client-settable cf-connecting-ip header.
- NoSQL injection (CWE-943): allow only plain field paths as admin sort keys.
- Reject malformed streamer requests missing required string fields.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>

* fix(ui): make gists reachable/visible and clarify the ZIP button

- Gist & PR routes now accept a trailing slash (/gist/:id/:path*?), so the
  dashboard links (which end in "/") resolve to the gist/PR page instead of
  falling through to the 404 route (#725).
- Gist viewer picks the default tab after content loads, defaulting to
  "files" when files exist; previously the ng-init ran before the async
  load and a files-only gist rendered blank under the hidden comments tab.
- Explorer toolbar: relabel ZIP to "Full repo ZIP" with a tooltip, and add
  tooltips to Raw/Download clarifying they apply to the current file (#721).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>

* fix: report SAML-enforced orgs clearly instead of "token expired"

When a repo's organization enforces SAML SSO, GitHub returns a 403 whose
message differs from the OAuth-App-restriction case. That 403 fell through
to the generic handler and surfaced as "token_expired", pushing users to
re-login when the real fix is authorizing their token for the org. Detect
the "SAML enforcement" message and raise a dedicated, actionable error
instead (#379, #550).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>

* security: catch nested quantified groups in ReDoS guard and backslash path traversal

- hasCatastrophicBacktracking now scans across nested parens ([\s\S]*?)
  so shapes like ((a+))+ are detected; comment reframed as a heuristic
  backstop rather than a proof.
- file route path-traversal check now rejects backslash separators and a
  leading backslash, covering Windows-style "..\" payloads (CWE-22/25).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>

* chore(dev): track dev-proxy script, ignore .DS_Store and .claude/

scripts/dev-proxy.js is referenced by the "dev:ui" npm script but was
never committed, breaking the command on a fresh clone. Add it and
ignore local-only macOS/Claude Code files.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-18 13:50:55 +02:00

145 lines
4.2 KiB
TypeScript

import { resolve } from "path";
import { randomBytes } from "crypto";
interface Config {
SESSION_SECRET: string;
REDIS_PORT: number;
REDIS_HOSTNAME: string;
CLIENT_ID: string;
CLIENT_SECRET: string;
GITHUB_TOKEN: string;
DEFAULT_QUOTA: number;
MAX_FILE_FOLDER: number;
MAX_FILE_SIZE: number;
MAX_REPO_SIZE: number;
AUTO_DOWNLOAD_REPO_SIZE: number;
FREE_DOWNLOAD_REPO_SIZE: number;
AUTH_CALLBACK: string;
/**
* Allow to download repository and files
*/
ENABLE_DOWNLOAD: boolean;
STREAMER_ENTRYPOINT: string | null;
ANONYMIZATION_MASK: string;
PORT: number;
APP_HOSTNAME: string;
DB_USERNAME: string;
DB_PASSWORD: string;
DB_HOSTNAME: string;
FOLDER: string;
additionalExtensions: string[];
S3_BUCKET: string | null;
S3_CLIENT_ID: string | null;
S3_CLIENT_SECRET: string | null;
S3_ENDPOINT: string | null;
S3_REGION: string | null;
STORAGE: "filesystem" | "s3";
TRUST_PROXY: number;
RATE_LIMIT: number;
}
const config: Config = {
// Predictable defaults are dangerous: a known SESSION_SECRET lets anyone
// forge session cookies. Default to empty and resolve below — random in
// dev, required in production. See the post-env block.
SESSION_SECRET: "",
CLIENT_ID: "CLIENT_ID",
CLIENT_SECRET: "CLIENT_SECRET",
GITHUB_TOKEN: "",
DEFAULT_QUOTA: 2 * 1024 * 1024 * 1024 * 8,
MAX_FILE_FOLDER: 1000,
MAX_FILE_SIZE: 100 * 1024 * 1024, // in b, 100MB
MAX_REPO_SIZE: 60000, // in kb, 60MB
AUTO_DOWNLOAD_REPO_SIZE: 150, // in kb, 150kb
FREE_DOWNLOAD_REPO_SIZE: 150, // in kb, 150kb
ENABLE_DOWNLOAD: true,
AUTH_CALLBACK: "http://localhost:5000/github/auth",
ANONYMIZATION_MASK: "XXXX",
PORT: 5000,
TRUST_PROXY: 1,
RATE_LIMIT: 350,
APP_HOSTNAME: "anonymous.4open.science",
DB_USERNAME: "admin",
DB_PASSWORD: "password",
DB_HOSTNAME: "mongodb",
REDIS_HOSTNAME: "redis",
REDIS_PORT: 6379,
FOLDER: resolve(__dirname, "..", "repositories"),
additionalExtensions: [
"license",
"dockerfile",
"sbt",
"ipynb",
"gp",
"out",
"sol",
"in",
"jsonl",
"ndjson",
],
STORAGE: "filesystem",
STREAMER_ENTRYPOINT: null,
S3_BUCKET: null,
S3_CLIENT_ID: null,
S3_CLIENT_SECRET: null,
S3_ENDPOINT: null,
S3_REGION: null,
};
for (const conf in process.env) {
const configRecord = config as unknown as Record<string, unknown>;
if (configRecord[conf] !== undefined) {
const currentValue = configRecord[conf];
const envValue = process.env[conf] as string;
if (typeof currentValue === "number") {
const parsed = Number(envValue);
if (!isNaN(parsed)) {
configRecord[conf] = parsed;
}
} else if (typeof currentValue === "boolean") {
configRecord[conf] = envValue === "true" || envValue === "1";
} else {
configRecord[conf] = envValue;
}
}
}
// Harden security-sensitive secrets that still hold an unset/predictable
// value after reading the environment (CWE-798).
const isProduction = process.env.NODE_ENV === "production";
// SESSION_SECRET: a known value allows session forgery. Require it in
// production; in development fall back to a per-process random value so the
// app still boots without shipping a guessable secret.
if (!config.SESSION_SECRET || config.SESSION_SECRET === "SESSION_SECRET") {
if (isProduction) {
throw new Error(
"SESSION_SECRET must be set to a strong random value in production"
);
}
config.SESSION_SECRET = randomBytes(32).toString("hex");
// eslint-disable-next-line no-console
console.warn(
"SESSION_SECRET not set — generated a random development secret. " +
"Sessions will not persist across restarts. Set SESSION_SECRET in production."
);
}
// Refuse to start in production with the placeholder OAuth credentials or the
// default database password baked into the image.
if (isProduction) {
const insecureDefaults: [string, string][] = [
["CLIENT_ID", "CLIENT_ID"],
["CLIENT_SECRET", "CLIENT_SECRET"],
["DB_PASSWORD", "password"],
];
for (const [key, badValue] of insecureDefaults) {
if ((config as unknown as Record<string, unknown>)[key] === badValue) {
throw new Error(
`${key} is using its insecure default value; set it via the environment in production`
);
}
}
}
export default config;