Sanitize markdown HTML output with DOMPurify to prevent XSS (#658)

This commit is contained in:
Thomas Durieux
2026-04-15 04:22:38 +02:00
committed by GitHub
parent b2d77faa6c
commit 812f8b6314
6 changed files with 1299 additions and 3 deletions
+1028
View File
File diff suppressed because it is too large Load Diff
+2
View File
@@ -58,6 +58,7 @@
"express-slow-down": "^2.0.1",
"got": "^11.8.6",
"inquirer": "^8.2.6",
"isomorphic-dompurify": "^3.8.0",
"istextorbinary": "^9.5.0",
"marked": "^5.1.2",
"mime-types": "^2.1.35",
@@ -86,6 +87,7 @@
"@types/passport": "^1.0.16",
"@types/passport-github2": "^1.2.9",
"@types/unzip-stream": "^0.3.4",
"chai": "^4.5.0",
"gulp": "^5.0.0",
"gulp-clean-css": "^4.3.0",
"gulp-concat": "^2.6.1",
+1 -1
View File
File diff suppressed because one or more lines are too long
+1 -1
View File
@@ -182,5 +182,5 @@ function renderMD(md, baseUrlValue) {
throwOnError: false,
})
);
return marked.parse(md, { renderer });
return DOMPurify.sanitize(marked.parse(md, { renderer }));
}
+2 -1
View File
@@ -4,6 +4,7 @@ import * as path from "path";
import AnonymizedFile from "../../core/AnonymizedFile";
import AnonymousError from "../../core/AnonymousError";
import * as marked from "marked";
import DOMPurify from "isomorphic-dompurify";
import { streamToString } from "../../core/anonymize-utils";
import { IFile } from "../../core/model/files/files.types";
@@ -113,7 +114,7 @@ async function webView(req: express.Request, res: express.Response) {
}
if (f.extension() == "md") {
const content = await streamToString(await f.anonymizedContent());
const body = marked.marked(content, { headerIds: false, mangle: false });
const body = DOMPurify.sanitize(marked.marked(content, { headerIds: false, mangle: false }));
const html = `<!DOCTYPE html><html><head><title>Content</title></head><link rel="stylesheet" href="/css/all.min.css" /><body><div class="container p-3 file-content markdown-body">${body}<div></body></html>`;
res.contentType("text/html").send(html);
} else {
+265
View File
@@ -0,0 +1,265 @@
const { expect } = require("chai");
const { marked } = require("marked");
const DOMPurify = require("isomorphic-dompurify");
/**
* Helper that mirrors the server-side rendering pipeline in webview.ts:
* DOMPurify.sanitize(marked.marked(content, { headerIds: false, mangle: false }))
*/
function renderAndSanitize(markdown) {
const raw = marked(markdown, { headerIds: false, mangle: false });
return DOMPurify.sanitize(raw);
}
describe("Markdown sanitization", function () {
// ---------------------------------------------------------------
// Script injection
// ---------------------------------------------------------------
describe("removes script tags", function () {
it("strips inline <script> tags", function () {
const html = renderAndSanitize('<script>alert("xss")</script>');
expect(html).to.not.include("<script");
expect(html).to.not.include("alert(");
});
it("strips script tags with src attribute", function () {
const html = renderAndSanitize(
'<script src="https://evil.com/xss.js"></script>'
);
expect(html).to.not.include("<script");
expect(html).to.not.include("evil.com");
});
it("strips script tags embedded in markdown", function () {
const html = renderAndSanitize(
"# Hello\n\n<script>document.cookie</script>\n\nWorld"
);
expect(html).to.not.include("<script");
expect(html).to.include("Hello");
expect(html).to.include("World");
});
});
// ---------------------------------------------------------------
// Event handler injection
// ---------------------------------------------------------------
describe("removes event handlers", function () {
it("strips onerror handler on img", function () {
const html = renderAndSanitize('<img src=x onerror="alert(1)">');
expect(html).to.not.include("onerror");
});
it("strips onload handler on img", function () {
const html = renderAndSanitize(
'<img src="valid.png" onload="alert(1)">'
);
expect(html).to.not.include("onload");
});
it("strips onmouseover handler on a tag", function () {
const html = renderAndSanitize(
'<a href="#" onmouseover="alert(1)">hover me</a>'
);
expect(html).to.not.include("onmouseover");
expect(html).to.include("hover me");
});
it("strips onfocus handler on input", function () {
const html = renderAndSanitize('<input onfocus="alert(1)" autofocus>');
expect(html).to.not.include("onfocus");
});
});
// ---------------------------------------------------------------
// javascript: URLs
// ---------------------------------------------------------------
describe("removes javascript: URLs", function () {
it("strips javascript: href in anchor", function () {
const html = renderAndSanitize(
'<a href="javascript:alert(1)">click</a>'
);
expect(html).to.not.include("javascript:");
});
it("strips javascript: href in markdown link syntax", function () {
const html = renderAndSanitize("[click](javascript:alert(1))");
expect(html).to.not.include("javascript:");
});
});
// ---------------------------------------------------------------
// iframe / object / embed
// ---------------------------------------------------------------
describe("removes dangerous elements", function () {
it("strips iframe", function () {
const html = renderAndSanitize(
'<iframe src="https://evil.com"></iframe>'
);
expect(html).to.not.include("<iframe");
});
it("strips object tag", function () {
const html = renderAndSanitize(
'<object data="malware.swf"></object>'
);
expect(html).to.not.include("<object");
});
it("strips embed tag", function () {
const html = renderAndSanitize('<embed src="malware.swf">');
expect(html).to.not.include("<embed");
});
it("strips form action with javascript: URL", function () {
const html = renderAndSanitize(
'<form action="javascript:alert(1)"><input type="submit"></form>'
);
expect(html).to.not.include("javascript:");
});
});
// ---------------------------------------------------------------
// SVG-based attacks
// ---------------------------------------------------------------
describe("removes SVG-based XSS", function () {
it("strips svg with onload", function () {
const html = renderAndSanitize('<svg onload="alert(1)">');
expect(html).to.not.include("onload");
});
it("strips svg with embedded script", function () {
const html = renderAndSanitize(
"<svg><script>alert(1)</script></svg>"
);
expect(html).to.not.include("<script");
});
});
// ---------------------------------------------------------------
// data: URL attacks
// ---------------------------------------------------------------
describe("removes data: URL attacks", function () {
it("strips data:text/html href", function () {
const html = renderAndSanitize(
'<a href="data:text/html,<script>alert(1)</script>">click</a>'
);
expect(html).to.not.include("data:text/html");
});
});
// ---------------------------------------------------------------
// style-based attacks
// ---------------------------------------------------------------
describe("removes style-based attacks", function () {
it("strips style tags with expressions", function () {
const html = renderAndSanitize(
"<style>body { background: url('javascript:alert(1)') }</style>"
);
expect(html).to.not.include("javascript:");
});
});
// ---------------------------------------------------------------
// Safe content is preserved
// ---------------------------------------------------------------
describe("preserves safe markdown content", function () {
it("preserves headings", function () {
const html = renderAndSanitize("# Heading 1\n## Heading 2");
expect(html).to.include("<h1>");
expect(html).to.include("Heading 1");
expect(html).to.include("<h2>");
});
it("preserves paragraphs", function () {
const html = renderAndSanitize("Hello world\n\nSecond paragraph");
expect(html).to.include("<p>");
expect(html).to.include("Hello world");
});
it("preserves bold and italic", function () {
const html = renderAndSanitize("**bold** and *italic*");
expect(html).to.include("<strong>bold</strong>");
expect(html).to.include("<em>italic</em>");
});
it("preserves links", function () {
const html = renderAndSanitize("[example](https://example.com)");
expect(html).to.include("https://example.com");
expect(html).to.include("example");
});
it("preserves images", function () {
const html = renderAndSanitize(
"![alt](https://example.com/img.png)"
);
expect(html).to.include("<img");
expect(html).to.include("https://example.com/img.png");
});
it("preserves code blocks", function () {
const html = renderAndSanitize("```js\nconsole.log('hi')\n```");
expect(html).to.include("<code");
expect(html).to.include("console.log");
});
it("preserves inline code", function () {
const html = renderAndSanitize("Use `npm install` to install");
expect(html).to.include("<code>npm install</code>");
});
it("preserves unordered lists", function () {
const html = renderAndSanitize("- item 1\n- item 2\n- item 3");
expect(html).to.include("<ul>");
expect(html).to.include("<li>");
expect(html).to.include("item 1");
});
it("preserves ordered lists", function () {
const html = renderAndSanitize("1. first\n2. second");
expect(html).to.include("<ol>");
expect(html).to.include("first");
});
it("preserves blockquotes", function () {
const html = renderAndSanitize("> This is a quote");
expect(html).to.include("<blockquote>");
expect(html).to.include("This is a quote");
});
it("preserves tables", function () {
const html = renderAndSanitize("| A | B |\n|---|---|\n| 1 | 2 |");
expect(html).to.include("<table>");
expect(html).to.include("<th>");
expect(html).to.include("<td>");
});
it("preserves horizontal rules", function () {
const html = renderAndSanitize("---");
expect(html).to.include("<hr");
});
});
// ---------------------------------------------------------------
// Mixed: malicious + safe content
// ---------------------------------------------------------------
describe("handles mixed content", function () {
it("strips malicious parts while keeping safe parts", function () {
const html = renderAndSanitize(
'# Title\n\nSafe paragraph.\n\n<script>alert("xss")</script>\n\n**Bold text**'
);
expect(html).to.not.include("<script");
expect(html).to.include("Title");
expect(html).to.include("Safe paragraph");
expect(html).to.include("<strong>Bold text</strong>");
});
it("strips event handlers from otherwise-safe tags", function () {
const html = renderAndSanitize(
'<img src="photo.jpg" alt="photo" onerror="alert(1)">'
);
expect(html).to.not.include("onerror");
expect(html).to.include("photo.jpg");
expect(html).to.include('alt="photo"');
});
});
});