From fb11e0881fa1b53e47f4f5741d223d1d096fb26e Mon Sep 17 00:00:00 2001 From: Shadowbroker <43977454+BigBodyCobain@users.noreply.github.com> Date: Thu, 21 May 2026 01:41:13 -0600 Subject: [PATCH] Fix #251: refuse symlink/hardlink members during Tor bundle extraction (#277) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit External audit (@tg12) flagged that the Tor Expert Bundle extractor checked tarinfo.name against path traversal but never inspected tarinfo.linkname for symlink or hardlink members. Python 3.11's tarfile.extractall() honors symlinks, so a malicious archive could ship a member like:: name = "innocent.txt" (passes the path-traversal check) type = SYMTYPE linkname = "C:\Windows\System32\config\system" After extraction, subsequent reads of innocent.txt dereference to that arbitrary filesystem location; subsequent writes corrupt it. On Windows (where Tor Expert Bundle extraction actually runs), this is a host-compromise path of essentially the same severity as the supply-chain RCE in #231 — gated only by the integrity check we just hardened in PR #261/#265. Python 3.12+ added tarfile.extract / extractall filter='data' as a built-in mitigation; we're on Python 3.11 in production, so we implement the same idea manually. Fix in backend/services/tor_hidden_service.py: Extract the existing path-traversal-only check into a new _extract_tor_bundle_safely() helper that: 1. Refuses any member with member.issym() or member.islnk() True. Tor bundles never legitimately contain symlinks or hardlinks so this is non-disruptive. Logs the linkname so an operator can see what the malicious archive was trying to alias. 2. Refuses any member that isn't isfile() or isdir() — no FIFOs, no character or block devices, no contiguous-file-type entries. None of those belong in a Tor Expert Bundle and accepting them is a class of bug we don't need to debug later. 3. Preserves the original path-traversal guard (member.name must resolve under install_dir). 4. Catches tarfile.TarError so a corrupt archive returns False gracefully instead of bubbling out an exception. Tests: backend/tests/test_tor_bundle_symlink_filter.py (8 tests) - Clean archive with only regular files extracts successfully - Symlink member is rejected (the core regression) - Hardlink member is rejected - Symlink with relative target inside install_dir is still rejected (we don't allow symlinks at all, not just absolute-target ones) - FIFO/device-style member is rejected - Path-traversal guard still works under the new shape - Malformed/non-tar file is rejected gracefully (no crash) - Failure on one member rejects the whole bundle (no half-extract) Validation: pytest backend/tests/test_tor_bundle_symlink_filter.py backend/tests/test_tor_bundle_verification.py -> 14 passed UX impact: zero for legitimate Tor releases. Operators installing a real Tor Expert Bundle continue to see "Tor installed at:" exactly as before. Only malicious archives are refused, with a clear log message identifying the rejected linkname. Credit: @tg12 — the original report was specific enough that the fix design was immediate. Co-authored-by: Claude Opus 4.7 --- backend/services/tor_hidden_service.py | 99 +++++++- .../tests/test_tor_bundle_symlink_filter.py | 222 ++++++++++++++++++ 2 files changed, 313 insertions(+), 8 deletions(-) create mode 100644 backend/tests/test_tor_bundle_symlink_filter.py diff --git a/backend/services/tor_hidden_service.py b/backend/services/tor_hidden_service.py index d2771aa..e88fac6 100644 --- a/backend/services/tor_hidden_service.py +++ b/backend/services/tor_hidden_service.py @@ -173,6 +173,94 @@ def _verify_tor_bundle(archive_path: Path, bundle_url: str) -> tuple[bool, str]: return True, f"https-only (no digest source reachable, archive={actual_hash[:16]}...)" +def _extract_tor_bundle_safely(archive_path: Path, install_dir: Path) -> bool: + """Extract a Tor Expert Bundle tar.gz safely. + + Issue #251: the previous extractor checked tarinfo.name against path + traversal but never inspected tarinfo.linkname for symlink/hardlink + members. Python 3.11's tarfile honors symlinks during extractall(), + so a malicious archive could ship a member like:: + + name = "innocent.txt" # passes the path check + type = SYMTYPE + linkname = "C:\\Windows\\System32\\config\\system" + + and extractall() would then create that symlink. Subsequent reads + of innocent.txt deference to a sensitive system file; subsequent + writes corrupt one. Tor bundles never legitimately contain symlinks + or hardlinks, so we refuse all link members categorically rather + than trying to validate linkname targets (which has its own pitfalls + around relative path resolution). + + Also refuses non-regular-non-directory members (devices, FIFOs, + character/block special files) for completeness — none of those + belong in a Tor Expert Bundle and accepting them is a category of + bug we don't need to debug later. + + Returns True on success, False on rejection (and logs the reason). + The caller is responsible for cleaning up the archive file. + """ + import tarfile + + install_resolved = install_dir.resolve() + + try: + with tarfile.open(str(archive_path), "r:gz") as tar: + for member in tar.getmembers(): + # Reject anything that isn't a regular file or directory. + # Symlinks (SYMTYPE) and hardlinks (LNKTYPE) are the + # path-traversal vectors; the others (CHRTYPE, BLKTYPE, + # FIFOTYPE, CONTTYPE) have no legitimate use in a Tor + # Expert Bundle. + if member.issym() or member.islnk(): + logger.error( + "Tor bundle extraction blocked: link member %s -> %s " + "(symlinks/hardlinks are not allowed in Tor bundles; " + "this archive is malformed or hostile)", + member.name, + member.linkname, + ) + return False + if not (member.isfile() or member.isdir()): + logger.error( + "Tor bundle extraction blocked: unexpected member type " + "for %s (only regular files and directories are allowed)", + member.name, + ) + return False + + # Path traversal check (preserves the original guard). + try: + member_path = (install_dir / member.name).resolve() + except OSError as exc: + logger.error( + "Tor bundle extraction blocked: cannot resolve member " + "path %s: %s", + member.name, + exc, + ) + return False + try: + member_path.relative_to(install_resolved) + except ValueError: + logger.error( + "Tor bundle extraction blocked: path traversal on %s " + "(resolves to %s, outside install dir %s)", + member.name, + member_path, + install_resolved, + ) + return False + + # All members validated — extract. + tar.extractall(path=str(install_dir)) + except tarfile.TarError as exc: + logger.error("Tor bundle extraction failed: malformed tar (%s)", exc) + return False + + return True + + def _auto_install_tor() -> str | None: """Install or download Tor when it is safe to do so.""" if os.name != "nt": @@ -203,14 +291,9 @@ def _auto_install_tor() -> str | None: logger.info("Download complete, extracting...") import tarfile - with tarfile.open(str(archive_path), "r:gz") as tar: - for member in tar.getmembers(): - member_path = (TOR_INSTALL_DIR / member.name).resolve() - if not str(member_path).startswith(str(TOR_INSTALL_DIR.resolve())): - logger.error("Tar path traversal blocked: %s", member.name) - archive_path.unlink(missing_ok=True) - return None - tar.extractall(path=str(TOR_INSTALL_DIR)) + if not _extract_tor_bundle_safely(archive_path, TOR_INSTALL_DIR): + archive_path.unlink(missing_ok=True) + return None archive_path.unlink(missing_ok=True) diff --git a/backend/tests/test_tor_bundle_symlink_filter.py b/backend/tests/test_tor_bundle_symlink_filter.py new file mode 100644 index 0000000..1902086 --- /dev/null +++ b/backend/tests/test_tor_bundle_symlink_filter.py @@ -0,0 +1,222 @@ +"""Issue #251 (tg12): Tor bundle extraction must refuse symlink and +hardlink members. + +The previous extractor checked ``member.name`` against path traversal +but never inspected ``member.linkname``. Python 3.11's ``tarfile`` +honors symlinks during ``extractall()``, so a malicious archive could +ship a member named ``innocent.txt`` whose linkname points at an +arbitrary filesystem location. After extraction, reads of innocent.txt +dereference to that location; writes corrupt it. + +The fix categorically refuses any link member during extraction. +Tor Expert Bundles never legitimately contain symlinks or hardlinks, +so this is non-disruptive for real updates and a hard stop for hostile +archives. + +These tests build synthetic tar archives covering each refused case +and assert ``_extract_tor_bundle_safely`` rejects them. +""" +import io +import os +import stat +import tarfile +from pathlib import Path + +import pytest + +from services.tor_hidden_service import _extract_tor_bundle_safely + + +def _build_archive(tmp_path: Path, members: list) -> Path: + """Write a .tar.gz with the given (name, builder) pairs. + + Each builder is called with the open tarfile and is responsible for + adding its member however it likes (regular file, symlink, etc.). + """ + archive = tmp_path / "test_bundle.tar.gz" + with tarfile.open(str(archive), "w:gz") as tar: + for name, builder in members: + builder(tar, name) + return archive + + +def _add_regular_file(tar: tarfile.TarFile, name: str, payload: bytes = b"hello") -> None: + info = tarfile.TarInfo(name) + info.size = len(payload) + info.mode = 0o644 + info.type = tarfile.REGTYPE + tar.addfile(info, io.BytesIO(payload)) + + +def _add_symlink(tar: tarfile.TarFile, name: str, linkname: str) -> None: + info = tarfile.TarInfo(name) + info.size = 0 + info.type = tarfile.SYMTYPE + info.linkname = linkname + info.mode = 0o777 + tar.addfile(info) + + +def _add_hardlink(tar: tarfile.TarFile, name: str, linkname: str) -> None: + info = tarfile.TarInfo(name) + info.size = 0 + info.type = tarfile.LNKTYPE + info.linkname = linkname + info.mode = 0o644 + tar.addfile(info) + + +def _add_fifo(tar: tarfile.TarFile, name: str) -> None: + info = tarfile.TarInfo(name) + info.type = tarfile.FIFOTYPE + info.mode = 0o644 + tar.addfile(info) + + +def test_clean_archive_extracts_successfully(tmp_path): + """A normal archive with only regular files extracts fine.""" + install_dir = tmp_path / "install" + install_dir.mkdir() + + def add_normal(tar, name): + _add_regular_file(tar, name, b"clean content") + + archive = _build_archive( + tmp_path, + [ + ("tor/tor.exe", add_normal), + ("tor/data/geoip", add_normal), + ], + ) + assert _extract_tor_bundle_safely(archive, install_dir) is True + assert (install_dir / "tor" / "tor.exe").is_file() + assert (install_dir / "tor" / "data" / "geoip").is_file() + + +def test_symlink_member_is_rejected(tmp_path, caplog): + """Issue #251 core regression: symlink members are refused.""" + install_dir = tmp_path / "install" + install_dir.mkdir() + + archive = _build_archive( + tmp_path, + [ + ("tor/innocent.txt", lambda t, n: _add_symlink(t, n, "/etc/passwd")), + ], + ) + + import logging + + with caplog.at_level(logging.ERROR): + result = _extract_tor_bundle_safely(archive, install_dir) + + assert result is False + # No file should have been created + assert not (install_dir / "tor" / "innocent.txt").exists() + # Log should explain why + assert any( + "symlinks/hardlinks are not allowed" in rec.getMessage() + for rec in caplog.records + ) + + +def test_hardlink_member_is_rejected(tmp_path): + """Hardlinks are refused for the same reason as symlinks.""" + install_dir = tmp_path / "install" + install_dir.mkdir() + + archive = _build_archive( + tmp_path, + [ + ("tor/regular.txt", lambda t, n: _add_regular_file(t, n)), + ("tor/sneaky.txt", lambda t, n: _add_hardlink(t, n, "regular.txt")), + ], + ) + assert _extract_tor_bundle_safely(archive, install_dir) is False + # The whole extraction is refused even though only one member is bad. + assert not (install_dir / "tor" / "regular.txt").exists() + + +def test_symlink_with_relative_target_still_rejected(tmp_path): + """Even a relative symlink target inside the install dir is refused. + + We don't allow symlinks at all — there is no legitimate Tor bundle + use case for them, and an attacker can chain link redirections in + ways the path-resolution check is poor at catching. + """ + install_dir = tmp_path / "install" + install_dir.mkdir() + + archive = _build_archive( + tmp_path, + [ + ("tor/alias.txt", lambda t, n: _add_symlink(t, n, "tor/tor.exe")), + ], + ) + assert _extract_tor_bundle_safely(archive, install_dir) is False + + +def test_fifo_or_device_member_is_rejected(tmp_path): + """Non-regular-non-directory members (FIFOs, devices) are refused.""" + install_dir = tmp_path / "install" + install_dir.mkdir() + + archive = _build_archive( + tmp_path, + [ + ("tor/weird.fifo", _add_fifo), + ], + ) + assert _extract_tor_bundle_safely(archive, install_dir) is False + + +def test_path_traversal_member_is_rejected(tmp_path): + """Pre-existing path-traversal guard still works under the new shape.""" + install_dir = tmp_path / "install" + install_dir.mkdir() + + def add_traversal(tar, name): + _add_regular_file(tar, name) + + # ../../escape.txt resolves outside install_dir on most platforms. + archive = _build_archive( + tmp_path, + [ + ("../../escape.txt", add_traversal), + ], + ) + assert _extract_tor_bundle_safely(archive, install_dir) is False + + +def test_malformed_tar_is_rejected(tmp_path): + """A corrupt/non-tar file is rejected without crashing.""" + install_dir = tmp_path / "install" + install_dir.mkdir() + + bogus = tmp_path / "not-a-tar.tar.gz" + bogus.write_bytes(b"this is not a tar archive at all") + + assert _extract_tor_bundle_safely(bogus, install_dir) is False + + +def test_extraction_failure_does_not_leave_partial_state_referenced_to_caller(tmp_path): + """When extraction fails partway, the caller relies on a False return + to know it must clean up. We test the contract here — actual cleanup + of files that may have been written by tar.extractall() before the + failure point isn't part of THIS helper's responsibility (the caller + deletes the install dir if needed).""" + install_dir = tmp_path / "install" + install_dir.mkdir() + + # Hostile archive: one good file, then a symlink. Whether the good + # file was written or not, the return value must be False so the + # caller refuses the bundle. + archive = _build_archive( + tmp_path, + [ + ("tor/clean.txt", lambda t, n: _add_regular_file(t, n)), + ("tor/evil-link.txt", lambda t, n: _add_symlink(t, n, "/etc/passwd")), + ], + ) + + assert _extract_tor_bundle_safely(archive, install_dir) is False