mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-05-27 01:22:27 +02:00
External audit (@tg12) flagged that the Tor Expert Bundle extractor
checked tarinfo.name against path traversal but never inspected
tarinfo.linkname for symlink or hardlink members. Python 3.11's
tarfile.extractall() honors symlinks, so a malicious archive could
ship a member like::
name = "innocent.txt" (passes the path-traversal check)
type = SYMTYPE
linkname = "C:\Windows\System32\config\system"
After extraction, subsequent reads of innocent.txt dereference to that
arbitrary filesystem location; subsequent writes corrupt it. On
Windows (where Tor Expert Bundle extraction actually runs), this is
a host-compromise path of essentially the same severity as the
supply-chain RCE in #231 — gated only by the integrity check we just
hardened in PR #261/#265.
Python 3.12+ added tarfile.extract / extractall filter='data' as a
built-in mitigation; we're on Python 3.11 in production, so we
implement the same idea manually.
Fix in backend/services/tor_hidden_service.py:
Extract the existing path-traversal-only check into a new
_extract_tor_bundle_safely() helper that:
1. Refuses any member with member.issym() or member.islnk() True.
Tor bundles never legitimately contain symlinks or hardlinks
so this is non-disruptive. Logs the linkname so an operator
can see what the malicious archive was trying to alias.
2. Refuses any member that isn't isfile() or isdir() — no FIFOs,
no character or block devices, no contiguous-file-type entries.
None of those belong in a Tor Expert Bundle and accepting them
is a class of bug we don't need to debug later.
3. Preserves the original path-traversal guard (member.name must
resolve under install_dir).
4. Catches tarfile.TarError so a corrupt archive returns False
gracefully instead of bubbling out an exception.
Tests: backend/tests/test_tor_bundle_symlink_filter.py (8 tests)
- Clean archive with only regular files extracts successfully
- Symlink member is rejected (the core regression)
- Hardlink member is rejected
- Symlink with relative target inside install_dir is still rejected
(we don't allow symlinks at all, not just absolute-target ones)
- FIFO/device-style member is rejected
- Path-traversal guard still works under the new shape
- Malformed/non-tar file is rejected gracefully (no crash)
- Failure on one member rejects the whole bundle (no half-extract)
Validation:
pytest backend/tests/test_tor_bundle_symlink_filter.py
backend/tests/test_tor_bundle_verification.py
-> 14 passed
UX impact: zero for legitimate Tor releases. Operators installing
a real Tor Expert Bundle continue to see "Tor installed at:" exactly
as before. Only malicious archives are refused, with a clear log
message identifying the rejected linkname.
Credit: @tg12 — the original report was specific enough that the
fix design was immediate.
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -173,6 +173,94 @@ def _verify_tor_bundle(archive_path: Path, bundle_url: str) -> tuple[bool, str]:
|
||||
return True, f"https-only (no digest source reachable, archive={actual_hash[:16]}...)"
|
||||
|
||||
|
||||
def _extract_tor_bundle_safely(archive_path: Path, install_dir: Path) -> bool:
|
||||
"""Extract a Tor Expert Bundle tar.gz safely.
|
||||
|
||||
Issue #251: the previous extractor checked tarinfo.name against path
|
||||
traversal but never inspected tarinfo.linkname for symlink/hardlink
|
||||
members. Python 3.11's tarfile honors symlinks during extractall(),
|
||||
so a malicious archive could ship a member like::
|
||||
|
||||
name = "innocent.txt" # passes the path check
|
||||
type = SYMTYPE
|
||||
linkname = "C:\\Windows\\System32\\config\\system"
|
||||
|
||||
and extractall() would then create that symlink. Subsequent reads
|
||||
of innocent.txt deference to a sensitive system file; subsequent
|
||||
writes corrupt one. Tor bundles never legitimately contain symlinks
|
||||
or hardlinks, so we refuse all link members categorically rather
|
||||
than trying to validate linkname targets (which has its own pitfalls
|
||||
around relative path resolution).
|
||||
|
||||
Also refuses non-regular-non-directory members (devices, FIFOs,
|
||||
character/block special files) for completeness — none of those
|
||||
belong in a Tor Expert Bundle and accepting them is a category of
|
||||
bug we don't need to debug later.
|
||||
|
||||
Returns True on success, False on rejection (and logs the reason).
|
||||
The caller is responsible for cleaning up the archive file.
|
||||
"""
|
||||
import tarfile
|
||||
|
||||
install_resolved = install_dir.resolve()
|
||||
|
||||
try:
|
||||
with tarfile.open(str(archive_path), "r:gz") as tar:
|
||||
for member in tar.getmembers():
|
||||
# Reject anything that isn't a regular file or directory.
|
||||
# Symlinks (SYMTYPE) and hardlinks (LNKTYPE) are the
|
||||
# path-traversal vectors; the others (CHRTYPE, BLKTYPE,
|
||||
# FIFOTYPE, CONTTYPE) have no legitimate use in a Tor
|
||||
# Expert Bundle.
|
||||
if member.issym() or member.islnk():
|
||||
logger.error(
|
||||
"Tor bundle extraction blocked: link member %s -> %s "
|
||||
"(symlinks/hardlinks are not allowed in Tor bundles; "
|
||||
"this archive is malformed or hostile)",
|
||||
member.name,
|
||||
member.linkname,
|
||||
)
|
||||
return False
|
||||
if not (member.isfile() or member.isdir()):
|
||||
logger.error(
|
||||
"Tor bundle extraction blocked: unexpected member type "
|
||||
"for %s (only regular files and directories are allowed)",
|
||||
member.name,
|
||||
)
|
||||
return False
|
||||
|
||||
# Path traversal check (preserves the original guard).
|
||||
try:
|
||||
member_path = (install_dir / member.name).resolve()
|
||||
except OSError as exc:
|
||||
logger.error(
|
||||
"Tor bundle extraction blocked: cannot resolve member "
|
||||
"path %s: %s",
|
||||
member.name,
|
||||
exc,
|
||||
)
|
||||
return False
|
||||
try:
|
||||
member_path.relative_to(install_resolved)
|
||||
except ValueError:
|
||||
logger.error(
|
||||
"Tor bundle extraction blocked: path traversal on %s "
|
||||
"(resolves to %s, outside install dir %s)",
|
||||
member.name,
|
||||
member_path,
|
||||
install_resolved,
|
||||
)
|
||||
return False
|
||||
|
||||
# All members validated — extract.
|
||||
tar.extractall(path=str(install_dir))
|
||||
except tarfile.TarError as exc:
|
||||
logger.error("Tor bundle extraction failed: malformed tar (%s)", exc)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _auto_install_tor() -> str | None:
|
||||
"""Install or download Tor when it is safe to do so."""
|
||||
if os.name != "nt":
|
||||
@@ -203,14 +291,9 @@ def _auto_install_tor() -> str | None:
|
||||
logger.info("Download complete, extracting...")
|
||||
import tarfile
|
||||
|
||||
with tarfile.open(str(archive_path), "r:gz") as tar:
|
||||
for member in tar.getmembers():
|
||||
member_path = (TOR_INSTALL_DIR / member.name).resolve()
|
||||
if not str(member_path).startswith(str(TOR_INSTALL_DIR.resolve())):
|
||||
logger.error("Tar path traversal blocked: %s", member.name)
|
||||
archive_path.unlink(missing_ok=True)
|
||||
return None
|
||||
tar.extractall(path=str(TOR_INSTALL_DIR))
|
||||
if not _extract_tor_bundle_safely(archive_path, TOR_INSTALL_DIR):
|
||||
archive_path.unlink(missing_ok=True)
|
||||
return None
|
||||
|
||||
archive_path.unlink(missing_ok=True)
|
||||
|
||||
|
||||
@@ -0,0 +1,222 @@
|
||||
"""Issue #251 (tg12): Tor bundle extraction must refuse symlink and
|
||||
hardlink members.
|
||||
|
||||
The previous extractor checked ``member.name`` against path traversal
|
||||
but never inspected ``member.linkname``. Python 3.11's ``tarfile``
|
||||
honors symlinks during ``extractall()``, so a malicious archive could
|
||||
ship a member named ``innocent.txt`` whose linkname points at an
|
||||
arbitrary filesystem location. After extraction, reads of innocent.txt
|
||||
dereference to that location; writes corrupt it.
|
||||
|
||||
The fix categorically refuses any link member during extraction.
|
||||
Tor Expert Bundles never legitimately contain symlinks or hardlinks,
|
||||
so this is non-disruptive for real updates and a hard stop for hostile
|
||||
archives.
|
||||
|
||||
These tests build synthetic tar archives covering each refused case
|
||||
and assert ``_extract_tor_bundle_safely`` rejects them.
|
||||
"""
|
||||
import io
|
||||
import os
|
||||
import stat
|
||||
import tarfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from services.tor_hidden_service import _extract_tor_bundle_safely
|
||||
|
||||
|
||||
def _build_archive(tmp_path: Path, members: list) -> Path:
|
||||
"""Write a .tar.gz with the given (name, builder) pairs.
|
||||
|
||||
Each builder is called with the open tarfile and is responsible for
|
||||
adding its member however it likes (regular file, symlink, etc.).
|
||||
"""
|
||||
archive = tmp_path / "test_bundle.tar.gz"
|
||||
with tarfile.open(str(archive), "w:gz") as tar:
|
||||
for name, builder in members:
|
||||
builder(tar, name)
|
||||
return archive
|
||||
|
||||
|
||||
def _add_regular_file(tar: tarfile.TarFile, name: str, payload: bytes = b"hello") -> None:
|
||||
info = tarfile.TarInfo(name)
|
||||
info.size = len(payload)
|
||||
info.mode = 0o644
|
||||
info.type = tarfile.REGTYPE
|
||||
tar.addfile(info, io.BytesIO(payload))
|
||||
|
||||
|
||||
def _add_symlink(tar: tarfile.TarFile, name: str, linkname: str) -> None:
|
||||
info = tarfile.TarInfo(name)
|
||||
info.size = 0
|
||||
info.type = tarfile.SYMTYPE
|
||||
info.linkname = linkname
|
||||
info.mode = 0o777
|
||||
tar.addfile(info)
|
||||
|
||||
|
||||
def _add_hardlink(tar: tarfile.TarFile, name: str, linkname: str) -> None:
|
||||
info = tarfile.TarInfo(name)
|
||||
info.size = 0
|
||||
info.type = tarfile.LNKTYPE
|
||||
info.linkname = linkname
|
||||
info.mode = 0o644
|
||||
tar.addfile(info)
|
||||
|
||||
|
||||
def _add_fifo(tar: tarfile.TarFile, name: str) -> None:
|
||||
info = tarfile.TarInfo(name)
|
||||
info.type = tarfile.FIFOTYPE
|
||||
info.mode = 0o644
|
||||
tar.addfile(info)
|
||||
|
||||
|
||||
def test_clean_archive_extracts_successfully(tmp_path):
|
||||
"""A normal archive with only regular files extracts fine."""
|
||||
install_dir = tmp_path / "install"
|
||||
install_dir.mkdir()
|
||||
|
||||
def add_normal(tar, name):
|
||||
_add_regular_file(tar, name, b"clean content")
|
||||
|
||||
archive = _build_archive(
|
||||
tmp_path,
|
||||
[
|
||||
("tor/tor.exe", add_normal),
|
||||
("tor/data/geoip", add_normal),
|
||||
],
|
||||
)
|
||||
assert _extract_tor_bundle_safely(archive, install_dir) is True
|
||||
assert (install_dir / "tor" / "tor.exe").is_file()
|
||||
assert (install_dir / "tor" / "data" / "geoip").is_file()
|
||||
|
||||
|
||||
def test_symlink_member_is_rejected(tmp_path, caplog):
|
||||
"""Issue #251 core regression: symlink members are refused."""
|
||||
install_dir = tmp_path / "install"
|
||||
install_dir.mkdir()
|
||||
|
||||
archive = _build_archive(
|
||||
tmp_path,
|
||||
[
|
||||
("tor/innocent.txt", lambda t, n: _add_symlink(t, n, "/etc/passwd")),
|
||||
],
|
||||
)
|
||||
|
||||
import logging
|
||||
|
||||
with caplog.at_level(logging.ERROR):
|
||||
result = _extract_tor_bundle_safely(archive, install_dir)
|
||||
|
||||
assert result is False
|
||||
# No file should have been created
|
||||
assert not (install_dir / "tor" / "innocent.txt").exists()
|
||||
# Log should explain why
|
||||
assert any(
|
||||
"symlinks/hardlinks are not allowed" in rec.getMessage()
|
||||
for rec in caplog.records
|
||||
)
|
||||
|
||||
|
||||
def test_hardlink_member_is_rejected(tmp_path):
|
||||
"""Hardlinks are refused for the same reason as symlinks."""
|
||||
install_dir = tmp_path / "install"
|
||||
install_dir.mkdir()
|
||||
|
||||
archive = _build_archive(
|
||||
tmp_path,
|
||||
[
|
||||
("tor/regular.txt", lambda t, n: _add_regular_file(t, n)),
|
||||
("tor/sneaky.txt", lambda t, n: _add_hardlink(t, n, "regular.txt")),
|
||||
],
|
||||
)
|
||||
assert _extract_tor_bundle_safely(archive, install_dir) is False
|
||||
# The whole extraction is refused even though only one member is bad.
|
||||
assert not (install_dir / "tor" / "regular.txt").exists()
|
||||
|
||||
|
||||
def test_symlink_with_relative_target_still_rejected(tmp_path):
|
||||
"""Even a relative symlink target inside the install dir is refused.
|
||||
|
||||
We don't allow symlinks at all — there is no legitimate Tor bundle
|
||||
use case for them, and an attacker can chain link redirections in
|
||||
ways the path-resolution check is poor at catching.
|
||||
"""
|
||||
install_dir = tmp_path / "install"
|
||||
install_dir.mkdir()
|
||||
|
||||
archive = _build_archive(
|
||||
tmp_path,
|
||||
[
|
||||
("tor/alias.txt", lambda t, n: _add_symlink(t, n, "tor/tor.exe")),
|
||||
],
|
||||
)
|
||||
assert _extract_tor_bundle_safely(archive, install_dir) is False
|
||||
|
||||
|
||||
def test_fifo_or_device_member_is_rejected(tmp_path):
|
||||
"""Non-regular-non-directory members (FIFOs, devices) are refused."""
|
||||
install_dir = tmp_path / "install"
|
||||
install_dir.mkdir()
|
||||
|
||||
archive = _build_archive(
|
||||
tmp_path,
|
||||
[
|
||||
("tor/weird.fifo", _add_fifo),
|
||||
],
|
||||
)
|
||||
assert _extract_tor_bundle_safely(archive, install_dir) is False
|
||||
|
||||
|
||||
def test_path_traversal_member_is_rejected(tmp_path):
|
||||
"""Pre-existing path-traversal guard still works under the new shape."""
|
||||
install_dir = tmp_path / "install"
|
||||
install_dir.mkdir()
|
||||
|
||||
def add_traversal(tar, name):
|
||||
_add_regular_file(tar, name)
|
||||
|
||||
# ../../escape.txt resolves outside install_dir on most platforms.
|
||||
archive = _build_archive(
|
||||
tmp_path,
|
||||
[
|
||||
("../../escape.txt", add_traversal),
|
||||
],
|
||||
)
|
||||
assert _extract_tor_bundle_safely(archive, install_dir) is False
|
||||
|
||||
|
||||
def test_malformed_tar_is_rejected(tmp_path):
|
||||
"""A corrupt/non-tar file is rejected without crashing."""
|
||||
install_dir = tmp_path / "install"
|
||||
install_dir.mkdir()
|
||||
|
||||
bogus = tmp_path / "not-a-tar.tar.gz"
|
||||
bogus.write_bytes(b"this is not a tar archive at all")
|
||||
|
||||
assert _extract_tor_bundle_safely(bogus, install_dir) is False
|
||||
|
||||
|
||||
def test_extraction_failure_does_not_leave_partial_state_referenced_to_caller(tmp_path):
|
||||
"""When extraction fails partway, the caller relies on a False return
|
||||
to know it must clean up. We test the contract here — actual cleanup
|
||||
of files that may have been written by tar.extractall() before the
|
||||
failure point isn't part of THIS helper's responsibility (the caller
|
||||
deletes the install dir if needed)."""
|
||||
install_dir = tmp_path / "install"
|
||||
install_dir.mkdir()
|
||||
|
||||
# Hostile archive: one good file, then a symlink. Whether the good
|
||||
# file was written or not, the return value must be False so the
|
||||
# caller refuses the bundle.
|
||||
archive = _build_archive(
|
||||
tmp_path,
|
||||
[
|
||||
("tor/clean.txt", lambda t, n: _add_regular_file(t, n)),
|
||||
("tor/evil-link.txt", lambda t, n: _add_symlink(t, n, "/etc/passwd")),
|
||||
],
|
||||
)
|
||||
|
||||
assert _extract_tor_bundle_safely(archive, install_dir) is False
|
||||
Reference in New Issue
Block a user