From 0bf754673a710f5d245e9bbbabbdd8d0fc3f421d Mon Sep 17 00:00:00 2001 From: xiaolai Date: Sat, 25 Apr 2026 06:28:03 +0800 Subject: [PATCH] fix: constrain cross-reference resolution to repo root (#91) `(file_path.parent / link_path).resolve()` could silently follow a crafted `../../outside/path` link that escapes the repository root. Paths that resolve outside the repo root are now skipped rather than checked for existence. Co-authored-by: Claude Code --- scripts/check_cross_references.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/scripts/check_cross_references.py b/scripts/check_cross_references.py index a496590..a02efc7 100644 --- a/scripts/check_cross_references.py +++ b/scripts/check_cross_references.py @@ -61,6 +61,7 @@ def strip_code_blocks(content: str) -> str: def main() -> int: errors: list[str] = [] + repo_root = Path().resolve() for file_path in iter_md_files(): content = file_path.read_text(encoding="utf-8") @@ -68,12 +69,13 @@ def main() -> int: # from documentation examples inside code fences. scannable = strip_code_blocks(content) - # Relative .md links must resolve - errors.extend( - f"{file_path}: broken cross-reference → '{link_path}'" - for link_path in re.findall(r"\[[^\]]+\]\(([^)#]+\.md)[^)]*\)", scannable) - if not (file_path.parent / link_path).resolve().exists() - ) + # Relative .md links must resolve and must stay within the repo root + for link_path in re.findall(r"\[[^\]]+\]\(([^)#]+\.md)[^)]*\)", scannable): + resolved = (file_path.parent / link_path).resolve() + if not resolved.is_relative_to(repo_root): + continue + if not resolved.exists(): + errors.append(f"{file_path}: broken cross-reference → '{link_path}'") # In-page anchors must match a real heading anchors = re.findall(r"\[[^\]]+\]\(#([^)]+)\)", scannable)