fix: constrain cross-reference resolution to repo root (#91)

`(file_path.parent / link_path).resolve()` could silently follow a
crafted `../../outside/path` link that escapes the repository root.
Paths that resolve outside the repo root are now skipped rather than
checked for existence.

Co-authored-by: Claude Code <noreply@anthropic.com>
This commit is contained in:
xiaolai
2026-04-25 06:28:03 +08:00
committed by GitHub
parent 6740288030
commit 0bf754673a
+8 -6
View File
@@ -61,6 +61,7 @@ def strip_code_blocks(content: str) -> str:
def main() -> int:
errors: list[str] = []
repo_root = Path().resolve()
for file_path in iter_md_files():
content = file_path.read_text(encoding="utf-8")
@@ -68,12 +69,13 @@ def main() -> int:
# from documentation examples inside code fences.
scannable = strip_code_blocks(content)
# Relative .md links must resolve
errors.extend(
f"{file_path}: broken cross-reference → '{link_path}'"
for link_path in re.findall(r"\[[^\]]+\]\(([^)#]+\.md)[^)]*\)", scannable)
if not (file_path.parent / link_path).resolve().exists()
)
# Relative .md links must resolve and must stay within the repo root
for link_path in re.findall(r"\[[^\]]+\]\(([^)#]+\.md)[^)]*\)", scannable):
resolved = (file_path.parent / link_path).resolve()
if not resolved.is_relative_to(repo_root):
continue
if not resolved.exists():
errors.append(f"{file_path}: broken cross-reference → '{link_path}'")
# In-page anchors must match a real heading
anchors = re.findall(r"\[[^\]]+\]\(#([^)]+)\)", scannable)