mirror of
https://github.com/luongnv89/claude-howto.git
synced 2026-04-26 09:56:01 +02:00
89e89d4aa3
Add Chinese (Simplified) translations for all documentation, organized under a dedicated zh/ directory that mirrors the English folder structure. Co-authored-by: tanqingkuang <tanqingkuang@users.noreply.github.com> Translations originally contributed by @tanqingkuang in #45. Restructured from *-CN.md suffix pattern into zh/ directory to prevent the EPUB builder (scripts/build_epub.py collect_folder_files) from picking up Chinese files via glob("*.md") inside module folders.
115 lines
3.8 KiB
Python
115 lines
3.8 KiB
Python
#!/usr/bin/env python3
|
|
"""Validate cross-references, anchors, and code fences in Markdown files."""
|
|
|
|
import re
|
|
import sys
|
|
import unicodedata
|
|
from pathlib import Path
|
|
|
|
IGNORE_DIRS = {
|
|
".venv",
|
|
"node_modules",
|
|
".git",
|
|
"blog-posts",
|
|
"openspec",
|
|
"prompts",
|
|
".agents",
|
|
}
|
|
IGNORE_FILES = {"README.backup.md"}
|
|
|
|
|
|
def iter_md_files():
|
|
for f in Path().rglob("*.md"):
|
|
if (
|
|
not any(part in IGNORE_DIRS for part in f.parts)
|
|
and f.name not in IGNORE_FILES
|
|
):
|
|
yield f
|
|
|
|
|
|
def heading_to_anchor(heading: str) -> str:
|
|
# Match GitHub's anchor generation: strip emoji and special punctuation,
|
|
# keep Unicode letters (including Vietnamese diacritics), lowercase,
|
|
# replace spaces with hyphens, strip leading/trailing hyphens.
|
|
# 1. Remove emoji (characters outside BMP or in known emoji ranges)
|
|
heading = re.sub(
|
|
r"[\U0001F000-\U0001FFFF" # Supplementary Multilingual Plane symbols
|
|
r"\U00002702-\U000027B0" # Dingbats
|
|
r"\U0000FE00-\U0000FE0F" # Variation selectors
|
|
r"\U0000200D" # Zero-width joiner
|
|
r"\U000000A9\U000000AE" # (C) (R)
|
|
r"\U00002000-\U0000206F" # General punctuation (some emoji-adjacent)
|
|
r"]",
|
|
"",
|
|
heading,
|
|
)
|
|
# 2. Remove punctuation but keep Unicode word chars, spaces, and hyphens
|
|
anchor = re.sub(r"[^\w\s-]", "", heading.lower(), flags=re.UNICODE)
|
|
# 3. Replace spaces with hyphens
|
|
anchor = anchor.replace(" ", "-")
|
|
# 4. Strip trailing hyphens (keep leading hyphens for emoji-prefixed headings)
|
|
return anchor.rstrip("-")
|
|
|
|
|
|
def strip_code_blocks(content: str) -> str:
|
|
"""Remove fenced code blocks and inline code spans to avoid scanning example links."""
|
|
# Strip fenced code blocks (``` ... ```)
|
|
content = re.sub(r"```[^\n]*\n.*?```", "", content, flags=re.DOTALL)
|
|
# Strip inline code spans (` ... `)
|
|
content = re.sub(r"`[^`\n]+`", "", content)
|
|
return content
|
|
|
|
|
|
def main() -> int:
|
|
errors = []
|
|
|
|
for file_path in iter_md_files():
|
|
content = file_path.read_text(encoding="utf-8")
|
|
# Strip code blocks before scanning for links/anchors to avoid false positives
|
|
# from documentation examples inside code fences.
|
|
scannable = strip_code_blocks(content)
|
|
|
|
# Relative .md links must resolve
|
|
errors.extend(
|
|
f"{file_path}: broken cross-reference → '{link_path}'"
|
|
for link_path in re.findall(r"\[[^\]]+\]\(([^)#]+\.md)[^)]*\)", scannable)
|
|
if not (file_path.parent / link_path).resolve().exists()
|
|
)
|
|
|
|
# In-page anchors must match a real heading
|
|
anchors = re.findall(r"\[[^\]]+\]\(#([^)]+)\)", scannable)
|
|
if anchors:
|
|
headings = re.findall(r"^#{1,6}\s+(.+)$", content, re.MULTILINE)
|
|
valid_anchors = {heading_to_anchor(h) for h in headings}
|
|
errors.extend(
|
|
f"{file_path}: broken anchor → '#{anchor}'"
|
|
for anchor in anchors
|
|
if anchor not in valid_anchors
|
|
)
|
|
|
|
# Unmatched code fences (only count fences at start of line)
|
|
if len(re.findall(r"^```", content, re.MULTILINE)) % 2 != 0:
|
|
errors.append(f"{file_path}: unmatched code fences")
|
|
|
|
# All numbered lesson dirs must have README.md
|
|
for i in range(1, 11):
|
|
errors.extend(
|
|
f"{d}: missing README.md"
|
|
for d in Path().glob(f"{i:02d}-*")
|
|
if d.is_dir() and not (d / "README.md").exists()
|
|
)
|
|
|
|
if errors:
|
|
print("❌ Cross-reference errors:")
|
|
for e in errors:
|
|
print(f" - {e}")
|
|
return 1
|
|
|
|
md_count = sum(1 for _ in iter_md_files())
|
|
print(f"✅ All cross-references valid ({md_count} files checked)")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|