mirror of
https://github.com/luongnv89/claude-howto.git
synced 2026-04-21 21:45:58 +02:00
699fb39a46
* ci: shift-left quality gates — add mypy to pre-commit, fix CI failures - Add mypy pre-commit hook (mirrors-mypy v1.13.0) so type checks run locally - Add [tool.mypy] config to scripts/pyproject.toml with overrides for untyped libs (ebooklib, sync_translations) - Add mypy>=1.8.0 to requirements-dev.txt - Fix CI test.yml: remove continue-on-error: true from lint/security/type-check jobs (was silently swallowing failures) - Fix CI bandit -c path: pyproject.toml → scripts/pyproject.toml - Fix CI mypy command: use --config-file scripts/pyproject.toml - Fix CI build-epub: add type-check to needs, fix if: success() → !failure() && !cancelled() - Fix ruff errors in sync_translations.py (RUF013 implicit Optional, SIM102 nested if) - Fix mypy errors: add list[str] annotations to errors vars in check_cross_references.py and check_links.py * fix(ci): install mmdc in build-epub job and correct return type annotation - Add npm install step for @mermaid-js/mermaid-cli before Build EPUB to fix CI failure (mmdc not found error) - Fix check_translation_status() return type from list[dict] to tuple[list[dict], list[dict]] to match the actual return value * fix(ci): pass --no-sandbox to Puppeteer in build-epub CI job mmdc (Mermaid CLI) uses Puppeteer/Chromium which requires --no-sandbox in the GitHub Actions sandboxed environment. Add --puppeteer-config flag to build_epub.py that passes a Puppeteer JSON config file to mmdc via -p, and use it in the CI workflow to inject the no-sandbox args.
114 lines
3.8 KiB
Python
114 lines
3.8 KiB
Python
#!/usr/bin/env python3
|
|
"""Validate cross-references, anchors, and code fences in Markdown files."""
|
|
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
IGNORE_DIRS = {
|
|
".venv",
|
|
"node_modules",
|
|
".git",
|
|
"blog-posts",
|
|
"openspec",
|
|
"prompts",
|
|
".agents",
|
|
}
|
|
IGNORE_FILES = {"README.backup.md"}
|
|
|
|
|
|
def iter_md_files():
|
|
for f in Path().rglob("*.md"):
|
|
if (
|
|
not any(part in IGNORE_DIRS for part in f.parts)
|
|
and f.name not in IGNORE_FILES
|
|
):
|
|
yield f
|
|
|
|
|
|
def heading_to_anchor(heading: str) -> str:
|
|
# Match GitHub's anchor generation: strip emoji and special punctuation,
|
|
# keep Unicode letters (including Vietnamese diacritics), lowercase,
|
|
# replace spaces with hyphens, strip leading/trailing hyphens.
|
|
# 1. Remove emoji (characters outside BMP or in known emoji ranges)
|
|
heading = re.sub(
|
|
r"[\U0001F000-\U0001FFFF" # Supplementary Multilingual Plane symbols
|
|
r"\U00002702-\U000027B0" # Dingbats
|
|
r"\U0000FE00-\U0000FE0F" # Variation selectors
|
|
r"\U0000200D" # Zero-width joiner
|
|
r"\U000000A9\U000000AE" # (C) (R)
|
|
r"\U00002000-\U0000206F" # General punctuation (some emoji-adjacent)
|
|
r"]",
|
|
"",
|
|
heading,
|
|
)
|
|
# 2. Remove punctuation but keep Unicode word chars, spaces, and hyphens
|
|
anchor = re.sub(r"[^\w\s-]", "", heading.lower(), flags=re.UNICODE)
|
|
# 3. Replace spaces with hyphens
|
|
anchor = anchor.replace(" ", "-")
|
|
# 4. Strip trailing hyphens (keep leading hyphens for emoji-prefixed headings)
|
|
return anchor.rstrip("-")
|
|
|
|
|
|
def strip_code_blocks(content: str) -> str:
|
|
"""Remove fenced code blocks and inline code spans to avoid scanning example links."""
|
|
# Strip fenced code blocks (``` ... ```)
|
|
content = re.sub(r"```[^\n]*\n.*?```", "", content, flags=re.DOTALL)
|
|
# Strip inline code spans (` ... `)
|
|
content = re.sub(r"`[^`\n]+`", "", content)
|
|
return content
|
|
|
|
|
|
def main() -> int:
|
|
errors: list[str] = []
|
|
|
|
for file_path in iter_md_files():
|
|
content = file_path.read_text(encoding="utf-8")
|
|
# Strip code blocks before scanning for links/anchors to avoid false positives
|
|
# from documentation examples inside code fences.
|
|
scannable = strip_code_blocks(content)
|
|
|
|
# Relative .md links must resolve
|
|
errors.extend(
|
|
f"{file_path}: broken cross-reference → '{link_path}'"
|
|
for link_path in re.findall(r"\[[^\]]+\]\(([^)#]+\.md)[^)]*\)", scannable)
|
|
if not (file_path.parent / link_path).resolve().exists()
|
|
)
|
|
|
|
# In-page anchors must match a real heading
|
|
anchors = re.findall(r"\[[^\]]+\]\(#([^)]+)\)", scannable)
|
|
if anchors:
|
|
headings = re.findall(r"^#{1,6}\s+(.+)$", content, re.MULTILINE)
|
|
valid_anchors = {heading_to_anchor(h) for h in headings}
|
|
errors.extend(
|
|
f"{file_path}: broken anchor → '#{anchor}'"
|
|
for anchor in anchors
|
|
if anchor not in valid_anchors
|
|
)
|
|
|
|
# Unmatched code fences (only count fences at start of line)
|
|
if len(re.findall(r"^```", content, re.MULTILINE)) % 2 != 0:
|
|
errors.append(f"{file_path}: unmatched code fences")
|
|
|
|
# All numbered lesson dirs must have README.md
|
|
for i in range(1, 11):
|
|
errors.extend(
|
|
f"{d}: missing README.md"
|
|
for d in Path().glob(f"{i:02d}-*")
|
|
if d.is_dir() and not (d / "README.md").exists()
|
|
)
|
|
|
|
if errors:
|
|
print("❌ Cross-reference errors:")
|
|
for e in errors:
|
|
print(f" - {e}")
|
|
return 1
|
|
|
|
md_count = sum(1 for _ in iter_md_files())
|
|
print(f"✅ All cross-references valid ({md_count} files checked)")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|