mirror of
https://github.com/luongnv89/claude-howto.git
synced 2026-05-23 10:19:41 +02:00
553a319934
Catches mechanical rendering bugs that look fine in a diff but render wrong on GitHub / in the EPUB build: inner backticks inside inline-code spans (PR #114), unescaped pipes in table cells (PR #115), stray `$ARGUMENTS` outside code, and unmatched fences. Scope: `**/README.md` across tutorial modules + translations (ja, uk, vi, zh). Excludes dev tooling dirs (`.claude`, `.venv`, `openspec`, …). Rule registry: each rule is `(Path, content) -> list[str]`. Adding a new rule = one function + one registry entry + one test fixture. - scripts/check_markdown_rendering.py: 4 rules, CommonMark-aware code- span consumption, blockquote-fence-aware masking - scripts/tests/test_check_markdown_rendering.py: 20 unit tests, positive + negative per rule, currency-false-positive guard - .pre-commit-config.yaml: new global `markdown-rendering` hook
256 lines
8.3 KiB
Python
256 lines
8.3 KiB
Python
#!/usr/bin/env python3
|
|
"""Validate Markdown rendering correctness in tutorial README files.
|
|
|
|
Catches mechanical rendering bugs that look fine in a diff but render wrong
|
|
on GitHub / in the EPUB build: inner backticks in inline code, unescaped
|
|
pipes in table cells, stray $ARGUMENTS / $N outside code, mismatched fences.
|
|
|
|
Scope: `**/README.md` across the repo (tutorial modules + ja/uk/vi/zh
|
|
translations). Excludes `.venv`, `node_modules`, `.git`, `blog-posts`,
|
|
`.agents`, `.claude`, and `openspec` — these are not tutorial output.
|
|
|
|
Each rule is a function `(file_path, content) -> list[str]`. To add a new
|
|
rule: write the function, append `(name, fn)` to `RULES`, add fixtures in
|
|
`tests/test_check_markdown_rendering.py`.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
import sys
|
|
from collections.abc import Callable
|
|
from pathlib import Path
|
|
|
|
IGNORE_DIRS = {
|
|
".venv",
|
|
"node_modules",
|
|
".git",
|
|
"blog-posts",
|
|
"openspec",
|
|
"prompts",
|
|
".agents",
|
|
".claude",
|
|
}
|
|
|
|
Rule = Callable[[Path, str], list[str]]
|
|
|
|
|
|
def iter_readme_files() -> list[Path]:
|
|
return [
|
|
f
|
|
for f in Path().rglob("README.md")
|
|
if not any(part in IGNORE_DIRS for part in f.parts)
|
|
]
|
|
|
|
|
|
_FENCE_RE = re.compile(r"^ {0,3}(?:>\s*)*```")
|
|
|
|
|
|
def _mask_fenced_blocks(content: str) -> str:
|
|
"""Replace fenced code blocks with blank lines so line numbers survive.
|
|
|
|
Triple-backtick fences only. Recognises fences inside blockquotes
|
|
(``` > ``` ``` ```), matching how GitHub renders them. Lines inside
|
|
fences become empty so subsequent rules see no code content but still
|
|
report accurate line numbers.
|
|
"""
|
|
out: list[str] = []
|
|
in_fence = False
|
|
for line in content.split("\n"):
|
|
if _FENCE_RE.match(line):
|
|
in_fence = not in_fence
|
|
out.append("")
|
|
continue
|
|
out.append("" if in_fence else line)
|
|
return "\n".join(out)
|
|
|
|
|
|
def _strip_inline_code(line: str) -> str:
|
|
"""Remove inline code spans from a single line.
|
|
|
|
Strips double-backtick spans first (they may contain single backticks),
|
|
then single-backtick spans. HTML comments are also stripped so rules
|
|
that scan prose ignore explanatory comments.
|
|
"""
|
|
line = re.sub(r"<!--.*?-->", "", line)
|
|
line = re.sub(r"``[^`]+``", "", line)
|
|
line = re.sub(r"`[^`\n]+`", "", line)
|
|
return line
|
|
|
|
|
|
def _consume_code_spans(line: str) -> str:
|
|
"""Remove well-formed inline code spans from a single line.
|
|
|
|
Follows CommonMark inline-code semantics: a run of N backticks opens a
|
|
span; the span closes at the next matching run of N backticks. Spans
|
|
may nest different run-lengths. Anything left after consumption is a
|
|
structural mismatch (an unmatched opening run).
|
|
"""
|
|
out: list[str] = []
|
|
i = 0
|
|
n = len(line)
|
|
while i < n:
|
|
if line[i] != "`":
|
|
out.append(line[i])
|
|
i += 1
|
|
continue
|
|
run_start = i
|
|
while i < n and line[i] == "`":
|
|
i += 1
|
|
run_len = i - run_start
|
|
scan = i
|
|
while scan < n:
|
|
if line[scan] == "`":
|
|
close_start = scan
|
|
while scan < n and line[scan] == "`":
|
|
scan += 1
|
|
if scan - close_start == run_len:
|
|
i = scan
|
|
break
|
|
else:
|
|
scan += 1
|
|
else:
|
|
out.append(line[run_start:i])
|
|
return "".join(out)
|
|
|
|
|
|
def rule_backtick_in_inline_code(file_path: Path, content: str) -> list[str]:
|
|
"""Flag inline code spans that contain a literal backtick.
|
|
|
|
Bug pattern shipped in PR #114: `` `!`command`` `` — a single-backtick
|
|
span containing an inner backtick. The canonical fix is the
|
|
``` `` `text` `` ``` idiom (double-backticks + space padding).
|
|
|
|
Detection: consume well-formed code spans left-to-right per line. Any
|
|
leftover backtick is an unmatched opener — a structural mismatch that
|
|
renders wrong on GitHub.
|
|
|
|
Skips fenced code blocks.
|
|
"""
|
|
masked = _mask_fenced_blocks(content)
|
|
errors: list[str] = []
|
|
for lineno, line in enumerate(masked.split("\n"), start=1):
|
|
if "`" in _consume_code_spans(line):
|
|
errors.append(
|
|
f"{file_path}:{lineno}: backtick-in-inline-code — "
|
|
"use `` `text` `` (double-backtick + space) to display a literal backtick"
|
|
)
|
|
return errors
|
|
|
|
|
|
def rule_unescaped_pipe_in_table(file_path: Path, content: str) -> list[str]:
|
|
"""Flag table rows whose unescaped pipe count differs from the header.
|
|
|
|
A table row is `|...|...|`. The header row sets the column count; body
|
|
rows must have the same count. A bare `|` in a cell (e.g. `[color|default]`)
|
|
inflates the count and breaks rendering. The fix is `\\|`.
|
|
|
|
Pipes inside inline-code spans are ignored (they need escaping too, but
|
|
that's a separate idiom). The separator row `|---|---|` is ignored.
|
|
"""
|
|
masked = _mask_fenced_blocks(content)
|
|
errors: list[str] = []
|
|
lines = masked.split("\n")
|
|
|
|
def cell_count(line: str) -> int:
|
|
scannable = _strip_inline_code(line.strip())
|
|
return len(re.findall(r"(?<!\\)\|", scannable))
|
|
|
|
i = 0
|
|
while i < len(lines):
|
|
line = lines[i].strip()
|
|
if (
|
|
line.startswith("|")
|
|
and line.endswith("|")
|
|
and i + 1 < len(lines)
|
|
and re.match(r"^\s*\|[\s\-:|]+\|\s*$", lines[i + 1])
|
|
):
|
|
header_pipes = cell_count(line)
|
|
j = i + 2
|
|
while j < len(lines):
|
|
row = lines[j].strip()
|
|
if not (row.startswith("|") and row.endswith("|")):
|
|
break
|
|
if cell_count(row) != header_pipes:
|
|
errors.append(
|
|
f"{file_path}:{j + 1}: unescaped-pipe-in-table — "
|
|
"row cell count differs from header; escape literal `|` as `\\|`"
|
|
)
|
|
j += 1
|
|
i = j
|
|
else:
|
|
i += 1
|
|
return errors
|
|
|
|
|
|
def rule_stray_argument_placeholder(file_path: Path, content: str) -> list[str]:
|
|
"""Flag `$ARGUMENTS` appearing in prose outside code.
|
|
|
|
Tutorial snippets reference these inside fenced code blocks or inline
|
|
code (`` `$ARGUMENTS` ``). When `$ARGUMENTS` appears in bare prose it
|
|
usually means a tutorial snippet bled into the surrounding paragraph.
|
|
|
|
Single-digit `$N` was considered but dropped: shell-arg `$0`..`$9` is
|
|
syntactically indistinguishable from prose currency (`$5 today`,
|
|
`$1,000`, `$5/month`) without semantic context, and currency is
|
|
legitimate translator content. `$ARGUMENTS` is unambiguous.
|
|
"""
|
|
masked = _mask_fenced_blocks(content)
|
|
errors: list[str] = []
|
|
pattern = re.compile(r"\$ARGUMENTS\b")
|
|
for lineno, line in enumerate(masked.split("\n"), start=1):
|
|
stripped = _strip_inline_code(line)
|
|
if pattern.search(stripped):
|
|
errors.append(
|
|
f"{file_path}:{lineno}: stray-argument-placeholder — "
|
|
"wrap `$ARGUMENTS` in backticks or move into a code fence"
|
|
)
|
|
return errors
|
|
|
|
|
|
def rule_unmatched_fence(file_path: Path, content: str) -> list[str]:
|
|
"""Flag files with an odd number of triple-backtick fences.
|
|
|
|
Redundant safety net — `scripts/check_cross_references.py` also
|
|
enforces this. Kept here so the validator stays self-contained.
|
|
"""
|
|
fences = re.findall(r"^\s*```", content, re.MULTILINE)
|
|
if len(fences) % 2 != 0:
|
|
return [
|
|
f"{file_path}:1: unmatched-fence — "
|
|
"odd number of triple-backtick fences in file"
|
|
]
|
|
return []
|
|
|
|
|
|
RULES: list[tuple[str, Rule]] = [
|
|
("backtick-in-inline-code", rule_backtick_in_inline_code),
|
|
("unescaped-pipe-in-table", rule_unescaped_pipe_in_table),
|
|
("stray-argument-placeholder", rule_stray_argument_placeholder),
|
|
("unmatched-fence", rule_unmatched_fence),
|
|
]
|
|
|
|
|
|
def main() -> int:
|
|
errors: list[str] = []
|
|
files = iter_readme_files()
|
|
for file_path in files:
|
|
content = file_path.read_text(encoding="utf-8")
|
|
for _name, rule in RULES:
|
|
errors.extend(rule(file_path, content))
|
|
|
|
if errors:
|
|
print("❌ Markdown rendering errors:")
|
|
for e in errors:
|
|
print(f" - {e}")
|
|
return 1
|
|
|
|
print(
|
|
f"✅ Markdown rendering clean ({len(files)} README files, {len(RULES)} rules)"
|
|
)
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|