mirror of
https://github.com/luongnv89/claude-howto.git
synced 2026-05-23 10:19:41 +02:00
3557d791f5
* feat(scripts): add static website generator from markdown sources (#85) Generate an elegant, mobile-friendly static site from the existing tutorial markdown files. The markdown remains the single source of truth — `scripts/build_website.py` reads from the same `.md` files the EPUB builder uses, rewrites cross-references to site URLs, and rewrites references to non-markdown repo files (`.json`, `.sh`, `.py`) to GitHub blob URLs so users can jump to the source on github.com. Highlights: - Reuses the chapter ordering convention from `build_epub.py` - Anchor algorithm mirrors `check_cross_references.heading_to_anchor` for parity with the validator - Mermaid renders client-side via `mermaid.js` (no pre-render step) - Tailwind CSS via CDN; light/dark theme toggle; sidebar nav; in-page TOC; prev/next page navigation; mobile responsive - 27 unit + smoke tests covering anchors, link rewriting (including `<source srcset>` inside `<picture>`), Mermaid handling, and a full end-to-end build - GitHub Pages deploy workflow at `.github/workflows/pages.yml` Closes #85 * fix(website): use relative URLs in sidebar nav and avoid INDEX.html collision Two bugs found by local browser dogfooding: 1. **Sidebar nav broke from deep pages.** `build_navigation` emitted raw `output_url` values (site-root-relative) which made every sidebar link 404 from any page below the root. Moved the call inside the per-page render loop so each page gets nav links computed relative to its own URL — `01-slash-commands/index.html` from the root, `../01-slash-commands/...` from a depth-1 page, `../../01-slash-commands/...` from depth-2. 2. **`INDEX.md` overwrote `index.html`.** On case-insensitive filesystems (macOS/Windows), `INDEX.html` and `index.html` are the same file, so `INDEX.md` clobbered the rendered `README.md`. Added `_disambiguate_url` that detects case-insensitive collisions and suffixes the colliding page with its source stem (`INDEX-index.html`). Added 2 tests; full suite stays at 83 passed. * fix(scripts): skip URLs with port in localhost/127.0.0.1 skip list `check_links.is_skipped()` did an exact-match comparison against the host, so `http://localhost:8080` (used in scripts/README.md as a preview example) was not skipped and CI's link check tried to fetch it, which fails on the GitHub runner. Strip the port before comparing. * chore(scripts): drop vestigial mypy ignore_errors for build_website The override silenced all mypy errors for build_website, making the "mypy: clean" claim technically vacuous. Removing it shows mypy is actually clean — 0 issues on build_website after type annotations were added during PR review. * feat(website): self-host Tailwind, Mermaid, and Inter fonts Drop all third-party CDN dependencies from rendered pages. The site previously loaded Tailwind from cdn.tailwindcss.com (Play CDN — JIT compile in browser, marked not-for-production), Mermaid from cdn.jsdelivr.net, and Inter/JetBrains Mono from fonts.googleapis.com. Replace with a vendored toolchain: - scripts/vendor_assets.py downloads the Tailwind standalone CLI (Go binary, no Node toolchain), Mermaid's UMD bundle, and Google Fonts CSS + WOFF2 files. Cached under scripts/.vendor-cache/ (gitignored), refetched only when missing. - Tailwind compiles a per-build site/assets/tailwind.css with only the utility classes actually used by the rendered HTML. - Mermaid and font files land in site/assets/vendor/ and load via relative URLs. - Tailwind config + entry CSS live in scripts/website_templates/ alongside the Jinja template. - build_website grows a skip_vendor flag so the smoke test runs offline. - pre-commit mypy hook gets types-Markdown so it can resolve the same imports as the project venv. Verification: 86/86 pytest pass, ruff/mypy/bandit clean, full build produces a working site with zero external requests (verified in a headless browser — no console errors, no failed network calls, Mermaid diagrams render). * fix(website): use tree URLs for repo directory links (#85) * fix(website): include additional top-level docs (#85)
129 lines
4.0 KiB
Python
129 lines
4.0 KiB
Python
#!/usr/bin/env python3
|
|
"""Check external URLs in Markdown files are reachable."""
|
|
|
|
import re
|
|
import sys
|
|
import urllib.error
|
|
import urllib.request
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
from pathlib import Path
|
|
|
|
IGNORE_DIRS = {
|
|
".venv",
|
|
"node_modules",
|
|
".git",
|
|
"blog-posts",
|
|
"openspec",
|
|
"prompts",
|
|
".agents",
|
|
".claude",
|
|
}
|
|
TIMEOUT = 10
|
|
# Domains/patterns to skip: badges, placeholders, and bot-blocking hosts
|
|
SKIP_DOMAINS = {
|
|
"shields.io",
|
|
"img.shields.io",
|
|
"star-history.com",
|
|
"api.star-history.com",
|
|
"example.com",
|
|
"localhost",
|
|
"127.0.0.1",
|
|
"my-webhook.example.com",
|
|
"git.internal",
|
|
# Wikipedia blocks HEAD requests — GET also unreliable in CI without network
|
|
"en.wikipedia.org",
|
|
"wikipedia.org",
|
|
# GitHub API requires auth — unauthenticated requests return 404 for protected endpoints
|
|
"api.github.com",
|
|
# Claude Code native-binary download host — directory listing returns 404, artifacts are
|
|
# fetched programmatically by the installer via the full filename path
|
|
"downloads.claude.ai",
|
|
}
|
|
SKIP_DOMAIN_SUFFIXES = (".example.com", ".example.org", ".internal")
|
|
# Placeholder/template URLs that are intentionally non-resolvable
|
|
SKIP_URL_PATTERNS = {
|
|
"github.com/org/",
|
|
"github.com/user/",
|
|
"github.com/your-org/",
|
|
"docs.example.com",
|
|
}
|
|
|
|
URL_RE = re.compile(r"https?://[a-zA-Z0-9][a-zA-Z0-9\-._~:/?#\[\]@!$&'()*+,;=%]+")
|
|
|
|
|
|
def is_skipped(url: str) -> bool:
|
|
try:
|
|
domain = url.split("/", 3)[2].split(":", 1)[0]
|
|
except IndexError:
|
|
return True # malformed URL
|
|
if any(skip == domain or domain.endswith("." + skip) for skip in SKIP_DOMAINS):
|
|
return True
|
|
if any(domain.endswith(suffix) for suffix in SKIP_DOMAIN_SUFFIXES):
|
|
return True
|
|
return any(pattern in url for pattern in SKIP_URL_PATTERNS)
|
|
|
|
|
|
def check_url(url: str) -> tuple[str, bool, str]:
|
|
if is_skipped(url):
|
|
return url, True, "skipped"
|
|
try:
|
|
req = urllib.request.Request(
|
|
url, headers={"User-Agent": "Mozilla/5.0"}, method="HEAD"
|
|
)
|
|
with urllib.request.urlopen(req, timeout=TIMEOUT): # nosec B310
|
|
return url, True, "ok"
|
|
except urllib.error.HTTPError as e:
|
|
# 403/429 often means the server is up but blocks bots — treat as ok
|
|
if e.code in (401, 403, 405, 429):
|
|
return url, True, f"http {e.code} (ignored)"
|
|
return url, False, f"HTTP {e.code}"
|
|
except Exception as e:
|
|
return url, False, str(e)
|
|
|
|
|
|
def main(strict: bool = False) -> int:
|
|
urls: dict[str, list[str]] = {} # url -> [file, ...]
|
|
|
|
md_files = [
|
|
f
|
|
for f in Path().rglob("*.md")
|
|
if not any(part in IGNORE_DIRS for part in f.parts)
|
|
]
|
|
|
|
for file_path in md_files:
|
|
content = file_path.read_text()
|
|
for raw_url in URL_RE.findall(content):
|
|
# Strip trailing Markdown/punctuation characters the regex may over-capture
|
|
# from link syntax like [text](https://url/) or **https://url)**
|
|
clean_url = raw_url.rstrip(")>*_`':.,;").split("#")[0]
|
|
urls.setdefault(clean_url, []).append(str(file_path))
|
|
|
|
if not urls:
|
|
print("✅ No external URLs found")
|
|
return 0
|
|
|
|
errors: list[str] = []
|
|
with ThreadPoolExecutor(max_workers=10) as pool:
|
|
futures = {pool.submit(check_url, url): url for url in urls}
|
|
for future in as_completed(futures):
|
|
url, ok, reason = future.result()
|
|
if not ok:
|
|
errors.extend(f"{f}: dead link → {url} ({reason})" for f in urls[url])
|
|
|
|
if errors:
|
|
print("❌ Dead links found:")
|
|
for e in sorted(errors):
|
|
print(f" - {e}")
|
|
# In non-strict mode (pre-commit), report but don't block the commit.
|
|
# Set LINK_CHECK_STRICT=1 (as CI does) to enforce failures.
|
|
return 1 if strict else 0
|
|
|
|
print(f"✅ All external URLs reachable ({len(urls)} checked)")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import os
|
|
|
|
sys.exit(main(strict=os.environ.get("LINK_CHECK_STRICT") == "1"))
|