diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1a404a7..cf92f43 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -17,6 +17,14 @@ jobs: - name: Install uv uses: astral-sh/setup-uv@v4 + - name: Install Node.js + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Install Mermaid CLI + run: npm install -g @mermaid-js/mermaid-cli + - name: Build EPUB run: uv run scripts/build_epub.py diff --git a/scripts/build_epub.py b/scripts/build_epub.py index ae33f4c..ea93398 100755 --- a/scripts/build_epub.py +++ b/scripts/build_epub.py @@ -1,6 +1,6 @@ #!/usr/bin/env -S uv run --script # /// script -# dependencies = ["ebooklib", "markdown", "beautifulsoup4", "httpx", "pillow", "tenacity"] +# dependencies = ["ebooklib", "markdown", "beautifulsoup4", "pillow"] # /// """ Build an EPUB from the Claude How-To markdown files. @@ -17,8 +17,7 @@ Usage: --root, -r Root directory containing markdown files (default: repo root) --output, -o Output EPUB file path (default: /claude-howto-guide.epub) --verbose, -v Enable verbose logging - --timeout Timeout for API requests in seconds (default: 30) - --max-concurrent Maximum concurrent API requests (default: 10) + --mmdc-path Path to mmdc binary (default: mmdc from PATH) The script uses inline script dependencies (PEP 723), so uv will automatically install required packages in an isolated environment. @@ -28,7 +27,7 @@ Output: Features: - Organizes chapters by folder structure (01-slash-commands, etc.) - - Renders Mermaid diagrams as PNG images via Kroki.io API (async concurrent) + - Renders Mermaid diagrams as PNG images via local mmdc CLI (no network required) - Generates a cover image from the project logo - Converts internal markdown links to EPUB chapter references - Handles SVG images by replacing with styled placeholders @@ -36,36 +35,29 @@ Features: Requirements: - uv (recommended) or Python 3.10+ with dependencies installed - - Internet connection for Mermaid diagram rendering + - @mermaid-js/mermaid-cli installed globally: npm install -g @mermaid-js/mermaid-cli - Repository structure with markdown files and claude-howto-logo.png """ from __future__ import annotations import argparse -import asyncio -import base64 import html import logging import os import re +import shutil +import subprocess # nosec B404 import sys -import zlib +import tempfile from dataclasses import dataclass, field from io import BytesIO from pathlib import Path -import httpx import markdown from bs4 import BeautifulSoup from ebooklib import epub from PIL import Image, ImageDraw, ImageFont -from tenacity import ( - retry, - retry_if_exception_type, - stop_after_attempt, - wait_exponential, -) # ============================================================================= # Custom Exceptions @@ -129,11 +121,8 @@ class EPUBConfig: cover_title_color: tuple[int, int, int] = (78, 205, 196) cover_subtitle_color: tuple[int, int, int] = (168, 178, 209) - # Network Settings - kroki_base_url: str = "https://kroki.io" - request_timeout: float = 30.0 - max_retries: int = 3 - max_concurrent_requests: int = 10 + # Local rendering settings + mmdc_path: str = "mmdc" # Font paths (platform-specific) title_font_paths: list[str] = field( @@ -246,7 +235,7 @@ def validate_inputs(config: EPUBConfig, logger: logging.Logger) -> None: # ============================================================================= -# Mermaid Rendering (Async with Retry) +# Mermaid Rendering (Local mmdc) # ============================================================================= @@ -263,7 +252,7 @@ def sanitize_mermaid(mermaid_code: str) -> str: class MermaidRenderer: - """Async renderer for Mermaid diagrams via Kroki.io API.""" + """Renders Mermaid diagrams locally via the mmdc CLI (no network required).""" def __init__( self, config: EPUBConfig, state: BuildState, logger: logging.Logger @@ -271,100 +260,85 @@ class MermaidRenderer: self.config = config self.state = state self.logger = logger - self._semaphore: asyncio.Semaphore | None = None - async def _fetch_single( - self, client: httpx.AsyncClient, mermaid_code: str, index: int - ) -> tuple[str, tuple[bytes, str]]: - """Fetch a single Mermaid diagram with retry logic.""" + def _resolve_mmdc(self) -> str: + """Resolve the mmdc binary path, raising if not found.""" + mmdc = shutil.which(self.config.mmdc_path) or self.config.mmdc_path + if not shutil.which(mmdc): + raise MermaidRenderError( + f"mmdc not found at '{self.config.mmdc_path}'. " + "Install it with: npm install -g @mermaid-js/mermaid-cli" + ) + return mmdc + + def _render_one( + self, mmdc: str, mermaid_code: str, index: int + ) -> tuple[bytes, str]: + """Render a single Mermaid diagram to PNG bytes using mmdc.""" cache_key = mermaid_code.strip() - - # Check cache first if cache_key in self.state.mermaid_cache: self.logger.debug(f"Cache hit for diagram {index}") - return cache_key, self.state.mermaid_cache[cache_key] + return self.state.mermaid_cache[cache_key] - # Rate limit with semaphore - assert self._semaphore is not None - async with self._semaphore: - result = await self._fetch_with_retry(client, mermaid_code, index) - if result is None: + with tempfile.TemporaryDirectory() as tmpdir: + input_file = Path(tmpdir) / "diagram.mmd" + output_file = Path(tmpdir) / "diagram.png" + input_file.write_text(mermaid_code, encoding="utf-8") + + try: + result = subprocess.run( # nosec B603 + [ + mmdc, + "-i", + str(input_file), + "-o", + str(output_file), + "-b", + "white", + ], + capture_output=True, + text=True, + check=False, + timeout=60, + ) + except subprocess.TimeoutExpired as exc: raise MermaidRenderError( - f"Failed to render Mermaid diagram {index} after {self.config.max_retries} attempts" - ) - return cache_key, result + f"mmdc timed out rendering diagram {index} (60s limit)" + ) from exc - @retry( - stop=stop_after_attempt(3), - wait=wait_exponential(multiplier=1, min=1, max=10), - retry=retry_if_exception_type((httpx.TimeoutException, httpx.NetworkError)), - reraise=True, - ) - async def _fetch_with_retry( - self, client: httpx.AsyncClient, mermaid_code: str, index: int - ) -> tuple[bytes, str] | None: - """Fetch diagram with retry logic.""" - try: - compressed = zlib.compress(mermaid_code.encode("utf-8"), level=9) - encoded = base64.urlsafe_b64encode(compressed).decode("ascii") - url = f"{self.config.kroki_base_url}/mermaid/png/{encoded}" - - self.logger.debug(f"Fetching diagram {index}...") - response = await client.get(url, timeout=self.config.request_timeout) - - if response.status_code == 200: - self.state.mermaid_counter += 1 - img_name = f"mermaid_{self.state.mermaid_counter}.png" - result = (response.content, img_name) - cache_key = mermaid_code.strip() - self.state.mermaid_cache[cache_key] = result - self.logger.info(f"Rendered diagram {index} -> {img_name}") - return result - else: - self.logger.warning( - f"Kroki API returned {response.status_code} for diagram {index}" - ) + if result.returncode != 0: raise MermaidRenderError( - f"Kroki API returned {response.status_code} for diagram {index}" + f"mmdc failed for diagram {index}: {result.stderr.strip()}" ) - except httpx.TimeoutException: - self.logger.warning(f"Timeout fetching diagram {index}, will retry...") - raise - except httpx.NetworkError as e: - self.logger.warning( - f"Network error for diagram {index}: {e}, will retry..." - ) - raise + if not output_file.exists(): + raise MermaidRenderError(f"mmdc produced no output for diagram {index}") - async def render_all( + png_bytes = output_file.read_bytes() + + self.state.mermaid_counter += 1 + img_name = f"mermaid_{self.state.mermaid_counter}.png" + entry = (png_bytes, img_name) + self.state.mermaid_cache[cache_key] = entry + self.logger.info(f"Rendered diagram {index} -> {img_name}") + return entry + + def render_all( self, diagrams: list[tuple[int, str]] ) -> dict[str, tuple[bytes, str]]: - """Render all Mermaid diagrams concurrently.""" - self._semaphore = asyncio.Semaphore(self.config.max_concurrent_requests) + """Render all Mermaid diagrams using local mmdc.""" + mmdc = self._resolve_mmdc() results: dict[str, tuple[bytes, str]] = {} - async with httpx.AsyncClient( - follow_redirects=True, - limits=httpx.Limits(max_connections=self.config.max_concurrent_requests), - timeout=httpx.Timeout(self.config.request_timeout), - ) as client: - tasks = [ - self._fetch_single(client, sanitize_mermaid(code), idx) - for idx, code in diagrams - ] + self.logger.info(f"Rendering {len(diagrams)} Mermaid diagrams locally...") + for idx, code in diagrams: + sanitized = sanitize_mermaid(code) + cache_key = sanitized.strip() + data = self._render_one(mmdc, sanitized, idx) + results[cache_key] = data - self.logger.info(f"Fetching {len(tasks)} Mermaid diagrams concurrently...") - - # Use gather with return_exceptions=False for strict mode - completed = await asyncio.gather(*tasks) - - for cache_key, data in completed: - results[cache_key] = data - - success_count = len(results) self.logger.info( - f"Successfully rendered {success_count}/{len(diagrams)} diagrams" + f"Successfully rendered {len(results)} unique diagrams ({len(diagrams)} total blocks)" ) return results @@ -702,7 +676,7 @@ def handle_svg_image( svg_path = (root_path / src).resolve() if not svg_path.is_file(): logger.warning(f"SVG file not found: {src}") - return f'

[SVG not found: {html.escape(src)}]

' + return f"

[SVG not found: {html.escape(src)}]

" svg_key = str(svg_path) @@ -907,12 +881,12 @@ def create_stylesheet() -> epub.EpubItem: ) -async def build_epub_async( +def build_epub_async( config: EPUBConfig, logger: logging.Logger, state: BuildState | None = None, ) -> Path: - """Build EPUB asynchronously with concurrent diagram fetching.""" + """Build EPUB with local Mermaid diagram rendering.""" state = state or BuildState() state.reset() # Ensure clean state @@ -940,14 +914,14 @@ async def build_epub_async( collector = ChapterCollector(config.root_path, state) chapter_infos = collector.collect_all_chapters(get_chapter_order()) - # Extract and pre-fetch all Mermaid diagrams + # Extract and render all Mermaid diagrams locally logger.info("Extracting Mermaid diagrams...") md_files = [(ch.file_path, ch.file_title) for ch in chapter_infos] all_diagrams = extract_all_mermaid_blocks(md_files, logger) if all_diagrams: renderer = MermaidRenderer(config, state, logger) - await renderer.render_all(all_diagrams) + renderer.render_all(all_diagrams) # Process chapters logger.info("Processing chapters...") @@ -1039,7 +1013,7 @@ def create_epub(root_path: Path, output_path: Path, verbose: bool = False) -> Pa """Synchronous wrapper for backward compatibility.""" logger = setup_logging(verbose) config = EPUBConfig(root_path=root_path, output_path=output_path) - return asyncio.run(build_epub_async(config, logger)) + return build_epub_async(config, logger) # ============================================================================= @@ -1070,16 +1044,10 @@ def main() -> int: "--verbose", "-v", action="store_true", help="Enable verbose logging" ) parser.add_argument( - "--timeout", - type=float, - default=30.0, - help="Timeout for API requests in seconds (default: 30)", - ) - parser.add_argument( - "--max-concurrent", - type=int, - default=10, - help="Maximum concurrent API requests (default: 10)", + "--mmdc-path", + type=str, + default="mmdc", + help="Path to mmdc binary (default: mmdc from PATH)", ) parser.add_argument( "--lang", @@ -1116,12 +1084,11 @@ def main() -> int: output_path=output, language=language, title=title, - request_timeout=args.timeout, - max_concurrent_requests=args.max_concurrent, + mmdc_path=args.mmdc_path, ) try: - result = asyncio.run(build_epub_async(config, logger)) + result = build_epub_async(config, logger) print(f"Successfully created: {result}") return 0 except EPUBBuildError as e: diff --git a/scripts/tests/test_build_epub.py b/scripts/tests/test_build_epub.py index 44533ab..9d0cf4c 100644 --- a/scripts/tests/test_build_epub.py +++ b/scripts/tests/test_build_epub.py @@ -4,7 +4,7 @@ from __future__ import annotations import logging from pathlib import Path -from unittest.mock import patch +from unittest.mock import MagicMock, patch import pytest @@ -14,6 +14,8 @@ from build_epub import ( BuildState, ChapterCollector, EPUBConfig, + MermaidRenderer, + MermaidRenderError, ValidationError, create_chapter_html, extract_all_mermaid_blocks, @@ -92,9 +94,7 @@ class TestEPUBConfig: assert config.title == "Claude Code How-To Guide" assert config.language == "en" assert config.author == "Claude Code Community" - assert config.request_timeout == 30.0 - assert config.max_concurrent_requests == 10 - assert config.max_retries == 3 + assert config.mmdc_path == "mmdc" def test_custom_values(self, tmp_path: Path) -> None: """Test that custom values override defaults.""" @@ -102,12 +102,10 @@ class TestEPUBConfig: root_path=tmp_path, output_path=tmp_path / "out.epub", title="Custom Title", - request_timeout=60.0, - max_concurrent_requests=5, + mmdc_path="/usr/local/bin/mmdc", ) assert config.title == "Custom Title" - assert config.request_timeout == 60.0 - assert config.max_concurrent_requests == 5 + assert config.mmdc_path == "/usr/local/bin/mmdc" # ============================================================================= @@ -375,6 +373,110 @@ class TestLogging: assert logger.name == "epub_builder" +# ============================================================================= +# MermaidRenderer Tests +# ============================================================================= + + +class TestMermaidRenderer: + """Tests for local MermaidRenderer.""" + + def _make_renderer( + self, tmp_path: Path, state: BuildState, logger: logging.Logger + ) -> MermaidRenderer: + config = EPUBConfig( + root_path=tmp_path, + output_path=tmp_path / "out.epub", + mmdc_path="mmdc", + ) + return MermaidRenderer(config, state, logger) + + def test_render_all_success( + self, tmp_path: Path, state: BuildState, logger: logging.Logger + ) -> None: + """Test that render_all uses mmdc and caches results.""" + fake_png = b"\x89PNG\r\n\x1a\n" + b"\x00" * 100 + renderer = self._make_renderer(tmp_path, state, logger) + + with ( + patch("shutil.which", return_value="/usr/bin/mmdc"), + patch("subprocess.run") as mock_run, + patch("pathlib.Path.exists", return_value=True), + patch("pathlib.Path.read_bytes", return_value=fake_png), + ): + mock_run.return_value = MagicMock(returncode=0, stderr="") + results = renderer.render_all([(1, "graph TD\n A --> B")]) + + assert len(results) == 1 + png_bytes, img_name = next(iter(results.values())) + assert png_bytes == fake_png + assert img_name.startswith("mermaid_") + assert img_name.endswith(".png") + + def test_render_all_mmdc_not_found( + self, tmp_path: Path, state: BuildState, logger: logging.Logger + ) -> None: + """Test that missing mmdc raises MermaidRenderError.""" + renderer = self._make_renderer(tmp_path, state, logger) + + with ( + patch("shutil.which", return_value=None), + pytest.raises(MermaidRenderError, match="mmdc not found"), + ): + renderer.render_all([(1, "graph TD\n A --> B")]) + + def test_render_all_mmdc_failure( + self, tmp_path: Path, state: BuildState, logger: logging.Logger + ) -> None: + """Test that mmdc non-zero exit raises MermaidRenderError.""" + renderer = self._make_renderer(tmp_path, state, logger) + + with ( + patch("shutil.which", return_value="/usr/bin/mmdc"), + patch("subprocess.run") as mock_run, + ): + mock_run.return_value = MagicMock(returncode=1, stderr="parse error") + with pytest.raises(MermaidRenderError, match="mmdc failed"): + renderer.render_all([(1, "graph TD\n A --> B")]) + + def test_render_all_deduplication( + self, tmp_path: Path, state: BuildState, logger: logging.Logger + ) -> None: + """Test that identical diagrams are only rendered once.""" + fake_png = b"\x89PNG\r\n\x1a\n" + b"\x00" * 100 + renderer = self._make_renderer(tmp_path, state, logger) + same_code = "graph TD\n A --> B" + + with ( + patch("shutil.which", return_value="/usr/bin/mmdc"), + patch("subprocess.run") as mock_run, + patch("pathlib.Path.exists", return_value=True), + patch("pathlib.Path.read_bytes", return_value=fake_png), + ): + mock_run.return_value = MagicMock(returncode=0, stderr="") + results = renderer.render_all([(1, same_code), (2, same_code)]) + + # mmdc should only be called once for duplicate diagrams + assert mock_run.call_count == 1 + # Both entries map to the same cached result + assert len(results) == 1 + + def test_render_all_timeout( + self, tmp_path: Path, state: BuildState, logger: logging.Logger + ) -> None: + """Test that a hung mmdc process raises MermaidRenderError.""" + import subprocess as _subprocess + + renderer = self._make_renderer(tmp_path, state, logger) + + with ( + patch("shutil.which", return_value="/usr/bin/mmdc"), + patch("subprocess.run", side_effect=_subprocess.TimeoutExpired("mmdc", 60)), + pytest.raises(MermaidRenderError, match="timed out"), + ): + renderer.render_all([(1, "graph TD\n A --> B")]) + + # ============================================================================= # Integration Tests # ============================================================================= @@ -383,8 +485,7 @@ class TestLogging: class TestIntegration: """Integration tests for the full build process.""" - @pytest.mark.asyncio - async def test_build_without_mermaid( + def test_build_without_mermaid( self, tmp_project: Path, logger: logging.Logger ) -> None: """Test building an EPUB without Mermaid diagrams.""" @@ -399,7 +500,7 @@ class TestIntegration: with patch("build_epub.get_chapter_order") as mock_order: mock_order.return_value = [("README.md", "Introduction")] - result = await build_epub_async(config, logger) + result = build_epub_async(config, logger) assert result.exists() assert result.suffix == ".epub"