diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..35e7237 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,106 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +# Cancel in-progress runs for the same branch +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + lint: + name: Lint & Format + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v4 + + - name: Set up Python + run: uv python install 3.11 + + - name: Install Ruff + run: uv pip install --system ruff + + - name: Ruff Format Check + run: ruff format --check scripts/ + + - name: Ruff Lint Check + run: ruff check scripts/ + + security: + name: Security Scan + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v4 + + - name: Set up Python + run: uv python install 3.11 + + - name: Install Bandit + run: uv pip install --system "bandit[toml]" + + - name: Run Bandit Security Scan + run: bandit -c pyproject.toml -r scripts/ --exclude scripts/tests/ + + test: + name: Tests + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v4 + + - name: Set up Python + run: uv python install 3.11 + + - name: Install dependencies + run: | + uv pip install --system -r requirements.txt + uv pip install --system pytest pytest-asyncio + + - name: Run Tests + run: pytest scripts/tests/ -v --tb=short + + build: + name: Build EPUB + runs-on: ubuntu-latest + needs: [lint, security, test] + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v4 + + - name: Build EPUB + run: uv run scripts/build_epub.py + + - name: Verify EPUB Created + run: | + if [ -f claude-howto-guide.epub ]; then + echo "EPUB built successfully" + ls -lh claude-howto-guide.epub + else + echo "EPUB file not found!" + exit 1 + fi + + - name: Upload EPUB Artifact + uses: actions/upload-artifact@v4 + with: + name: claude-howto-guide-epub + path: claude-howto-guide.epub + retention-days: 7 diff --git a/.gitignore b/.gitignore index 5f5a63c..9cfb8af 100644 --- a/.gitignore +++ b/.gitignore @@ -48,4 +48,18 @@ yarn-error.log* blog-posts/ # EPUB files in root directory -/*.epub \ No newline at end of file +/*.epub + +# Python virtual environment +.venv/ + +# Python cache +__pycache__/ +*.py[cod] +*$py.class +*.so +.pytest_cache/ +.mypy_cache/ +*.egg-info/ +dist/ +build/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..7331f61 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,54 @@ +# Pre-commit hooks for claude-howto project +# Run `pre-commit install` to set up hooks +# Run `pre-commit run --all-files` to check all files + +default_language_version: + python: python3.11 + +repos: + # Ruff - Fast Python linter and formatter + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.8.2 + hooks: + # Ruff linter + - id: ruff + name: ruff-lint + args: [--fix, --exit-non-zero-on-fix] + types_or: [python, pyi] + files: ^scripts/ + # Ruff formatter (replaces black) + - id: ruff-format + name: ruff-format + types_or: [python, pyi] + files: ^scripts/ + + # Bandit - Security linter + - repo: https://github.com/PyCQA/bandit + rev: 1.7.10 + hooks: + - id: bandit + name: bandit-security + args: [-c, pyproject.toml] + additional_dependencies: ["bandit[toml]"] + types: [python] + files: ^scripts/ + exclude: ^scripts/tests/ + + # Standard pre-commit hooks + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: check-yaml + name: check-yaml + args: [--allow-multiple-documents] + - id: check-toml + name: check-toml + - id: end-of-file-fixer + name: fix-end-of-file + - id: trailing-whitespace + name: fix-trailing-whitespace + - id: check-added-large-files + name: check-large-files + args: [--maxkb=1000] + - id: check-merge-conflict + name: check-merge-conflict diff --git a/03-skills/code-review/scripts/analyze-metrics.py b/03-skills/code-review/scripts/analyze-metrics.py index dc0fc0d..e294973 100644 --- a/03-skills/code-review/scripts/analyze-metrics.py +++ b/03-skills/code-review/scripts/analyze-metrics.py @@ -2,31 +2,33 @@ import re import sys + def analyze_code_metrics(code): """Analyze code for common metrics.""" # Count functions - functions = len(re.findall(r'^def\s+\w+', code, re.MULTILINE)) + functions = len(re.findall(r"^def\s+\w+", code, re.MULTILINE)) # Count classes - classes = len(re.findall(r'^class\s+\w+', code, re.MULTILINE)) + classes = len(re.findall(r"^class\s+\w+", code, re.MULTILINE)) # Average line length - lines = code.split('\n') + lines = code.split("\n") avg_length = sum(len(l) for l in lines) / len(lines) if lines else 0 # Estimate complexity - complexity = len(re.findall(r'\b(if|elif|else|for|while|and|or)\b', code)) + complexity = len(re.findall(r"\b(if|elif|else|for|while|and|or)\b", code)) return { - 'functions': functions, - 'classes': classes, - 'avg_line_length': avg_length, - 'complexity_score': complexity + "functions": functions, + "classes": classes, + "avg_line_length": avg_length, + "complexity_score": complexity, } -if __name__ == '__main__': - with open(sys.argv[1], 'r') as f: + +if __name__ == "__main__": + with open(sys.argv[1]) as f: code = f.read() metrics = analyze_code_metrics(code) for key, value in metrics.items(): diff --git a/03-skills/code-review/scripts/compare-complexity.py b/03-skills/code-review/scripts/compare-complexity.py index a93182b..d4aba23 100644 --- a/03-skills/code-review/scripts/compare-complexity.py +++ b/03-skills/code-review/scripts/compare-complexity.py @@ -6,14 +6,14 @@ Helps identify if refactoring actually simplifies code structure. import re import sys -from typing import Dict, Tuple + class ComplexityAnalyzer: """Analyze code complexity metrics.""" def __init__(self, code: str): self.code = code - self.lines = code.split('\n') + self.lines = code.split("\n") def calculate_cyclomatic_complexity(self) -> int: """ @@ -24,13 +24,13 @@ class ComplexityAnalyzer: # Count decision points decision_patterns = [ - r'\bif\b', - r'\belif\b', - r'\bfor\b', - r'\bwhile\b', - r'\bexcept\b', - r'\band\b(?!$)', - r'\bor\b(?!$)' + r"\bif\b", + r"\belif\b", + r"\bfor\b", + r"\bwhile\b", + r"\bexcept\b", + r"\band\b(?!$)", + r"\bor\b(?!$)", ] for pattern in decision_patterns: @@ -49,10 +49,10 @@ class ComplexityAnalyzer: for line in self.lines: # Track nesting depth - if re.search(r'^\s*(if|for|while|def|class|try)\b', line): + if re.search(r"^\s*(if|for|while|def|class|try)\b", line): nesting_depth += 1 cognitive += nesting_depth - elif re.search(r'^\s*(elif|else|except|finally)\b', line): + elif re.search(r"^\s*(elif|else|except|finally)\b", line): cognitive += nesting_depth # Reduce nesting when unindenting @@ -74,28 +74,37 @@ class ComplexityAnalyzer: cognitive = self.calculate_cognitive_complexity() # Simplified MI calculation - mi = 171 - 5.2 * (cyclomatic / lines) - 0.23 * (cognitive) - 16.2 * (lines / 1000) + mi = ( + 171 + - 5.2 * (cyclomatic / lines) + - 0.23 * (cognitive) + - 16.2 * (lines / 1000) + ) return max(0, min(100, mi)) - def get_complexity_report(self) -> Dict: + def get_complexity_report(self) -> dict: """Generate comprehensive complexity report.""" return { - 'cyclomatic_complexity': self.calculate_cyclomatic_complexity(), - 'cognitive_complexity': self.calculate_cognitive_complexity(), - 'maintainability_index': round(self.calculate_maintainability_index(), 2), - 'lines_of_code': len(self.lines), - 'avg_line_length': round(sum(len(l) for l in self.lines) / len(self.lines), 2) if self.lines else 0 + "cyclomatic_complexity": self.calculate_cyclomatic_complexity(), + "cognitive_complexity": self.calculate_cognitive_complexity(), + "maintainability_index": round(self.calculate_maintainability_index(), 2), + "lines_of_code": len(self.lines), + "avg_line_length": round( + sum(len(l) for l in self.lines) / len(self.lines), 2 + ) + if self.lines + else 0, } def compare_files(before_file: str, after_file: str) -> None: """Compare complexity metrics between two code versions.""" - with open(before_file, 'r') as f: + with open(before_file) as f: before_code = f.read() - with open(after_file, 'r') as f: + with open(after_file) as f: after_code = f.read() before_analyzer = ComplexityAnalyzer(before_code) @@ -123,10 +132,16 @@ def compare_files(before_file: str, after_file: str) -> None: print(f" Avg Line Length: {after_metrics['avg_line_length']}") print("\nCHANGES:") - cyclomatic_change = after_metrics['cyclomatic_complexity'] - before_metrics['cyclomatic_complexity'] - cognitive_change = after_metrics['cognitive_complexity'] - before_metrics['cognitive_complexity'] - mi_change = after_metrics['maintainability_index'] - before_metrics['maintainability_index'] - loc_change = after_metrics['lines_of_code'] - before_metrics['lines_of_code'] + cyclomatic_change = ( + after_metrics["cyclomatic_complexity"] - before_metrics["cyclomatic_complexity"] + ) + cognitive_change = ( + after_metrics["cognitive_complexity"] - before_metrics["cognitive_complexity"] + ) + mi_change = ( + after_metrics["maintainability_index"] - before_metrics["maintainability_index"] + ) + loc_change = after_metrics["lines_of_code"] - before_metrics["lines_of_code"] print(f" Cyclomatic Complexity: {cyclomatic_change:+d}") print(f" Cognitive Complexity: {cognitive_change:+d}") @@ -151,7 +166,7 @@ def compare_files(before_file: str, after_file: str) -> None: print("=" * 60) -if __name__ == '__main__': +if __name__ == "__main__": if len(sys.argv) != 3: print("Usage: python compare-complexity.py ") sys.exit(1) diff --git a/03-skills/doc-generator/generate-docs.py b/03-skills/doc-generator/generate-docs.py index 2578315..3a0f588 100644 --- a/03-skills/doc-generator/generate-docs.py +++ b/03-skills/doc-generator/generate-docs.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import ast -import json -from typing import Dict, List + class APIDocExtractor(ast.NodeVisitor): """Extract API documentation from Python source code.""" @@ -11,13 +10,13 @@ class APIDocExtractor(ast.NodeVisitor): def visit_FunctionDef(self, node): """Extract function documentation.""" - if node.name.startswith('get_') or node.name.startswith('post_'): + if node.name.startswith("get_") or node.name.startswith("post_"): doc = ast.get_docstring(node) endpoint = { - 'name': node.name, - 'docstring': doc, - 'params': [arg.arg for arg in node.args.args], - 'returns': self._extract_return_type(node) + "name": node.name, + "docstring": doc, + "params": [arg.arg for arg in node.args.args], + "returns": self._extract_return_type(node), } self.endpoints.append(endpoint) self.generic_visit(node) @@ -28,7 +27,8 @@ class APIDocExtractor(ast.NodeVisitor): return ast.unparse(node.returns) return "Any" -def generate_markdown_docs(endpoints: List[Dict]) -> str: + +def generate_markdown_docs(endpoints: list[dict]) -> str: """Generate markdown documentation from endpoints.""" docs = "# API Documentation\n\n" @@ -41,9 +41,11 @@ def generate_markdown_docs(endpoints: List[Dict]) -> str: return docs -if __name__ == '__main__': + +if __name__ == "__main__": import sys - with open(sys.argv[1], 'r') as f: + + with open(sys.argv[1]) as f: tree = ast.parse(f.read()) extractor = APIDocExtractor() diff --git a/README.md b/README.md index 264310f..e5d560d 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,14 @@ # Claude How To +## Contributors + +Thanks to everyone who has contributed to this project! + +| Contributor | PRs | +|-------------|-----| +| [wjhrdy](https://github.com/wjhrdy) | [#1 - add a tool to create an epub](https://github.com/luongnv89/claude-howto/pull/1) | + Complete collection of examples for some important Claude Code features and concepts. ## Quick Navigation @@ -667,6 +675,20 @@ These examples are provided as-is for educational purposes. Adapt and use them f --- -**Last Updated**: November 2025 +## EPUB Generation + +Want to read this guide offline? Generate an EPUB ebook: + +```bash +uv run scripts/build_epub.py +``` + +This creates `claude-howto-guide.epub` with all content, including rendered Mermaid diagrams. + +See [scripts/README.md](scripts/README.md) for more options. + +--- + +**Last Updated**: December 2025 **Claude Code Version**: 1.0+ **Compatible Models**: Sonnet 4.5, Opus 4.1, Haiku 4.5 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..1736e7b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,100 @@ +[project] +name = "claude-howto" +version = "1.0.0" +description = "Claude Code How-To Guide with EPUB builder" +readme = "README.md" +license = {text = "MIT"} +requires-python = ">=3.10" +dependencies = [ + "ebooklib", + "markdown", + "beautifulsoup4", + "httpx", + "pillow", + "tenacity", +] + +[project.optional-dependencies] +dev = [ + "pytest>=7.0", + "pytest-asyncio>=0.21", +] + +[tool.pytest.ini_options] +testpaths = ["scripts/tests"] +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "function" +python_files = ["test_*.py"] +python_functions = ["test_*"] +addopts = "-v" + +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +# ============================================================================= +# Ruff Configuration +# ============================================================================= +[tool.ruff] +target-version = "py310" +line-length = 88 +include = ["scripts/**/*.py"] +exclude = [ + ".git", + ".venv", + "__pycache__", + ".pytest_cache", + "*.egg-info", +] + +[tool.ruff.lint] +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # Pyflakes + "I", # isort (import sorting) + "B", # flake8-bugbear + "C4", # flake8-comprehensions + "UP", # pyupgrade + "SIM", # flake8-simplify + "TCH", # flake8-type-checking + "RUF", # Ruff-specific rules + "PTH", # flake8-use-pathlib + "PL", # Pylint + "PERF", # Perflint +] +ignore = [ + "E501", # Line too long (handled by formatter) + "PLR0913", # Too many arguments + "PLR2004", # Magic value comparison + "PLR0915", # Too many statements + "PERF203", # try-except in loop (acceptable for error handling) + "PERF403", # dict comprehension (readability preference) + "TC003", # Type-checking imports (not critical) + "PLC0415", # Import not at top level (acceptable for lazy imports) + "RUF005", # Collection concatenation (readability preference) +] +fixable = ["ALL"] +unfixable = [] + +[tool.ruff.lint.isort] +known-first-party = ["build_epub"] +force-single-line = false +combine-as-imports = true + +[tool.ruff.lint.per-file-ignores] +"scripts/tests/*.py" = ["S101", "PLR2004"] + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +skip-magic-trailing-comma = false +docstring-code-format = true + +# ============================================================================= +# Bandit Configuration +# ============================================================================= +[tool.bandit] +targets = ["scripts"] +exclude_dirs = ["scripts/tests", ".venv", "__pycache__"] +skips = ["B101", "B113"] # B113: httpx timeout false positive (timeout is set) diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..131e1fe --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,11 @@ +# Development dependencies (includes core dependencies) +-r requirements.txt + +# Testing +pytest>=7.0 +pytest-asyncio>=0.21 + +# Code Quality +pre-commit>=3.6.0 +ruff>=0.8.0 +bandit[toml]>=1.7.7 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e6f81f0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +# Core dependencies for build_epub.py +ebooklib +markdown +beautifulsoup4 +httpx +pillow +tenacity diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..7eac754 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,115 @@ +# EPUB Builder Script + +Build an EPUB ebook from the Claude How-To markdown files. + +## Features + +- Organizes chapters by folder structure (01-slash-commands, 02-memory, etc.) +- Renders Mermaid diagrams as PNG images via Kroki.io API +- Async concurrent fetching - renders all diagrams in parallel +- Generates a cover image from the project logo +- Converts internal markdown links to EPUB chapter references +- Strict error mode - fails if any diagram cannot be rendered + +## Requirements + +- Python 3.10+ +- [uv](https://github.com/astral-sh/uv) +- Internet connection for Mermaid diagram rendering + +## Quick Start + +```bash +# Simplest way - uv handles everything +uv run scripts/build_epub.py +``` + +## Development Setup + +```bash +# Create virtual environment +uv venv + +# Activate and install dependencies +source .venv/bin/activate +uv pip install -r requirements-dev.txt + +# Run tests +pytest scripts/tests/ -v + +# Run the script +python scripts/build_epub.py +``` + +## Command-Line Options + +``` +usage: build_epub.py [-h] [--root ROOT] [--output OUTPUT] [--verbose] + [--timeout TIMEOUT] [--max-concurrent MAX_CONCURRENT] + +options: + -h, --help show this help message and exit + --root, -r ROOT Root directory (default: repo root) + --output, -o OUTPUT Output path (default: claude-howto-guide.epub) + --verbose, -v Enable verbose logging + --timeout TIMEOUT API timeout in seconds (default: 30) + --max-concurrent N Max concurrent requests (default: 10) +``` + +## Examples + +```bash +# Build with verbose output +uv run scripts/build_epub.py --verbose + +# Custom output location +uv run scripts/build_epub.py --output ~/Desktop/claude-guide.epub + +# Limit concurrent requests (if rate-limited) +uv run scripts/build_epub.py --max-concurrent 5 +``` + +## Output + +Creates `claude-howto-guide.epub` in the repository root directory. + +The EPUB includes: +- Cover image with project logo +- Table of contents with nested sections +- All markdown content converted to EPUB-compatible HTML +- Mermaid diagrams rendered as PNG images + +## Running Tests + +```bash +# With virtual environment +source .venv/bin/activate +pytest scripts/tests/ -v + +# Or with uv directly +uv run --with pytest --with pytest-asyncio \ + --with ebooklib --with markdown --with beautifulsoup4 \ + --with httpx --with pillow --with tenacity \ + pytest scripts/tests/ -v +``` + +## Dependencies + +Managed via PEP 723 inline script metadata: + +| Package | Purpose | +|---------|---------| +| `ebooklib` | EPUB generation | +| `markdown` | Markdown to HTML conversion | +| `beautifulsoup4` | HTML parsing | +| `httpx` | Async HTTP client | +| `pillow` | Cover image generation | +| `tenacity` | Retry logic | + +## Troubleshooting + +**Build fails with network error**: Check internet connectivity and Kroki.io status. Try `--timeout 60`. + +**Rate limiting**: Reduce concurrent requests with `--max-concurrent 3`. + +**Missing logo**: The script generates a text-only cover if `claude-howto-logo.png` is not found. diff --git a/scripts/build_epub.py b/scripts/build_epub.py index 49b39bd..48fcf9e 100755 --- a/scripts/build_epub.py +++ b/scripts/build_epub.py @@ -1,6 +1,6 @@ #!/usr/bin/env -S uv run --script # /// script -# dependencies = ["ebooklib", "markdown", "beautifulsoup4", "httpx", "pillow"] +# dependencies = ["ebooklib", "markdown", "beautifulsoup4", "httpx", "pillow", "tenacity"] # /// """ Build an EPUB from the Claude How-To markdown files. @@ -13,6 +13,13 @@ Usage: uv run scripts/build_epub.py python scripts/build_epub.py + Command-line options: + --root, -r Root directory containing markdown files (default: repo root) + --output, -o Output EPUB file path (default: /claude-howto-guide.epub) + --verbose, -v Enable verbose logging + --timeout Timeout for API requests in seconds (default: 30) + --max-concurrent Maximum concurrent API requests (default: 10) + The script uses inline script dependencies (PEP 723), so uv will automatically install required packages in an isolated environment. @@ -21,10 +28,11 @@ Output: Features: - Organizes chapters by folder structure (01-slash-commands, etc.) - - Renders Mermaid diagrams as PNG images via Kroki.io API + - Renders Mermaid diagrams as PNG images via Kroki.io API (async concurrent) - Generates a cover image from the project logo - Converts internal markdown links to EPUB chapter references - - Handles SVG images by replacing with alt text (unsupported in EPUB) + - Handles SVG images by replacing with styled placeholders + - Strict error mode: fails if any diagram cannot be rendered Requirements: - uv (recommended) or Python 3.10+ with dependencies installed @@ -32,57 +40,202 @@ Requirements: - Repository structure with markdown files and claude-howto-logo.png """ +from __future__ import annotations + +import argparse +import asyncio import base64 +import html +import logging +import os import re +import sys import zlib -from pathlib import Path -import httpx +from dataclasses import dataclass, field from io import BytesIO -from PIL import Image, ImageDraw, ImageFont -from ebooklib import epub +from pathlib import Path + +import httpx import markdown from bs4 import BeautifulSoup +from ebooklib import epub +from PIL import Image, ImageDraw, ImageFont +from tenacity import ( + retry, + retry_if_exception_type, + stop_after_attempt, + wait_exponential, +) + +# ============================================================================= +# Custom Exceptions +# ============================================================================= -# Cache for mermaid images to avoid re-fetching (stores (image_data, filename) tuples) -_mermaid_cache: dict[str, tuple[bytes, str]] = {} -_mermaid_counter = 0 +class EPUBBuildError(Exception): + """Base exception for EPUB build errors.""" -# Track which mermaid images have been added to the book -_mermaid_added_to_book: set[str] = set() - -# Mapping from source paths to EPUB chapter filenames -_path_to_chapter: dict[str, str] = {} + pass -def mermaid_to_image(mermaid_code: str) -> tuple[bytes, str] | None: - """Convert mermaid code to PNG image using Kroki.io API.""" - global _mermaid_counter +class MermaidRenderError(EPUBBuildError): + """Error rendering Mermaid diagram.""" - # Check cache - return cached image data and filename to avoid duplicates - cache_key = mermaid_code.strip() - if cache_key in _mermaid_cache: - return _mermaid_cache[cache_key] + pass - try: - # Use Kroki.io API - accepts deflate-compressed, base64-encoded diagrams - compressed = zlib.compress(mermaid_code.encode('utf-8'), level=9) - encoded = base64.urlsafe_b64encode(compressed).decode('ascii') - url = f"https://kroki.io/mermaid/png/{encoded}" - response = httpx.get(url, timeout=30, follow_redirects=True) - if response.status_code == 200: - _mermaid_counter += 1 - img_data = response.content - img_name = f"mermaid_{_mermaid_counter}.png" - _mermaid_cache[cache_key] = (img_data, img_name) - return img_data, img_name - else: - print(f" Warning: Kroki API returned {response.status_code}") - return None - except Exception as e: - print(f" Warning: Failed to render mermaid diagram: {e}") - return None +class ValidationError(EPUBBuildError): + """Error validating input or output.""" + + pass + + +class CoverGenerationError(EPUBBuildError): + """Error generating cover image.""" + + pass + + +# ============================================================================= +# Configuration and State +# ============================================================================= + + +@dataclass +class EPUBConfig: + """Configuration for EPUB generation.""" + + # Paths + root_path: Path + output_path: Path + logo_path: Path | None = None + + # EPUB Metadata + identifier: str = "claude-howto-guide" + title: str = "Claude Code How-To Guide" + language: str = "en" + author: str = "Claude Code Community" + + # Cover Settings + cover_width: int = 600 + cover_height: int = 900 + cover_bg_color: tuple[int, int, int] = (26, 26, 46) + cover_title_color: tuple[int, int, int] = (78, 205, 196) + cover_subtitle_color: tuple[int, int, int] = (168, 178, 209) + + # Network Settings + kroki_base_url: str = "https://kroki.io" + request_timeout: float = 30.0 + max_retries: int = 3 + max_concurrent_requests: int = 10 + + # Font paths (platform-specific) + title_font_paths: list[str] = field( + default_factory=lambda: [ + "/System/Library/Fonts/Supplemental/Arial Bold.ttf", + "/System/Library/Fonts/Helvetica.ttc", + "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", # Linux + "C:\\Windows\\Fonts\\arialbd.ttf", # Windows + ] + ) + subtitle_font_paths: list[str] = field( + default_factory=lambda: [ + "/System/Library/Fonts/Supplemental/Arial.ttf", + "/System/Library/Fonts/Helvetica.ttc", + "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", # Linux + "C:\\Windows\\Fonts\\arial.ttf", # Windows + ] + ) + + +@dataclass +class BuildState: + """Mutable state for the build process.""" + + mermaid_cache: dict[str, tuple[bytes, str]] = field(default_factory=dict) + mermaid_counter: int = 0 + mermaid_added_to_book: set[str] = field(default_factory=set) + path_to_chapter: dict[str, str] = field(default_factory=dict) + + def reset(self) -> None: + """Reset all state for a fresh build.""" + self.mermaid_cache.clear() + self.mermaid_counter = 0 + self.mermaid_added_to_book.clear() + self.path_to_chapter.clear() + + +@dataclass +class ChapterInfo: + """Information about a chapter for processing.""" + + file_path: Path + display_name: str + file_title: str + chapter_filename: str + is_folder_overview: bool = False + folder_name: str | None = None + + +# ============================================================================= +# Logging Setup +# ============================================================================= + + +def setup_logging(verbose: bool = False) -> logging.Logger: + """Configure logging for the build process.""" + level = logging.DEBUG if verbose else logging.INFO + logging.basicConfig( + level=level, + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%H:%M:%S", + ) + return logging.getLogger("epub_builder") + + +# ============================================================================= +# Input Validation +# ============================================================================= + + +def validate_inputs(config: EPUBConfig, logger: logging.Logger) -> None: + """Validate all inputs before starting the build.""" + errors = [] + + # Check root path exists + if not config.root_path.exists(): + errors.append(f"Root path does not exist: {config.root_path}") + elif not config.root_path.is_dir(): + errors.append(f"Root path is not a directory: {config.root_path}") + + # Check output path is writable + output_dir = config.output_path.parent + if not output_dir.exists(): + errors.append(f"Output directory does not exist: {output_dir}") + elif not os.access(output_dir, os.W_OK): + errors.append(f"Output directory is not writable: {output_dir}") + + # Check logo if specified + logo_path = config.logo_path or (config.root_path / "claude-howto-logo.png") + if not logo_path.exists(): + logger.warning( + f"Logo file not found: {logo_path}. Cover will be generated without logo." + ) + + # Verify at least some markdown files exist + md_files = list(config.root_path.glob("**/*.md")) + if not md_files: + errors.append(f"No markdown files found in {config.root_path}") + + if errors: + for error in errors: + logger.error(error) + raise ValidationError("\n".join(errors)) + + +# ============================================================================= +# Mermaid Rendering (Async with Retry) +# ============================================================================= def sanitize_mermaid(mermaid_code: str) -> str: @@ -93,151 +246,148 @@ def sanitize_mermaid(mermaid_code: str) -> str: to prevent that. """ # Escape numbered list patterns inside brackets: [1. Text] -> [1\. Text] - # Match patterns like [1. or ["1. inside node definitions - sanitized = re.sub(r'\[(["\']?)(\d+)\.(\s)', r'[\1\2\\.\3', mermaid_code) + sanitized = re.sub(r'\[(["\']?)(\d+)\.(\s)', r"[\1\2\\.\3", mermaid_code) return sanitized -def process_mermaid_blocks(md_content: str, book: epub.EpubBook) -> str: - """Find mermaid code blocks and replace with image references.""" - # Pattern to match ```mermaid ... ``` - pattern = r'```mermaid\n(.*?)```' +class MermaidRenderer: + """Async renderer for Mermaid diagrams via Kroki.io API.""" - def replace_mermaid(match): - mermaid_code = sanitize_mermaid(match.group(1)) - result = mermaid_to_image(mermaid_code) - if result: - img_data, img_name = result - # Only add image to book if not already added - if img_name not in _mermaid_added_to_book: - img_item = epub.EpubItem( - uid=img_name.replace('.', '_'), - file_name=f"images/{img_name}", - media_type="image/png", - content=img_data + def __init__( + self, config: EPUBConfig, state: BuildState, logger: logging.Logger + ) -> None: + self.config = config + self.state = state + self.logger = logger + self._semaphore: asyncio.Semaphore | None = None + + async def _fetch_single( + self, client: httpx.AsyncClient, mermaid_code: str, index: int + ) -> tuple[str, tuple[bytes, str]]: + """Fetch a single Mermaid diagram with retry logic.""" + cache_key = mermaid_code.strip() + + # Check cache first + if cache_key in self.state.mermaid_cache: + self.logger.debug(f"Cache hit for diagram {index}") + return cache_key, self.state.mermaid_cache[cache_key] + + # Rate limit with semaphore + assert self._semaphore is not None + async with self._semaphore: + result = await self._fetch_with_retry(client, mermaid_code, index) + if result is None: + raise MermaidRenderError( + f"Failed to render Mermaid diagram {index} after {self.config.max_retries} attempts" ) - book.add_item(img_item) - _mermaid_added_to_book.add(img_name) - # Return markdown image reference - return f'\n![Diagram](images/{img_name})\n' - else: - # Fallback: show as code block with a note - return f'\n**[Diagram]**\n```\n{mermaid_code}```\n' + return cache_key, result - return re.sub(pattern, replace_mermaid, md_content, flags=re.DOTALL) + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=1, max=10), + retry=retry_if_exception_type((httpx.TimeoutException, httpx.NetworkError)), + reraise=True, + ) + async def _fetch_with_retry( + self, client: httpx.AsyncClient, mermaid_code: str, index: int + ) -> tuple[bytes, str] | None: + """Fetch diagram with retry logic.""" + try: + compressed = zlib.compress(mermaid_code.encode("utf-8"), level=9) + encoded = base64.urlsafe_b64encode(compressed).decode("ascii") + url = f"{self.config.kroki_base_url}/mermaid/png/{encoded}" + + self.logger.debug(f"Fetching diagram {index}...") + response = await client.get(url, timeout=self.config.request_timeout) + + if response.status_code == 200: + self.state.mermaid_counter += 1 + img_name = f"mermaid_{self.state.mermaid_counter}.png" + result = (response.content, img_name) + cache_key = mermaid_code.strip() + self.state.mermaid_cache[cache_key] = result + self.logger.info(f"Rendered diagram {index} -> {img_name}") + return result + else: + self.logger.warning( + f"Kroki API returned {response.status_code} for diagram {index}" + ) + raise MermaidRenderError( + f"Kroki API returned {response.status_code} for diagram {index}" + ) + + except httpx.TimeoutException: + self.logger.warning(f"Timeout fetching diagram {index}, will retry...") + raise + except httpx.NetworkError as e: + self.logger.warning( + f"Network error for diagram {index}: {e}, will retry..." + ) + raise + + async def render_all( + self, diagrams: list[tuple[int, str]] + ) -> dict[str, tuple[bytes, str]]: + """Render all Mermaid diagrams concurrently.""" + self._semaphore = asyncio.Semaphore(self.config.max_concurrent_requests) + results: dict[str, tuple[bytes, str]] = {} + + async with httpx.AsyncClient( + follow_redirects=True, + limits=httpx.Limits(max_connections=self.config.max_concurrent_requests), + timeout=httpx.Timeout(self.config.request_timeout), + ) as client: + tasks = [ + self._fetch_single(client, sanitize_mermaid(code), idx) + for idx, code in diagrams + ] + + self.logger.info(f"Fetching {len(tasks)} Mermaid diagrams concurrently...") + + # Use gather with return_exceptions=False for strict mode + completed = await asyncio.gather(*tasks) + + for cache_key, data in completed: + results[cache_key] = data + + success_count = len(results) + self.logger.info( + f"Successfully rendered {success_count}/{len(diagrams)} diagrams" + ) + return results -def create_cover_image(logo_path: Path, title: str = "Claude Code\nHow-To Guide") -> bytes: - """Create a cover image by composing the logo with title text on top. +def extract_all_mermaid_blocks( + md_files: list[tuple[Path, str]], logger: logging.Logger +) -> list[tuple[int, str]]: + """Extract all unique Mermaid code blocks from markdown files.""" + pattern = r"```mermaid\n(.*?)```" + seen: set[str] = set() + diagrams: list[tuple[int, str]] = [] + counter = 0 - Args: - logo_path: Path to the PNG logo file - title: Title text to overlay on the cover + for file_path, _ in md_files: + try: + content = file_path.read_text(encoding="utf-8") + for match in re.finditer(pattern, content, flags=re.DOTALL): + code = match.group(1).strip() + if code not in seen: + seen.add(code) + counter += 1 + diagrams.append((counter, code)) + except UnicodeDecodeError as e: + logger.warning(f"Failed to read {file_path}: {e}") - Returns: - PNG image data as bytes - """ - # Target cover dimensions (standard ebook cover ratio ~1.6:1 height:width) - cover_width = 600 - cover_height = 900 - - # Background color matching the logo gradient - bg_color = (26, 26, 46) # #1a1a2e from the logo - - # Create the cover canvas - cover = Image.new('RGB', (cover_width, cover_height), bg_color) - draw = ImageDraw.Draw(cover) - - # Load and scale the logo - with Image.open(logo_path) as logo: - # Scale logo to fit cover width with some padding - target_width = cover_width - 60 # 30px padding on each side - scale_factor = target_width / logo.width - new_height = int(logo.height * scale_factor) - logo_scaled = logo.resize((target_width, new_height), Image.Resampling.LANCZOS) - - # Handle transparency - if logo_scaled.mode == 'RGBA': - # Composite onto a background matching cover color - logo_bg = Image.new('RGB', logo_scaled.size, bg_color) - logo_bg.paste(logo_scaled, mask=logo_scaled.split()[3]) - logo_scaled = logo_bg - elif logo_scaled.mode != 'RGB': - logo_scaled = logo_scaled.convert('RGB') - - # Position the logo in the lower portion of the cover - logo_x = (cover_width - logo_scaled.width) // 2 - logo_y = cover_height - logo_scaled.height - 80 # 80px from bottom - cover.paste(logo_scaled, (logo_x, logo_y)) - - # Add title text at the top - # Try to use a nice font, fall back to default - font_size = 72 - font = ImageFont.load_default() - try: - # Try common system fonts on macOS - for font_name in [ - '/System/Library/Fonts/Supplemental/Arial Bold.ttf', - '/System/Library/Fonts/Helvetica.ttc', - 'Arial Bold', - 'Helvetica Bold', - ]: - try: - font = ImageFont.truetype(font_name, font_size) - break - except OSError: - continue - except Exception: - pass - - # Draw title text (centered, near top) - title_color = (78, 205, 196) # #4ecdc4 - teal from the logo gradient - - # Split title into lines and draw each centered - lines = title.split('\n') - y_offset = 120 - line_spacing = 90 - - for line in lines: - bbox = draw.textbbox((0, 0), line, font=font) - text_width = bbox[2] - bbox[0] - x = (cover_width - text_width) // 2 - draw.text((x, y_offset), line, font=font, fill=title_color) - y_offset += line_spacing - - # Add a subtle subtitle - subtitle = "Complete Guide to Claude Code Features" - subtitle_font_size = 24 - subtitle_font = ImageFont.load_default() - try: - for font_name in [ - '/System/Library/Fonts/Supplemental/Arial.ttf', - '/System/Library/Fonts/Helvetica.ttc', - 'Arial', - 'Helvetica', - ]: - try: - subtitle_font = ImageFont.truetype(font_name, subtitle_font_size) - break - except OSError: - continue - except Exception: - pass - - subtitle_color = (168, 178, 209) # #a8b2d1 - light gray from logo - bbox = draw.textbbox((0, 0), subtitle, font=subtitle_font) - subtitle_width = bbox[2] - bbox[0] - subtitle_x = (cover_width - subtitle_width) // 2 - subtitle_y = y_offset + 20 - draw.text((subtitle_x, subtitle_y), subtitle, font=subtitle_font, fill=subtitle_color) - - # Save to bytes - buffer = BytesIO() - cover.save(buffer, format='PNG', optimize=True) - return buffer.getvalue() + logger.info(f"Found {len(diagrams)} unique Mermaid diagrams") + return diagrams -def get_chapter_order(): +# ============================================================================= +# Chapter Collection (Single-Pass) +# ============================================================================= + + +def get_chapter_order() -> list[tuple[str, str]]: """Define the order of chapters based on folder structure.""" return [ ("README.md", "Introduction"), @@ -257,20 +407,345 @@ def get_chapter_order(): ] -def convert_internal_links(html: str, current_file: Path, root_path: Path) -> str: - """Convert markdown links to internal EPUB chapter links.""" - soup = BeautifulSoup(html, 'html.parser') +def collect_folder_files(folder_path: Path) -> list[tuple[Path, str]]: + """Collect all markdown files from a folder, README first.""" + files: list[tuple[Path, str]] = [] - for link in soup.find_all('a'): - href = link.get('href', '') - if not href or href.startswith(('http://', 'https://', 'mailto:', '#')): + # Get README first if it exists + readme = folder_path / "README.md" + if readme.exists(): + files.append((readme, "Overview")) + + # Get all other markdown files + for md_file in sorted(folder_path.glob("*.md")): + if md_file.name != "README.md": + title = md_file.stem.replace("-", " ").replace("_", " ").title() + files.append((md_file, title)) + + # Recursively get subfolders + for subfolder in sorted(folder_path.iterdir()): + if subfolder.is_dir() and not subfolder.name.startswith("."): + subfiles = collect_folder_files(subfolder) + for sf, st in subfiles: + rel_path = sf.relative_to(folder_path) + if len(rel_path.parts) > 1: + prefix = ( + rel_path.parts[0].replace("-", " ").replace("_", " ").title() + ) + files.append((sf, f"{prefix}: {st}")) + else: + files.append((sf, st)) + + return files + + +class ChapterCollector: + """Collects and organizes chapter information in a single pass.""" + + def __init__(self, root_path: Path, state: BuildState) -> None: + self.root_path = root_path + self.state = state + + def collect_all_chapters( + self, chapter_order: list[tuple[str, str]] + ) -> list[ChapterInfo]: + """Collect all chapters and build path mapping in one pass.""" + chapters: list[ChapterInfo] = [] + chapter_num = 0 + + for item, display_name in chapter_order: + item_path = self.root_path / item + + if item_path.is_file() and item_path.suffix == ".md": + chapter_num += 1 + chapter_filename = f"chap_{chapter_num:02d}.xhtml" + self.state.path_to_chapter[item] = chapter_filename + + chapters.append( + ChapterInfo( + file_path=item_path, + display_name=display_name, + file_title=display_name, + chapter_filename=chapter_filename, + ) + ) + + elif item_path.is_dir(): + folder_chapters = self._collect_folder( + item_path, item, display_name, chapter_num + ) + if folder_chapters: + chapter_num += 1 + chapters.extend(folder_chapters) + + return chapters + + def _collect_folder( + self, folder_path: Path, item: str, display_name: str, base_chapter_num: int + ) -> list[ChapterInfo]: + """Collect chapters from a folder.""" + folder_files = collect_folder_files(folder_path) + if not folder_files: + return [] + + chapter_num = base_chapter_num + 1 + chapters: list[ChapterInfo] = [] + + # Map folder itself + first_filename = f"chap_{chapter_num:02d}_00.xhtml" + self.state.path_to_chapter[item] = first_filename + self.state.path_to_chapter[item.rstrip("/")] = first_filename + + for i, (file_path, file_title) in enumerate(folder_files): + chapter_filename = f"chap_{chapter_num:02d}_{i:02d}.xhtml" + rel_path = str(file_path.relative_to(self.root_path)) + self.state.path_to_chapter[rel_path] = chapter_filename + + chapters.append( + ChapterInfo( + file_path=file_path, + display_name=display_name if i == 0 else file_title, + file_title=file_title, + chapter_filename=chapter_filename, + is_folder_overview=(i == 0), + folder_name=display_name, + ) + ) + + return chapters + + +# ============================================================================= +# Cover Image Generation +# ============================================================================= + + +def load_font( + font_paths: list[str], size: int, logger: logging.Logger +) -> ImageFont.FreeTypeFont | ImageFont.ImageFont: + """Load a font from a list of paths, with fallback to default.""" + for font_path in font_paths: + try: + font = ImageFont.truetype(font_path, size) + logger.debug(f"Loaded font: {font_path}") + return font + except OSError: + continue + + logger.warning("No custom fonts found, using default font") + return ImageFont.load_default() + + +def _add_logo_to_cover( + cover: Image.Image, logo_path: Path, config: EPUBConfig, logger: logging.Logger +) -> None: + """Add logo to cover image.""" + with Image.open(logo_path) as logo: + target_width = config.cover_width - 60 + scale_factor = target_width / logo.width + new_height = int(logo.height * scale_factor) + logo_scaled = logo.resize((target_width, new_height), Image.Resampling.LANCZOS) + + if logo_scaled.mode == "RGBA": + logo_bg = Image.new("RGB", logo_scaled.size, config.cover_bg_color) + logo_bg.paste(logo_scaled, mask=logo_scaled.split()[3]) + logo_scaled = logo_bg + elif logo_scaled.mode != "RGB": + logo_scaled = logo_scaled.convert("RGB") + + logo_x = (config.cover_width - logo_scaled.width) // 2 + logo_y = config.cover_height - logo_scaled.height - 80 + cover.paste(logo_scaled, (logo_x, logo_y)) + logger.debug(f"Added logo from {logo_path}") + + +def _draw_text_centered( + draw: ImageDraw.ImageDraw, + text: str, + font: ImageFont.FreeTypeFont | ImageFont.ImageFont, + color: tuple[int, int, int], + canvas_width: int, + y_start: int, + line_spacing: int, +) -> int: + """Draw centered multi-line text, return final y position.""" + y_offset = y_start + for line in text.split("\n"): + bbox = draw.textbbox((0, 0), line, font=font) + text_width = bbox[2] - bbox[0] + x = (canvas_width - text_width) // 2 + draw.text((x, y_offset), line, font=font, fill=color) + y_offset += line_spacing + return y_offset + + +def create_cover_image( + config: EPUBConfig, + logger: logging.Logger, + title: str = "Claude Code\nHow-To Guide", + subtitle: str = "Complete Guide to Claude Code Features", +) -> bytes: + """Create a cover image with proper error handling.""" + try: + cover = Image.new( + "RGB", (config.cover_width, config.cover_height), config.cover_bg_color + ) + draw = ImageDraw.Draw(cover) + + # Load fonts once + title_font = load_font(config.title_font_paths, 72, logger) + subtitle_font = load_font(config.subtitle_font_paths, 24, logger) + + # Add logo if available + logo_path = config.logo_path or (config.root_path / "claude-howto-logo.png") + if logo_path.exists(): + _add_logo_to_cover(cover, logo_path, config, logger) + else: + logger.warning("Logo not found, creating text-only cover") + + # Draw title + y_after_title = _draw_text_centered( + draw, + title, + title_font, + config.cover_title_color, + config.cover_width, + y_start=120, + line_spacing=90, + ) + + # Draw subtitle + _draw_text_centered( + draw, + subtitle, + subtitle_font, + config.cover_subtitle_color, + config.cover_width, + y_start=y_after_title + 20, + line_spacing=30, + ) + + buffer = BytesIO() + cover.save(buffer, format="PNG", optimize=True) + logger.info("Cover image generated successfully") + return buffer.getvalue() + + except Exception as e: + logger.error(f"Failed to create cover image: {e}") + raise CoverGenerationError(f"Cover generation failed: {e}") from e + + +# ============================================================================= +# HTML Generation +# ============================================================================= + + +def create_chapter_html( + display_name: str, file_title: str, html_content: str, is_overview: bool = False +) -> str: + """Create chapter HTML with proper escaping.""" + safe_display = html.escape(display_name) + safe_title = html.escape(file_title) + + if is_overview: + return f""" + + + + {safe_display} + + +

{safe_display}

+ {html_content} + +""" + else: + return f""" + + + + {safe_title} + + +

{safe_title}

+ {html_content} + +""" + + +def handle_svg_image(src: str, alt: str, logger: logging.Logger) -> str: + """Handle SVG images with a styled placeholder.""" + placeholder = f""" +
+

[SVG Image: {html.escape(alt)}]

+

+ Original: {html.escape(src)} +

+
+ """ + logger.debug(f"Replaced SVG image: {src}") + return placeholder + + +# ============================================================================= +# Markdown Processing +# ============================================================================= + + +def process_mermaid_blocks( + md_content: str, book: epub.EpubBook, state: BuildState, logger: logging.Logger +) -> str: + """Find mermaid code blocks and replace with image references.""" + pattern = r"```mermaid\n(.*?)```" + + def replace_mermaid(match: re.Match[str]) -> str: + mermaid_code = sanitize_mermaid(match.group(1)) + cache_key = mermaid_code.strip() + + if cache_key in state.mermaid_cache: + img_data, img_name = state.mermaid_cache[cache_key] + # Only add image to book if not already added + if img_name not in state.mermaid_added_to_book: + img_item = epub.EpubItem( + uid=img_name.replace(".", "_"), + file_name=f"images/{img_name}", + media_type="image/png", + content=img_data, + ) + book.add_item(img_item) + state.mermaid_added_to_book.add(img_name) + return f"\n![Diagram](images/{img_name})\n" + else: + # This should not happen in strict mode since we pre-fetch all diagrams + logger.error("Mermaid diagram not found in cache") + raise MermaidRenderError("Mermaid diagram not found in cache") + + return re.sub(pattern, replace_mermaid, md_content, flags=re.DOTALL) + + +def convert_internal_links( + html_content: str, current_file: Path, root_path: Path, state: BuildState +) -> str: + """Convert markdown links to internal EPUB chapter links.""" + soup = BeautifulSoup(html_content, "html.parser") + + for link in soup.find_all("a"): + href = link.get("href", "") + if not href or href.startswith(("http://", "https://", "mailto:", "#")): continue # Remove anchor part for path resolution - anchor = '' - if '#' in href: - href, anchor = href.split('#', 1) - anchor = '#' + anchor + anchor = "" + if "#" in href: + href, anchor = href.split("#", 1) + anchor = "#" + anchor # Resolve relative path from current file's directory if href: @@ -287,108 +762,75 @@ def convert_internal_links(html: str, current_file: Path, root_path: Path) -> st # Try various path forms for matching paths_to_try = [ lookup_path, - lookup_path.rstrip('/'), - lookup_path + '/README.md' if not lookup_path.endswith('.md') else lookup_path, + lookup_path.rstrip("/"), + lookup_path + "/README.md" + if not lookup_path.endswith(".md") + else lookup_path, ] for path in paths_to_try: - if path in _path_to_chapter: - link['href'] = _path_to_chapter[path] + anchor + if path in state.path_to_chapter: + link["href"] = state.path_to_chapter[path] + anchor break return str(soup) -def md_to_html(md_content: str, current_file: Path, root_path: Path, book: epub.EpubBook) -> str: +def md_to_html( + md_content: str, + current_file: Path, + root_path: Path, + book: epub.EpubBook, + state: BuildState, + logger: logging.Logger, +) -> str: """Convert markdown to HTML with proper styling. Handles: - Mermaid diagrams (rendered as PNG images) - - SVG images (replaced with alt text) + - SVG images (replaced with styled placeholders) - Internal links (converted to EPUB chapter references) - Standard markdown features """ # Process mermaid blocks first (before markdown conversion) - md_content = process_mermaid_blocks(md_content, book) + md_content = process_mermaid_blocks(md_content, book, state, logger) # Convert markdown to HTML - html = markdown.markdown( + html_content = markdown.markdown( md_content, extensions=[ - 'tables', - 'fenced_code', - 'codehilite', - 'toc', - ] + "tables", + "fenced_code", + "codehilite", + "toc", + ], ) # Clean up any SVG references (they won't work in EPUB) - soup = BeautifulSoup(html, 'html.parser') - for img in soup.find_all('img'): - src = img.get('src', '') - if src.endswith('.svg'): - # Replace SVG with alt text - alt = img.get('alt', 'Image') - img.replace_with(f'[{alt}]') + soup = BeautifulSoup(html_content, "html.parser") + for img in soup.find_all("img"): + src = img.get("src", "") + if src.endswith(".svg"): + alt = img.get("alt", "Image") + placeholder = handle_svg_image(src, alt, logger) + img.replace_with(BeautifulSoup(placeholder, "html.parser")) - html = str(soup) + html_content = str(soup) # Convert internal links to EPUB chapter references - html = convert_internal_links(html, current_file, root_path) + html_content = convert_internal_links(html_content, current_file, root_path, state) - return html + return html_content -def collect_folder_files(folder_path: Path) -> list[tuple[Path, str]]: - """Collect all markdown files from a folder, README first.""" - files = [] - - # Get README first if it exists - readme = folder_path / "README.md" - if readme.exists(): - files.append((readme, "Overview")) - - # Get all other markdown files - for md_file in sorted(folder_path.glob("*.md")): - if md_file.name != "README.md": - title = md_file.stem.replace("-", " ").replace("_", " ").title() - files.append((md_file, title)) - - # Recursively get subfolders - for subfolder in sorted(folder_path.iterdir()): - if subfolder.is_dir() and not subfolder.name.startswith('.'): - subfiles = collect_folder_files(subfolder) - for sf, st in subfiles: - # Prefix with subfolder name - rel_path = sf.relative_to(folder_path) - if len(rel_path.parts) > 1: - prefix = rel_path.parts[0].replace("-", " ").replace("_", " ").title() - files.append((sf, f"{prefix}: {st}")) - else: - files.append((sf, st)) - - return files +# ============================================================================= +# EPUB Generation +# ============================================================================= -def create_epub(root_path: Path, output_path: Path): - """Create the EPUB from markdown files.""" - book = epub.EpubBook() - - # Set metadata - book.set_identifier('claude-howto-guide') - book.set_title('Claude Code How-To Guide') - book.set_language('en') - book.add_author('Claude Code Community') - - # Add cover image from PNG logo - logo_path = root_path / "claude-howto-logo.png" - if logo_path.exists(): - print("Adding cover image...") - cover_data = create_cover_image(logo_path) - book.set_cover("cover.png", cover_data) - - # Add CSS - style = ''' +def create_stylesheet() -> epub.EpubItem: + """Create the EPUB stylesheet.""" + style = """ body { font-family: Georgia, serif; line-height: 1.6; padding: 1em; } h1 { color: #333; border-bottom: 2px solid #e67e22; padding-bottom: 0.3em; } h2 { color: #444; margin-top: 1.5em; } @@ -403,126 +845,125 @@ def create_epub(root_path: Path, output_path: Path): a { color: #e67e22; } img { max-width: 100%; height: auto; display: block; margin: 1em auto; } .diagram { text-align: center; margin: 1.5em 0; } - ''' - nav_css = epub.EpubItem( + .svg-placeholder { border: 1px dashed #ccc; padding: 1em; text-align: center; background: #f9f9f9; border-radius: 4px; margin: 1em 0; } + """ + return epub.EpubItem( uid="style_nav", file_name="style/nav.css", media_type="text/css", - content=style + content=style, ) + + +async def build_epub_async( + config: EPUBConfig, + logger: logging.Logger, + state: BuildState | None = None, +) -> Path: + """Build EPUB asynchronously with concurrent diagram fetching.""" + state = state or BuildState() + state.reset() # Ensure clean state + + # Validate inputs + validate_inputs(config, logger) + + # Initialize book + book = epub.EpubBook() + book.set_identifier(config.identifier) + book.set_title(config.title) + book.set_language(config.language) + book.add_author(config.author) + + # Add cover + logger.info("Generating cover image...") + cover_data = create_cover_image(config, logger) + book.set_cover("cover.png", cover_data) + + # Add CSS + nav_css = create_stylesheet() book.add_item(nav_css) - chapters = [] - toc = [] - chapter_order = get_chapter_order() + # Collect all chapters in single pass + logger.info("Collecting chapters...") + collector = ChapterCollector(config.root_path, state) + chapter_infos = collector.collect_all_chapters(get_chapter_order()) - # First pass: build path-to-chapter mapping - chapter_num = 0 - print("Building chapter mapping...") - for item, display_name in chapter_order: - item_path = root_path / item + # Extract and pre-fetch all Mermaid diagrams + logger.info("Extracting Mermaid diagrams...") + md_files = [(ch.file_path, ch.file_title) for ch in chapter_infos] + all_diagrams = extract_all_mermaid_blocks(md_files, logger) - if item_path.is_file() and item_path.suffix == '.md': - chapter_num += 1 - chapter_file = f'chap_{chapter_num:02d}.xhtml' - _path_to_chapter[item] = chapter_file + if all_diagrams: + renderer = MermaidRenderer(config, state, logger) + await renderer.render_all(all_diagrams) - elif item_path.is_dir(): - folder_files = collect_folder_files(item_path) - if not folder_files: - continue + # Process chapters + logger.info("Processing chapters...") + chapters: list[epub.EpubHtml] = [] + toc: list[epub.EpubHtml | tuple[epub.Section, list[epub.EpubHtml]]] = [] - chapter_num += 1 - # Map folder itself to first file (README) - _path_to_chapter[item] = f'chap_{chapter_num:02d}_00.xhtml' - _path_to_chapter[item.rstrip('/')] = f'chap_{chapter_num:02d}_00.xhtml' + current_folder: str | None = None + current_folder_chapters: list[epub.EpubHtml] = [] - for i, (file_path, _) in enumerate(folder_files): - rel_path = str(file_path.relative_to(root_path)) - _path_to_chapter[rel_path] = f'chap_{chapter_num:02d}_{i:02d}.xhtml' + for chapter_info in chapter_infos: + try: + content = chapter_info.file_path.read_text(encoding="utf-8") + except UnicodeDecodeError as e: + logger.error(f"Failed to read {chapter_info.file_path}: {e}") + raise ValidationError( + f"Failed to read {chapter_info.file_path}: {e}" + ) from e - # Second pass: generate chapters with link conversion - chapter_num = 0 - for item, display_name in chapter_order: - item_path = root_path / item + logger.debug( + f"Processing: {chapter_info.file_path.relative_to(config.root_path)}" + ) + html_content = md_to_html( + content, chapter_info.file_path, config.root_path, book, state, logger + ) - if item_path.is_file() and item_path.suffix == '.md': + chapter = epub.EpubHtml( + title=chapter_info.file_title, + file_name=chapter_info.chapter_filename, + lang="en", + ) + + chapter.content = create_chapter_html( + chapter_info.display_name, + chapter_info.file_title, + html_content, + is_overview=chapter_info.is_folder_overview + or chapter_info.folder_name is None, + ) + chapter.add_item(nav_css) + book.add_item(chapter) + chapters.append(chapter) + + # Build TOC structure + if chapter_info.folder_name is None: # Single file chapter - chapter_num += 1 - content = item_path.read_text(encoding='utf-8') - print(f"Processing: {item_path.name}") - html_content = md_to_html(content, item_path, root_path, book) - - chapter = epub.EpubHtml( - title=display_name, - file_name=f'chap_{chapter_num:02d}.xhtml', - lang='en' - ) - chapter.content = f''' - - {display_name} - -

{display_name}

- {html_content} - - - ''' - chapter.add_item(nav_css) - book.add_item(chapter) - chapters.append(chapter) - toc.append(chapter) - - elif item_path.is_dir(): - # Folder chapter with sub-sections - folder_files = collect_folder_files(item_path) - - if not folder_files: - continue - - chapter_num += 1 - sub_chapters = [] - - for i, (file_path, file_title) in enumerate(folder_files): - content = file_path.read_text(encoding='utf-8') - print(f"Processing: {file_path.relative_to(root_path)}") - html_content = md_to_html(content, file_path, root_path, book) - - sub_chapter = epub.EpubHtml( - title=file_title, - file_name=f'chap_{chapter_num:02d}_{i:02d}.xhtml', - lang='en' + if current_folder is not None: + # Finish previous folder + toc.append( + (epub.Section(current_folder), current_folder_chapters.copy()) ) + current_folder_chapters.clear() + current_folder = None + toc.append(chapter) + else: + # Part of a folder + if current_folder != chapter_info.folder_name: + if current_folder is not None: + # Finish previous folder + toc.append( + (epub.Section(current_folder), current_folder_chapters.copy()) + ) + current_folder_chapters.clear() + current_folder = chapter_info.folder_name + current_folder_chapters.append(chapter) - if i == 0: - # First file gets the chapter title - sub_chapter.content = f''' - - {display_name} - -

{display_name}

- {html_content} - - - ''' - else: - sub_chapter.content = f''' - - {file_title} - -

{file_title}

- {html_content} - - - ''' - - sub_chapter.add_item(nav_css) - book.add_item(sub_chapter) - chapters.append(sub_chapter) - sub_chapters.append(sub_chapter) - - # Add folder as section with sub-chapters - if sub_chapters: - toc.append((epub.Section(display_name), sub_chapters)) + # Handle last folder + if current_folder is not None and current_folder_chapters: + toc.append((epub.Section(current_folder), current_folder_chapters)) # Set table of contents book.toc = toc @@ -532,14 +973,94 @@ def create_epub(root_path: Path, output_path: Path): book.add_item(epub.EpubNav()) # Set spine - book.spine = ['nav'] + chapters + book.spine = ["nav"] + chapters - # Write the EPUB - epub.write_epub(str(output_path), book, {}) - print(f"EPUB created: {output_path}") + # Write EPUB + logger.info(f"Writing EPUB to {config.output_path}...") + epub.write_epub(str(config.output_path), book, {}) + + logger.info(f"EPUB created successfully: {config.output_path}") + return config.output_path -if __name__ == '__main__': - root = Path(__file__).parent - output = root / "claude-howto-guide.epub" - create_epub(root, output) +def create_epub(root_path: Path, output_path: Path, verbose: bool = False) -> Path: + """Synchronous wrapper for backward compatibility.""" + logger = setup_logging(verbose) + config = EPUBConfig(root_path=root_path, output_path=output_path) + return asyncio.run(build_epub_async(config, logger)) + + +# ============================================================================= +# CLI +# ============================================================================= + + +def main() -> int: + """Main entry point with CLI argument parsing.""" + parser = argparse.ArgumentParser( + description="Build an EPUB from Claude How-To markdown files." + ) + parser.add_argument( + "--root", + "-r", + type=Path, + default=None, + help="Root directory containing markdown files (default: repo root)", + ) + parser.add_argument( + "--output", + "-o", + type=Path, + default=None, + help="Output EPUB file path (default: /claude-howto-guide.epub)", + ) + parser.add_argument( + "--verbose", "-v", action="store_true", help="Enable verbose logging" + ) + parser.add_argument( + "--timeout", + type=float, + default=30.0, + help="Timeout for API requests in seconds (default: 30)", + ) + parser.add_argument( + "--max-concurrent", + type=int, + default=10, + help="Maximum concurrent API requests (default: 10)", + ) + + args = parser.parse_args() + + # Determine root path + root = args.root + if root is None: + # Default to parent of scripts directory (repo root) + root = Path(__file__).parent.parent + + root = root.resolve() + output = args.output or (root / "claude-howto-guide.epub") + output = output.resolve() + + logger = setup_logging(args.verbose) + config = EPUBConfig( + root_path=root, + output_path=output, + request_timeout=args.timeout, + max_concurrent_requests=args.max_concurrent, + ) + + try: + result = asyncio.run(build_epub_async(config, logger)) + print(f"Successfully created: {result}") + return 0 + except EPUBBuildError as e: + logger.error(f"Build failed: {e}") + return 1 + except KeyboardInterrupt: + logger.warning("Build interrupted by user") + return 130 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/tests/__init__.py b/scripts/tests/__init__.py new file mode 100644 index 0000000..230b77c --- /dev/null +++ b/scripts/tests/__init__.py @@ -0,0 +1 @@ +# Tests for build_epub module diff --git a/scripts/tests/conftest.py b/scripts/tests/conftest.py new file mode 100644 index 0000000..c191541 --- /dev/null +++ b/scripts/tests/conftest.py @@ -0,0 +1,58 @@ +"""Pytest configuration and shared fixtures for EPUB builder tests.""" + +from __future__ import annotations + +import logging +import sys +from pathlib import Path + +import pytest + +# Add parent directory to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from build_epub import BuildState, EPUBConfig, setup_logging + + +@pytest.fixture +def tmp_project(tmp_path: Path) -> Path: + """Create a minimal project structure for testing.""" + # Create root markdown file + readme = tmp_path / "README.md" + readme.write_text("# Test Project\n\nThis is a test.") + + # Create a chapter directory + chapter_dir = tmp_path / "01-test-chapter" + chapter_dir.mkdir() + (chapter_dir / "README.md").write_text("# Chapter Overview\n\nOverview content.") + (chapter_dir / "section.md").write_text("# Section\n\nSection content.") + + # Create a proper PNG logo using PIL + from PIL import Image as PILImage + + logo_path = tmp_path / "claude-howto-logo.png" + img = PILImage.new("RGB", (100, 100), color=(26, 26, 46)) + img.save(logo_path, "PNG") + + return tmp_path + + +@pytest.fixture +def config(tmp_project: Path) -> EPUBConfig: + """Create a test configuration.""" + return EPUBConfig( + root_path=tmp_project, + output_path=tmp_project / "test.epub", + ) + + +@pytest.fixture +def state() -> BuildState: + """Create a fresh build state.""" + return BuildState() + + +@pytest.fixture +def logger() -> logging.Logger: + """Create a test logger.""" + return setup_logging(verbose=False) diff --git a/scripts/tests/test_build_epub.py b/scripts/tests/test_build_epub.py new file mode 100644 index 0000000..44533ab --- /dev/null +++ b/scripts/tests/test_build_epub.py @@ -0,0 +1,414 @@ +"""Tests for the EPUB builder module.""" + +from __future__ import annotations + +import logging +from pathlib import Path +from unittest.mock import patch + +import pytest + +# Fixtures are imported from conftest.py automatically by pytest +# Import from parent directory (handled by conftest.py sys.path) +from build_epub import ( + BuildState, + ChapterCollector, + EPUBConfig, + ValidationError, + create_chapter_html, + extract_all_mermaid_blocks, + get_chapter_order, + sanitize_mermaid, + setup_logging, + validate_inputs, +) + +# ============================================================================= +# BuildState Tests +# ============================================================================= + + +class TestBuildState: + """Tests for BuildState dataclass.""" + + def test_initial_state(self, state: BuildState) -> None: + """Test that initial state is empty.""" + assert state.mermaid_counter == 0 + assert len(state.mermaid_cache) == 0 + assert len(state.mermaid_added_to_book) == 0 + assert len(state.path_to_chapter) == 0 + + def test_state_modification(self, state: BuildState) -> None: + """Test that state can be modified.""" + state.mermaid_counter = 5 + state.mermaid_cache["key"] = (b"data", "file.png") + state.mermaid_added_to_book.add("file.png") + state.path_to_chapter["README.md"] = "chap_01.xhtml" + + assert state.mermaid_counter == 5 + assert state.mermaid_cache["key"] == (b"data", "file.png") + assert "file.png" in state.mermaid_added_to_book + assert state.path_to_chapter["README.md"] == "chap_01.xhtml" + + def test_reset(self, state: BuildState) -> None: + """Test that reset clears all state.""" + state.mermaid_counter = 5 + state.mermaid_cache["key"] = (b"data", "file.png") + state.mermaid_added_to_book.add("file.png") + state.path_to_chapter["README.md"] = "chap_01.xhtml" + + state.reset() + + assert state.mermaid_counter == 0 + assert len(state.mermaid_cache) == 0 + assert len(state.mermaid_added_to_book) == 0 + assert len(state.path_to_chapter) == 0 + + +# ============================================================================= +# EPUBConfig Tests +# ============================================================================= + + +class TestEPUBConfig: + """Tests for EPUBConfig dataclass.""" + + def test_required_fields(self, tmp_path: Path) -> None: + """Test that required fields must be provided.""" + config = EPUBConfig( + root_path=tmp_path, + output_path=tmp_path / "out.epub", + ) + assert config.root_path == tmp_path + assert config.output_path == tmp_path / "out.epub" + + def test_default_values(self, tmp_path: Path) -> None: + """Test that default values are set correctly.""" + config = EPUBConfig( + root_path=tmp_path, + output_path=tmp_path / "out.epub", + ) + assert config.identifier == "claude-howto-guide" + assert config.title == "Claude Code How-To Guide" + assert config.language == "en" + assert config.author == "Claude Code Community" + assert config.request_timeout == 30.0 + assert config.max_concurrent_requests == 10 + assert config.max_retries == 3 + + def test_custom_values(self, tmp_path: Path) -> None: + """Test that custom values override defaults.""" + config = EPUBConfig( + root_path=tmp_path, + output_path=tmp_path / "out.epub", + title="Custom Title", + request_timeout=60.0, + max_concurrent_requests=5, + ) + assert config.title == "Custom Title" + assert config.request_timeout == 60.0 + assert config.max_concurrent_requests == 5 + + +# ============================================================================= +# Validation Tests +# ============================================================================= + + +class TestValidation: + """Tests for input validation.""" + + def test_valid_inputs(self, config: EPUBConfig, logger: logging.Logger) -> None: + """Test that valid inputs pass validation.""" + # Should not raise + validate_inputs(config, logger) + + def test_missing_root_path(self, tmp_path: Path, logger: logging.Logger) -> None: + """Test that missing root path raises ValidationError.""" + config = EPUBConfig( + root_path=tmp_path / "nonexistent", + output_path=tmp_path / "out.epub", + ) + with pytest.raises(ValidationError, match="Root path does not exist"): + validate_inputs(config, logger) + + def test_root_path_is_file(self, tmp_path: Path, logger: logging.Logger) -> None: + """Test that file as root path raises ValidationError.""" + file_path = tmp_path / "file.txt" + file_path.write_text("content") + config = EPUBConfig( + root_path=file_path, + output_path=tmp_path / "out.epub", + ) + with pytest.raises(ValidationError, match="Root path is not a directory"): + validate_inputs(config, logger) + + def test_no_markdown_files(self, tmp_path: Path, logger: logging.Logger) -> None: + """Test that directory with no markdown files raises ValidationError.""" + empty_dir = tmp_path / "empty" + empty_dir.mkdir() + config = EPUBConfig( + root_path=empty_dir, + output_path=tmp_path / "out.epub", + ) + with pytest.raises(ValidationError, match="No markdown files found"): + validate_inputs(config, logger) + + def test_missing_output_directory( + self, tmp_project: Path, logger: logging.Logger + ) -> None: + """Test that missing output directory raises ValidationError.""" + config = EPUBConfig( + root_path=tmp_project, + output_path=tmp_project / "nonexistent" / "out.epub", + ) + with pytest.raises(ValidationError, match="Output directory does not exist"): + validate_inputs(config, logger) + + +# ============================================================================= +# Mermaid Processing Tests +# ============================================================================= + + +class TestMermaidProcessing: + """Tests for Mermaid diagram processing.""" + + def test_sanitize_mermaid_numbered_list(self) -> None: + """Test that numbered lists in brackets are escaped.""" + input_code = 'A["1. First item"] --> B["2. Second item"]' + expected = 'A["1\\. First item"] --> B["2\\. Second item"]' + assert sanitize_mermaid(input_code) == expected + + def test_sanitize_mermaid_no_change(self) -> None: + """Test that code without numbered lists is unchanged.""" + input_code = "A --> B --> C" + assert sanitize_mermaid(input_code) == input_code + + def test_extract_mermaid_blocks( + self, tmp_path: Path, logger: logging.Logger + ) -> None: + """Test extraction of Mermaid blocks from files.""" + # Create test file with mermaid blocks + md_file = tmp_path / "test.md" + md_file.write_text( + """# Test + +```mermaid +graph TD + A --> B +``` + +Some text + +```mermaid +graph LR + C --> D +``` +""" + ) + + diagrams = extract_all_mermaid_blocks([(md_file, "Test")], logger) + + assert len(diagrams) == 2 + assert diagrams[0][0] == 1 # First diagram index + assert diagrams[1][0] == 2 # Second diagram index + assert "A --> B" in diagrams[0][1] + assert "C --> D" in diagrams[1][1] + + def test_extract_mermaid_blocks_deduplication( + self, tmp_path: Path, logger: logging.Logger + ) -> None: + """Test that duplicate Mermaid blocks are deduplicated.""" + md_file1 = tmp_path / "test1.md" + md_file2 = tmp_path / "test2.md" + + same_diagram = """```mermaid +graph TD + A --> B +```""" + + md_file1.write_text(f"# File 1\n\n{same_diagram}") + md_file2.write_text(f"# File 2\n\n{same_diagram}") + + diagrams = extract_all_mermaid_blocks( + [(md_file1, "Test1"), (md_file2, "Test2")], logger + ) + + # Should only have one diagram since they're identical + assert len(diagrams) == 1 + + +# ============================================================================= +# Chapter Collection Tests +# ============================================================================= + + +class TestChapterCollector: + """Tests for ChapterCollector class.""" + + def test_collect_single_file(self, tmp_path: Path, state: BuildState) -> None: + """Test collecting a single markdown file.""" + readme = tmp_path / "README.md" + readme.write_text("# Test") + + collector = ChapterCollector(tmp_path, state) + chapters = collector.collect_all_chapters([("README.md", "Introduction")]) + + assert len(chapters) == 1 + assert chapters[0].file_path == readme + assert chapters[0].display_name == "Introduction" + assert chapters[0].chapter_filename == "chap_01.xhtml" + assert state.path_to_chapter["README.md"] == "chap_01.xhtml" + + def test_collect_folder(self, tmp_project: Path, state: BuildState) -> None: + """Test collecting a folder with multiple files.""" + collector = ChapterCollector(tmp_project, state) + chapters = collector.collect_all_chapters([("01-test-chapter", "Test Chapter")]) + + assert len(chapters) == 2 # README.md and section.md + assert chapters[0].is_folder_overview is True + assert chapters[0].folder_name == "Test Chapter" + assert chapters[1].is_folder_overview is False + + def test_path_mapping(self, tmp_project: Path, state: BuildState) -> None: + """Test that path mapping is built correctly.""" + collector = ChapterCollector(tmp_project, state) + collector.collect_all_chapters( + [ + ("README.md", "Introduction"), + ("01-test-chapter", "Test Chapter"), + ] + ) + + assert "README.md" in state.path_to_chapter + assert "01-test-chapter" in state.path_to_chapter + assert "01-test-chapter/README.md" in state.path_to_chapter + + +# ============================================================================= +# HTML Generation Tests +# ============================================================================= + + +class TestHTMLGeneration: + """Tests for HTML generation.""" + + def test_create_chapter_html_overview(self) -> None: + """Test creating HTML for an overview chapter.""" + html = create_chapter_html( + display_name="Introduction", + file_title="Introduction", + html_content="

Content

", + is_overview=True, + ) + + assert "" in html + assert 'Introduction" in html + assert "

Content

" in html + + def test_create_chapter_html_section(self) -> None: + """Test creating HTML for a section chapter.""" + html = create_chapter_html( + display_name="Chapter", + file_title="Section", + html_content="

Content

", + is_overview=False, + ) + + assert "

Section

" in html + assert "

" not in html + + def test_html_escaping(self) -> None: + """Test that HTML special characters are escaped.""" + html = create_chapter_html( + display_name="", + file_title="Test & Title", + html_content="

Content

", + is_overview=True, + ) + + assert "<script>" in html + # Note: Python's html.escape uses ' for single quotes + assert "