#!/usr/bin/env python3 import argparse import re import subprocess from datetime import datetime from markdown import markdown from pathlib import Path from weasyprint import HTML VALIDATE_STRUCTURE = False def sanitize_heading(text): return re.sub(r'[^a-zA-Z0-9_-]', '', text.replace(' ', '_')) def get_git_info(): try: commit = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode().strip() branch = subprocess.check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"]).decode().strip() return branch, commit except Exception: return "unknown", "unknown" def transform_special_blockquotes(md_text: str) -> str: md_text = re.sub( r'(?m)^> NOTE:\s*(.*)', r'

Note: \1

', md_text ) md_text = re.sub( r'(?m)^> COMMENT:\s*(.*)', r'

Comment: \1

', md_text ) return md_text def validate_markdown(file_path: Path, html: str): headings = re.findall(r'(.*?)', html) if VALIDATE_STRUCTURE: if not any(level == '1' for level, _ in headings): raise ValueError(f"Validation failed: No

list[Path]: """Scan directory for markdown files with numeric naming pattern.""" print(f"\nScanning directory: {input_dir}") markdown_files = sorted( [ p for p in input_dir.rglob('*.md') if re.match(r'^\d{2}', p.name) and re.match(r'^\d{2}', p.relative_to(input_dir).parts[0]) ], key=lambda p: str(p) ) print(f"Total files found: {len(markdown_files)}") return markdown_files def generate_pdf(input_path: Path, output_file: Path): # Determine input mode: ToC file or directory if input_path.is_file(): print(f"Mode: ToC file") markdown_files = parse_toc_file(input_path) elif input_path.is_dir(): print(f"Mode: Directory scan") markdown_files = scan_directory(input_path) else: raise ValueError(f"Input path does not exist: {input_path}") if not markdown_files: raise ValueError("No markdown files found to process") toc_entries = [] content_blocks = [] h1_counter = 0 h2_counter = 0 h3_counter = 0 print(f"\nProcessing {len(markdown_files)} files...") for file in markdown_files: print(f" Processing: {file.name}") with open(file, encoding='utf-8') as f: raw_md = f.read() raw_md = transform_special_blockquotes(raw_md) html = markdown(raw_md, extensions=['extra', 'nl2br', 'sane_lists', 'attr_list']) html = resolve_image_paths(html, file.parent) headings = validate_markdown(file, html) for level, heading in headings: if level == '1': h1_counter += 1 h2_counter = 0 h3_counter = 0 number = f"{h1_counter}" elif level == '2': h2_counter += 1 h3_counter = 0 number = f"{h1_counter}.{h2_counter}" elif level == '3': h3_counter += 1 number = f"{h1_counter}.{h2_counter}.{h3_counter}" else: number = "" numbered_heading = f"{number} {heading}" anchor = sanitize_heading(f"{number}_{heading}") toc_entries.append( f"
{numbered_heading}
" ) html = html.replace( f"{heading}", f"{numbered_heading}" ) content_blocks.append(html) current_date = datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC') branch, commit = get_git_info() combined_html = f"""
DRAFT

AI Testing Guide

PDF Preview

Generated on: {current_date}
From Branch: {branch}
From Commit: {commit}

Draft Disclaimer

This PDF is an automatically generated draft intended for internal review and development purposes. It is not final, not publication-ready, and may contain formatting or structural inconsistencies.

Content, structure, and layout are subject to change. For questions or feedback, please refer to the project repository or connect via Slack.

Table of Contents

{''.join(toc_entries)}

{''.join(content_blocks)} """ output_file.parent.mkdir(parents=True, exist_ok=True) print(f"\nGenerating PDF: {output_file}") HTML(string=combined_html).write_pdf(str(output_file)) print(f"✓ PDF generated successfully") if name == "main": parser = argparse.ArgumentParser( description="Generate PDF from markdown files. Supports both directory scanning and ToC file parsing." ) parser.add_argument( "input_path", type=Path, help="Path to ToC markdown file OR root directory containing markdown files" ) parser.add_argument("output_file", type=Path, help="Path to output PDF file") args = parser.parse_args() generate_pdf(args.input_path, args.output_file)