diff --git a/PDFGenerator/generate_pdf_preview.py b/PDFGenerator/generate_pdf_preview.py
new file mode 100755
index 0000000..ef0fa76
--- /dev/null
+++ b/PDFGenerator/generate_pdf_preview.py
@@ -0,0 +1,399 @@
+#!/usr/bin/env python3
+import argparse
+import re
+import subprocess
+from datetime import datetime
+from markdown import markdown
+from pathlib import Path
+from weasyprint import HTML
+
+VALIDATE_STRUCTURE = False
+
+def sanitize_heading(text):
+ return re.sub(r'[^a-zA-Z0-9_-]', '', text.replace(' ', '_'))
+
+def get_git_info():
+ try:
+ commit = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode().strip()
+ branch = subprocess.check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"]).decode().strip()
+ return branch, commit
+ except Exception:
+ return "unknown", "unknown"
+
+def transform_special_blockquotes(md_text: str) -> str:
+ md_text = re.sub(
+ r'(?m)^> NOTE:\s*(.*)',
+ r'
Note: \1
',
+ md_text
+ )
+ md_text = re.sub(
+ r'(?m)^> COMMENT:\s*(.*)',
+ r'',
+ md_text
+ )
+ return md_text
+
+def validate_markdown(file_path: Path, html: str):
+ headings = re.findall(r'(.*?)', html)
+
+ if VALIDATE_STRUCTURE:
+ if not any(level == '1' for level, _ in headings):
+ raise ValueError(f"Validation failed: No heading found in {file_path}")
+
+ return headings
+
+def resolve_image_paths(html: str, base_path: Path) -> str:
+ def repl(match):
+ src = match.group(1)
+ abs_path = (base_path / src).resolve()
+ if not abs_path.exists():
+ print(f"Warning: Image file not found: {abs_path}")
+ else:
+ print(f"Embedding image: {src} → {abs_path}")
+ return f'
'
+
+ return re.sub(r'
]*src="([^"]+)"[^>]*>', repl, html)
+
+def parse_toc_file(toc_path: Path) -> list[Path]:
+ """Parse a markdown ToC file and extract file paths in order."""
+ print(f"\nParsing ToC file: {toc_path}")
+ with open(toc_path, encoding='utf-8') as f:
+ toc_content = f.read()
+
+ # Extract all markdown links: [text](path)
+ link_pattern = r'\[([^\]]+)\]\(([^)]+\.md)\)'
+ matches = re.findall(link_pattern, toc_content)
+
+ file_paths = []
+ toc_dir = toc_path.parent
+
+ for title, relative_path in matches:
+ # Resolve relative path from ToC file location
+ # Clean up paths like ../Document/../Document/content/file.md
+ abs_path = (toc_dir / relative_path).resolve()
+
+ if not abs_path.exists():
+ print(f" ⚠ Warning: File not found: {relative_path} → {abs_path}")
+ else:
+ file_paths.append(abs_path)
+ print(f" ✓ Found: {relative_path}")
+
+ print(f"\nTotal files found: {len(file_paths)}")
+ return file_paths
+
+def scan_directory(input_dir: Path) -> list[Path]:
+ """Scan directory for markdown files with numeric naming pattern."""
+ print(f"\nScanning directory: {input_dir}")
+ markdown_files = sorted(
+ [
+ p for p in input_dir.rglob('*.md')
+ if re.match(r'^\d{2}', p.name)
+ and re.match(r'^\d{2}', p.relative_to(input_dir).parts[0])
+ ],
+ key=lambda p: str(p)
+ )
+ print(f"Total files found: {len(markdown_files)}")
+ return markdown_files
+
+def generate_pdf(input_path: Path, output_file: Path):
+ # Determine input mode: ToC file or directory
+ if input_path.is_file():
+ print(f"Mode: ToC file")
+ markdown_files = parse_toc_file(input_path)
+ elif input_path.is_dir():
+ print(f"Mode: Directory scan")
+ markdown_files = scan_directory(input_path)
+ else:
+ raise ValueError(f"Input path does not exist: {input_path}")
+
+ if not markdown_files:
+ raise ValueError("No markdown files found to process")
+
+ toc_entries = []
+ content_blocks = []
+
+ h1_counter = 0
+ h2_counter = 0
+ h3_counter = 0
+
+ print(f"\nProcessing {len(markdown_files)} files...")
+ for file in markdown_files:
+ print(f" Processing: {file.name}")
+ with open(file, encoding='utf-8') as f:
+ raw_md = f.read()
+ raw_md = transform_special_blockquotes(raw_md)
+ html = markdown(raw_md, extensions=['extra', 'nl2br', 'sane_lists', 'attr_list'])
+ html = resolve_image_paths(html, file.parent)
+ headings = validate_markdown(file, html)
+
+ for level, heading in headings:
+ if level == '1':
+ h1_counter += 1
+ h2_counter = 0
+ h3_counter = 0
+ number = f"{h1_counter}"
+ elif level == '2':
+ h2_counter += 1
+ h3_counter = 0
+ number = f"{h1_counter}.{h2_counter}"
+ elif level == '3':
+ h3_counter += 1
+ number = f"{h1_counter}.{h2_counter}.{h3_counter}"
+ else:
+ number = ""
+
+ numbered_heading = f"{number} {heading}"
+ anchor = sanitize_heading(f"{number}_{heading}")
+ toc_entries.append(
+ f"
{numbered_heading}"
+ )
+ html = html.replace(
+ f"{heading}",
+ f"{numbered_heading}"
+ )
+
+ content_blocks.append(html)
+
+ current_date = datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')
+ branch, commit = get_git_info()
+
+ combined_html = f"""
+
+
+
+
+
+
+ DRAFT
+
+
AI Testing Guide
+
PDF Preview
+
Generated on: {current_date}
+ From Branch: {branch}
+ From Commit: {commit}
+
+
+
Draft Disclaimer
+
This PDF is an automatically generated draft intended for internal review and development purposes. It is not final, not publication-ready, and may contain formatting or structural inconsistencies.
+
Content, structure, and layout are subject to change. For questions or feedback, please refer to the project repository or connect via Slack.
+
+
+
Table of Contents
+
+ {''.join(toc_entries)}
+
+
+ {''.join(content_blocks)}
+
+
+"""
+
+ output_file.parent.mkdir(parents=True, exist_ok=True)
+ print(f"\nGenerating PDF: {output_file}")
+ HTML(string=combined_html).write_pdf(str(output_file))
+ print(f"✓ PDF generated successfully")
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Generate PDF from markdown files. Supports both directory scanning and ToC file parsing."
+ )
+ parser.add_argument(
+ "input_path",
+ type=Path,
+ help="Path to ToC markdown file OR root directory containing markdown files"
+ )
+ parser.add_argument("output_file", type=Path, help="Path to output PDF file")
+
+ args = parser.parse_args()
+ generate_pdf(args.input_path, args.output_file)
\ No newline at end of file