mirror of
https://github.com/Shiva108/ai-llm-red-team-handbook.git
synced 2026-02-12 14:42:46 +00:00
- Significantly expanded Chapter 7 with detailed guides and code examples for AI red teaming lab setup. - Introduced comprehensive sections on local LLM deployment, API-based testing, and network isolation. - Added critical safety controls including kill switches, watchdog timers, rate limiting, and cost management. - Included advanced topics such as testing RAG, agent systems, and multi-modal models. - Provided pre-engagement and daily operational checklists, risk management, and incident response procedures.
128 lines
4.4 KiB
Python
128 lines
4.4 KiB
Python
|
|
import re
|
|
import sys
|
|
import json
|
|
import time
|
|
import argparse
|
|
import requests
|
|
import concurrent.futures
|
|
from urllib.parse import urlparse
|
|
from pathlib import Path
|
|
from typing import List, Dict, Any
|
|
|
|
def extract_links(markdown_content: str) -> List[str]:
|
|
"""Extract all markdown links [text](url) and <url>."""
|
|
# Match [text](url)
|
|
md_links = re.findall(r'\[.*?\]\((https?://[^)]+)\)', markdown_content)
|
|
# Match <url>
|
|
angle_links = re.findall(r'<(https?://[^>]+)>', markdown_content)
|
|
# Match bare URLs (ignoring those already captured in () or <>)
|
|
# This regex looks for http/https, but tries to avoid trailing punctuation often found in text
|
|
plain_links = re.findall(r'https?://[^\s<>")]+', markdown_content)
|
|
|
|
all_links = sorted(list(set(md_links + angle_links + plain_links)))
|
|
|
|
# Remove any trailing punctuation that might have slipped in (like periods or commas)
|
|
clean_links = []
|
|
for link in all_links:
|
|
link = link.rstrip('.,;:')
|
|
clean_links.append(link)
|
|
|
|
return sorted(list(set(clean_links)))
|
|
|
|
def check_url(url: str, timeout: int = 10) -> Dict[str, Any]:
|
|
"""Check a single URL for reachability."""
|
|
try:
|
|
headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
|
}
|
|
response = requests.head(url, headers=headers, timeout=timeout, allow_redirects=True)
|
|
if response.status_code == 405: # Method Not Allowed, try GET
|
|
response = requests.get(url, headers=headers, timeout=timeout, stream=True)
|
|
response.close()
|
|
|
|
return {
|
|
"url": url,
|
|
"status": response.status_code,
|
|
"alive": 200 <= response.status_code < 400
|
|
}
|
|
except requests.RequestException as e:
|
|
return {
|
|
"url": url,
|
|
"status": -1,
|
|
"alive": False,
|
|
"error": str(e)
|
|
}
|
|
|
|
def audit_chapter(file_path: str, output_file: str = None) -> Dict[str, Any]:
|
|
"""Audit a markdown chapter file."""
|
|
start_time = time.time()
|
|
path = Path(file_path)
|
|
if not path.exists():
|
|
print(f"Error: File {file_path} not found.")
|
|
sys.exit(1)
|
|
|
|
content = path.read_text(encoding='utf-8')
|
|
|
|
# 1. Verification: Link Checking
|
|
links = extract_links(content)
|
|
print(f"Found {len(links)} unique remote URLs. Verifying...")
|
|
|
|
url_results = []
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
|
|
future_to_url = {executor.submit(check_url, url): url for url in links}
|
|
for future in concurrent.futures.as_completed(future_to_url):
|
|
url_results.append(future.result())
|
|
|
|
broken_links = [r for r in url_results if not r['alive']]
|
|
|
|
# 2. Fact/Formatting Check (Basic Regex)
|
|
issues = []
|
|
|
|
# Check for empty links
|
|
if re.search(r'\[.*?\]\(\s*\)', content):
|
|
issues.append({"type": "Formatting", "issue": "Found empty markdown link ()"})
|
|
|
|
# Check for duplicate headers
|
|
headers = re.findall(r'^#+\s+(.+)$', content, re.MULTILINE)
|
|
if len(headers) != len(set(headers)):
|
|
issues.append({"type": "Structure", "issue": "Duplicate headers found"})
|
|
|
|
total_duration = time.time() - start_time
|
|
|
|
report = {
|
|
"metadata": {
|
|
"chapter": path.name,
|
|
"audit_date": time.strftime("%Y-%m-%d %H:%M:%S"),
|
|
"duration_seconds": round(total_duration, 2),
|
|
"version": "2.0"
|
|
},
|
|
"summary": {
|
|
"total_links": len(links),
|
|
"broken_links": len(broken_links)
|
|
},
|
|
"url_verification": convert_results_to_json_friendly(url_results),
|
|
"structure_issues": issues
|
|
}
|
|
|
|
if output_file:
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
json.dump(report, f, indent=2)
|
|
print(f"Report saved to {output_file}")
|
|
else:
|
|
print(json.dumps(report, indent=2))
|
|
|
|
return report
|
|
|
|
def convert_results_to_json_friendly(results):
|
|
# Ensure all data in results is JSON serializable if needed
|
|
return results
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Audit MD Chapter")
|
|
parser.add_argument("filename", help="Markdown file to audit")
|
|
parser.add_argument("--output", help="JSON output file", default="audit_report.json")
|
|
args = parser.parse_args()
|
|
|
|
audit_chapter(args.filename, args.output)
|