Files
ai-llm-red-team-handbook/scripts/compliance/tooling_compliance_validator.py
T
shiva108 b3d3bac51f Add practical scripts directory with 400+ tools
- Extracted all code examples from handbook chapters
- Organized into 15 attack categories
- Created shared utilities (api_client, validators, logging, constants)
- Added workflow orchestration scripts
- Implemented install.sh for easy setup
- Renamed all scripts to descriptive functional names
- Added comprehensive README and documentation
- Included pytest test suite and configuration
2026-01-07 11:39:46 +01:00

85 lines
3.3 KiB
Python

#!/usr/bin/env python3
"""
40.3.2 Tooling: The `Compliance_Validator`
Source: Chapter_40_Compliance_and_Standards
Category: compliance
"""
import json
import logging
from typing import Dict, List, Any
import argparse
import sys
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
class ComplianceValidator:
"""
Parses Red Team scan reports (JSON) and maps findings to
ISO 42001 and NIST AI RMF controls.
"""
def __init__(self):
# Mapping: Attack Type -> [Compliance Controls]
self.control_map = {
"jailbreak": ["ISO_42001_A.7.2", "NIST_RMF_Manage_2.4"],
"prompt_injection": ["ISO_42001_A.7.2", "EU_AI_Act_Art_15"],
"leak_pii": ["ISO_42001_A.9.3", "GDPR_Art_33", "NIST_RMF_Measure_2.6"],
"encoding": ["ISO_42001_A.7.2", "NIST_RMF_Measure_2.5"],
"hallucination": ["ISO_42001_A.8.4", "EU_AI_Act_Art_15"]
}
def parse_garak_report(self, report_path: str) -> List[Dict[str, Any]]:
"""Simulate parsing a JSONL report from Garak tool."""
violations = []
try:
with open(report_path, 'r') as f:
for line in f:
entry = json.loads(line)
# Garak structure (simplified): {'probe': 'dan', 'status': 'fail', ...}
if entry.get("status") == "fail":
violations.append(entry)
except FileNotFoundError:
logging.error(f"Report file {report_path} not found.")
return violations
def generate_audit_artifact(self, violations: List[Dict[str, Any]]) -> str:
"""Generates a text-based compliance artifact."""
report_lines = ["# Compliance Audit Report (ISO 42001 / NIST AI RMF)\n"]
for v in violations:
probe_type = v.get("probe_class", "unknown").lower()
# Simple keyword matching to map probe to category
category = "unknown"
if "dan" in probe_type or "jailbreak" in probe_type:
category = "jailbreak"
elif "injection" in probe_type:
category = "prompt_injection"
elif "pii" in probe_type or "privacy" in probe_type:
category = "leak_pii"
controls = self.control_map.get(category, ["Manual_Review_Required"])
report_lines.append(f"## Finding: {probe_type}")
report_lines.append(f"- **Impact Check:** {v.get('notes', 'Adversarial success')}")
report_lines.append(f"- **Violated Controls:** {', '.join(controls)}")
report_lines.append(f"- **Remediation:** Implement output filtering for {category}.\n")
return "\n".join(report_lines)
# Example Usage
if __name__ == "__main__":
# Create a dummy report for demonstration
dummy_report = "garak.jsonl"
with open(dummy_report, 'w') as f:
f.write(json.dumps({"probe_class": "probes.dan.Dan_11.0", "status": "fail", "notes": "Model responded to harmful prompt"}) + "\n")
f.write(json.dumps({"probe_class": "probes.encoding.Base64", "status": "fail", "notes": "Model decoded malicious base64"}) + "\n")
validator = ComplianceValidator()
findings = validator.parse_garak_report(dummy_report)
print(validator.generate_audit_artifact(findings))