""" RAG Engine - Retrieval-Augmented Generation for enhanced AI reasoning. Indexes all knowledge sources (bug bounty reports, vuln KB, custom docs, reasoning traces) and provides semantic retrieval for context-enriched LLM prompts. Does NOT modify the model - only augments input context. Collections: - bug_bounty_patterns: 9131 real-world vulnerability reports - vuln_methodologies: 100 vulnerability type methodologies - custom_knowledge: User-uploaded research documents - reasoning_traces: Successful reasoning chains from past scans - attack_patterns: Extracted attack patterns and techniques """ import json import logging import re import time from pathlib import Path from typing import List, Dict, Optional, Any, Tuple from dataclasses import dataclass, field from .vectorstore import ( BaseVectorStore, Document, RetrievedChunk, create_vectorstore ) logger = logging.getLogger(__name__) # Collection names COL_BUG_BOUNTY = "bug_bounty_patterns" COL_VULN_METHODS = "vuln_methodologies" COL_CUSTOM = "custom_knowledge" COL_REASONING = "reasoning_traces" COL_ATTACK = "attack_patterns" # Defaults DEFAULT_TOP_K = 5 MAX_CONTEXT_CHARS = 4000 INDEX_BATCH_SIZE = 200 @dataclass class RAGContext: """Assembled RAG context for a specific query.""" query: str chunks: List[RetrievedChunk] = field(default_factory=list) total_score: float = 0.0 sources_used: List[str] = field(default_factory=list) token_estimate: int = 0 def to_prompt_text(self, max_chars: int = MAX_CONTEXT_CHARS) -> str: """Format retrieved context for injection into LLM prompt.""" if not self.chunks: return "" sections = [] current_len = 0 for chunk in self.chunks: source_label = chunk.metadata.get("source_type", chunk.source) vuln_type = chunk.metadata.get("vuln_type", "") score_pct = int(chunk.score * 100) if chunk.score <= 1.0 else int(chunk.score) header = f"[{source_label}]" if vuln_type: header += f" ({vuln_type})" header += f" [relevance: {score_pct}%]" text = chunk.text.strip() section = f"{header}\n{text}\n" if current_len + len(section) > max_chars: remaining = max_chars - current_len - len(header) - 20 if remaining > 100: section = f"{header}\n{text[:remaining]}...\n" else: break sections.append(section) current_len += len(section) if not sections: return "" result = "=== RETRIEVED KNOWLEDGE (RAG) ===\n" result += "Use this knowledge to inform your analysis. Adapt techniques to the target.\n\n" result += "\n---\n".join(sections) result += "\n=== END RETRIEVED KNOWLEDGE ===\n" self.token_estimate = len(result) // 4 # rough token estimate return result class RAGEngine: """ Main RAG orchestrator. Indexes knowledge sources and provides semantic retrieval for context-enriched AI reasoning. """ def __init__(self, data_dir: str = "data", backend: str = "auto", persist_dir: str = None): self.data_dir = Path(data_dir) self.persist_dir = persist_dir or str(self.data_dir / "vectorstore") self.store: BaseVectorStore = create_vectorstore( self.persist_dir, backend=backend ) self._indexed = False self._index_stats: Dict[str, int] = {} logger.info(f"RAG Engine initialized with '{self.store.backend_name}' backend") @property def backend_name(self) -> str: return self.store.backend_name @property def is_indexed(self) -> bool: return self._indexed def get_stats(self) -> Dict: """Return indexing statistics.""" stats = { "backend": self.store.backend_name, "indexed": self._indexed, "collections": {} } for col_name in [COL_BUG_BOUNTY, COL_VULN_METHODS, COL_CUSTOM, COL_REASONING, COL_ATTACK]: count = self.store.collection_count(col_name) if count > 0: stats["collections"][col_name] = count return stats # ── Indexing ──────────────────────────────────────────────── def index_all(self, force: bool = False) -> Dict[str, int]: """ Index all available knowledge sources. Returns dict of collection_name -> documents_indexed. """ stats = {} # Only re-index if forced or collections are empty if not force and self._all_collections_populated(): logger.info("RAG: All collections already populated, skipping index") self._indexed = True return stats start = time.time() stats[COL_BUG_BOUNTY] = self._index_bug_bounty() stats[COL_VULN_METHODS] = self._index_vuln_knowledge_base() stats[COL_CUSTOM] = self._index_custom_knowledge() stats[COL_ATTACK] = self._index_attack_patterns() elapsed = time.time() - start total = sum(stats.values()) self._indexed = True self._index_stats = stats logger.info(f"RAG: Indexed {total} documents across {len(stats)} collections in {elapsed:.1f}s") return stats def _all_collections_populated(self) -> bool: """Check if main collections already have data.""" return (self.store.collection_exists(COL_BUG_BOUNTY) and self.store.collection_exists(COL_VULN_METHODS)) def _index_bug_bounty(self) -> int: """Index the bug bounty finetuning dataset.""" dataset_path = Path("models/bug-bounty/bugbounty_finetuning_dataset.json") if not dataset_path.exists(): logger.warning(f"RAG: Bug bounty dataset not found at {dataset_path}") return 0 if self.store.collection_exists(COL_BUG_BOUNTY): existing = self.store.collection_count(COL_BUG_BOUNTY) if existing > 1000: logger.info(f"RAG: Bug bounty already indexed ({existing} docs)") return 0 try: with open(dataset_path, 'r', encoding='utf-8') as f: entries = json.load(f) except Exception as e: logger.error(f"RAG: Failed to load bug bounty dataset: {e}") return 0 if not isinstance(entries, list): return 0 documents = [] for i, entry in enumerate(entries): instruction = entry.get("instruction", "") output = entry.get("output", "") if not output or len(output) < 50: continue # Extract vulnerability types from content vuln_types = self._detect_vuln_types(instruction + " " + output) # Extract technologies technologies = self._detect_technologies(output) # Chunk 1: Full methodology (primary chunk) methodology = self._extract_section(output, [ "passos para reproduzir", "steps to reproduce", "methodology", "exploitation", "proof of concept", "como reproduzir", "reprodução" ]) if methodology and len(methodology) > 100: documents.append(Document( text=methodology[:4000], metadata={ "source_type": "bug_bounty", "vuln_type": vuln_types[0] if vuln_types else "unknown", "vuln_types": ",".join(vuln_types[:5]), "technologies": ",".join(technologies[:5]), "chunk_type": "methodology", "entry_index": i }, doc_id=f"bb_method_{i}" )) # Chunk 2: Summary + Impact (secondary chunk) summary = self._extract_section(output, [ "resumo", "summary", "descrição", "description", "overview" ]) impact = self._extract_section(output, [ "impacto", "impact", "severity", "risco" ]) summary_text = f"{instruction}\n\n{summary or output[:500]}" if impact: summary_text += f"\n\nImpact: {impact}" documents.append(Document( text=summary_text[:3000], metadata={ "source_type": "bug_bounty", "vuln_type": vuln_types[0] if vuln_types else "unknown", "vuln_types": ",".join(vuln_types[:5]), "technologies": ",".join(technologies[:5]), "chunk_type": "summary", "entry_index": i }, doc_id=f"bb_summary_{i}" )) # Chunk 3: Payloads & PoC code (if present) payloads = self._extract_code_blocks(output) if payloads: payload_text = f"Vulnerability: {vuln_types[0] if vuln_types else 'unknown'}\n" payload_text += f"Technologies: {', '.join(technologies[:3])}\n\n" payload_text += "Payloads/PoC:\n" + "\n\n".join(payloads[:10]) documents.append(Document( text=payload_text[:3000], metadata={ "source_type": "bug_bounty", "vuln_type": vuln_types[0] if vuln_types else "unknown", "vuln_types": ",".join(vuln_types[:5]), "technologies": ",".join(technologies[:5]), "chunk_type": "payload", "entry_index": i }, doc_id=f"bb_payload_{i}" )) # Index in batches total_added = 0 for start in range(0, len(documents), INDEX_BATCH_SIZE): batch = documents[start:start + INDEX_BATCH_SIZE] added = self.store.add(COL_BUG_BOUNTY, batch) total_added += added logger.info(f"RAG: Indexed {total_added} bug bounty chunks from {len(entries)} entries") return total_added def _index_vuln_knowledge_base(self) -> int: """Index the 100-type vulnerability knowledge base.""" kb_path = self.data_dir / "vuln_knowledge_base.json" if not kb_path.exists(): return 0 if self.store.collection_exists(COL_VULN_METHODS): existing = self.store.collection_count(COL_VULN_METHODS) if existing >= 90: return 0 try: with open(kb_path, 'r', encoding='utf-8') as f: kb = json.load(f) except Exception as e: logger.error(f"RAG: Failed to load vuln KB: {e}") return 0 vuln_types = kb.get("vulnerability_types", {}) if not vuln_types: return 0 documents = [] for vuln_type, info in vuln_types.items(): text = f"Vulnerability: {info.get('title', vuln_type)}\n" text += f"Type: {vuln_type}\n" text += f"CWE: {info.get('cwe_id', 'N/A')}\n" text += f"Severity: {info.get('severity', 'N/A')}\n\n" text += f"Description: {info.get('description', '')}\n\n" text += f"Impact: {info.get('impact', '')}\n\n" text += f"Remediation: {info.get('remediation', '')}\n" fp_markers = info.get("false_positive_markers", []) if fp_markers: text += f"\nFalse Positive Indicators: {', '.join(fp_markers)}\n" documents.append(Document( text=text, metadata={ "source_type": "vuln_kb", "vuln_type": vuln_type, "severity": info.get("severity", "medium"), "cwe_id": info.get("cwe_id", ""), "chunk_type": "methodology" }, doc_id=f"vkb_{vuln_type}" )) # Index XBOW insights if available xbow = kb.get("xbow_insights", {}) if xbow: for category, insights in xbow.items(): if isinstance(insights, str): text = f"XBOW Benchmark Insight - {category}:\n{insights}" elif isinstance(insights, dict): text = f"XBOW Benchmark Insight - {category}:\n{json.dumps(insights, indent=2)}" elif isinstance(insights, list): text = f"XBOW Benchmark Insight - {category}:\n" + "\n".join(str(i) for i in insights) else: continue documents.append(Document( text=text[:3000], metadata={ "source_type": "vuln_kb", "vuln_type": category, "chunk_type": "insight" }, doc_id=f"xbow_{category}" )) added = self.store.add(COL_VULN_METHODS, documents) logger.info(f"RAG: Indexed {added} vuln KB entries") return added def _index_custom_knowledge(self) -> int: """Index user-uploaded custom knowledge documents.""" index_path = self.data_dir / "custom-knowledge" / "index.json" if not index_path.exists(): return 0 try: with open(index_path, 'r', encoding='utf-8') as f: index = json.load(f) except Exception: return 0 documents = [] for doc_entry in index.get("documents", []): for entry in doc_entry.get("knowledge_entries", []): vuln_type = entry.get("vuln_type", "unknown") text = f"Custom Knowledge - {vuln_type}\n" text += f"Source: {doc_entry.get('filename', 'unknown')}\n\n" if entry.get("methodology"): text += f"Methodology: {entry['methodology']}\n\n" if entry.get("key_insights"): if isinstance(entry["key_insights"], list): text += "Key Insights:\n" + "\n".join(f"- {i}" for i in entry["key_insights"]) + "\n\n" else: text += f"Key Insights: {entry['key_insights']}\n\n" if entry.get("payloads"): payloads = entry["payloads"][:10] text += "Payloads:\n" + "\n".join(f" {p}" for p in payloads) + "\n\n" if entry.get("bypass_techniques"): techniques = entry["bypass_techniques"][:10] text += "Bypass Techniques:\n" + "\n".join(f"- {t}" for t in techniques) + "\n" documents.append(Document( text=text[:4000], metadata={ "source_type": "custom", "vuln_type": vuln_type, "filename": doc_entry.get("filename", ""), "chunk_type": "methodology" }, doc_id=f"custom_{doc_entry.get('id', '')}_{vuln_type}" )) if not documents: return 0 added = self.store.add(COL_CUSTOM, documents) logger.info(f"RAG: Indexed {added} custom knowledge entries") return added def _index_attack_patterns(self) -> int: """Index extracted attack patterns from execution history.""" hist_path = self.data_dir / "execution_history.json" if not hist_path.exists(): return 0 try: with open(hist_path, 'r', encoding='utf-8') as f: history = json.load(f) except Exception: return 0 attacks = history.get("attacks", []) if not attacks: return 0 # Group successful attacks by vuln_type + tech successes: Dict[str, List[Dict]] = {} for attack in attacks: if not attack.get("success"): continue key = f"{attack.get('vuln_type', '')}_{attack.get('tech', '')}" if key not in successes: successes[key] = [] successes[key].append(attack) documents = [] for key, attack_list in successes.items(): vuln_type = attack_list[0].get("vuln_type", "unknown") tech = attack_list[0].get("tech", "unknown") text = f"Successful Attack Pattern: {vuln_type} on {tech}\n" text += f"Success count: {len(attack_list)}\n\n" for atk in attack_list[:5]: evidence = atk.get("evidence_preview", "") domain = atk.get("target_domain", "") text += f"- Target: {domain}, Evidence: {evidence}\n" documents.append(Document( text=text[:2000], metadata={ "source_type": "attack_pattern", "vuln_type": vuln_type, "technology": tech, "success_count": len(attack_list), "chunk_type": "pattern" }, doc_id=f"atk_{key}" )) if not documents: return 0 added = self.store.add(COL_ATTACK, documents) logger.info(f"RAG: Indexed {added} attack patterns") return added def index_reasoning_trace(self, trace: Dict) -> bool: """ Index a successful reasoning trace for future retrieval. Called when a finding is confirmed. trace = { "vuln_type": str, "technology": str, "endpoint": str, "reasoning_chain": List[str], "payload_used": str, "evidence": str, "confidence": float, "timestamp": float } """ vuln_type = trace.get("vuln_type", "unknown") tech = trace.get("technology", "unknown") text = f"Confirmed Reasoning Trace - {vuln_type}\n" text += f"Technology: {tech}\n" text += f"Endpoint: {trace.get('endpoint', '')}\n" text += f"Confidence: {trace.get('confidence', 0):.0%}\n\n" chain = trace.get("reasoning_chain", []) if chain: text += "Reasoning Chain:\n" for i, step in enumerate(chain, 1): text += f" {i}. {step}\n" text += "\n" if trace.get("payload_used"): text += f"Payload Used: {trace['payload_used']}\n" if trace.get("evidence"): text += f"Evidence: {trace['evidence'][:500]}\n" doc = Document( text=text[:3000], metadata={ "source_type": "reasoning_trace", "vuln_type": vuln_type, "technology": tech, "confidence": trace.get("confidence", 0), "chunk_type": "reasoning", "timestamp": trace.get("timestamp", time.time()) }, doc_id=f"trace_{vuln_type}_{int(time.time())}" ) try: self.store.add(COL_REASONING, [doc]) return True except Exception as e: logger.warning(f"RAG: Failed to index reasoning trace: {e}") return False # ── Querying ──────────────────────────────────────────────── def query(self, query_text: str, collections: List[str] = None, top_k: int = DEFAULT_TOP_K, vuln_type: str = None, technology: str = None, chunk_type: str = None) -> RAGContext: """ Query across collections for relevant knowledge. Args: query_text: The search query collections: Which collections to search (default: all) top_k: Number of results per collection vuln_type: Filter by vulnerability type technology: Filter by technology chunk_type: Filter by chunk type (methodology, payload, summary, etc.) Returns: RAGContext with ranked, deduplicated results """ if not collections: collections = [COL_BUG_BOUNTY, COL_VULN_METHODS, COL_CUSTOM, COL_REASONING, COL_ATTACK] # Build metadata filter meta_filter = {} if vuln_type: meta_filter["vuln_type"] = vuln_type if chunk_type: meta_filter["chunk_type"] = chunk_type all_chunks: List[RetrievedChunk] = [] sources_used = [] for col_name in collections: if not self.store.collection_exists(col_name): continue chunks = self.store.query( collection=col_name, query_text=query_text, top_k=top_k, metadata_filter=meta_filter if meta_filter else None ) if chunks: all_chunks.extend(chunks) sources_used.append(col_name) # Also search with technology-enhanced query if provided if technology and technology not in query_text.lower(): enhanced_query = f"{query_text} {technology}" for col_name in collections: if not self.store.collection_exists(col_name): continue chunks = self.store.query( collection=col_name, query_text=enhanced_query, top_k=max(2, top_k // 2), metadata_filter=meta_filter if meta_filter else None ) if chunks: all_chunks.extend(chunks) # Deduplicate by chunk_id seen = set() unique_chunks = [] for chunk in all_chunks: if chunk.chunk_id not in seen: seen.add(chunk.chunk_id) unique_chunks.append(chunk) # Sort by relevance score unique_chunks.sort(key=lambda c: c.score, reverse=True) # Limit total results max_results = top_k * 2 unique_chunks = unique_chunks[:max_results] total_score = sum(c.score for c in unique_chunks) return RAGContext( query=query_text, chunks=unique_chunks, total_score=total_score, sources_used=sources_used ) def get_testing_context(self, vuln_type: str, target_url: str = "", technology: str = "", endpoint: str = "", parameter: str = "", max_chars: int = MAX_CONTEXT_CHARS) -> str: """ Get optimized RAG context for vulnerability testing. Combines methodology, real examples, and attack patterns. """ # Build a rich query query_parts = [vuln_type.replace("_", " ")] if technology: query_parts.append(technology) if endpoint: query_parts.append(f"endpoint {endpoint}") if parameter: query_parts.append(f"parameter {parameter}") query = " ".join(query_parts) # Query with vuln_type preference context = self.query( query_text=query, vuln_type=vuln_type, technology=technology, top_k=5 ) # Also get broader results without vuln_type filter broad_context = self.query( query_text=query, technology=technology, top_k=3 ) # Merge, preferring vuln-specific results seen = {c.chunk_id for c in context.chunks} for chunk in broad_context.chunks: if chunk.chunk_id not in seen: context.chunks.append(chunk) seen.add(chunk.chunk_id) # Re-sort and limit context.chunks.sort(key=lambda c: c.score, reverse=True) context.chunks = context.chunks[:8] return context.to_prompt_text(max_chars=max_chars) def get_verification_context(self, vuln_type: str, evidence: str, technology: str = "", max_chars: int = 2000) -> str: """ Get RAG context for finding verification/judgment. Focuses on confirmed examples and false positive patterns. """ query = f"{vuln_type.replace('_', ' ')} verification proof confirmed {evidence[:200]}" if technology: query += f" {technology}" # Get confirmed reasoning traces trace_ctx = self.query( query_text=query, collections=[COL_REASONING], vuln_type=vuln_type, top_k=3 ) # Get methodology for verification criteria method_ctx = self.query( query_text=f"{vuln_type} false positive verification criteria proof", collections=[COL_VULN_METHODS, COL_BUG_BOUNTY], vuln_type=vuln_type, chunk_type="methodology", top_k=3 ) # Combine all_chunks = trace_ctx.chunks + method_ctx.chunks all_chunks.sort(key=lambda c: c.score, reverse=True) combined = RAGContext( query=query, chunks=all_chunks[:6], total_score=sum(c.score for c in all_chunks[:6]), sources_used=list(set(trace_ctx.sources_used + method_ctx.sources_used)) ) return combined.to_prompt_text(max_chars=max_chars) def get_strategy_context(self, technologies: List[str], endpoints: List[str] = None, max_chars: int = 3000) -> str: """ Get RAG context for attack strategy planning. Focuses on tech-specific patterns and successful attack history. """ query_parts = ["penetration testing attack strategy"] query_parts.extend(technologies[:3]) if endpoints: query_parts.extend(endpoints[:3]) query = " ".join(query_parts) # Get attack patterns attack_ctx = self.query( query_text=query, collections=[COL_ATTACK, COL_BUG_BOUNTY], top_k=5 ) # Get methodology per technology for tech in technologies[:2]: tech_ctx = self.query( query_text=f"{tech} common vulnerabilities exploitation", collections=[COL_BUG_BOUNTY, COL_VULN_METHODS], technology=tech, top_k=3 ) attack_ctx.chunks.extend(tech_ctx.chunks) # Deduplicate and sort seen = set() unique = [] for c in attack_ctx.chunks: if c.chunk_id not in seen: seen.add(c.chunk_id) unique.append(c) unique.sort(key=lambda c: c.score, reverse=True) combined = RAGContext( query=query, chunks=unique[:10], total_score=sum(c.score for c in unique[:10]), sources_used=attack_ctx.sources_used ) return combined.to_prompt_text(max_chars=max_chars) # ── Helpers ───────────────────────────────────────────────── def _detect_vuln_types(self, text: str) -> List[str]: """Detect vulnerability types mentioned in text.""" text_lower = text.lower() VULN_KEYWORDS = { "xss": ["xss", "cross-site scripting", "cross site scripting", "script injection", "reflected xss", "stored xss"], "sqli": ["sql injection", "sqli", "sql injeção", "union select", "sqlmap"], "ssrf": ["ssrf", "server-side request forgery", "server side request"], "idor": ["idor", "insecure direct object", "referência direta"], "rce": ["rce", "remote code execution", "command injection", "execução remota", "os command"], "lfi": ["lfi", "local file inclusion", "path traversal", "directory traversal", "inclusão de arquivo"], "ssti": ["ssti", "server-side template injection", "template injection", "jinja", "twig"], "xxe": ["xxe", "xml external entity", "xml injection"], "csrf": ["csrf", "cross-site request forgery", "request forgery"], "open_redirect": ["open redirect", "redirecionamento aberto", "redirect"], "auth_bypass": ["authentication bypass", "auth bypass", "bypass autenticação"], "race_condition": ["race condition", "condição de corrida", "toctou"], "deserialization": ["deserialization", "deserialização", "unserialize", "pickle"], "upload": ["file upload", "upload", "unrestricted upload"], "cors": ["cors", "cross-origin"], "prototype_pollution": ["prototype pollution", "poluição de protótipo"], "request_smuggling": ["request smuggling", "http smuggling", "cl.te", "te.cl"], "graphql": ["graphql", "introspection"], "jwt": ["jwt", "json web token"], "nosql": ["nosql injection", "mongodb injection", "nosql"], "crlf": ["crlf injection", "header injection", "injeção de cabeçalho"], "subdomain_takeover": ["subdomain takeover", "tomada de subdomínio"], "information_disclosure": ["information disclosure", "divulgação de informação", "sensitive data"], "bola": ["bola", "broken object level"], "bfla": ["bfla", "broken function level"], "privilege_escalation": ["privilege escalation", "escalação de privilégio"], } detected = [] for vuln_type, keywords in VULN_KEYWORDS.items(): for kw in keywords: if kw in text_lower: detected.append(vuln_type) break return detected if detected else ["unknown"] def _detect_technologies(self, text: str) -> List[str]: """Detect technologies mentioned in text.""" text_lower = text.lower() TECH_KEYWORDS = { "php": ["php", "laravel", "wordpress", "drupal", "symfony", "codeigniter"], "python": ["python", "django", "flask", "fastapi", "tornado"], "java": ["java", "spring", "struts", "tomcat", "jboss", "wildfly"], "node": ["node.js", "nodejs", "express", "next.js", "nuxt"], "ruby": ["ruby", "rails", "sinatra"], "dotnet": [".net", "asp.net", "c#", "iis"], "go": ["golang", " go ", "gin", "echo"], "nginx": ["nginx"], "apache": ["apache", "httpd"], "react": ["react", "reactjs"], "angular": ["angular"], "vue": ["vue.js", "vuejs"], "graphql": ["graphql"], "docker": ["docker", "kubernetes", "k8s"], "aws": ["aws", "amazon", "s3", "lambda", "ec2"], "azure": ["azure", "microsoft cloud"], "mysql": ["mysql", "mariadb"], "postgres": ["postgresql", "postgres"], "mongodb": ["mongodb", "mongo"], "redis": ["redis"], } detected = [] for tech, keywords in TECH_KEYWORDS.items(): for kw in keywords: if kw in text_lower: detected.append(tech) break return detected def _extract_section(self, text: str, markers: List[str], max_chars: int = 2000) -> Optional[str]: """Extract a section from text based on header markers.""" text_lower = text.lower() for marker in markers: idx = text_lower.find(marker) if idx != -1: # Find section start (after the marker line) newline_after = text.find("\n", idx) if newline_after == -1: continue section_start = newline_after + 1 # Find section end (next ## header or end) next_header = re.search(r'\n#{1,3}\s', text[section_start:]) if next_header: section_end = section_start + next_header.start() else: section_end = min(section_start + max_chars, len(text)) section = text[section_start:section_end].strip() if len(section) > 50: return section[:max_chars] return None def _extract_code_blocks(self, text: str) -> List[str]: """Extract code blocks and payloads from text.""" blocks = [] # Fenced code blocks for match in re.finditer(r'```[\w]*\n(.*?)```', text, re.DOTALL): code = match.group(1).strip() if len(code) > 20: blocks.append(code[:500]) # Inline code with attack indicators for match in re.finditer(r'`([^`]{10,500})`', text): code = match.group(1) attack_indicators = ['