NeuroSploit v3.2 - Autonomous AI Penetration Testing Platform

116 modules | 100 vuln types | 18 API routes | 18 frontend pages Major features: - VulnEngine: 100 vuln types, 526+ payloads, 12 testers, anti-hallucination prompts - Autonomous Agent: 3-stream auto pentest, multi-session (5 concurrent), pause/resume/stop - CLI Agent: Claude Code / Gemini CLI / Codex CLI inside Kali containers - Validation Pipeline: negative controls, proof of execution, confidence scoring, judge - AI Reasoning: ReACT engine, token budget, endpoint classifier, CVE hunter, deep recon - Multi-Agent: 5 specialists + orchestrator + researcher AI + vuln type agents - RAG System: BM25/TF-IDF/ChromaDB vectorstore, few-shot, reasoning templates - Smart Router: 20 providers (8 CLI OAuth + 12 API), tier failover, token refresh - Kali Sandbox: container-per-scan, 56 tools, VPN support, on-demand install - Full IA Testing: methodology-driven comprehensive pentest sessions - Notifications: Discord, Telegram, WhatsApp/Twilio multi-channel alerts - Frontend: React/TypeScript with 18 pages, real-time WebSocket updates
2026-05-12 19:12:20 +02:00 · 2026-02-22 17:58:12 -03:00
commit e0935793c5
271 changed files with 132462 additions and 0 deletions
@@ -0,0 +1,877 @@
+"""
+RAG Engine - Retrieval-Augmented Generation for enhanced AI reasoning.
+
+Indexes all knowledge sources (bug bounty reports, vuln KB, custom docs,
+reasoning traces) and provides semantic retrieval for context-enriched
+LLM prompts. Does NOT modify the model - only augments input context.
+
+Collections:
+- bug_bounty_patterns: 9131 real-world vulnerability reports
+- vuln_methodologies: 100 vulnerability type methodologies
+- custom_knowledge: User-uploaded research documents
+- reasoning_traces: Successful reasoning chains from past scans
+- attack_patterns: Extracted attack patterns and techniques
+"""
+
+import json
+import logging
+import re
+import time
+from pathlib import Path
+from typing import List, Dict, Optional, Any, Tuple
+from dataclasses import dataclass, field
+
+from .vectorstore import (
+    BaseVectorStore, Document, RetrievedChunk,
+    create_vectorstore
+)
+
+logger = logging.getLogger(__name__)
+
+# Collection names
+COL_BUG_BOUNTY = "bug_bounty_patterns"
+COL_VULN_METHODS = "vuln_methodologies"
+COL_CUSTOM = "custom_knowledge"
+COL_REASONING = "reasoning_traces"
+COL_ATTACK = "attack_patterns"
+
+# Defaults
+DEFAULT_TOP_K = 5
+MAX_CONTEXT_CHARS = 4000
+INDEX_BATCH_SIZE = 200
+
+
+@dataclass
+class RAGContext:
+    """Assembled RAG context for a specific query."""
+    query: str
+    chunks: List[RetrievedChunk] = field(default_factory=list)
+    total_score: float = 0.0
+    sources_used: List[str] = field(default_factory=list)
+    token_estimate: int = 0
+
+    def to_prompt_text(self, max_chars: int = MAX_CONTEXT_CHARS) -> str:
+        """Format retrieved context for injection into LLM prompt."""
+        if not self.chunks:
+            return ""
+
+        sections = []
+        current_len = 0
+
+        for chunk in self.chunks:
+            source_label = chunk.metadata.get("source_type", chunk.source)
+            vuln_type = chunk.metadata.get("vuln_type", "")
+            score_pct = int(chunk.score * 100) if chunk.score <= 1.0 else int(chunk.score)
+
+            header = f"[{source_label}]"
+            if vuln_type:
+                header += f" ({vuln_type})"
+            header += f" [relevance: {score_pct}%]"
+
+            text = chunk.text.strip()
+            section = f"{header}\n{text}\n"
+
+            if current_len + len(section) > max_chars:
+                remaining = max_chars - current_len - len(header) - 20
+                if remaining > 100:
+                    section = f"{header}\n{text[:remaining]}...\n"
+                else:
+                    break
+
+            sections.append(section)
+            current_len += len(section)
+
+        if not sections:
+            return ""
+
+        result = "=== RETRIEVED KNOWLEDGE (RAG) ===\n"
+        result += "Use this knowledge to inform your analysis. Adapt techniques to the target.\n\n"
+        result += "\n---\n".join(sections)
+        result += "\n=== END RETRIEVED KNOWLEDGE ===\n"
+
+        self.token_estimate = len(result) // 4  # rough token estimate
+        return result
+
+
+class RAGEngine:
+    """
+    Main RAG orchestrator. Indexes knowledge sources and provides
+    semantic retrieval for context-enriched AI reasoning.
+    """
+
+    def __init__(self, data_dir: str = "data", backend: str = "auto",
+                 persist_dir: str = None):
+        self.data_dir = Path(data_dir)
+        self.persist_dir = persist_dir or str(self.data_dir / "vectorstore")
+
+        self.store: BaseVectorStore = create_vectorstore(
+            self.persist_dir, backend=backend
+        )
+
+        self._indexed = False
+        self._index_stats: Dict[str, int] = {}
+
+        logger.info(f"RAG Engine initialized with '{self.store.backend_name}' backend")
+
+    @property
+    def backend_name(self) -> str:
+        return self.store.backend_name
+
+    @property
+    def is_indexed(self) -> bool:
+        return self._indexed
+
+    def get_stats(self) -> Dict:
+        """Return indexing statistics."""
+        stats = {
+            "backend": self.store.backend_name,
+            "indexed": self._indexed,
+            "collections": {}
+        }
+        for col_name in [COL_BUG_BOUNTY, COL_VULN_METHODS, COL_CUSTOM,
+                         COL_REASONING, COL_ATTACK]:
+            count = self.store.collection_count(col_name)
+            if count > 0:
+                stats["collections"][col_name] = count
+        return stats
+
+    # ── Indexing ────────────────────────────────────────────────
+
+    def index_all(self, force: bool = False) -> Dict[str, int]:
+        """
+        Index all available knowledge sources.
+        Returns dict of collection_name -> documents_indexed.
+        """
+        stats = {}
+
+        # Only re-index if forced or collections are empty
+        if not force and self._all_collections_populated():
+            logger.info("RAG: All collections already populated, skipping index")
+            self._indexed = True
+            return stats
+
+        start = time.time()
+
+        stats[COL_BUG_BOUNTY] = self._index_bug_bounty()
+        stats[COL_VULN_METHODS] = self._index_vuln_knowledge_base()
+        stats[COL_CUSTOM] = self._index_custom_knowledge()
+        stats[COL_ATTACK] = self._index_attack_patterns()
+
+        elapsed = time.time() - start
+        total = sum(stats.values())
+        self._indexed = True
+        self._index_stats = stats
+
+        logger.info(f"RAG: Indexed {total} documents across {len(stats)} collections in {elapsed:.1f}s")
+        return stats
+
+    def _all_collections_populated(self) -> bool:
+        """Check if main collections already have data."""
+        return (self.store.collection_exists(COL_BUG_BOUNTY) and
+                self.store.collection_exists(COL_VULN_METHODS))
+
+    def _index_bug_bounty(self) -> int:
+        """Index the bug bounty finetuning dataset."""
+        dataset_path = Path("models/bug-bounty/bugbounty_finetuning_dataset.json")
+        if not dataset_path.exists():
+            logger.warning(f"RAG: Bug bounty dataset not found at {dataset_path}")
+            return 0
+
+        if self.store.collection_exists(COL_BUG_BOUNTY):
+            existing = self.store.collection_count(COL_BUG_BOUNTY)
+            if existing > 1000:
+                logger.info(f"RAG: Bug bounty already indexed ({existing} docs)")
+                return 0
+
+        try:
+            with open(dataset_path, 'r', encoding='utf-8') as f:
+                entries = json.load(f)
+        except Exception as e:
+            logger.error(f"RAG: Failed to load bug bounty dataset: {e}")
+            return 0
+
+        if not isinstance(entries, list):
+            return 0
+
+        documents = []
+        for i, entry in enumerate(entries):
+            instruction = entry.get("instruction", "")
+            output = entry.get("output", "")
+
+            if not output or len(output) < 50:
+                continue
+
+            # Extract vulnerability types from content
+            vuln_types = self._detect_vuln_types(instruction + " " + output)
+
+            # Extract technologies
+            technologies = self._detect_technologies(output)
+
+            # Chunk 1: Full methodology (primary chunk)
+            methodology = self._extract_section(output, [
+                "passos para reproduzir", "steps to reproduce",
+                "methodology", "exploitation", "proof of concept",
+                "como reproduzir", "reprodução"
+            ])
+
+            if methodology and len(methodology) > 100:
+                documents.append(Document(
+                    text=methodology[:4000],
+                    metadata={
+                        "source_type": "bug_bounty",
+                        "vuln_type": vuln_types[0] if vuln_types else "unknown",
+                        "vuln_types": ",".join(vuln_types[:5]),
+                        "technologies": ",".join(technologies[:5]),
+                        "chunk_type": "methodology",
+                        "entry_index": i
+                    },
+                    doc_id=f"bb_method_{i}"
+                ))
+
+            # Chunk 2: Summary + Impact (secondary chunk)
+            summary = self._extract_section(output, [
+                "resumo", "summary", "descrição", "description",
+                "overview"
+            ])
+            impact = self._extract_section(output, [
+                "impacto", "impact", "severity", "risco"
+            ])
+
+            summary_text = f"{instruction}\n\n{summary or output[:500]}"
+            if impact:
+                summary_text += f"\n\nImpact: {impact}"
+
+            documents.append(Document(
+                text=summary_text[:3000],
+                metadata={
+                    "source_type": "bug_bounty",
+                    "vuln_type": vuln_types[0] if vuln_types else "unknown",
+                    "vuln_types": ",".join(vuln_types[:5]),
+                    "technologies": ",".join(technologies[:5]),
+                    "chunk_type": "summary",
+                    "entry_index": i
+                },
+                doc_id=f"bb_summary_{i}"
+            ))
+
+            # Chunk 3: Payloads & PoC code (if present)
+            payloads = self._extract_code_blocks(output)
+            if payloads:
+                payload_text = f"Vulnerability: {vuln_types[0] if vuln_types else 'unknown'}\n"
+                payload_text += f"Technologies: {', '.join(technologies[:3])}\n\n"
+                payload_text += "Payloads/PoC:\n" + "\n\n".join(payloads[:10])
+
+                documents.append(Document(
+                    text=payload_text[:3000],
+                    metadata={
+                        "source_type": "bug_bounty",
+                        "vuln_type": vuln_types[0] if vuln_types else "unknown",
+                        "vuln_types": ",".join(vuln_types[:5]),
+                        "technologies": ",".join(technologies[:5]),
+                        "chunk_type": "payload",
+                        "entry_index": i
+                    },
+                    doc_id=f"bb_payload_{i}"
+                ))
+
+        # Index in batches
+        total_added = 0
+        for start in range(0, len(documents), INDEX_BATCH_SIZE):
+            batch = documents[start:start + INDEX_BATCH_SIZE]
+            added = self.store.add(COL_BUG_BOUNTY, batch)
+            total_added += added
+
+        logger.info(f"RAG: Indexed {total_added} bug bounty chunks from {len(entries)} entries")
+        return total_added
+
+    def _index_vuln_knowledge_base(self) -> int:
+        """Index the 100-type vulnerability knowledge base."""
+        kb_path = self.data_dir / "vuln_knowledge_base.json"
+        if not kb_path.exists():
+            return 0
+
+        if self.store.collection_exists(COL_VULN_METHODS):
+            existing = self.store.collection_count(COL_VULN_METHODS)
+            if existing >= 90:
+                return 0
+
+        try:
+            with open(kb_path, 'r', encoding='utf-8') as f:
+                kb = json.load(f)
+        except Exception as e:
+            logger.error(f"RAG: Failed to load vuln KB: {e}")
+            return 0
+
+        vuln_types = kb.get("vulnerability_types", {})
+        if not vuln_types:
+            return 0
+
+        documents = []
+        for vuln_type, info in vuln_types.items():
+            text = f"Vulnerability: {info.get('title', vuln_type)}\n"
+            text += f"Type: {vuln_type}\n"
+            text += f"CWE: {info.get('cwe_id', 'N/A')}\n"
+            text += f"Severity: {info.get('severity', 'N/A')}\n\n"
+            text += f"Description: {info.get('description', '')}\n\n"
+            text += f"Impact: {info.get('impact', '')}\n\n"
+            text += f"Remediation: {info.get('remediation', '')}\n"
+
+            fp_markers = info.get("false_positive_markers", [])
+            if fp_markers:
+                text += f"\nFalse Positive Indicators: {', '.join(fp_markers)}\n"
+
+            documents.append(Document(
+                text=text,
+                metadata={
+                    "source_type": "vuln_kb",
+                    "vuln_type": vuln_type,
+                    "severity": info.get("severity", "medium"),
+                    "cwe_id": info.get("cwe_id", ""),
+                    "chunk_type": "methodology"
+                },
+                doc_id=f"vkb_{vuln_type}"
+            ))
+
+        # Index XBOW insights if available
+        xbow = kb.get("xbow_insights", {})
+        if xbow:
+            for category, insights in xbow.items():
+                if isinstance(insights, str):
+                    text = f"XBOW Benchmark Insight - {category}:\n{insights}"
+                elif isinstance(insights, dict):
+                    text = f"XBOW Benchmark Insight - {category}:\n{json.dumps(insights, indent=2)}"
+                elif isinstance(insights, list):
+                    text = f"XBOW Benchmark Insight - {category}:\n" + "\n".join(str(i) for i in insights)
+                else:
+                    continue
+
+                documents.append(Document(
+                    text=text[:3000],
+                    metadata={
+                        "source_type": "vuln_kb",
+                        "vuln_type": category,
+                        "chunk_type": "insight"
+                    },
+                    doc_id=f"xbow_{category}"
+                ))
+
+        added = self.store.add(COL_VULN_METHODS, documents)
+        logger.info(f"RAG: Indexed {added} vuln KB entries")
+        return added
+
+    def _index_custom_knowledge(self) -> int:
+        """Index user-uploaded custom knowledge documents."""
+        index_path = self.data_dir / "custom-knowledge" / "index.json"
+        if not index_path.exists():
+            return 0
+
+        try:
+            with open(index_path, 'r', encoding='utf-8') as f:
+                index = json.load(f)
+        except Exception:
+            return 0
+
+        documents = []
+        for doc_entry in index.get("documents", []):
+            for entry in doc_entry.get("knowledge_entries", []):
+                vuln_type = entry.get("vuln_type", "unknown")
+                text = f"Custom Knowledge - {vuln_type}\n"
+                text += f"Source: {doc_entry.get('filename', 'unknown')}\n\n"
+
+                if entry.get("methodology"):
+                    text += f"Methodology: {entry['methodology']}\n\n"
+                if entry.get("key_insights"):
+                    if isinstance(entry["key_insights"], list):
+                        text += "Key Insights:\n" + "\n".join(f"- {i}" for i in entry["key_insights"]) + "\n\n"
+                    else:
+                        text += f"Key Insights: {entry['key_insights']}\n\n"
+                if entry.get("payloads"):
+                    payloads = entry["payloads"][:10]
+                    text += "Payloads:\n" + "\n".join(f"  {p}" for p in payloads) + "\n\n"
+                if entry.get("bypass_techniques"):
+                    techniques = entry["bypass_techniques"][:10]
+                    text += "Bypass Techniques:\n" + "\n".join(f"- {t}" for t in techniques) + "\n"
+
+                documents.append(Document(
+                    text=text[:4000],
+                    metadata={
+                        "source_type": "custom",
+                        "vuln_type": vuln_type,
+                        "filename": doc_entry.get("filename", ""),
+                        "chunk_type": "methodology"
+                    },
+                    doc_id=f"custom_{doc_entry.get('id', '')}_{vuln_type}"
+                ))
+
+        if not documents:
+            return 0
+
+        added = self.store.add(COL_CUSTOM, documents)
+        logger.info(f"RAG: Indexed {added} custom knowledge entries")
+        return added
+
+    def _index_attack_patterns(self) -> int:
+        """Index extracted attack patterns from execution history."""
+        hist_path = self.data_dir / "execution_history.json"
+        if not hist_path.exists():
+            return 0
+
+        try:
+            with open(hist_path, 'r', encoding='utf-8') as f:
+                history = json.load(f)
+        except Exception:
+            return 0
+
+        attacks = history.get("attacks", [])
+        if not attacks:
+            return 0
+
+        # Group successful attacks by vuln_type + tech
+        successes: Dict[str, List[Dict]] = {}
+        for attack in attacks:
+            if not attack.get("success"):
+                continue
+            key = f"{attack.get('vuln_type', '')}_{attack.get('tech', '')}"
+            if key not in successes:
+                successes[key] = []
+            successes[key].append(attack)
+
+        documents = []
+        for key, attack_list in successes.items():
+            vuln_type = attack_list[0].get("vuln_type", "unknown")
+            tech = attack_list[0].get("tech", "unknown")
+
+            text = f"Successful Attack Pattern: {vuln_type} on {tech}\n"
+            text += f"Success count: {len(attack_list)}\n\n"
+
+            for atk in attack_list[:5]:
+                evidence = atk.get("evidence_preview", "")
+                domain = atk.get("target_domain", "")
+                text += f"- Target: {domain}, Evidence: {evidence}\n"
+
+            documents.append(Document(
+                text=text[:2000],
+                metadata={
+                    "source_type": "attack_pattern",
+                    "vuln_type": vuln_type,
+                    "technology": tech,
+                    "success_count": len(attack_list),
+                    "chunk_type": "pattern"
+                },
+                doc_id=f"atk_{key}"
+            ))
+
+        if not documents:
+            return 0
+
+        added = self.store.add(COL_ATTACK, documents)
+        logger.info(f"RAG: Indexed {added} attack patterns")
+        return added
+
+    def index_reasoning_trace(self, trace: Dict) -> bool:
+        """
+        Index a successful reasoning trace for future retrieval.
+        Called when a finding is confirmed.
+
+        trace = {
+            "vuln_type": str,
+            "technology": str,
+            "endpoint": str,
+            "reasoning_chain": List[str],
+            "payload_used": str,
+            "evidence": str,
+            "confidence": float,
+            "timestamp": float
+        }
+        """
+        vuln_type = trace.get("vuln_type", "unknown")
+        tech = trace.get("technology", "unknown")
+
+        text = f"Confirmed Reasoning Trace - {vuln_type}\n"
+        text += f"Technology: {tech}\n"
+        text += f"Endpoint: {trace.get('endpoint', '')}\n"
+        text += f"Confidence: {trace.get('confidence', 0):.0%}\n\n"
+
+        chain = trace.get("reasoning_chain", [])
+        if chain:
+            text += "Reasoning Chain:\n"
+            for i, step in enumerate(chain, 1):
+                text += f"  {i}. {step}\n"
+            text += "\n"
+
+        if trace.get("payload_used"):
+            text += f"Payload Used: {trace['payload_used']}\n"
+        if trace.get("evidence"):
+            text += f"Evidence: {trace['evidence'][:500]}\n"
+
+        doc = Document(
+            text=text[:3000],
+            metadata={
+                "source_type": "reasoning_trace",
+                "vuln_type": vuln_type,
+                "technology": tech,
+                "confidence": trace.get("confidence", 0),
+                "chunk_type": "reasoning",
+                "timestamp": trace.get("timestamp", time.time())
+            },
+            doc_id=f"trace_{vuln_type}_{int(time.time())}"
+        )
+
+        try:
+            self.store.add(COL_REASONING, [doc])
+            return True
+        except Exception as e:
+            logger.warning(f"RAG: Failed to index reasoning trace: {e}")
+            return False
+
+    # ── Querying ────────────────────────────────────────────────
+
+    def query(self, query_text: str, collections: List[str] = None,
+              top_k: int = DEFAULT_TOP_K,
+              vuln_type: str = None,
+              technology: str = None,
+              chunk_type: str = None) -> RAGContext:
+        """
+        Query across collections for relevant knowledge.
+
+        Args:
+            query_text: The search query
+            collections: Which collections to search (default: all)
+            top_k: Number of results per collection
+            vuln_type: Filter by vulnerability type
+            technology: Filter by technology
+            chunk_type: Filter by chunk type (methodology, payload, summary, etc.)
+
+        Returns:
+            RAGContext with ranked, deduplicated results
+        """
+        if not collections:
+            collections = [COL_BUG_BOUNTY, COL_VULN_METHODS, COL_CUSTOM,
+                          COL_REASONING, COL_ATTACK]
+
+        # Build metadata filter
+        meta_filter = {}
+        if vuln_type:
+            meta_filter["vuln_type"] = vuln_type
+        if chunk_type:
+            meta_filter["chunk_type"] = chunk_type
+
+        all_chunks: List[RetrievedChunk] = []
+        sources_used = []
+
+        for col_name in collections:
+            if not self.store.collection_exists(col_name):
+                continue
+
+            chunks = self.store.query(
+                collection=col_name,
+                query_text=query_text,
+                top_k=top_k,
+                metadata_filter=meta_filter if meta_filter else None
+            )
+
+            if chunks:
+                all_chunks.extend(chunks)
+                sources_used.append(col_name)
+
+        # Also search with technology-enhanced query if provided
+        if technology and technology not in query_text.lower():
+            enhanced_query = f"{query_text} {technology}"
+            for col_name in collections:
+                if not self.store.collection_exists(col_name):
+                    continue
+                chunks = self.store.query(
+                    collection=col_name,
+                    query_text=enhanced_query,
+                    top_k=max(2, top_k // 2),
+                    metadata_filter=meta_filter if meta_filter else None
+                )
+                if chunks:
+                    all_chunks.extend(chunks)
+
+        # Deduplicate by chunk_id
+        seen = set()
+        unique_chunks = []
+        for chunk in all_chunks:
+            if chunk.chunk_id not in seen:
+                seen.add(chunk.chunk_id)
+                unique_chunks.append(chunk)
+
+        # Sort by relevance score
+        unique_chunks.sort(key=lambda c: c.score, reverse=True)
+
+        # Limit total results
+        max_results = top_k * 2
+        unique_chunks = unique_chunks[:max_results]
+
+        total_score = sum(c.score for c in unique_chunks)
+
+        return RAGContext(
+            query=query_text,
+            chunks=unique_chunks,
+            total_score=total_score,
+            sources_used=sources_used
+        )
+
+    def get_testing_context(self, vuln_type: str, target_url: str = "",
+                            technology: str = "", endpoint: str = "",
+                            parameter: str = "",
+                            max_chars: int = MAX_CONTEXT_CHARS) -> str:
+        """
+        Get optimized RAG context for vulnerability testing.
+        Combines methodology, real examples, and attack patterns.
+        """
+        # Build a rich query
+        query_parts = [vuln_type.replace("_", " ")]
+        if technology:
+            query_parts.append(technology)
+        if endpoint:
+            query_parts.append(f"endpoint {endpoint}")
+        if parameter:
+            query_parts.append(f"parameter {parameter}")
+
+        query = " ".join(query_parts)
+
+        # Query with vuln_type preference
+        context = self.query(
+            query_text=query,
+            vuln_type=vuln_type,
+            technology=technology,
+            top_k=5
+        )
+
+        # Also get broader results without vuln_type filter
+        broad_context = self.query(
+            query_text=query,
+            technology=technology,
+            top_k=3
+        )
+
+        # Merge, preferring vuln-specific results
+        seen = {c.chunk_id for c in context.chunks}
+        for chunk in broad_context.chunks:
+            if chunk.chunk_id not in seen:
+                context.chunks.append(chunk)
+                seen.add(chunk.chunk_id)
+
+        # Re-sort and limit
+        context.chunks.sort(key=lambda c: c.score, reverse=True)
+        context.chunks = context.chunks[:8]
+
+        return context.to_prompt_text(max_chars=max_chars)
+
+    def get_verification_context(self, vuln_type: str, evidence: str,
+                                  technology: str = "",
+                                  max_chars: int = 2000) -> str:
+        """
+        Get RAG context for finding verification/judgment.
+        Focuses on confirmed examples and false positive patterns.
+        """
+        query = f"{vuln_type.replace('_', ' ')} verification proof confirmed {evidence[:200]}"
+        if technology:
+            query += f" {technology}"
+
+        # Get confirmed reasoning traces
+        trace_ctx = self.query(
+            query_text=query,
+            collections=[COL_REASONING],
+            vuln_type=vuln_type,
+            top_k=3
+        )
+
+        # Get methodology for verification criteria
+        method_ctx = self.query(
+            query_text=f"{vuln_type} false positive verification criteria proof",
+            collections=[COL_VULN_METHODS, COL_BUG_BOUNTY],
+            vuln_type=vuln_type,
+            chunk_type="methodology",
+            top_k=3
+        )
+
+        # Combine
+        all_chunks = trace_ctx.chunks + method_ctx.chunks
+        all_chunks.sort(key=lambda c: c.score, reverse=True)
+
+        combined = RAGContext(
+            query=query,
+            chunks=all_chunks[:6],
+            total_score=sum(c.score for c in all_chunks[:6]),
+            sources_used=list(set(trace_ctx.sources_used + method_ctx.sources_used))
+        )
+
+        return combined.to_prompt_text(max_chars=max_chars)
+
+    def get_strategy_context(self, technologies: List[str],
+                              endpoints: List[str] = None,
+                              max_chars: int = 3000) -> str:
+        """
+        Get RAG context for attack strategy planning.
+        Focuses on tech-specific patterns and successful attack history.
+        """
+        query_parts = ["penetration testing attack strategy"]
+        query_parts.extend(technologies[:3])
+        if endpoints:
+            query_parts.extend(endpoints[:3])
+
+        query = " ".join(query_parts)
+
+        # Get attack patterns
+        attack_ctx = self.query(
+            query_text=query,
+            collections=[COL_ATTACK, COL_BUG_BOUNTY],
+            top_k=5
+        )
+
+        # Get methodology per technology
+        for tech in technologies[:2]:
+            tech_ctx = self.query(
+                query_text=f"{tech} common vulnerabilities exploitation",
+                collections=[COL_BUG_BOUNTY, COL_VULN_METHODS],
+                technology=tech,
+                top_k=3
+            )
+            attack_ctx.chunks.extend(tech_ctx.chunks)
+
+        # Deduplicate and sort
+        seen = set()
+        unique = []
+        for c in attack_ctx.chunks:
+            if c.chunk_id not in seen:
+                seen.add(c.chunk_id)
+                unique.append(c)
+        unique.sort(key=lambda c: c.score, reverse=True)
+
+        combined = RAGContext(
+            query=query,
+            chunks=unique[:10],
+            total_score=sum(c.score for c in unique[:10]),
+            sources_used=attack_ctx.sources_used
+        )
+
+        return combined.to_prompt_text(max_chars=max_chars)
+
+    # ── Helpers ─────────────────────────────────────────────────
+
+    def _detect_vuln_types(self, text: str) -> List[str]:
+        """Detect vulnerability types mentioned in text."""
+        text_lower = text.lower()
+        VULN_KEYWORDS = {
+            "xss": ["xss", "cross-site scripting", "cross site scripting", "script injection", "reflected xss", "stored xss"],
+            "sqli": ["sql injection", "sqli", "sql injeção", "union select", "sqlmap"],
+            "ssrf": ["ssrf", "server-side request forgery", "server side request"],
+            "idor": ["idor", "insecure direct object", "referência direta"],
+            "rce": ["rce", "remote code execution", "command injection", "execução remota", "os command"],
+            "lfi": ["lfi", "local file inclusion", "path traversal", "directory traversal", "inclusão de arquivo"],
+            "ssti": ["ssti", "server-side template injection", "template injection", "jinja", "twig"],
+            "xxe": ["xxe", "xml external entity", "xml injection"],
+            "csrf": ["csrf", "cross-site request forgery", "request forgery"],
+            "open_redirect": ["open redirect", "redirecionamento aberto", "redirect"],
+            "auth_bypass": ["authentication bypass", "auth bypass", "bypass autenticação"],
+            "race_condition": ["race condition", "condição de corrida", "toctou"],
+            "deserialization": ["deserialization", "deserialização", "unserialize", "pickle"],
+            "upload": ["file upload", "upload", "unrestricted upload"],
+            "cors": ["cors", "cross-origin"],
+            "prototype_pollution": ["prototype pollution", "poluição de protótipo"],
+            "request_smuggling": ["request smuggling", "http smuggling", "cl.te", "te.cl"],
+            "graphql": ["graphql", "introspection"],
+            "jwt": ["jwt", "json web token"],
+            "nosql": ["nosql injection", "mongodb injection", "nosql"],
+            "crlf": ["crlf injection", "header injection", "injeção de cabeçalho"],
+            "subdomain_takeover": ["subdomain takeover", "tomada de subdomínio"],
+            "information_disclosure": ["information disclosure", "divulgação de informação", "sensitive data"],
+            "bola": ["bola", "broken object level"],
+            "bfla": ["bfla", "broken function level"],
+            "privilege_escalation": ["privilege escalation", "escalação de privilégio"],
+        }
+
+        detected = []
+        for vuln_type, keywords in VULN_KEYWORDS.items():
+            for kw in keywords:
+                if kw in text_lower:
+                    detected.append(vuln_type)
+                    break
+
+        return detected if detected else ["unknown"]
+
+    def _detect_technologies(self, text: str) -> List[str]:
+        """Detect technologies mentioned in text."""
+        text_lower = text.lower()
+        TECH_KEYWORDS = {
+            "php": ["php", "laravel", "wordpress", "drupal", "symfony", "codeigniter"],
+            "python": ["python", "django", "flask", "fastapi", "tornado"],
+            "java": ["java", "spring", "struts", "tomcat", "jboss", "wildfly"],
+            "node": ["node.js", "nodejs", "express", "next.js", "nuxt"],
+            "ruby": ["ruby", "rails", "sinatra"],
+            "dotnet": [".net", "asp.net", "c#", "iis"],
+            "go": ["golang", " go ", "gin", "echo"],
+            "nginx": ["nginx"],
+            "apache": ["apache", "httpd"],
+            "react": ["react", "reactjs"],
+            "angular": ["angular"],
+            "vue": ["vue.js", "vuejs"],
+            "graphql": ["graphql"],
+            "docker": ["docker", "kubernetes", "k8s"],
+            "aws": ["aws", "amazon", "s3", "lambda", "ec2"],
+            "azure": ["azure", "microsoft cloud"],
+            "mysql": ["mysql", "mariadb"],
+            "postgres": ["postgresql", "postgres"],
+            "mongodb": ["mongodb", "mongo"],
+            "redis": ["redis"],
+        }
+
+        detected = []
+        for tech, keywords in TECH_KEYWORDS.items():
+            for kw in keywords:
+                if kw in text_lower:
+                    detected.append(tech)
+                    break
+
+        return detected
+
+    def _extract_section(self, text: str, markers: List[str],
+                          max_chars: int = 2000) -> Optional[str]:
+        """Extract a section from text based on header markers."""
+        text_lower = text.lower()
+
+        for marker in markers:
+            idx = text_lower.find(marker)
+            if idx != -1:
+                # Find section start (after the marker line)
+                newline_after = text.find("\n", idx)
+                if newline_after == -1:
+                    continue
+                section_start = newline_after + 1
+
+                # Find section end (next ## header or end)
+                next_header = re.search(r'\n#{1,3}\s', text[section_start:])
+                if next_header:
+                    section_end = section_start + next_header.start()
+                else:
+                    section_end = min(section_start + max_chars, len(text))
+
+                section = text[section_start:section_end].strip()
+                if len(section) > 50:
+                    return section[:max_chars]
+
+        return None
+
+    def _extract_code_blocks(self, text: str) -> List[str]:
+        """Extract code blocks and payloads from text."""
+        blocks = []
+
+        # Fenced code blocks
+        for match in re.finditer(r'```[\w]*\n(.*?)```', text, re.DOTALL):
+            code = match.group(1).strip()
+            if len(code) > 20:
+                blocks.append(code[:500])
+
+        # Inline code with attack indicators
+        for match in re.finditer(r'`([^`]{10,500})`', text):
+            code = match.group(1)
+            attack_indicators = ['<script', 'alert(', 'SELECT', 'UNION',
+                                '../', 'curl ', 'wget ', '{{', '${',
+                                'eval(', 'exec(', 'system(']
+            if any(ind in code for ind in attack_indicators):
+                blocks.append(code)
+
+        return blocks[:20]