mirror of
https://github.com/CyberSecurityUP/NeuroSploit.git
synced 2026-03-02 15:53:22 +00:00
116 modules | 100 vuln types | 18 API routes | 18 frontend pages Major features: - VulnEngine: 100 vuln types, 526+ payloads, 12 testers, anti-hallucination prompts - Autonomous Agent: 3-stream auto pentest, multi-session (5 concurrent), pause/resume/stop - CLI Agent: Claude Code / Gemini CLI / Codex CLI inside Kali containers - Validation Pipeline: negative controls, proof of execution, confidence scoring, judge - AI Reasoning: ReACT engine, token budget, endpoint classifier, CVE hunter, deep recon - Multi-Agent: 5 specialists + orchestrator + researcher AI + vuln type agents - RAG System: BM25/TF-IDF/ChromaDB vectorstore, few-shot, reasoning templates - Smart Router: 20 providers (8 CLI OAuth + 12 API), tier failover, token refresh - Kali Sandbox: container-per-scan, 56 tools, VPN support, on-demand install - Full IA Testing: methodology-driven comprehensive pentest sessions - Notifications: Discord, Telegram, WhatsApp/Twilio multi-channel alerts - Frontend: React/TypeScript with 18 pages, real-time WebSocket updates
282 lines
11 KiB
Python
282 lines
11 KiB
Python
"""
|
||
NeuroSploit v3 - Reasoning Engine
|
||
|
||
ReACT-inspired reasoning loop: Think → Plan → Act → Observe → Reflect.
|
||
Provides dedicated reasoning capability for strategic decision-making
|
||
throughout the pentest lifecycle, not just at verification time.
|
||
|
||
Inspired by CAI framework's dedicated Reasoner agent pattern.
|
||
"""
|
||
|
||
import json
|
||
import re
|
||
from dataclasses import dataclass, field
|
||
from typing import Dict, List, Optional, Any
|
||
|
||
from backend.core.token_budget import TokenBudget
|
||
|
||
try:
|
||
from backend.core.vuln_engine.system_prompts import get_system_prompt
|
||
except ImportError:
|
||
def get_system_prompt(ctx):
|
||
return "You are an expert penetration tester."
|
||
|
||
|
||
@dataclass
|
||
class ReasoningResult:
|
||
"""Result of a reasoning step."""
|
||
analysis: str
|
||
recommended_action: str
|
||
confidence: float # 0.0 - 1.0
|
||
reasoning_chain: List[str] = field(default_factory=list)
|
||
skip_reason: str = "" # Non-empty if reasoning was skipped (budget)
|
||
|
||
|
||
@dataclass
|
||
class AttackPlan:
|
||
"""Prioritized attack plan from strategic reasoning."""
|
||
priority_vulns: List[str] = field(default_factory=list)
|
||
endpoint_rankings: List[Dict] = field(default_factory=list) # [{url, risk, types}]
|
||
parameter_focus: List[str] = field(default_factory=list)
|
||
attack_vectors: List[str] = field(default_factory=list)
|
||
chain_targets: List[Dict] = field(default_factory=list)
|
||
checkpoint_pct: float = 0.0
|
||
token_cost: int = 0
|
||
|
||
|
||
@dataclass
|
||
class Reflection:
|
||
"""Post-action reflection result."""
|
||
should_continue: bool = True
|
||
next_suggestion: str = ""
|
||
learned_pattern: str = ""
|
||
confidence_adjustment: float = 0.0 # -1.0 to +1.0
|
||
|
||
|
||
REASONING_SYSTEM_PROMPT = """You are an expert penetration testing strategist. Your role is PURELY analytical — you think deeply about attack strategy, NOT execute attacks.
|
||
|
||
CRITICAL RULES:
|
||
1. Base ALL recommendations on CONCRETE evidence from reconnaissance data
|
||
2. Never hallucinate vulnerabilities — only suggest TESTING, not claim findings
|
||
3. Prioritize by REAL risk (attack surface × likelihood × impact)
|
||
4. Consider WAF/defense evasion when recommending approaches
|
||
5. Account for token budget — suggest efficient testing strategies
|
||
6. Learn from findings so far — adapt strategy based on what worked/failed
|
||
|
||
OUTPUT FORMAT: Always respond in valid JSON."""
|
||
|
||
|
||
class ReasoningEngine:
|
||
"""Dedicated reasoning module — thinks before major decisions.
|
||
|
||
Implements the CAI Reasoner pattern: a separate agent that only thinks,
|
||
never acts. Called at strategic checkpoints throughout the scan.
|
||
"""
|
||
|
||
def __init__(self, llm, token_budget: TokenBudget):
|
||
self.llm = llm
|
||
self.token_budget = token_budget
|
||
self._reasoning_history: List[Dict] = []
|
||
|
||
async def reason(self, context: str, question: str,
|
||
available_actions: List[str]) -> ReasoningResult:
|
||
"""Think step: analyze situation and recommend next action.
|
||
|
||
Args:
|
||
context: Current state (recon data, findings, progress)
|
||
question: Specific decision to make
|
||
available_actions: List of possible actions to choose from
|
||
|
||
Returns:
|
||
ReasoningResult with analysis and recommendation
|
||
"""
|
||
if self.token_budget and self.token_budget.should_skip("reasoning"):
|
||
return ReasoningResult(
|
||
analysis="",
|
||
recommended_action=available_actions[0] if available_actions else "",
|
||
confidence=0.5,
|
||
skip_reason=f"Budget level: {self.token_budget.get_degradation_level()}"
|
||
)
|
||
|
||
est_tokens = (self.token_budget.estimate_tokens(context) + 500) if self.token_budget else 1500
|
||
if self.token_budget and not self.token_budget.can_spend("reasoning", est_tokens):
|
||
return ReasoningResult(
|
||
analysis="",
|
||
recommended_action=available_actions[0] if available_actions else "",
|
||
confidence=0.5,
|
||
skip_reason="Insufficient reasoning budget"
|
||
)
|
||
|
||
prompt = f"""Analyze this situation and recommend the BEST next action.
|
||
|
||
**Current Context:**
|
||
{context[:3000]}
|
||
|
||
**Question:** {question}
|
||
|
||
**Available Actions:**
|
||
{json.dumps(available_actions[:20])}
|
||
|
||
**Previous Reasoning (last 3):**
|
||
{json.dumps(self._reasoning_history[-3:], default=str) if self._reasoning_history else "None yet"}
|
||
|
||
Respond in JSON:
|
||
{{
|
||
"analysis": "Brief analysis of the situation (2-3 sentences)",
|
||
"recommended_action": "The best action from available_actions",
|
||
"confidence": 0.85,
|
||
"reasoning_chain": ["step1: observation", "step2: inference", "step3: conclusion"]
|
||
}}"""
|
||
|
||
try:
|
||
response = await self.llm.generate(prompt, REASONING_SYSTEM_PROMPT)
|
||
if self.token_budget:
|
||
self.token_budget.record("reasoning", est_tokens, "reason()")
|
||
|
||
match = re.search(r'\{.*\}', response, re.DOTALL)
|
||
if match:
|
||
data = json.loads(match.group())
|
||
result = ReasoningResult(
|
||
analysis=data.get("analysis", ""),
|
||
recommended_action=data.get("recommended_action", ""),
|
||
confidence=min(1.0, max(0.0, float(data.get("confidence", 0.7)))),
|
||
reasoning_chain=data.get("reasoning_chain", []),
|
||
)
|
||
self._reasoning_history.append({
|
||
"question": question[:100],
|
||
"action": result.recommended_action,
|
||
"confidence": result.confidence,
|
||
})
|
||
return result
|
||
except Exception:
|
||
pass
|
||
|
||
return ReasoningResult(
|
||
analysis="Reasoning failed — using default",
|
||
recommended_action=available_actions[0] if available_actions else "",
|
||
confidence=0.5,
|
||
)
|
||
|
||
async def plan_attack(self, recon_summary: Dict, findings_so_far: List,
|
||
tested_types: set, progress_pct: float) -> AttackPlan:
|
||
"""Strategic planning: what to test next based on everything known.
|
||
|
||
Called at 30%, 60%, 90% checkpoints to refine the attack plan.
|
||
Replaces the single _ai_analyze_attack_surface() call at 50%.
|
||
"""
|
||
if self.token_budget and self.token_budget.should_skip("reasoning"):
|
||
return AttackPlan(checkpoint_pct=progress_pct)
|
||
|
||
est_tokens = 2000
|
||
if self.token_budget and not self.token_budget.can_spend("reasoning", est_tokens):
|
||
return AttackPlan(checkpoint_pct=progress_pct)
|
||
|
||
findings_summary = []
|
||
for f in findings_so_far[:10]:
|
||
findings_summary.append({
|
||
"type": getattr(f, "vulnerability_type", "unknown"),
|
||
"endpoint": getattr(f, "affected_endpoint", ""),
|
||
"severity": getattr(f, "severity", "medium"),
|
||
"confidence": getattr(f, "confidence_score", 0),
|
||
})
|
||
|
||
prompt = f"""You are replanning the attack strategy at {progress_pct:.0f}% scan progress.
|
||
|
||
**Reconnaissance:**
|
||
- Endpoints: {len(recon_summary.get('endpoints', []))}
|
||
- Technologies: {', '.join(recon_summary.get('technologies', [])[:10])}
|
||
- Forms: {len(recon_summary.get('forms', []))}
|
||
- Parameters: {len(recon_summary.get('parameters', {}))}
|
||
|
||
**Findings So Far ({len(findings_so_far)}):**
|
||
{json.dumps(findings_summary, indent=2)}
|
||
|
||
**Already Tested Types ({len(tested_types)}):**
|
||
{', '.join(sorted(tested_types)[:30])}
|
||
|
||
**Checkpoint Strategy:**
|
||
- At 30%: Widen scope if 0 findings, narrow if 3+ findings
|
||
- At 60%: Skip exhausted endpoints, propagate finding patterns
|
||
- At 90%: Focus only on high-confidence remaining targets
|
||
|
||
Respond in JSON:
|
||
{{
|
||
"priority_vulns": ["vuln_type_1", "vuln_type_2"],
|
||
"endpoint_rankings": [{{"url": "/api/users", "risk": "high", "types": ["idor", "bola"]}}],
|
||
"parameter_focus": ["id", "file"],
|
||
"attack_vectors": ["Test IDOR on all data endpoints", "Escalate SQLi to UNION"],
|
||
"chain_targets": [{{"from": "ssrf", "to": "lfi", "endpoint": "/proxy"}}]
|
||
}}"""
|
||
|
||
try:
|
||
response = await self.llm.generate(prompt, REASONING_SYSTEM_PROMPT)
|
||
if self.token_budget:
|
||
self.token_budget.record("reasoning", est_tokens,
|
||
f"plan_attack({progress_pct:.0f}%)")
|
||
|
||
match = re.search(r'\{.*\}', response, re.DOTALL)
|
||
if match:
|
||
data = json.loads(match.group())
|
||
return AttackPlan(
|
||
priority_vulns=data.get("priority_vulns", []),
|
||
endpoint_rankings=data.get("endpoint_rankings", []),
|
||
parameter_focus=data.get("parameter_focus", []),
|
||
attack_vectors=data.get("attack_vectors", []),
|
||
chain_targets=data.get("chain_targets", []),
|
||
checkpoint_pct=progress_pct,
|
||
token_cost=est_tokens,
|
||
)
|
||
except Exception:
|
||
pass
|
||
|
||
return AttackPlan(checkpoint_pct=progress_pct)
|
||
|
||
async def reflect(self, action_taken: str, result_observed: str,
|
||
success: bool) -> Reflection:
|
||
"""Post-action reflection: lightweight learning from results.
|
||
|
||
Called after significant testing actions to adapt strategy.
|
||
Uses minimal tokens — just a short reasoning step.
|
||
"""
|
||
if self.token_budget and self.token_budget.should_skip("reasoning"):
|
||
return Reflection(should_continue=True)
|
||
|
||
est_tokens = 300
|
||
if self.token_budget and not self.token_budget.can_spend("reasoning", est_tokens):
|
||
return Reflection(should_continue=True)
|
||
|
||
prompt = f"""Quick reflection on test result:
|
||
|
||
Action: {action_taken[:200]}
|
||
Result: {result_observed[:300]}
|
||
Success: {success}
|
||
|
||
Respond in JSON (be brief):
|
||
{{
|
||
"should_continue": true,
|
||
"next_suggestion": "Try X next",
|
||
"learned_pattern": "This type of endpoint responds to Y"
|
||
}}"""
|
||
|
||
try:
|
||
response = await self.llm.generate(prompt, REASONING_SYSTEM_PROMPT)
|
||
if self.token_budget:
|
||
self.token_budget.record("reasoning", est_tokens, "reflect()")
|
||
|
||
match = re.search(r'\{.*\}', response, re.DOTALL)
|
||
if match:
|
||
data = json.loads(match.group())
|
||
return Reflection(
|
||
should_continue=data.get("should_continue", True),
|
||
next_suggestion=data.get("next_suggestion", ""),
|
||
learned_pattern=data.get("learned_pattern", ""),
|
||
)
|
||
except Exception:
|
||
pass
|
||
|
||
return Reflection(should_continue=True)
|
||
|
||
def get_history(self) -> List[Dict]:
|
||
"""Return reasoning history for context."""
|
||
return self._reasoning_history[-10:]
|