mirror of
https://github.com/CyberSecurityUP/NeuroSploit.git
synced 2026-03-21 01:33:23 +00:00
116 modules | 100 vuln types | 18 API routes | 18 frontend pages Major features: - VulnEngine: 100 vuln types, 526+ payloads, 12 testers, anti-hallucination prompts - Autonomous Agent: 3-stream auto pentest, multi-session (5 concurrent), pause/resume/stop - CLI Agent: Claude Code / Gemini CLI / Codex CLI inside Kali containers - Validation Pipeline: negative controls, proof of execution, confidence scoring, judge - AI Reasoning: ReACT engine, token budget, endpoint classifier, CVE hunter, deep recon - Multi-Agent: 5 specialists + orchestrator + researcher AI + vuln type agents - RAG System: BM25/TF-IDF/ChromaDB vectorstore, few-shot, reasoning templates - Smart Router: 20 providers (8 CLI OAuth + 12 API), tier failover, token refresh - Kali Sandbox: container-per-scan, 56 tools, VPN support, on-demand install - Full IA Testing: methodology-driven comprehensive pentest sessions - Notifications: Discord, Telegram, WhatsApp/Twilio multi-channel alerts - Frontend: React/TypeScript with 18 pages, real-time WebSocket updates
378 lines
14 KiB
Python
378 lines
14 KiB
Python
"""
|
|
Advanced reconnaissance module for NeuroSploitv2.
|
|
|
|
Performs deep JS analysis, sitemap/robots parsing, API enumeration,
|
|
and technology fingerprinting using async HTTP requests.
|
|
"""
|
|
|
|
import re
|
|
import json
|
|
import asyncio
|
|
from dataclasses import dataclass, field
|
|
from typing import Dict, List, Optional
|
|
from urllib.parse import urljoin, urlparse
|
|
|
|
try:
|
|
import aiohttp
|
|
HAS_AIOHTTP = True
|
|
except ImportError:
|
|
HAS_AIOHTTP = False
|
|
|
|
try:
|
|
from xml.etree import ElementTree as ET
|
|
except ImportError:
|
|
ET = None
|
|
|
|
REQUEST_TIMEOUT = aiohttp.ClientTimeout(total=10) if HAS_AIOHTTP else None
|
|
MAX_JS_FILES = 10
|
|
MAX_JS_SIZE = 500 * 1024 # 500 KB
|
|
MAX_SITEMAP_URLS = 200
|
|
|
|
# --- Regex patterns for JS analysis ---
|
|
|
|
RE_API_ENDPOINT = re.compile(r'/api/v[0-9]+/[a-z_/]+')
|
|
RE_FETCH_URL = re.compile(r'fetch\(\s*["\']([^"\']+)["\']')
|
|
RE_AXIOS_URL = re.compile(r'axios\.(?:get|post|put|patch|delete)\(\s*["\']([^"\']+)["\']')
|
|
RE_AJAX_URL = re.compile(r'\$\.ajax\(\s*\{[^}]*url\s*:\s*["\']([^"\']+)["\']', re.DOTALL)
|
|
RE_XHR_URL = re.compile(r'\.open\(\s*["\'][A-Z]+["\']\s*,\s*["\']([^"\']+)["\']')
|
|
|
|
RE_API_KEY = re.compile(
|
|
r'(?:sk-[a-zA-Z0-9]{20,}|pk_(?:live|test)_[a-zA-Z0-9]{20,}'
|
|
r'|AKIA[0-9A-Z]{16}'
|
|
r'|ghp_[a-zA-Z0-9]{36}'
|
|
r'|glpat-[a-zA-Z0-9\-]{20,}'
|
|
r'|eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,})'
|
|
)
|
|
|
|
RE_INTERNAL_URL = re.compile(
|
|
r'https?://(?:localhost|127\.0\.0\.1|10\.\d+\.\d+\.\d+|192\.168\.\d+\.\d+|172\.(?:1[6-9]|2\d|3[01])\.\d+\.\d+)[^\s"\']*'
|
|
)
|
|
|
|
RE_REACT_ROUTE = re.compile(r'path\s*[:=]\s*["\'](/[^"\']*)["\']')
|
|
RE_ANGULAR_ROUTE = re.compile(r'path\s*:\s*["\']([^"\']+)["\']')
|
|
RE_VUE_ROUTE = re.compile(r'path\s*:\s*["\'](/[^"\']*)["\']')
|
|
|
|
|
|
@dataclass
|
|
class JSAnalysisResult:
|
|
"""Results from JavaScript file analysis."""
|
|
endpoints: List[str] = field(default_factory=list)
|
|
api_keys: List[str] = field(default_factory=list)
|
|
internal_urls: List[str] = field(default_factory=list)
|
|
secrets: List[str] = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class APISchema:
|
|
"""Parsed API schema from Swagger/OpenAPI or GraphQL introspection."""
|
|
endpoints: List[Dict] = field(default_factory=list)
|
|
version: str = ""
|
|
source: str = ""
|
|
|
|
|
|
class DeepRecon:
|
|
"""Advanced reconnaissance: JS analysis, sitemap, robots, API enum, fingerprinting."""
|
|
|
|
def __init__(self, session: Optional["aiohttp.ClientSession"] = None):
|
|
self._external_session = session is not None
|
|
self._session = session
|
|
|
|
async def _get_session(self) -> "aiohttp.ClientSession":
|
|
if self._session is None or self._session.closed:
|
|
self._session = aiohttp.ClientSession(timeout=REQUEST_TIMEOUT)
|
|
self._external_session = False
|
|
return self._session
|
|
|
|
async def close(self):
|
|
if not self._external_session and self._session and not self._session.closed:
|
|
await self._session.close()
|
|
|
|
async def _fetch(self, url: str, max_size: int = 0) -> Optional[str]:
|
|
"""Fetch URL text with optional size limit. Returns None on any error."""
|
|
try:
|
|
session = await self._get_session()
|
|
async with session.get(url, ssl=False, allow_redirects=True) as resp:
|
|
if resp.status != 200:
|
|
return None
|
|
if max_size:
|
|
chunk = await resp.content.read(max_size)
|
|
return chunk.decode("utf-8", errors="replace")
|
|
return await resp.text()
|
|
except Exception:
|
|
return None
|
|
|
|
# ------------------------------------------------------------------
|
|
# JS file analysis
|
|
# ------------------------------------------------------------------
|
|
|
|
async def crawl_js_files(self, base_url: str, js_urls: List[str]) -> JSAnalysisResult:
|
|
"""Fetch and analyse JavaScript files for endpoints, keys, and secrets."""
|
|
result = JSAnalysisResult()
|
|
urls_to_scan = js_urls[:MAX_JS_FILES]
|
|
|
|
tasks = [self._fetch(urljoin(base_url, u), max_size=MAX_JS_SIZE) for u in urls_to_scan]
|
|
bodies = await asyncio.gather(*tasks, return_exceptions=True)
|
|
|
|
seen_endpoints: set = set()
|
|
for body in bodies:
|
|
if not isinstance(body, str):
|
|
continue
|
|
|
|
# API endpoint patterns
|
|
for m in RE_API_ENDPOINT.finditer(body):
|
|
seen_endpoints.add(m.group(0))
|
|
for regex in (RE_FETCH_URL, RE_AXIOS_URL, RE_AJAX_URL, RE_XHR_URL):
|
|
for m in regex.finditer(body):
|
|
seen_endpoints.add(m.group(1))
|
|
|
|
# Route definitions (React Router, Angular, Vue Router)
|
|
for regex in (RE_REACT_ROUTE, RE_ANGULAR_ROUTE, RE_VUE_ROUTE):
|
|
for m in regex.finditer(body):
|
|
seen_endpoints.add(m.group(1))
|
|
|
|
# API keys / tokens
|
|
for m in RE_API_KEY.finditer(body):
|
|
val = m.group(0)
|
|
if val not in result.api_keys:
|
|
result.api_keys.append(val)
|
|
result.secrets.append(val)
|
|
|
|
# Internal / private URLs
|
|
for m in RE_INTERNAL_URL.finditer(body):
|
|
val = m.group(0)
|
|
if val not in result.internal_urls:
|
|
result.internal_urls.append(val)
|
|
|
|
# Resolve endpoints relative to base_url
|
|
for ep in sorted(seen_endpoints):
|
|
resolved = urljoin(base_url, ep) if not ep.startswith("http") else ep
|
|
if resolved not in result.endpoints:
|
|
result.endpoints.append(resolved)
|
|
|
|
return result
|
|
|
|
# ------------------------------------------------------------------
|
|
# Sitemap parsing
|
|
# ------------------------------------------------------------------
|
|
|
|
async def parse_sitemap(self, target: str) -> List[str]:
|
|
"""Fetch and parse sitemap XML files for URLs."""
|
|
target = target.rstrip("/")
|
|
candidates = [
|
|
f"{target}/sitemap.xml",
|
|
f"{target}/sitemap_index.xml",
|
|
f"{target}/sitemap1.xml",
|
|
]
|
|
urls: set = set()
|
|
|
|
for sitemap_url in candidates:
|
|
body = await self._fetch(sitemap_url)
|
|
if not body or ET is None:
|
|
continue
|
|
try:
|
|
root = ET.fromstring(body)
|
|
except ET.ParseError:
|
|
continue
|
|
# Handle both sitemapindex and urlset; strip namespace
|
|
for elem in root.iter():
|
|
tag = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
|
|
if tag == "loc" and elem.text:
|
|
urls.add(elem.text.strip())
|
|
if len(urls) >= MAX_SITEMAP_URLS:
|
|
return sorted(urls)[:MAX_SITEMAP_URLS]
|
|
|
|
return sorted(urls)[:MAX_SITEMAP_URLS]
|
|
|
|
# ------------------------------------------------------------------
|
|
# Robots.txt parsing
|
|
# ------------------------------------------------------------------
|
|
|
|
async def parse_robots(self, target: str) -> List[str]:
|
|
"""Parse robots.txt and return resolved paths (Disallow + Allow)."""
|
|
target = target.rstrip("/")
|
|
body = await self._fetch(f"{target}/robots.txt")
|
|
if not body:
|
|
return []
|
|
|
|
paths: set = set()
|
|
for line in body.splitlines():
|
|
line = line.strip()
|
|
if line.startswith("#") or ":" not in line:
|
|
continue
|
|
directive, _, value = line.partition(":")
|
|
directive = directive.strip().lower()
|
|
value = value.strip()
|
|
if directive in ("disallow", "allow") and value:
|
|
resolved = urljoin(target + "/", value)
|
|
paths.add(resolved)
|
|
|
|
return sorted(paths)
|
|
|
|
# ------------------------------------------------------------------
|
|
# API enumeration (Swagger / OpenAPI / GraphQL)
|
|
# ------------------------------------------------------------------
|
|
|
|
_API_DOC_PATHS = [
|
|
"/swagger.json",
|
|
"/openapi.json",
|
|
"/api-docs",
|
|
"/v2/api-docs",
|
|
"/swagger/v1/swagger.json",
|
|
"/.well-known/openapi",
|
|
"/api/swagger.json",
|
|
]
|
|
|
|
async def enumerate_api(self, target: str, technologies: List[str]) -> APISchema:
|
|
"""Discover and parse API documentation (OpenAPI/Swagger, GraphQL)."""
|
|
target = target.rstrip("/")
|
|
schema = APISchema()
|
|
|
|
# Try OpenAPI / Swagger endpoints
|
|
for path in self._API_DOC_PATHS:
|
|
body = await self._fetch(f"{target}{path}")
|
|
if not body:
|
|
continue
|
|
try:
|
|
doc = json.loads(body)
|
|
except (json.JSONDecodeError, ValueError):
|
|
continue
|
|
|
|
# Looks like a valid Swagger/OpenAPI doc
|
|
if "paths" in doc or "openapi" in doc or "swagger" in doc:
|
|
schema.version = doc.get("openapi", doc.get("info", {}).get("version", ""))
|
|
schema.source = path
|
|
for route, methods in doc.get("paths", {}).items():
|
|
for method, detail in methods.items():
|
|
if method.lower() in ("get", "post", "put", "patch", "delete", "options", "head"):
|
|
params = [
|
|
p.get("name", "")
|
|
for p in detail.get("parameters", [])
|
|
if isinstance(p, dict)
|
|
]
|
|
schema.endpoints.append({
|
|
"url": route,
|
|
"method": method.upper(),
|
|
"params": params,
|
|
})
|
|
return schema
|
|
|
|
# GraphQL introspection
|
|
if "graphql" in [t.lower() for t in technologies] or not schema.endpoints:
|
|
introspection = await self._graphql_introspect(target)
|
|
if introspection:
|
|
return introspection
|
|
|
|
return schema
|
|
|
|
async def _graphql_introspect(self, target: str) -> Optional[APISchema]:
|
|
"""Attempt GraphQL introspection query."""
|
|
query = '{"query":"{ __schema { queryType { name } types { name fields { name args { name } } } } }"}'
|
|
try:
|
|
session = await self._get_session()
|
|
headers = {"Content-Type": "application/json"}
|
|
async with session.post(
|
|
f"{target}/graphql", data=query, headers=headers, ssl=False
|
|
) as resp:
|
|
if resp.status != 200:
|
|
return None
|
|
data = await resp.json()
|
|
except Exception:
|
|
return None
|
|
|
|
if "data" not in data or "__schema" not in data.get("data", {}):
|
|
return None
|
|
|
|
schema = APISchema(version="graphql", source="/graphql")
|
|
for type_info in data["data"]["__schema"].get("types", []):
|
|
type_name = type_info.get("name", "")
|
|
if type_name.startswith("__"):
|
|
continue
|
|
for fld in type_info.get("fields", []) or []:
|
|
params = [a["name"] for a in fld.get("args", []) if isinstance(a, dict)]
|
|
schema.endpoints.append({
|
|
"url": f"/{type_name}/{fld['name']}",
|
|
"method": "QUERY",
|
|
"params": params,
|
|
})
|
|
return schema if schema.endpoints else None
|
|
|
|
# ------------------------------------------------------------------
|
|
# Deep technology fingerprinting
|
|
# ------------------------------------------------------------------
|
|
|
|
_FINGERPRINT_FILES = [
|
|
"/readme.txt", "/README.md", "/CHANGELOG.md", "/CHANGES.txt",
|
|
"/package.json", "/composer.json",
|
|
]
|
|
|
|
_WP_PROBES = [
|
|
"/wp-links-opml.php",
|
|
"/wp-includes/js/wp-embed.min.js",
|
|
]
|
|
|
|
_DRUPAL_PROBES = [
|
|
"/CHANGELOG.txt",
|
|
"/core/CHANGELOG.txt",
|
|
]
|
|
|
|
RE_VERSION = re.compile(r'["\']?version["\']?\s*[:=]\s*["\']?(\d+\.\d+[\w.\-]*)')
|
|
RE_WP_VER = re.compile(r'ver=(\d+\.\d+[\w.\-]*)')
|
|
RE_DRUPAL_VER = re.compile(r'Drupal\s+(\d+\.\d+[\w.\-]*)')
|
|
|
|
async def deep_fingerprint(
|
|
self, target: str, headers: Dict, body: str
|
|
) -> List[Dict]:
|
|
"""Detect software and versions from well-known files and probes."""
|
|
target = target.rstrip("/")
|
|
results: List[Dict] = []
|
|
seen: set = set()
|
|
|
|
def _add(software: str, version: str, source: str):
|
|
key = (software.lower(), version)
|
|
if key not in seen:
|
|
seen.add(key)
|
|
results.append({"software": software, "version": version, "source": source})
|
|
|
|
# Generic version files
|
|
tasks = {path: self._fetch(f"{target}{path}") for path in self._FINGERPRINT_FILES}
|
|
bodies = dict(zip(tasks.keys(), await asyncio.gather(*tasks.values(), return_exceptions=True)))
|
|
|
|
for path, content in bodies.items():
|
|
if not isinstance(content, str):
|
|
continue
|
|
if path.endswith(".json"):
|
|
try:
|
|
doc = json.loads(content)
|
|
name = doc.get("name", "unknown")
|
|
ver = doc.get("version", "")
|
|
if ver:
|
|
_add(name, ver, path)
|
|
except (json.JSONDecodeError, ValueError):
|
|
pass
|
|
else:
|
|
m = self.RE_VERSION.search(content)
|
|
if m:
|
|
_add("unknown", m.group(1), path)
|
|
|
|
# WordPress probes
|
|
for wp_path in self._WP_PROBES:
|
|
content = await self._fetch(f"{target}{wp_path}")
|
|
if not content:
|
|
continue
|
|
m = self.RE_WP_VER.search(content)
|
|
if m:
|
|
_add("WordPress", m.group(1), wp_path)
|
|
elif "WordPress" in content or "wp-" in content:
|
|
_add("WordPress", "unknown", wp_path)
|
|
|
|
# Drupal probes
|
|
for dp_path in self._DRUPAL_PROBES:
|
|
content = await self._fetch(f"{target}{dp_path}")
|
|
if not content:
|
|
continue
|
|
m = self.RE_DRUPAL_VER.search(content)
|
|
if m:
|
|
_add("Drupal", m.group(1), dp_path)
|
|
|
|
return results
|