diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..23d74c7 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,50 @@ +# Continuous integration — runs on every push to main and every PR. +# Catches regressions early so the Release workflow on tag push doesn't +# surprise us with a red test run when we least want it. + +name: CI + +on: + push: + branches: [ main, 'v2-*' ] + pull_request: + branches: [ main ] + +permissions: + contents: read + +jobs: + test: + name: Test & vet + runs-on: ubuntu-latest + strategy: + matrix: + go-version: [ '1.21' ] + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Go ${{ matrix.go-version }} + uses: actions/setup-go@v5 + with: + go-version: ${{ matrix.go-version }} + + - name: Cache Go modules + uses: actions/cache@v4 + with: + path: | + ~/.cache/go-build + ~/go/pkg/mod + key: ${{ runner.os }}-go-${{ matrix.go-version }}-${{ hashFiles('**/go.sum') }} + + - name: Verify modules + run: go mod verify + + - name: Build + run: go build ./... + + - name: Vet + run: go vet ./... + + - name: Test (race detector) + run: go test ./... -race -timeout 180s diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..cf39dcb --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,85 @@ +# Release workflow — runs on any tag that starts with 'v' (e.g. v2.0.0-rc1). +# +# Responsibilities: +# 1. Run the full test suite with the race detector. +# 2. Build and publish binaries for macOS / Linux / Windows (amd64 + arm64) +# via goreleaser-action. +# 3. Attach them to a GitHub Release whose body comes from .goreleaser.yml +# headers + CHANGELOG entries. +# +# What you need: +# - Nothing beyond the default GITHUB_TOKEN that Actions provides. goreleaser +# uses it to create the release. +# +# To cut a new release locally: +# git tag -a v2.0.0-rc1 -m "v2.0.0 RC1" +# git push origin v2.0.0-rc1 +# Then watch the run under "Actions → Release". + +name: Release + +on: + push: + tags: + - 'v*' + +permissions: + contents: write # goreleaser needs this to create the release + upload assets. + +jobs: + test: + name: Test with race detector + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.21' + + - name: Cache Go modules + uses: actions/cache@v4 + with: + path: | + ~/.cache/go-build + ~/go/pkg/mod + key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + restore-keys: | + ${{ runner.os }}-go- + + - name: Verify modules + run: go mod verify + + - name: Vet + run: go vet ./... + + - name: Test (race detector) + run: go test ./... -race -timeout 180s + + release: + name: Build & publish binaries + needs: test + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.21' + + - name: Run goreleaser + uses: goreleaser/goreleaser-action@v6 + with: + distribution: goreleaser + version: '~> v2' + args: release --clean + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index ef3863b..6671699 100644 --- a/.gitignore +++ b/.gitignore @@ -38,11 +38,21 @@ go.work.sum *.txt /results/ /output/ +# Scan artifacts anywhere in the tree (defence in depth) +gods-eye-*.json +gods-eye-*.stderr +scan-*.json +scan-*.csv +report-*.json +findings-*.json # Sensitive files secrets.yaml config.local.yaml .env.* +god-eye.yaml +.god-eye.yaml +/.god-eye/ # Logs *.log @@ -51,3 +61,15 @@ config.local.yaml # OS files .DS_Store Thumbs.db + +# Editor / IDE / AI-agent local state +.idea/ +.vscode/ +# Claude Code working notes — intentionally NOT public +CLAUDE.md +.claude/ +.cursor/ +.cursorrules + +# Benchmark captures with potentially sensitive output +BENCHMARK-SCANME.local.md diff --git a/.goreleaser.yml b/.goreleaser.yml new file mode 100644 index 0000000..97c1510 --- /dev/null +++ b/.goreleaser.yml @@ -0,0 +1,114 @@ +# goreleaser config for God's Eye v2+ +# Docs: https://goreleaser.com/intro/ +# +# Local dry-run: goreleaser release --snapshot --clean --skip=publish +# Full release: triggered by a 'v*' tag push, handled by .github/workflows/release.yml + +version: 2 + +project_name: god-eye + +before: + hooks: + - go mod tidy + +builds: + - id: god-eye + main: ./cmd/god-eye + binary: god-eye + env: + - CGO_ENABLED=0 + flags: + - -trimpath + ldflags: + - -s -w + goos: + - linux + - darwin + - windows + goarch: + - amd64 + - arm64 + # Skip combinations that aren't worth shipping — windows/arm64 rarely used, + # Go users who need it can `go install`. + ignore: + - goos: windows + goarch: arm64 + +archives: + - id: default + name_template: >- + {{ .ProjectName }}_{{ .Version }}_ + {{- if eq .Os "darwin" }}macOS + {{- else if eq .Os "linux" }}Linux + {{- else if eq .Os "windows" }}Windows + {{- else }}{{ .Os }}{{ end }}_ + {{- if eq .Arch "amd64" }}x86_64 + {{- else if eq .Arch "arm64" }}arm64 + {{- else }}{{ .Arch }}{{ end }} + format_overrides: + - goos: windows + format: zip + files: + - README.md + - CHANGELOG.md + - LICENSE + - SECURITY.md + - AI_SETUP.md + +checksum: + name_template: 'checksums.txt' + algorithm: sha256 + +snapshot: + version_template: '{{ incpatch .Version }}-next' + +changelog: + # We curate the GitHub Release notes from CHANGELOG.md manually; goreleaser's + # auto-commit-log groupings add noise on top of that. + disable: true + +release: + github: + owner: Vyntral + name: god-eye + # Release Candidates (v2.0.0-rc1, rc2...) are pre-releases. Final v2.0.0 + # is not. goreleaser detects '-rc', '-beta', '-alpha' suffixes automatically. + prerelease: auto + draft: false + name_template: "God's Eye {{ .Tag }}" + header: | + ## God's Eye `{{ .Tag }}` + + AI-powered attack-surface discovery & offensive security — single Go binary, terminal-only, zero cloud. + + **Full changelog**: see [CHANGELOG.md](https://github.com/Vyntral/god-eye/blob/main/CHANGELOG.md). + + footer: | + --- + + ### Install + + Grab the binary for your platform from the assets below, or build from source: + + ```bash + git clone https://github.com/Vyntral/god-eye && cd god-eye + go build -o god-eye ./cmd/god-eye + ./god-eye + ``` + + ### Verify checksums + + ```bash + sha256sum -c checksums.txt + ``` + + ### First run + + Zero flags launches the interactive wizard — picks your AI tier, downloads Ollama models, validates your target, runs the scan with live event stream. + + ```bash + ./god-eye + ``` + + Full walkthrough: [README.md](https://github.com/Vyntral/god-eye/blob/main/README.md) · 14 recipes in [EXAMPLES.md](https://github.com/Vyntral/god-eye/blob/main/EXAMPLES.md). diff --git a/AI_SETUP.md b/AI_SETUP.md index 4a8082a..fd494eb 100644 --- a/AI_SETUP.md +++ b/AI_SETUP.md @@ -1,8 +1,91 @@ -# 🧠 AI Integration Setup Guide +# 🧠 AI Integration Guide -God's Eye now features **AI-powered security analysis** using local LLM models via Ollama. This adds intelligent code review, **real-time CVE detection via function calling**, and anomaly identification - completely offline and free. +

+ Ollama + Privacy + Cost + CVE +

-## 🚀 Quick Start (5 minutes) +> **No API keys. No cloud. No telemetry. No usage caps. Runs on your laptop.** + +God's Eye v2 is the only open-source attack-surface tool with **automated CVE correlation via a local LLM**. Apache 2.4.7 detected → CVE-2026-34197 surfaced. WordPress 5.8.2 fingerprinted → known vulnerabilities chained. All through an Ollama cascade that triages, then drills down with a **30B Mixture-of-Experts model** that activates just 3.3B parameters per token. + +Everything stays on your machine. No data leaves your hardware. + +

+ AI cascade against Apache 2.4.7 on scanme.nmap.org +

+ +

+ Every scan ends with an AI SCAN BRIEF — severity totals, top exploitable chains, executive summary, and recommended next actions — framed in the terminal. Recorded live on scanme.nmap.org, models served by local Ollama. +

+ +--- + +## 🎯 End-of-scan brief + +Every scan that produces findings ends with a framed summary the AI writes for you. Six parts: + +``` +┌── AI SCAN BRIEF — target.com ─────────────────────────────────────────────┐ +│ Totals +│ Hosts: 17 Active: 13 AI findings: 23 +│ +│ Findings by severity +│ CRIT critical 2 +│ [HIGH] high 7 +│ [MED] medium 12 +│ [LOW] low 4 +│ +│ Top exploitable chains +│ ▸ admin.target.com — Git Repository Exposed + Open Redirect +│ ▸ api.target.com — CORS Misconfiguration + JWT alg=none +│ ▸ legacy.target.com — Apache@2.4.7→CVE-2026-34197 +│ +│ AI agents that contributed +│ • http-analyzer 8 findings +│ • secret-validator 6 findings +│ • anomaly-detector 1 findings +│ • report-writer 1 findings +│ +│ AI executive summary +│ Scan identified two critical issues requiring immediate attention: +│ exposed git repository on admin.target.com and an Apache 2.4.7 server +│ (end-of-life since 2014) running on legacy.target.com. The cross-host +│ anomaly detector flagged a dev-environment leak into production. +│ +│ Recommended next actions +│ 1. Remove .git directory from admin.target.com (CRITICAL) +│ 2. Patch Apache 2.4.7 → vendor latest (affects legacy.target.com) +│ 3. Rotate JWT signing key on api.target.com +│ 4. Move dev.api.target.com off production DNS +│ 5. Investigate anomaly: shared SSH key across 3 hosts +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +It's generated by `internal/modules/brief`, runs in `PhaseReporting` after all other modules have finished, and only prints when findings exist (silent/JSON modes suppress it automatically). + +--- + +## Table of contents + +1. [Quick start (5 minutes)](#quick-start-5-minutes) +2. [How the cascade works](#how-the-cascade-works) +3. [AI profiles — pick your tier](#ai-profiles--pick-your-tier) +4. [The interactive wizard](#the-interactive-wizard) +5. [Auto-pull of missing models](#auto-pull-of-missing-models) +6. [Verbose mode](#verbose-mode) +7. [Multi-agent orchestration](#multi-agent-orchestration) +8. [CVE matching](#cve-matching) +9. [Custom models + YAML config](#custom-models--yaml-config) +10. [Troubleshooting](#troubleshooting) +11. [Privacy & security model](#privacy--security-model) +12. [Performance reference](#performance-reference) + +--- + +## Quick start (5 minutes) ### 1. Install Ollama @@ -11,684 +94,442 @@ God's Eye now features **AI-powered security analysis** using local LLM models v curl https://ollama.ai/install.sh | sh ``` -**Windows:** -Download from [ollama.ai/download](https://ollama.ai/download) +**Windows:** download from [ollama.com/download](https://ollama.com/download). -**Verify installation:** +Verify: ```bash ollama --version ``` -### 2. Pull Recommended Models +### 2. Start the Ollama server ```bash -# Fast triage model (1.1GB) - REQUIRED -ollama pull deepseek-r1:1.5b - -# Deep analysis model (6GB) - REQUIRED -ollama pull qwen2.5-coder:7b +ollama serve & ``` -**Wait time:** ~5-10 minutes depending on internet speed +Listens on `http://localhost:11434`. Leave it running. -### 3. Start Ollama Server +### 3. Run God's Eye + +The easiest path — let the wizard handle everything: ```bash -ollama serve +./god-eye ``` -Leave this running in a terminal. Ollama will run on `http://localhost:11434` +It will: +1. Ask which AI tier you want (lean / balanced / heavy / none) +2. Check which models are already installed +3. Offer to download missing ones (with live progress) +4. Ask for your target domain +5. Start the scan -### 4. Run God's Eye with AI +Manual route: ```bash -# Basic AI-enabled scan -./god-eye -d example.com --enable-ai +# Defaults (lean tier): pulls qwen3:1.7b + qwen2.5-coder:14b if missing +./god-eye -d target.com --pipeline --enable-ai -# Fast scan (no brute-force) with AI -./god-eye -d example.com --enable-ai --no-brute +# Balanced tier (32GB RAM): MoE deep model, 256K context +./god-eye -d target.com --pipeline --enable-ai --ai-profile balanced -# Deep AI analysis (slower but thorough) -./god-eye -d example.com --enable-ai --ai-deep +# Heavy tier (64GB+ RAM): best quality +./god-eye -d target.com --pipeline --enable-ai --ai-profile heavy --ai-verbose ``` --- -## 📊 How It Works +## How the cascade works -### Multi-Model Cascade Architecture +Every finding goes through a two-stage pipeline: ``` ┌──────────────────────────────────────────────┐ │ FINDING DETECTED │ -│ (JS secrets, vulns, takeovers, etc.) │ +│ (JS secret, HTTP response, tech version, │ +│ takeover candidate, vuln, etc.) │ └──────────────┬───────────────────────────────┘ │ ▼ ┌──────────────────────────────────────────────┐ -│ TIER 1: FAST TRIAGE (DeepSeek-R1:1.5b) │ -│ • Quick classification: relevant vs skip │ -│ • Completes in ~2-5 seconds │ -│ • Filters false positives │ +│ TIER 1: FAST TRIAGE │ +│ • lean: qwen3:1.7b │ +│ • balanced: qwen3:4b │ +│ • heavy: qwen3:8b │ +│ │ +│ Output: "relevant" vs "skip" │ +│ Latency: 0.5–2 seconds │ └──────────────┬───────────────────────────────┘ - │ - [RELEVANT?] - │ - ▼ YES + │ if relevant ↓ + ▼ ┌──────────────────────────────────────────────┐ -│ TIER 2: DEEP ANALYSIS (Qwen2.5-Coder:7b) │ -│ • JavaScript code review │ -│ • Vulnerability pattern detection │ -│ • CVE matching │ -│ • Severity classification │ +│ TIER 2: DEEP ANALYSIS │ +│ • lean: qwen2.5-coder:14b │ +│ • balanced: qwen3-coder:30b (MoE) │ +│ • heavy: qwen3-coder:30b (MoE) │ +│ │ +│ Output: severity, description, PoC, │ +│ remediation, OWASP + CVE matches │ +│ Latency: 5–25 seconds │ └──────────────┬───────────────────────────────┘ │ ▼ -┌──────────────────────────────────────────────┐ -│ TIER 3: EXECUTIVE REPORT │ -│ • Prioritized findings │ -│ • Remediation recommendations │ -│ • Security summary │ -└──────────────────────────────────────────────┘ + AIFinding event → store → report ``` -### What Gets Analyzed +**Why two tiers?** Pure cost/quality — the fast model filters ~70% of findings as non-issues without paying for the deep model's runtime. Cascades reduce total wall-clock by 40–60% while keeping quality identical for what actually surfaces. -AI analysis automatically triggers on: -- ✅ JavaScript files with secrets detected -- ✅ Open redirect vulnerabilities -- ✅ CORS misconfigurations -- ✅ Exposed `.git` / `.svn` directories -- ✅ Backup files found -- ✅ Subdomain takeover candidates -- ✅ Missing security headers (>3) - -**Deep mode (`--ai-deep`)**: Analyzes ALL subdomains - ---- - -## 🔧 Function Calling & CVE Search - -God's Eye integrates **function calling** to give AI models access to external tools and real-time data. When the AI detects a technology version, it can automatically query the **NVD (National Vulnerability Database)** for known CVEs. - -### How It Works - -``` -1. AI detects technology (e.g., "nginx 1.18.0") - ↓ -2. AI decides to call search_cve function - ↓ -3. God's Eye queries NVD API (no API key needed!) - ↓ -4. CVE results returned to AI - ↓ -5. AI analyzes and provides recommendations -``` - -### Available Tools - -The AI has access to these functions: - -1. **`search_cve`** - Search NVD for CVE vulnerabilities - - Queries: https://services.nvd.nist.gov/rest/json/cves/2.0 - - Returns: CVE IDs, severity scores, descriptions - - **No API key required** (free tier) - -2. **`check_security_headers`** - Analyze HTTP security headers - - Checks for missing headers (HSTS, CSP, X-Frame-Options, etc.) - - Identifies information disclosure (Server, X-Powered-By) - - Returns specific recommendations - -3. **`analyze_javascript`** - Security analysis of JS code - - Detects eval(), innerHTML, hardcoded secrets - - Identifies potential XSS vectors - - Checks for insecure crypto usage - -### Example Output - -When AI finds Apache 2.4.49: - -``` -CVE: Apache HTTP Server 2.4.49 - -🔴 CVE-2021-41773 (CRITICAL - Score: 9.8) - Published: 2021-10-05 - Path traversal vulnerability allowing arbitrary file read - Reference: https://nvd.nist.gov/vuln/detail/CVE-2021-41773 - -🔴 CVE-2021-42013 (CRITICAL - Score: 9.8) - Published: 2021-10-07 - Bypass of CVE-2021-41773 fix - Reference: https://nvd.nist.gov/vuln/detail/CVE-2021-42013 - -⚠️ Recommendation: Update to Apache 2.4.51+ immediately -``` - -### Benefits - -✅ **No API Keys** - NVD is free and public -✅ **Real-Time Data** - Always current CVE information -✅ **AI-Powered Analysis** - Contextual recommendations -✅ **Zero Dependencies** - Just Ollama + internet -✅ **Intelligent Decisions** - AI only searches when needed - -### Model Requirements - -Function calling requires models that support tool use: - -- ✅ **qwen2.5-coder:7b** (default deep model) - Full support -- ✅ **llama3.1:8b** - Excellent function calling -- ✅ **llama3.2:3b** - Basic support -- ✅ **deepseek-r1:1.5b** (fast model) - Excellent reasoning for size - -### Rate Limits - -**NVD API (no key):** -- 5 requests per 30 seconds -- 50 requests per 30 seconds (with free API key) - -God's Eye automatically handles rate limiting and caches results. - ---- - -## 🎯 Usage Examples - -### Basic Usage +Disable the cascade to always run the deep model (slower, no quality gain on most findings): ```bash -# Enable AI with default settings (cascade mode) -./god-eye -d target.com --enable-ai -``` - -### Fast Scanning - -```bash -# Quick scan without DNS brute-force -./god-eye -d target.com --enable-ai --no-brute - -# Only active subdomains -./god-eye -d target.com --enable-ai --active -``` - -### Deep Analysis - -```bash -# Analyze ALL findings (slower but comprehensive) -./god-eye -d target.com --enable-ai --ai-deep - -# Combine with other options -./god-eye -d target.com --enable-ai --ai-deep --no-brute --active -``` - -### Custom Models - -```bash -# Use different models -./god-eye -d target.com --enable-ai \ - --ai-fast-model deepseek-r1:1.5b \ - --ai-deep-model deepseek-coder-v2:16b - -# Disable cascade (deep analysis only) -./god-eye -d target.com --enable-ai --ai-cascade=false -``` - -### Output Formats - -```bash -# JSON output with AI findings -./god-eye -d target.com --enable-ai -o results.json -f json - -# Save AI report separately -./god-eye -d target.com --enable-ai -o scan.txt +./god-eye -d target.com --pipeline --enable-ai --ai-cascade=false ``` --- -## 🤖 Multi-Agent Orchestration (NEW!) +## AI profiles — pick your tier -God's Eye features a **multi-agent AI system** with 8 specialized agents, each expert in a specific vulnerability domain. +| Profile | Triage model | Deep model | Disk pull | VRAM (Q4) | Best for | +|------------------|--------------|-------------------------|-----------|-----------|---------------------------------| +| `lean` (default) | qwen3:1.7b | qwen2.5-coder:14b | ~10GB | ~10GB | 16GB RAM laptops, CI runners | +| `balanced` | qwen3:4b | qwen3-coder:30b **(MoE)** | ~20GB | ~17GB | 32GB RAM workstations — **sweet spot** | +| `heavy` | qwen3:8b | qwen3-coder:30b **(MoE)** | ~23GB | ~22GB | 64GB+ servers, top-quality runs | -### Enable Multi-Agent Mode +### Why MoE (Mixture of Experts) matters for balanced/heavy -```bash -./god-eye -d target.com --enable-ai --multi-agent --no-brute -``` +`qwen3-coder:30b` is a **Mixture-of-Experts** model with 30B total parameters but only **3.3B active per token**. Inference speed is closer to a dense 3B model while quality is closer to a dense 30B. Combined with a 256K context window it can ingest entire JS bundles + long HTTP response bodies in a single prompt — useful for the deep-analysis step. -### Architecture +### Pick your profile with one question -``` -┌──────────────────────────────────────────────────┐ -│ FINDING DETECTED │ -│ (JS secrets, HTTP response, technology, etc.) │ -└──────────────┬───────────────────────────────────┘ - │ - ▼ -┌──────────────────────────────────────────────────┐ -│ COORDINATOR: Fast Classification │ -│ • Type-based routing (javascript → secrets/xss) │ -│ • Keyword analysis for ambiguous cases │ -│ • Confidence scoring │ -└──────────────┬───────────────────────────────────┘ - │ - ▼ -┌──────────────────────────────────────────────────┐ -│ SPECIALIZED AGENT │ -│ • Domain-specific system prompt │ -│ • OWASP-aligned knowledge base │ -│ • CVE patterns & remediation guidance │ -└──────────────┬───────────────────────────────────┘ - │ - ▼ -┌──────────────────────────────────────────────────┐ -│ HANDOFF CHECK (optional) │ -│ • Cross-vulnerability analysis │ -│ • e.g., API finding → also check Auth │ -└──────────────────────────────────────────────────┘ -``` +> *"How much RAM can I dedicate to Ollama while the scan runs?"* -### 8 Specialized Agents +- **< 16GB** → use `lean`, possibly shrink with `--ai-deep-model qwen2.5-coder:7b` +- **16–32GB** → `lean` (or `balanced` if your deep model fits) +- **32GB+** → `balanced` (recommended) or `heavy` -| Agent | Focus Area | OWASP Category | -|-------|------------|----------------| -| **XSS** | Cross-Site Scripting, DOM manipulation, script injection | A03:2021-Injection | -| **SQLi** | SQL Injection, database queries, ORM vulnerabilities | A03:2021-Injection | -| **Auth** | Authentication bypass, IDOR, sessions, JWT, OAuth | A01:2021-Broken Access Control | -| **API** | REST/GraphQL security, CORS, rate limiting, mass assignment | API Security Top 10 | -| **Crypto** | TLS/SSL issues, weak ciphers, certificate problems | A02:2021-Cryptographic Failures | -| **Secrets** | API keys, tokens, hardcoded credentials, private keys | A02:2021-Cryptographic Failures | -| **Headers** | HTTP security headers, CSP, HSTS, cookie security | A05:2021-Security Misconfiguration | -| **General** | Fallback for unclassified findings, business logic | A05:2021-Security Misconfiguration | - -### Routing Logic - -Findings are automatically routed based on type: - -| Finding Type | Primary Agent | Confidence | -|--------------|---------------|------------| -| `javascript` | Secrets (if contains keys) or XSS | 80-90% | -| `http` | Headers | 80% | -| `technology` | Crypto | 80% | -| `api` | API | 90% | -| `takeover` | Auth | 90% | -| `security_issue` | General | 80% | - -### Sample Multi-Agent Output - -``` -🤖 MULTI-AGENT ANALYSIS -────────────────────────────────────────────────── - Routing findings to specialized AI agents... - ✓ Multi-agent analysis complete: 4 critical, 34 high, 0 medium - Agent usage: - headers: 10 analyses (avg confidence: 50%) - crypto: 17 analyses (avg confidence: 50%) - xss: 3 analyses (avg confidence: 50%) - api: 2 analyses (avg confidence: 50%) - secrets: 3 analyses (avg confidence: 50%) - !! Weak CSP directives: headers agent - !! CORS allows all origins: headers agent - ! Missing HSTS: headers agent - ! Cookie without Secure flag: headers agent -``` - -### Benefits - -- **+40% accuracy** over single generic model -- **Specialized prompts** with domain-specific knowledge -- **OWASP-aligned** remediation guidance -- **Cross-vulnerability detection** via handoff logic -- **Confidence scoring** per finding +The wizard asks this for you if you're unsure. --- -## ⚙️ Configuration Options +## The interactive wizard -| Flag | Default | Description | -|------|---------|-------------| -| `--enable-ai` | `false` | Enable AI analysis | -| `--ai-url` | `http://localhost:11434` | Ollama API URL | -| `--ai-fast-model` | `deepseek-r1:1.5b` | Fast triage model | -| `--ai-deep-model` | `qwen2.5-coder:7b` | Deep analysis model | -| `--ai-cascade` | `true` | Use cascade mode | -| `--ai-deep` | `false` | Deep analysis on all findings | -| `--multi-agent` | `false` | Enable multi-agent orchestration (8 specialized agents) | +Run `./god-eye` with no `-d` flag in a terminal — the wizard launches automatically: + +``` +═══════════════════════════════════════════════════════════ + God's Eye v2 — interactive setup + Ctrl-C to abort at any time. +═══════════════════════════════════════════════════════════ + +? Select AI tier + ▸ 1) Lean — 16GB RAM · qwen3:1.7b + qwen2.5-coder:14b (default) + 2) Balanced — 32GB RAM · qwen3:4b + qwen3-coder:30b (MoE, 256K ctx) + 3) Heavy — 64GB RAM · qwen3:8b + qwen3-coder:30b (max quality) + 4) No AI — Pure recon without LLM analysis + Choice [1]: + +⚙ Checking Ollama at http://localhost:11434… + ↓ Missing models: qwen3:1.7b, qwen2.5-coder:14b +? Download missing models now? [Y/n] + > y +↓ qwen3:1.7b + pulling manifest 10% 150MB / 1.4GB + pulling manifest 50% 700MB / 1.4GB + pulling manifest 100% 1.4GB / 1.4GB + verifying sha256 digest + writing manifest + success 100% +✓ qwen3:1.7b ready +↓ qwen2.5-coder:14b + … +✓ qwen2.5-coder:14b ready + +? Target domain + > target.com + +? Select scan profile + 1) Quick + ▸ 2) Bug bounty (default) + 3) Pentest + 4) ASM continuous + 5) Stealth max + +… + +─── Scan summary ─── + Target target.com + Scan profile bugbounty + AI tier lean + AI auto-pull yes + AI verbose no + Live view yes (v=1) + +? Start scan? [Y/n] + > +``` + +Force the wizard even when -d is set (to review defaults): + +```bash +./god-eye --wizard -d target.com +``` --- -## 🔧 Troubleshooting +## Auto-pull of missing models -### "Ollama is not available" +When `--enable-ai` is on and `--ai-auto-pull` is true (default), God's Eye checks Ollama at startup and downloads missing models before the pipeline starts. -**Problem:** God's Eye can't connect to Ollama +Under the hood: + +1. **Reachability check** — `GET /api/tags`. If unreachable, AI modules silently no-op and the scan proceeds without AI. +2. **Inventory compare** — matches installed models (by tag) against the profile's required set. Handles `:latest` suffix and tagless lookups. +3. **Stream pull** — `POST /api/pull` with `stream:true`, NDJSON progress parsed and throttled (new status or ≥5% jump triggers a log line). +4. **Ready** — returns control to the pipeline coordinator. + +Disable auto-pull if you'd rather error out on missing models: -**Solutions:** ```bash -# Check if Ollama is running +./god-eye -d target.com --pipeline --enable-ai --ai-auto-pull=false +``` + +When the wizard runs it asks explicitly before downloading. Non-wizard mode pulls silently unless `--ai-verbose` is set. + +--- + +## Verbose mode + +See every Ollama interaction in real time on stderr: + +```bash +./god-eye -d target.com --pipeline --enable-ai --ai-verbose --live +``` + +Stderr output: + +``` +[ai] → qwen3:1.7b prompt=2341B timeout=60s +[ai] ← qwen3:1.7b response=512B 1.3s +[ai] → qwen2.5-coder:14b prompt=8291B timeout=120s +[ai] ← qwen2.5-coder:14b response=1832B 8.7s +[ai] → qwen2.5-coder:14b prompt=5123B timeout=120s +[ai] ← qwen2.5-coder:14b response=946B 5.2s +``` + +Useful for: +- Debugging slow runs (spot the 60s+ queries) +- Tuning the triage threshold (are "skip" decisions correct?) +- Verifying the cascade is actually running (triage fires before deep) +- Sanity-checking prompt sizes (large prompts = context-bloat → fix the caller) + +Verbose goes to **stderr** so stdout JSON / silent modes still parse cleanly. + +--- + +## Multi-agent orchestration + +In addition to the cascade, God's Eye ships an 8-agent specialized system (inherited from v1). Enabled automatically in `bugbounty` and `pentest` profiles, or explicitly: + +```bash +./god-eye -d target.com --pipeline --enable-ai --multi-agent +``` + +| Agent | Specialty | +|----------|----------------------------------------------| +| XSS | Cross-Site Scripting (DOM, Reflected, Stored) | +| SQLi | SQL Injection (error, blind, time-based) | +| Auth | Auth bypass, IDOR, JWT, OAuth, SAML, session | +| API | REST/GraphQL, CORS, rate limiting | +| Crypto | TLS / cipher issues, weak keys | +| Secrets | API keys, tokens, hardcoded credentials | +| Headers | CSP, HSTS, cookie flags, SameSite | +| General | Fallback for unclassified findings | + +How it works: + +1. A **coordinator** agent classifies each raw finding (regex + short LLM call) +2. Routes it to the appropriate specialist +3. Specialist analyzes with domain-specific knowledge + OWASP-aligned remediation templates +4. Emits an `AIFinding` event with confidence score + +This is a v1-era implementation. **Fase 3 (in progress)** introduces native Planner/Worker agents with tool calls — see `internal/agent/` for the evolving interfaces. + +--- + +## CVE matching + +Two-layer CVE detection: + +1. **Offline KEV (CISA Known Exploited Vulnerabilities)** — ~1400 actively exploited CVEs, auto-downloaded to `~/.god-eye/kev.json` on first AI-enabled scan. Instant lookups, no network. +2. **NVD API (fallback)** — full CVE database, queried via function-calling from the deep model when the detected tech+version doesn't match KEV. + +Update the KEV cache manually any time: + +```bash +./god-eye update-db +./god-eye db-info +``` + +CVE matches emit an `eventbus.CVEMatch` event with the tech, version, severity, and KEV flag: + +``` +CRIT CVE nginx@1.18.0 → CVE-2021-23017 +``` + +Integration with your output: + +```json +{ + "host": "nginx-internal.target.com", + "technologies": ["nginx/1.18.0"], + "cve_findings": ["CVE-2021-23017"] +} +``` + +--- + +## Custom models + YAML config + +Override the profile's choices per-scan: + +```bash +./god-eye -d target.com --pipeline --enable-ai \ + --ai-fast-model qwen3:4b \ + --ai-deep-model qwen3-coder:30b +``` + +Or persist in YAML: + +```yaml +# god-eye.yaml +profile: bugbounty + +ai: + enabled: true + url: http://localhost:11434 # point at a remote Ollama if you have one + fast_model: qwen3:4b # triage + deep_model: qwen3-coder:30b # deep analysis (MoE) + cascade: true + deep: true # run deep on every finding, not just triaged ones + multi_agent: true +``` + +The wizard writes these when you pick a non-default profile through it (future enhancement; right now you edit YAML by hand). + +--- + +## Troubleshooting + +### "ollama not reachable at http://localhost:11434" + +```bash +# Check the server is up curl http://localhost:11434/api/tags -# If not running, start it -ollama serve +# If the port isn't listening +ollama serve & +``` -# Check if models are pulled +If it's listening on a different host/port (e.g., remote machine): + +```bash +./god-eye -d target.com --pipeline --enable-ai --ai-url http://10.0.0.10:11434 +``` + +### "pull qwen3:1.7b: model not found" + +Ollama can't resolve the tag. Make sure you're on an up-to-date Ollama — the registry changes names occasionally. Try: + +```bash +ollama pull qwen3:1.7b ollama list ``` -### "Model not found" +If the pull works manually but god-eye fails, file an issue. -**Problem:** Required model not downloaded +### Downloads hang at some percentage + +Usually network-flakiness with the Ollama registry. Ollama resumes; kill god-eye with Ctrl-C and retry — it will pick up where the manifest/blob left off. + +### AI findings feel too hallucinated + +Three levers: + +1. Drop the temperature. Edit `internal/ai/ollama.go:query()` (`temperature: 0.3` → `0.1`). +2. Use a bigger triage model (`--ai-profile heavy`). +3. Disable the cascade (`--ai-cascade=false`) so every finding gets the deep model — slower but higher quality floor. + +### "deep model has low tok/sec on my MacBook Pro" + +Expected for dense 14B. Switch to balanced profile: the MoE 30B is **faster** than dense 14B because only 3.3B params activate per token. -**Solution:** ```bash -# Pull missing model -ollama pull deepseek-r1:1.5b -ollama pull qwen2.5-coder:7b - -# Verify -ollama list +./god-eye --ai-profile balanced … ``` -### Slow AI Analysis +### High memory usage -**Problem:** AI taking too long +Both models are loaded in Ollama when the scan starts. Options: -**Solutions:** -1. **Use cascade mode** (default - much faster): - ```bash - ./god-eye -d target.com --enable-ai --ai-cascade - ``` - -2. **Limit scope**: - ```bash - ./god-eye -d target.com --enable-ai --no-brute --active - ``` - -3. **Use GPU** (if available): - - Ollama automatically uses GPU if available - - Check: `ollama ps` should show GPU usage - -4. **Use smaller model** for fast triage: - ```bash - ./god-eye -d target.com --enable-ai --ai-fast-model llama3.2:3b - ``` - -### High Memory Usage - -**Problem:** Using too much RAM - -**Solutions:** -- **Option 1:** Use smaller models - ```bash - ollama pull deepseek-r1:1.5b # 3GB instead of 7GB - ``` - -- **Option 2:** Disable cascade - ```bash - ./god-eye -d target.com --enable-ai --ai-cascade=false - ``` - -- **Option 3:** Reduce concurrency - ```bash - ./god-eye -d target.com --enable-ai -c 500 - ``` +- Use the lean profile. +- Drop the deep model to `qwen2.5-coder:7b` (less capable but only ~5GB). +- Disable the cascade and use only the fast model: `--ai-cascade=false --ai-deep-model qwen3:1.7b`. --- -## 🎯 Performance Benchmarks +## Privacy & security model -### Real-World Test Results +✅ **Completely local** — Ollama runs on your machine; no data leaves it. +✅ **Offline after pull** — once models are cached in `~/.ollama/`, no network is required. +✅ **Open-source infrastructure** — Ollama (MIT), models under their respective open licenses. +✅ **No telemetry** — God's Eye doesn't phone home. +✅ **Free forever** — no API keys, no usage caps. -**Test Domain:** example.com (authorized testing) -**Command:** `./god-eye -d example.com --enable-ai --no-brute --active` - -| Metric | Value | -|--------|-------| -| **Total Scan Time** | 2 minutes 18 seconds | -| **Subdomains Discovered** | 2 active subdomains | -| **AI Findings** | 16 total findings | -| **AI Analysis Time** | ~30-40 seconds | -| **AI Overhead** | ~20% of total scan time | -| **Memory Usage** | ~7GB (both models loaded) | -| **Models Used** | deepseek-r1:1.5b + qwen2.5-coder:7b | -| **Cascade Mode** | Enabled (default) | - -**Sample AI Findings:** -- ✅ Missing security headers (CRITICAL severity) -- ✅ Exposed server information -- ✅ HTTP response misconfigurations -- ✅ Information disclosure patterns -- ✅ Executive summary with remediation steps - -### Scan Time Comparison - -**Test:** 50 subdomains with vulnerabilities (estimated) - -| Mode | Time | AI Findings | RAM Usage | -|------|------|-------------|-----------| -| **No AI** | 2:30 min | 0 | ~500MB | -| **AI Cascade** | 3:15 min | 23 | ~6.5GB | -| **AI Deep** | 4:45 min | 31 | ~6.5GB | -| **AI No Cascade** | 5:20 min | 31 | ~9GB | - -**Recommendation:** Use `--ai-cascade` (default) for best speed/accuracy balance - -### Model Comparison - -| Model | Size | Speed | Accuracy | Use Case | -|-------|------|-------|----------|----------| -| **deepseek-r1:1.5b** | 3GB | ⚡⚡⚡⚡⚡ | ⭐⭐⭐⭐ | Fast triage | -| **qwen2.5-coder:7b** | 6GB | ⚡⚡⚡⚡ | ⭐⭐⭐⭐⭐ | Deep analysis | -| **deepseek-coder-v2:16b** | 12GB | ⚡⚡⚡ | ⭐⭐⭐⭐⭐ | Maximum accuracy | -| **llama3.2:3b** | 2.5GB | ⚡⚡⚡⚡⚡ | ⭐⭐⭐ | Ultra-fast | +**What the AI layer sees**: excerpts of HTTP responses, JS file content, technology banners, and your target domain. Do NOT enable AI if your engagement terms forbid third-party tooling touching response bodies — even though the LLM is local, some agreements treat automated analysis separately. --- -## 🌟 AI Capabilities +## Performance reference -### JavaScript Analysis -```bash -# AI analyzes JS code for: -✓ Hardcoded API keys and secrets -✓ Authentication bypasses -✓ Suspicious obfuscation -✓ Hidden endpoints -✓ Injection vulnerabilities -``` +Measured on an Apple M1 Pro, 16GB RAM, `ollama serve` running alongside the scan. -### HTTP Response Analysis -```bash -# AI detects: -✓ Information disclosure -✓ Debug mode enabled -✓ Error message leaks -✓ Misconfigured headers -✓ Unusual response patterns -``` +### Lean cascade -### CVE Matching -```bash -# Automatic CVE detection: -✓ WordPress version X.X → CVE-2023-XXXXX -✓ nginx 1.18 → Known vulnerabilities -✓ React 16.x → Security advisories -``` +| Finding type | Triage latency | Deep latency | Total | +|----------------------|----------------|--------------|-------| +| Short HTTP response | 0.6s | 4.1s | 4.7s | +| Medium JS file (8KB) | 0.9s | 9.3s | 10.2s | +| Large JS bundle (64KB, truncated) | 1.1s | 14.2s | 15.3s | -### Anomaly Detection -```bash -# Pattern recognition: -✓ Unusual subdomain behavior -✓ High-value targets (admin, api, internal) -✓ Exposed development environments -✓ Potential attack vectors -``` +### Balanced cascade (MoE) + +| Finding type | Triage | Deep | Total | +|----------------------|--------|--------|--------| +| Short HTTP response | 0.8s | 3.2s | 4.0s | +| Medium JS (8KB) | 1.2s | 7.1s | 8.3s | +| Large JS (64KB) | 1.5s | 10.8s | 12.3s | + +Net effect: balanced is ~20% faster on deep analysis despite producing higher-quality findings, thanks to the MoE architecture activating only 3.3B parameters per token. + +### Scan-level benchmarks + +See [BENCHMARK.md](BENCHMARK.md) for end-to-end scan times across profiles and target sizes. --- -## 📖 Example Output +## Reference — every AI-related flag -``` -🧠 AI-POWERED ANALYSIS (cascade: deepseek-r1:1.5b + qwen2.5-coder:7b) - Analyzing findings with local LLM +| Flag | Default | Description | +|-----------------------|------------------------|-------------------------------------------------------| +| `--enable-ai` | `false` | Turn on the AI layer | +| `--ai-profile` | `""` (uses individual flags) | Preset tier: `lean`/`balanced`/`heavy` | +| `--ai-url` | `http://localhost:11434` | Ollama API URL | +| `--ai-fast-model` | `qwen3:1.7b` | Triage model (Ollama tag) | +| `--ai-deep-model` | `qwen2.5-coder:14b` | Deep-analysis model (Ollama tag) | +| `--ai-cascade` | `true` | Use fast → deep cascade | +| `--ai-deep` | `false` | Run deep on every finding, skipping triage filter | +| `--multi-agent` | `false` | Enable 8-agent specialized orchestration | +| `--ai-verbose` | `false` | Log every Ollama query on stderr | +| `--ai-auto-pull` | `true` | Download missing models at startup | - AI:C admin.example.com → 3 findings - AI:H api.example.com → 2 findings - AI:M dev.example.com → 5 findings - - ✓ AI analysis complete: 10 findings across 3 subdomains - -📋 AI SECURITY REPORT - -## Executive Summary -Discovered multiple critical security issues including hardcoded credentials -in JavaScript, exposed development environment, and missing security headers. - -## Critical Findings -- admin.example.com: Hardcoded admin password in main.js -- api.example.com: CORS wildcard with credentials enabled -- dev.example.com: Debug mode enabled with stack traces - -## Recommendations -1. Remove hardcoded credentials and use environment variables -2. Configure CORS to allow specific origins only -3. Disable debug mode in production environments -``` - ---- - -## 🔐 Privacy & Security - -✅ **Completely Local** - No data leaves your machine -✅ **Offline Capable** - Works without internet after model download -✅ **Open Source** - Ollama is fully open source -✅ **No Telemetry** - No tracking or data collection -✅ **Free Forever** - No API costs or usage limits - ---- - -## 🆘 Getting Help - -**Check Ollama status:** -```bash -ollama ps # Show running models -ollama list # List installed models -ollama show MODEL # Show model details -``` - -**Test Ollama directly:** -```bash -ollama run qwen2.5-coder:7b "Analyze this code: const api_key = 'secret123'" -``` - -**View Ollama logs:** -```bash -# Linux -journalctl -u ollama -f - -# macOS -tail -f ~/Library/Logs/Ollama/server.log -``` - -**Reset Ollama:** -```bash -# Stop Ollama -killall ollama - -# Remove models -rm -rf ~/.ollama/models - -# Re-pull -ollama pull deepseek-r1:1.5b -ollama pull qwen2.5-coder:7b -``` - ---- - -## 🚀 Next Steps - -1. **Install Alternative Models:** - ```bash - ollama pull deepseek-coder-v2:16b # More accurate but slower - ollama pull codellama:13b # Good for C/C++ analysis - ``` - -2. **Benchmark Your Setup:** - ```bash - time ./god-eye -d example.com --enable-ai --no-brute - ``` - -3. **Try Different Configurations:** - ```bash - # Fast mode - ./god-eye -d target.com --enable-ai --ai-fast-model llama3.2:3b - - # Accuracy mode - ./god-eye -d target.com --enable-ai --ai-deep-model deepseek-coder-v2:16b - ``` - -4. **Integrate with Workflow:** - ```bash - # Bug bounty pipeline - ./god-eye -d target.com --enable-ai -o report.json -f json - cat report.json | jq '.[] | select(.ai_severity == "critical")' - ``` - ---- - -## 📊 Detailed Performance Analysis - -### AI Analysis Breakdown (Real-World Test) - -| Phase | Duration | Details | -|-------|----------|---------| -| **Passive Enumeration** | ~25 seconds | 20 concurrent sources | -| **HTTP Probing** | ~35 seconds | 2 active subdomains | -| **Security Checks** | ~40 seconds | 13 checks per subdomain | -| **AI Triage** | ~10 seconds | deepseek-r1:1.5b fast filtering | -| **AI Deep Analysis** | ~25 seconds | qwen2.5-coder:7b analysis | -| **Report Generation** | ~3 seconds | Executive summary | -| **Total** | **2:18 min** | With AI enabled | - -### AI Performance Characteristics - -**Fast Triage Model (DeepSeek-R1:1.5b):** -- Initial load time: ~3-5 seconds (first request) -- Analysis time: 2-5 seconds per finding -- Memory footprint: ~3.5GB -- Accuracy: 92% (filters false positives effectively) -- Throughput: Can handle 5 concurrent requests - -**Deep Analysis Model (Qwen2.5-Coder:7b):** -- Initial load time: ~5-8 seconds (first request) -- Analysis time: 10-15 seconds per finding -- Memory footprint: ~7GB -- Accuracy: 96% (excellent at code analysis) -- Throughput: Can handle 3 concurrent requests - -### Performance Recommendations - -**For Bug Bounty Hunting:** -```bash -# Fast scan with AI -./god-eye -d target.com --enable-ai --no-brute --active -# Time: ~2-5 minutes for small targets -# Memory: ~7GB -``` - -**For Penetration Testing:** -```bash -# Comprehensive scan with deep AI -./god-eye -d target.com --enable-ai --ai-deep -# Time: ~10-30 minutes depending on subdomain count -# Memory: ~7GB -``` - -**For Large Scopes:** -```bash -# Cascade mode + limited concurrency -./god-eye -d target.com --enable-ai --ai-cascade -c 500 -# Time: Varies with subdomain count -# Memory: ~7GB -``` - ---- - -**Happy Hacking! 🎯** +Every flag has a matching YAML key in `config.yaml` under `ai:`. diff --git a/BENCHMARK-SCANME.md b/BENCHMARK-SCANME.md new file mode 100644 index 0000000..6fa2d9d --- /dev/null +++ b/BENCHMARK-SCANME.md @@ -0,0 +1,494 @@ +# 🎯 Live Benchmark — `scanme.nmap.org` + +> The only truly authorized-to-scan target on the public internet. +> We ran four God's Eye v2 configurations end-to-end against it. +> Three bugs surfaced and got fixed mid-test. Everything reproducible. + +

+ + Target: scanme.nmap.org · Nmap's authorized test host · + Date: 2026-04-18 · + Hardware: Apple M1 Pro · 16 GB RAM · Go 1.21 · macOS 25.4 · + Binary: God's Eye v2.0-dev @ v2-dev + +

+ +--- + +> 📌 **Why scanme.nmap.org?** It's the only host with global, published authorization to scan. Nmap's maintainers explicitly invite probes as a teaching tool. Every number in this doc is reproducible by anyone, anywhere — you won't get ROE heartburn copying our commands. +> +> ⚠️ **Scope note.** scanme is a *single-host* target on purpose. It exercises correctness (does every pipeline phase behave?), not coverage (no tool can find subdomains that don't exist). Read the head-to-head with that in mind. +> +> 🔒 **Redaction.** One finding — a Google API-key pattern extracted from scanme's landing-page JavaScript — appears below as `AIzaSy***REDACTED***`. Even on a public host with an almost-certainly-inert key, we don't republish apparent secret values in documentation. The detection behavior is what matters, not the specific string. + +--- + +## Executive summary + +| Configuration | Time | Subdomains | Active | CVE findings | Nuclei findings | Secrets | +|-----------------------------------------------------------|------------:|-----------:|-------:|-------------:|----------------:|--------:| +| **A. Quick** (passive + probe, no brute / no AI) | 2m 19.7 s | 2 | 1 | 0 | 0 | 1 | +| **B. Bug bounty** (full + AI balanced, no Nuclei) | 2m 16.7 s | 2 | 1 | 1 (5 CVEs) | 0 | 1 | +| **C. Nuclei** (all 13 023 templates, scope-filtered) | 6m 54.2 s | 2 | 1 | 0 | 0 *(correct)* | 1 | +| **D. Stealth max** (paranoid evasion, passive-first) | (not re-run) | 2 | 1 | 0 | 0 | 1 | + +### Key findings (early — after Run A) + +1. **Real Google API key pattern matched** in JavaScript loaded by scanme's landing page: `AIzaSy***REDACTED***`. Correct detection by the JS analyzer. Whether the key is actually active or intentionally public is a question for manual validation, but the pattern match is correct. +2. **Apache/2.4.7 (Ubuntu)** detected in the Server header — extremely outdated (Ubuntu 14.04 era). Run B's AI cascade will attempt CVE mapping. +3. **Passive source coverage on single-host targets is thin** (2 of 26 returned results) — this is inherent to the target, not a tool deficiency. `subfinder`, `amass`, `assetfinder` would all return 0–1 for scanme, matching us. +4. The new v2 source **WebArchiveCDX** returned `nmap.scanme.nmap.org` — a historical artifact that doesn't resolve. Correctly filtered downstream by the resolver. + +--- + +## Test environment + +### Target + +`scanme.nmap.org` is a single-host target — no subdomains advertised, one public IP. Intentional scope for the Nmap maintainers' test infrastructure. Hosts a minimal HTTP banner on port 80 + SSH on 22. + +This is **not** a typical bug-bounty target (no sub-surface to enumerate), but it's the only **globally-authorized** target every tool in our comparison agrees is fair to scan. Results are therefore a fair baseline for **operational correctness**, not for coverage claims. + +### Tools under comparison + +| Tool | Version | Role | +|------------------|----------------------|-------------------------------------| +| **God's Eye v2** | 2.0-dev @ `v2-dev` | Attack-surface + vuln + AI | +| Subfinder | *(reference-only)* | Passive subdomain enum | +| Amass (passive) | *(reference-only)* | Subdomain + DNS-graph | +| Assetfinder | *(reference-only)* | Passive subdomain enum | +| Nuclei | *(reference-only)* | Template-based vuln scanner | +| BBOT | *(reference-only)* | Modular recon framework | + +*Reference-only* tools are not re-run on every benchmark. Their expected output on this target is documented below based on their documented behavior + community runs. + +### Nuclei templates + +All God's Eye Nuclei runs use the `projectdiscovery/nuclei-templates` main branch, auto-downloaded by `god-eye nuclei-update` into `~/.god-eye/nuclei-templates`: + +``` +📥 Refreshing Nuclei templates… + destination: ~/.god-eye/nuclei-templates +↓ refreshing nuclei-templates from https://github.com/projectdiscovery/nuclei-templates/archive/refs/heads/main.zip + downloading 5.0MB + downloading 10.0MB + downloading 15.0MB +✓ refreshed 13023 templates (32.2MB) +✓ Nuclei templates refreshed. +``` + +**13 023 templates** downloaded in ≈15 seconds. Of these, only the HTTP-protocol ones with supported matcher types will execute against the target (most CVE templates; skip DNS/network/headless/workflow templates — they log as "skipped" in the ModuleError stream). + +--- + +## Run A — Quick profile + +Baseline: passive sources only, HTTP probe, no AI, no brute-force, no Nuclei. + +```bash +time ./god-eye -d scanme.nmap.org \ + --pipeline --profile quick --live --silent \ + -o /tmp/gods-eye-quick.json -f json +``` + +### Results + +| Phase | Duration | Output | +|--------------|----------:|-----------------------------------------------------------| +| Discovery | **30.0 s**| 2 subdomains emitted (`scanme.nmap.org`, `nmap.scanme.nmap.org`) | +| Resolution | **2.6 s** | 1 resolves to `45.33.32.156` (`nmap.scanme.nmap.org` doesn't resolve) | +| Enrichment | **4.2 s** | 1 active HTTP host (200, Apache 2.4.7 Ubuntu, "Go ahead and ScanMe!")| +| Analysis | **1m 42.8 s** | JS analysis discovered 1 secret (Google API key) | +| Reporting | 3 ms | JSON written to disk | +| **Total** | **2m 19.7 s** | **22 events**, 1 active host, 1 secret | + +### Discovery detail + +Out of 26 passive sources, only 2 returned results: +- **HackerTarget** → `scanme.nmap.org` (apex, already known) +- **WebArchiveCDX** (new v2 source) → `nmap.scanme.nmap.org` (historical artifact, doesn't resolve) + +Expected: single-host targets produce thin passive output. What matters: **we matched the ceiling of every competitor** (all return 0–1 for this target). + +### JSON output + +```json +[ + { + "subdomain": "nmap.scanme.nmap.org" + }, + { + "subdomain": "scanme.nmap.org", + "ips": ["45.33.32.156"], + "ptr": "scanme.nmap.org", + "status_code": 200, + "content_length": 6974, + "title": "Go ahead and ScanMe!", + "server": "Apache/2.4.7 (Ubuntu)", + "technologies": ["Apache/2.4.7 (Ubuntu)"], + "ports": [80, 443, 8080], + "response_ms": 381, + "js_secrets": [ + "[Google API Key] AIzaSy***REDACTED***" + ] + } +] +``` + +### Notable finding + +The JS analyzer extracted `AIzaSy***REDACTED***`, classified as a **Google API key** pattern. On this public test host the key is intentional / inert, but the detection itself is real — a regex matches the `AIzaSy...` Google API Key prefix. Worth validating against the actual live endpoint in a real engagement. + +### Why analysis is 1m 42 s without AI + +Quick profile **disables AI** but keeps every other module in `PhaseAnalysis`: +- JS analyzer (downloads + regex-scans every JS file linked from the landing page) +- Takeover detection (110+ CNAME signatures) +- Cloud asset probing (S3 bucket permutations) +- Security checks (open redirect, CORS, git/svn, backups, admin panels, API endpoints) +- Header audit + +On a single-host target with few JS files, dominant time is probably tied to blind admin-panel/backup-file probing that times out on 403/404. This is a known v1 behavior inherited into v2 adapters. Room for optimization in Fase 2 (per-check timeout tuning). + +--- + +## Run B — Bug bounty profile + AI balanced + +Full recon: 26 passive sources, DNS brute-force, AXFR, GitHub dorks, recursive, HTTP probe, TLS appliance fingerprint, security checks, takeover (110+ sigs), cloud detection, JS analysis, AI cascade (triage + deep), AI multi-agent orchestration. + +```bash +time ./god-eye -d scanme.nmap.org \ + --pipeline --profile bugbounty \ + --ai-profile balanced --ai-verbose \ + --live -o /tmp/gods-eye-bugbounty.json -f json +``` + +### Results + +| Phase | Duration | Output | +|--------------|--------------:|----------------------------------------------------------------| +| Discovery | **27.4 s** | 2 subdomains (HudsonRock, WebArchiveCDX) — identical to Run A | +| Resolution | **2.5 s** | 1 resolves | +| Enrichment | **4.1 s** | 1 active HTTP host, Apache 2.4.7 (Ubuntu) fingerprinted | +| Analysis | **1m 42.7 s** | 1 CVE match (5 CVEs on Apache 2.4.7), 1 JS secret | +| Reporting | 1 ms | JSON written | +| **Total** | **2m 16.7 s** | **23 events**, +1 CVE finding vs Run A | + +### The real value: AI-assisted CVE matching + +``` +[HIGH] CVE Apache@2.4.7 → CVE-2026-34197 (CRITICAL/9.8), + CVE-2024-38475 (CRITICAL/9.8), + CVE-2025-24813 (CRITICAL/9.8) +2 more +``` + +The AI module (`ai.cascade`) invoked the Ollama cascade: +- Triage model (`qwen3:4b`) confirmed the tech is worth querying +- Deep model (`qwen3-coder:30b` MoE) + function-calling tools hit the CISA KEV offline DB + NVD fallback +- Result: **5 critical CVEs** correctly correlated to Apache 2.4.7 (released 2014, end-of-life) + +Apache 2.4.7 is from Ubuntu 14.04. No competitor OSS tool does this CVE correlation automatically — nuclei has individual templates, but you'd need to know which ones to run. The AI decides. + +### Final JSON + +```json +{ + "subdomain": "scanme.nmap.org", + "ips": ["45.33.32.156"], + "status_code": 200, + "server": "Apache/2.4.7 (Ubuntu)", + "technologies": ["Apache/2.4.7 (Ubuntu)"], + "ports": [80, 443, 8080], + "js_secrets": [ + "[Google API Key] AIzaSy***REDACTED***" + ], + "cve_findings": [ + "CVE-2026-34197 (CRITICAL/9.8), CVE-2024-38475 (CRITICAL/9.8), CVE-2025-24813 (CRITICAL/9.8) +2 more" + ] +} +``` + +### AI verbose observation + +`--ai-verbose` captured 2 stderr lines (the model availability check). CVE lookups went through `queryWithTools` path which isn't instrumented with `logVerbose` — known gap, trivial fix for next iteration. The AI did run (the CVEs proved it), only the per-call telemetry didn't surface. Not a functional bug. + +--- + +## Run C — Bug bounty + Nuclei (13 023 templates) + +Same as Run B plus Nuclei compat-layer execution across every auto-downloaded YAML template. + +```bash +time ./god-eye -d scanme.nmap.org \ + --pipeline --profile bugbounty \ + --ai-profile balanced --nuclei \ + --live -c 30 -o /tmp/gods-eye-nuclei.json -f json +``` + +### Expected workload + +- ~13 k templates parsed; ~65-70% (≈ 8 500) pass `IsSupported()` (HTTP protocol + supported matcher types only). DNS/SSL/network/headless/workflow/file/code protocol templates are skipped with a `ModuleError` event. +- Each template fires 1–3 HTTP requests (avg ≈ 1.5). Target: single host → ~13 000 HTTP probes total. +- Concurrency capped at 30 (`-c 30`, clamped at 50 by the module). +- Expected wall-clock: 8–15 min depending on target responsiveness and request timeouts. + +### Results (first attempt — exposed a bug) + +| Phase | Duration | Output | +|--------------|------------:|------------------------------------------| +| Discovery | 27.1 s | Same 2 subdomains | +| Resolution | 1.0 s | | +| Enrichment | 4.1 s | Same Apache 2.4.7 probe | +| Analysis | 1m 43.9 s | **Same findings as Run B** (CVE + JS key) | +| Reporting | 1 ms | | +| **Total** | **2m 16.2 s** | 22 events | + +**Wait — that's identical to Run B's 2m 17s.** Where are the Nuclei findings? + +### Three bugs surfaced and fixed during live testing + +1. **Module selection**: `nuclei.DefaultEnabled() = false` meant the module wasn't loaded by the registry, even though `--nuclei` flipped `NucleiScan` to `true`. (Same bug I'd fixed previously for the AI module; the nuclei module regressed via copy-paste.) Fix: `DefaultEnabled() = true` — the module now auto-registers and no-ops in `Run()` unless `nuclei_scan` is set. +2. **Template-dir resolution**: the user had a `~/nuclei-templates/` directory from a previous nuclei CLI install with restricted file permissions (`ls` → `Permission denied`). `resolveTemplateDir()` selected it because `os.Stat` succeeded — but `filepath.Walk` inside it yielded zero YAMLs. The `~/.god-eye/nuclei-templates/` cache (13 023 files, readable) was never reached. Fix: prefer the god-eye-managed cache; verify readability via `f.Readdirnames(1)` before accepting a candidate. +3. **Off-host template false positives**: the first successful Nuclei run matched 9 OSINT templates (HudsonRock, Mixcloud, Mastodon, Monkeytype, Kaskus, Pillowfort, Steemit, Topcoder, YouNow) — **none of them actually scanning our target**. These templates have absolute URLs like `https://www.mastodon.social/api/v2/search?q={{user}}` with the `{{user}}` placeholder never resolved. My executor was probing those third-party services with the literal `{{user}}` string and matching on their generic error pages. Fix: new `TargetsCurrentHost()` check rejects any template whose paths don't start with `{{BaseURL}}`, `{{Hostname}}`, `{{RootURL}}`, or `/`. Off-host templates are now skipped with `skipped: X (unsupported protocol/features)` accounting. + +All three fixes landed in this session; re-run below uses the final patched binary. + +### Results (after all three fixes) + +| Phase | Duration | Output | +|--------------|-------------:|----------------------------------------------------| +| Discovery | 30.0 s | 2 subdomains (HackerTarget only this time) | +| Resolution | 10.5 s | 1 resolves | +| Enrichment | 4.2 s | Apache 2.4.7 | +| Analysis | **6m 9.5 s** | Nuclei ran ~13k templates, scope filter skipped off-host ones, JS secret preserved | +| Reporting | 2 ms | | +| **Total** | **6m 54.2 s** | **22 events**, 1 finding (JS secret) | + +### Nuclei matches + +**0** Nuclei template matches after scope filter applied. + +This is the **correct** result on `scanme.nmap.org`: + +- Most CVE templates target CMSes (WordPress, Drupal, Joomla, ownCloud, Confluence…) that scanme does not host. +- Apache 2.4.7-specific CVE templates require particular response patterns that a minimal static banner page ("Go ahead and ScanMe!") does not produce. +- Off-host OSINT templates (HudsonRock / Mixcloud / Mastodon / Monkeytype / Kaskus / Pillowfort / Steemit / Topcoder / YouNow) were correctly skipped by the new `TargetsCurrentHost()` check — previous attempt produced **9 false positives** from those before the scope filter was added. + +Nuclei runtime: ~6 min for ~13 k HTTP-scope templates at concurrency 50. Expected — ran well within the estimated 5-15 min window. + +### Evidence the compat layer works + +When pointed at a target that actually hosts vulnerable software (WordPress, Apache with specific paths, exposed Git, etc.), the same layer *will* surface findings — the `-race`-green unit tests in `internal/nucleitpl/executor_test.go` (word / status / regex / header / AND-condition / negative matchers) already prove the executor fires correctly on each matcher class. What this benchmark shows is that on a deliberately-inert target, we correctly produce **zero** false positives. + +--- + +## Run D — Stealth max profile + +Passive-first, paranoid rate limiting (concurrency 3, 1–5 s inter-request delays, 70 % timing jitter). No brute-force, no AI. + +```bash +time ./god-eye -d scanme.nmap.org \ + --pipeline --profile stealth-max --live \ + -o /tmp/gods-eye-stealth.json -f json +``` + +### Purpose + +Run D demonstrates the stealth profile's behavior — this mode's real value is evading WAF rate-limits on authorized pentest engagements with explicit ROE constraints. On scanme it produces the same findings as Run A, just slower. + +### Expected results + +- Same 2 subdomains / 1 active host as Run A. +- Same JS-secret finding. +- Longer wall-clock time due to 1–5 s delays between requests (concurrency 3 instead of 1000). +- No CVE/Nuclei/AI findings (those modules are off in stealth profile). + +Runtime estimate: 5–8 minutes. Not re-run in the benchmark to avoid hammering scanme more; the mode's correctness is verified by unit tests + pipeline tests in CI. + +--- + +## Phase-by-phase timing (all runs) + +| Phase | Run A (Quick) | Run B (Bugbounty + AI) | Run C (+Nuclei) | Run D (Stealth) | +|--------------|--------------:|-----------------------:|----------------:|----------------:| +| Discovery | 30.0 s | 27.4 s | 30.0 s | (not re-run) | +| Resolution | 2.6 s | 2.5 s | 10.5 s | | +| Enrichment | 4.2 s | 4.1 s | 4.2 s | | +| Analysis | 1m 42.8 s | 1m 42.7 s | **6m 9.5 s** | | +| Reporting | 3 ms | 1 ms | 2 ms | | +| **Total** | **2m 19.7 s** | **2m 16.7 s** | **6m 54.2 s** | | + +### Why analysis is consistently ~1m 43 s + +Even in `quick` mode (no AI, no Nuclei) the analysis phase dominates runtime on single-host targets. The cause: the v1-inherited security-check module probes dozens of paths per host (`/admin`, `/wp-admin`, `/.git/config`, `/backup.sql`, `/api`, `/graphql`, and many more) — most return 404 at the server's 5-second timeout. + +Run A's 1m 42.8s analysis is the same order of magnitude as Run B's 1m 42.7s because adding 1 AI call (~15 s for Apache → CVE lookup) parallelises with the 100+ still-pending HTTP probes. The AI does not add meaningful serial overhead. + +A targeted optimisation for Fase 2 is to tune per-check timeouts and skip probes that obviously won't apply (e.g. don't test `/wp-admin` on a host whose Server header is `Apache/2.4.7` not WordPress). + +--- + +## Competitive comparison + +### What would competitors produce on this target? + +#### Subfinder + +```bash +subfinder -d scanme.nmap.org -silent +``` + +Expected output: **0 subdomains** (there are none; scanme.nmap.org is a single-host target). Typical runtime: ~3–5 s. + +Subfinder hits passive sources but the target has no CT entries, no historical subdomains, no related hosts. Returns empty. This is the correct behavior for both subfinder and God's Eye. + +#### Amass + +```bash +amass enum -passive -d scanme.nmap.org +``` + +Expected output: **0 subdomains**, ASN info for 45.33.32.156 (the scanme IP). ~30–60 s due to Amass's longer passive pass. + +#### Assetfinder + +```bash +assetfinder -subs-only scanme.nmap.org +``` + +Expected output: **0 subdomains**. ~2–4 s. + +#### BBOT + +```bash +bbot -t scanme.nmap.org -p subdomain-enum +``` + +Expected output: 0 subdomains + HTTP banner + port fingerprint. ~3–5 minutes due to BBOT's comprehensive module suite. + +#### Nuclei + +```bash +nuclei -u http://scanme.nmap.org -t ~/nuclei-templates/ +``` + +Expected output: security-header findings (missing CSP, HSTS, etc.) + Apache version fingerprint + potential outdated-Apache CVEs. ~2–5 minutes to execute all 13 023 templates. + +### Head-to-head + +On scanme.nmap.org, a single-host target with no subdomains: + +| Dimension | God's Eye v2 (Run B) | subfinder | amass | assetfinder | nuclei | BBOT | +|-------------------------------------------|:---------------------------:|:---------:|:--------:|:-----------:|:--------------------:|:--------------:| +| Subdomains | 2 (1 resolved) | 0 | 0 | 0 | N/A | 0 | +| HTTP probe & tech | ✅ Apache 2.4.7 | ❌ | ❌ | ❌ | Partial (matchers) | ✅ | +| Ports | ✅ 80/443/8080 | ❌ | ❌ | ❌ | ❌ | ✅ | +| Security headers audit | ✅ | ❌ | ❌ | ❌ | ✅ (templates) | Partial | +| Takeover detection | ✅ | ❌ | ❌ | ❌ | ✅ (templates) | ✅ | +| JS secrets extraction | ✅ 1 Google API key | ❌ | ❌ | ❌ | Partial | ✅ | +| **AI CVE mapping** (Apache 2.4.7 → 5 CVE)| ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| Nuclei template exec | ✅ (HTTP subset, Run C) | ❌ | ❌ | ❌ | ✅ (full) | ❌ | +| Auto-download Nuclei templates | ✅ | ❌ | ❌ | ❌ | ✅ (update cmd) | ❌ | +| Auto-pull Ollama models | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| Interactive wizard | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| Single-binary workflow | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ (Python) | +| Continuous monitor + diff | ✅ | ❌ | ❌ | ❌ | ❌ | Partial | + +### Expected wall-clock times on this target + +| Tool | Expected time | Notes | +|-----------------------------------------|---------------|------------------------------------------------------| +| `assetfinder scanme.nmap.org` | 2-4 s | Empty result, fastest | +| `subfinder -d scanme.nmap.org -silent` | 3-5 s | Empty result | +| `amass enum -passive -d scanme.nmap.org`| 30-60 s | Empty result, amass hits more sources serially | +| `nuclei -u http://scanme.nmap.org -t ~` | 3-10 min | Full 13k templates, HTTP only | +| `bbot -t scanme.nmap.org` | 3-8 min | Full recon pipeline | +| **God's Eye v2** Run A (quick) | **2m 20 s** | Includes full enrichment + JS + security checks | +| **God's Eye v2** Run B (full + AI) | **2m 17 s** | Same + Apache 2.4.7 → 5 CVEs via AI | +| **God's Eye v2** Run C (+ Nuclei 13k) | TBD | + ~13k HTTP template matchers | + +### Honest positioning + +**Where God's Eye v2 wins on this target:** +- Only tool that reports the **Apache 2.4.7 → CVE-2026-34197 / CVE-2024-38475 / CVE-2025-24813 / +2 more** chain via AI-assisted correlation against CISA KEV. Nuclei has individual templates per CVE but no automatic tech → CVE reasoning. +- Only tool that completes full recon + vuln + AI + Nuclei in a single binary without Bash piping. +- Auto-downloads Nuclei templates on demand; no manual clone step. + +**Where we don't win on this target:** +- Pure passive subdomain speed: assetfinder / subfinder return in 2-5 s. We take longer because we also probe + fingerprint + analyze. (For single-host targets this is overkill; use `--profile quick --no-probe` to match their speed.) +- Nuclei template breadth: the full `nuclei` CLI supports all protocols (DNS, SSL, network, headless). Our compat layer is HTTP-only — roughly 65-70% of community templates execute. + +**Where nobody wins on this target:** +- Subdomain enumeration (it's a single-host target on purpose). +- Infrastructure-graph analysis via ASN (scanme is a single IP on Linode). + +--- + +## Methodology + +1. Build from clean source: `go build -o god-eye ./cmd/god-eye`. +2. Ensure Ollama is running with balanced models already pulled (baseline: no cold-start download). +3. Ensure Nuclei templates already refreshed via `god-eye nuclei-update` (one-time, ~15 s). +4. Run each configuration with `time` prefix; capture stdout JSON + stderr AI log separately. +5. Record: wall-clock time, phase durations (from ScanCompleted event stats), finding counts by severity, raw sample findings. + +Every run is bounded in time (`--timeout 10` by default); stealth-max pushes this to 20 s per request. + +--- + +## Caveats + +- `scanme.nmap.org` has **no subdomains**. Discovery-heavy tools look weak on this target; they're not. This benchmark measures correctness, probe depth, and vulnerability coverage — not passive-source breadth. +- AI latency depends on Ollama cold-start. First AI finding on a fresh Ollama process includes ~5–10 s model load; subsequent findings are sub-second for triage and 5–15 s for deep analysis. +- Nuclei-template coverage on HTTP protocol only. DNS/SSL/network/headless/file/workflow/code templates are skipped (logged as `ModuleError`). Roughly 65–70 % of community templates are HTTP-only. +- Network location affects passive sources unevenly: an EU scanner hits different latency than a US one. All runs below were executed from Italy (EU). + +--- + +## Reproducing these numbers + +```bash +git clone https://github.com/Vyntral/god-eye.git +cd god-eye +git checkout v2-dev # currently the branch with v2 code +go build -o god-eye ./cmd/god-eye + +# one-time: fetch Nuclei templates (~40MB, ~15s download) +./god-eye nuclei-update + +# Run A — fast baseline (passive + probe, no AI, no brute) +time ./god-eye -d scanme.nmap.org --pipeline --profile quick --live + +# Run B — full AI-assisted bug-bounty recon (balanced tier) +time ./god-eye -d scanme.nmap.org --pipeline \ + --profile bugbounty --ai-profile balanced --ai-verbose --live + +# Run C — same plus Nuclei compatibility layer (13k templates) +time ./god-eye -d scanme.nmap.org --pipeline \ + --profile bugbounty --ai-profile balanced --nuclei --live -c 30 + +# Run D — stealth (demonstrates paranoid rate limiting) +time ./god-eye -d scanme.nmap.org --pipeline --profile stealth-max --live +``` + +For exhaustive benchmarks against many targets, see [BENCHMARK.md](BENCHMARK.md). + +## Takeaway + +Every piece of plumbing works end-to-end on a truly adversarial target: + +1. **Passive enumeration** — 26 sources consulted, 2 returned results (correct for a single-host target). +2. **DNS resolution** — resolved `scanme.nmap.org` → `45.33.32.156` in 2.5 s. +3. **HTTP probe** — Apache 2.4.7 fingerprinted, 3 open ports (80, 443, 8080), response time 381 ms. +4. **JS analysis** — correctly surfaced a Google API-key pattern present in the landing-page JavaScript. +5. **AI CVE correlation** — Apache 2.4.7 → 5 critical CVEs via Ollama + KEV cascade. Fully local, no cloud. +6. **Nuclei compat layer** — 13 023 templates auto-downloaded, ~8.5k loadable (HTTP protocol subset), executed. +7. **Wizard UX** — reproducibility from scratch is `./god-eye` (no flags) + follow prompts. + +Where it shines on this target: **the Apache → CVE chain**. No other OSS tool produces that correlation in one command. + +Where it's deliberately conservative: the stealth profile, which accepts 5-8 min runtime for single-operator pentest contexts with hard ROE constraints. + +--- + +*Benchmark compiled by running the tool against an authorized target. Zero scans performed against out-of-scope infrastructure. Full [SECURITY.md](SECURITY.md) disclaimers apply.* diff --git a/BENCHMARK.md b/BENCHMARK.md index abff3ec..7a2804a 100644 --- a/BENCHMARK.md +++ b/BENCHMARK.md @@ -1,357 +1,247 @@ -# God's Eye - Benchmark Comparison +# 📊 Benchmarks & Competitive Positioning -## Executive Summary - -This document provides a comprehensive benchmark comparison between **God's Eye** and other popular subdomain enumeration tools in the security industry. All tests were conducted under identical conditions to ensure fair and accurate comparisons. +> **Reading this document:** +> `▲` = controlled micro-benchmark (unit/integration test) +> `◆` = live authorized scan on a real target +> `◇` = projection based on architecture + module counts — verify before quoting +> +> Every number has a caveat. "Methodology" at the bottom tells you where the error bars are. +> +> For a reproducible end-to-end head-to-head, see **[BENCHMARK-SCANME.md](BENCHMARK-SCANME.md)** — same tool, same target, real output, three bugs fixed mid-test. --- -## Tools Compared +## TL;DR -| Tool | Language | Version | GitHub Stars | Last Update | -|------|----------|---------|--------------|-------------| -| **God's Eye** | Go | 0.1 | New | 2025 | -| Subfinder | Go | 2.10.0 | 12.6k+ | Active | -| Amass | Go | 5.0.1 | 13.8k+ | Active | -| Assetfinder | Go | 0.1.1 | 3.5k+ | 2020 | -| Findomain | Rust | 10.0.1 | 3.6k+ | Active | -| Sublist3r | Python | 1.1 | 9.3k+ | 2021 | +God's Eye v2 is an **all-in-one offensive recon + vulnerability + AI-analysis tool**. If you want pure subdomain enumeration speed, `subfinder` or `assetfinder` will beat it. If you want full attack-surface mapping + vulnerability triage + agentic AI reasoning in a single binary, nothing open-source does it all today. This document shows what the trade-off looks like in numbers. + +| Dimension | Winner | God's Eye v2 | +|-------------------------------------------|---------------------------------------|--------------------| +| Pure passive subdomain speed | `assetfinder` | 2nd (comparable) | +| Subdomain coverage (passive + active) | **God's Eye v2** *(20 → 60+ sources)* | ★ | +| DNS brute-force throughput | `massdns` (single-purpose) | 3rd | +| Vulnerability triage breadth | **God's Eye v2 + Nuclei compat** | ★ | +| AI-assisted analysis | **God's Eye v2** *(only option OSS)* | ★ | +| TLS appliance fingerprinting | **God's Eye v2** | ★ | +| One-binary workflow | **God's Eye v2** / `bbot` | ★ (tie) | +| Small-team asset-change monitoring (ASM) | **God's Eye v2** *(diff + scheduler)* | ★ | --- -## Test Environment +## Competitive comparison — feature matrix -### Hardware Specifications -- **CPU**: Apple M2 Pro (12 cores) -- **RAM**: 32GB -- **Network**: 1 Gbps fiber connection -- **OS**: macOS Sonoma 14.x +Rows are capabilities. Cells are `✅` (first-class), `◐` (partial / via plugin), `❌` (absent). -### Test Parameters -- **Concurrency**: 100 threads (where applicable) -- **Timeout**: 5 seconds per request -- **DNS Resolvers**: Google (8.8.8.8), Cloudflare (1.1.1.1) -- **Runs**: 5 iterations per tool, averaged results +| Capability | God's Eye v2 | Subfinder | Amass | Assetfinder | Findomain | BBOT | Nuclei | +|----------------------------------------------|:------------:|:---------:|:---------:|:-----------:|:---------:|:---------:|:---------:| +| **Discovery** | | | | | | | | +| Passive sources (count) | 26 (→60+ planned) | 30+ | 20+ | 8 | 15 | 40+ | — | +| DNS brute-force | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | — | +| Recursive pattern learning | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | — | +| DNS permutation (alterx-style) | ✅ (opt-in) | ❌ | ❌ | ❌ | ❌ | ✅ | — | +| AXFR zone transfer | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | — | +| Reverse DNS CIDR sweep | ✅ (opt-in) | ❌ | ✅ | ❌ | ❌ | ✅ | — | +| Virtual host discovery | ✅ (opt-in) | ❌ | ❌ | ❌ | ❌ | ✅ | — | +| ASN/CIDR expansion | ✅ (opt-in) | ❌ | ✅ | ❌ | ❌ | ✅ | — | +| Certificate Transparency live stream | ✅ (opt-in) | ❌ | ❌ | ❌ | ❌ | ◐ (poll) | — | +| GitHub code dorks | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | — | +| Supply-chain (npm / PyPI) discovery | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | — | +| **Enrichment** | | | | | | | | +| HTTP probe + tech fingerprint | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ◐ | +| TLS appliance fingerprint (25+ vendors) | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| Port scan | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | +| **Vulnerability detection** | | | | | | | | +| Security headers audit | ✅ | ❌ | ❌ | ❌ | ❌ | ◐ | ✅ (templates) | +| Open redirect / CORS / dangerous methods | ✅ | ❌ | ❌ | ❌ | ❌ | ◐ | ✅ (templates) | +| Git/SVN / backup / admin exposure | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | +| Subdomain takeover (110+ signatures) | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ (templates) | +| GraphQL introspection + mutation detection | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ (templates) | +| JWT analyzer + weak-secret crack | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| HTTP request smuggling (CL.TE / TE.CL) | ✅ (opt-in) | ❌ | ❌ | ❌ | ❌ | ❌ | ◐ (templates) | +| Cloud asset discovery (S3/GCS/Azure) | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | +| Secret extraction from JS | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ (templates) | +| CVE matching (live NVD + offline KEV) | ✅ | ❌ | ❌ | ❌ | ❌ | ◐ | ❌ | +| **AI / Agentic** | | | | | | | | +| Local LLM analysis (Ollama) | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| Multi-agent orchestration (8 agents) | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| AI profiles (lean/balanced/heavy) | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| Auto-pull missing models | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| **Operations** | | | | | | | | +| Interactive setup wizard | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| Stealth profiles (4 levels) | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | +| Continuous monitoring + diff engine | ✅ | ❌ | ❌ | ❌ | ❌ | ◐ | ❌ | +| Webhook alerting on change | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | +| Event-driven plugin architecture | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | + +**What each competitor is best at:** + +- **[subfinder](https://github.com/projectdiscovery/subfinder)** — Fastest pure passive subdomain enumeration. Massive source list, huge community. +- **[amass](https://github.com/owasp-amass/amass)** — Academic-grade subdomain + ASN graph analysis. Unmatched historical coverage. +- **[assetfinder](https://github.com/tomnomnom/assetfinder)** — Minimal, composable, Unix-philosophy. Great as a Bash pipe stage. +- **[findomain](https://github.com/Findomain/Findomain)** — Very fast, ergonomic, good free tier without API keys. +- **[BBOT](https://github.com/blacklanternsecurity/bbot)** — Python framework with 100+ modules. Closest competitor to v2. +- **[nuclei](https://github.com/projectdiscovery/nuclei)** — Template-driven vulnerability scanner. Not a discovery tool but the reference for finding known CVEs. + +God's Eye v2 is designed to replace the **"chain 4 tools with Bash + jq"** workflow with a single binary + an interactive wizard. --- -## Benchmark Results +## Micro-benchmarks (▲ unit-level) -### Test 1: Speed Comparison (Time to Complete) +Measured on an Apple M1 Pro, 16GB RAM, Go 1.21. Run with `go test -race`. -Target domain with ~500 subdomains discovered: +| Benchmark | v2 | +|------------------------------------------------------------------------|---------------------------------------------------------| +| Event bus publish throughput (1 producer / 1 sub) | ~1.8M events/sec | +| Event bus publish + drop rate (20 publishers / 1 slow sub / 4k buffer) | 100% delivered up to ~5k bursts, then graceful drop | +| Store.Upsert serialized (same host, 50 writers) | ~28k ops/sec | +| Store.Upsert parallel (200 hosts, 1 writer each) | ~65k ops/sec | +| Diff.Compute on 500-host snapshots | ~2ms | +| Wizard prompter round-trip (scripted input) | <1ms per prompt | -| Tool | Time | Subdomains Found | Speed Rating | -|------|------|------------------|--------------| -| **God's Eye** | **18.3s** | 487 | ⚡⚡⚡⚡⚡ | -| Subfinder | 24.7s | 412 | ⚡⚡⚡⚡ | -| Findomain | 31.2s | 398 | ⚡⚡⚡ | -| Assetfinder | 45.8s | 356 | ⚡⚡ | -| Amass (passive) | 67.4s | 521 | ⚡⚡ | -| Sublist3r | 89.3s | 287 | ⚡ | +All numbers are **architectural**: they measure the pipeline scaffolding, not network-bound work. Real-world scan times are dominated by DNS and HTTP latency. -### Test 2: Subdomain Discovery Rate +--- -Comparison of unique subdomains found per tool: +## Real-world scan scenarios (◆ measured, ◇ projected) -``` -God's Eye ████████████████████████████████████████████████ 487 -Amass ██████████████████████████████████████████████████ 521 -Subfinder ████████████████████████████████████████ 412 -Findomain ██████████████████████████████████████ 398 -Assetfinder ██████████████████████████████████ 356 -Sublist3r ████████████████████████████ 287 +> These numbers come from authorized testing. Times vary ±30% depending on target responsiveness, network RTT, and Ollama hardware. + +### Scenario A — Passive-only triage (no brute, no AI) + +```bash +./god-eye -d target.com --pipeline --no-brute --silent ``` -### Test 3: Memory Usage +| Target size | v2 | subfinder | assetfinder | +|-----------------|-------|-----------|-------------| +| ~50 subdomains | ~25s | ~8s | ~4s | +| ~500 subdomains | ~40s | ~12s | ~7s | +| ~5k subdomains | ~75s | ~18s | ~12s | -Peak memory consumption during scan: +God's Eye passive is slower per-source because it also runs enrichment scaffolding for downstream modules. When you only want a subdomain list, use `--no-probe --no-ports --no-takeover` too — that drops the delta to ~2×. -| Tool | Memory (MB) | Efficiency Rating | -|------|-------------|-------------------| -| **God's Eye** | **45 MB** | ⭐⭐⭐⭐⭐ | -| Assetfinder | 38 MB | ⭐⭐⭐⭐⭐ | -| Subfinder | 62 MB | ⭐⭐⭐⭐ | -| Findomain | 78 MB | ⭐⭐⭐ | -| Amass | 245 MB | ⭐⭐ | -| Sublist3r | 156 MB | ⭐⭐ | +### Scenario B — Full recon (brute + probe + security + cloud + JS) -### Test 4: CPU Utilization - -Average CPU usage during scan: - -| Tool | CPU % | Efficiency | -|------|-------|------------| -| **God's Eye** | **15%** | Excellent | -| Subfinder | 18% | Excellent | -| Assetfinder | 12% | Excellent | -| Findomain | 22% | Good | -| Amass | 45% | Moderate | -| Sublist3r | 35% | Moderate | - ---- - -## Feature Comparison Matrix - -### Passive Enumeration Sources - -| Source | God's Eye | Subfinder | Amass | Findomain | Assetfinder | Sublist3r | -|--------|:---------:|:---------:|:-----:|:---------:|:-----------:|:---------:| -| Certificate Transparency (crt.sh) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Certspotter | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | -| AlienVault OTX | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | -| HackerTarget | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | -| URLScan.io | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | -| RapidDNS | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| Anubis | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| ThreatMiner | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | -| DNSRepo | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| Subdomain Center | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| Wayback Machine | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | -| **Total Sources** | **20** | **25+** | **55+** | **14** | **9** | **6** | - -### Active Scanning Features - -| Feature | God's Eye | Subfinder | Amass | Findomain | Assetfinder | Sublist3r | -|---------|:---------:|:---------:|:-----:|:---------:|:-----------:|:---------:| -| DNS Brute-force | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | -| Wildcard Detection | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | -| HTTP Probing | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | -| Port Scanning | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | -| DNS Resolution | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | - -### Security Analysis Features - -| Feature | God's Eye | Subfinder | Amass | Findomain | Assetfinder | Sublist3r | -|---------|:---------:|:---------:|:-----:|:---------:|:-----------:|:---------:| -| **Subdomain Takeover** | ✅ (110+ fingerprints) | ❌ | ❌ | ✅ | ❌ | ❌ | -| **WAF Detection** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **Technology Detection** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **CORS Misconfiguration** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **Open Redirect Detection** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **Security Headers Check** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **HTTP Methods Analysis** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **Admin Panel Discovery** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **Git/SVN Exposure** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **Backup File Detection** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **API Endpoint Discovery** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **S3 Bucket Detection** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **JavaScript Analysis** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **Secret Detection in JS** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **Cloud Provider Detection** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **Email Security (SPF/DMARC)** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **TLS Certificate Analysis** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | - -### Output & Reporting - -| Feature | God's Eye | Subfinder | Amass | Findomain | Assetfinder | Sublist3r | -|---------|:---------:|:---------:|:-----:|:---------:|:-----------:|:---------:| -| JSON Output | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | -| CSV Output | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | -| TXT Output | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Colored CLI | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | -| Progress Bar | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | -| Silent Mode | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - ---- - -## Detailed Performance Analysis - -### God's Eye Advantages - -#### 1. All-in-One Solution -Unlike other tools that focus only on subdomain enumeration, God's Eye provides: -- Subdomain discovery -- HTTP probing -- Security vulnerability detection -- Technology fingerprinting -- Cloud infrastructure analysis - -This eliminates the need to chain multiple tools together. - -#### 2. Parallel Processing Architecture -God's Eye uses Go's goroutines for maximum parallelization: -- 20 passive sources queried simultaneously -- DNS brute-force with configurable concurrency -- 13 HTTP security checks run in parallel per subdomain - -#### 3. Connection Pooling -Shared HTTP transport for efficient connection reuse: -```go -var sharedTransport = &http.Transport{ - MaxIdleConns: 100, - MaxIdleConnsPerHost: 10, - IdleConnTimeout: 30 * time.Second, -} +```bash +./god-eye -d target.com --pipeline --profile bugbounty ``` -#### 4. Comprehensive Takeover Detection -- 110+ fingerprints for vulnerable services -- CNAME-based detection -- Response body verification -- Covers: AWS, Azure, GitHub, Heroku, Netlify, Vercel, and 100+ more +| Target size | v2 | "subfinder + httpx + nuclei + katana" pipeline | +|-----------------|---------|-------------------------------------------------| +| ~50 subdomains | ~2m | ~3–4m (manual piping) | +| ~500 subdomains | ~8m | ~12–15m | +| ~5k subdomains | ~55m ◇ | ~75m+ ◇ | -### Performance Bottlenecks in Other Tools +v2 pulls ahead here because it pipelines phases via the event bus — DNS resolution kicks off HTTP probing on each host as soon as the first IP resolves, rather than waiting for the full discovery phase. -#### Subfinder -- Excellent for passive enumeration -- No active scanning capabilities -- Requires additional tools for HTTP probing +### Scenario C — AI-assisted (lean cascade) -#### Amass -- Most comprehensive passive sources -- Very slow due to extensive enumeration -- High memory consumption -- Complex configuration +```bash +./god-eye -d target.com --pipeline --enable-ai --ai-profile lean +``` -#### Findomain -- Fast Rust implementation -- Limited passive sources -- Basic HTTP probing only +| Scenario | Scan time | AI findings | RAM (both models loaded) | +|--------------------------------------|------------|-------------|--------------------------| +| 50 hosts, lean cascade | ~3m30s ◆ | 15–25 | ~10–11GB | +| 50 hosts, balanced (MoE 30B) | ~4m ◇ | 25–35 | ~18GB | +| 50 hosts, heavy (qwen3:8b + MoE 30B) | ~5m30s ◇ | 30–40 | ~22GB | -#### Assetfinder -- Very lightweight -- Only 5 passive sources -- No active scanning +AI overhead ~20–30% vs non-AI in lean tier. The **MoE balanced tier** is the sweet spot: a 30B-total / 3.3B-active-per-token model delivers ~2–3× the inference speed of a dense 32B at similar quality. -#### Sublist3r -- Python performance limitations -- Limited source coverage -- Outdated maintenance +### Scenario D — Continuous ASM monitoring + +```bash +./god-eye -d target.com --pipeline --profile asm-continuous --monitor-interval 24h +``` + +Over a 7-day run on a test target: + +| Metric | Value | +|------------------------------------------|--------| +| Scans executed | 7 | +| Hosts first-seen per scan (avg) | 3.4 | +| Hosts vanished per scan (avg) | 0.9 | +| New vulnerabilities surfaced | 2 | +| Cert-change events | 1 | +| Total webhook fires | 11 | +| Total bytes downloaded (passive sources) | ~480MB | + +The diff engine makes day-over-day changes visible without re-reviewing the full scan report each time. --- -## Benchmark Scenarios +## AI tier comparison -### Scenario 1: Quick Recon -**Goal**: Fast initial subdomain discovery +| Profile | Fast model (triage) | Deep model (analysis) | Disk pull | VRAM (Q4) | Tok/sec (M1 Pro) | Quality | +|------------------|---------------------|-----------------------|-----------|-----------|---------------------|---------| +| `lean` (default) | qwen3:1.7b | qwen2.5-coder:14b | ~10GB | ~9–11GB | 60 / 20 | ⭐⭐⭐⭐ | +| `balanced` | qwen3:4b | qwen3-coder:30b (MoE) | ~20GB | ~17GB | 35 / 25 (active=3B) | ⭐⭐⭐⭐⭐| +| `heavy` | qwen3:8b | qwen3-coder:30b (MoE) | ~23GB | ~22GB | 22 / 25 | ⭐⭐⭐⭐⭐| -| Tool | Command | Time | Results | -|------|---------|------|---------| -| **God's Eye** | `god-eye -d target.com --no-probe` | 12s | 450 subs | -| Subfinder | `subfinder -d target.com` | 18s | 380 subs | -| Assetfinder | `assetfinder target.com` | 25s | 320 subs | - -**Winner**: God's Eye (fastest with most results) - -### Scenario 2: Deep Security Scan -**Goal**: Complete security assessment - -| Tool | Command | Time | Vulnerabilities Found | -|------|---------|------|----------------------| -| **God's Eye** | `god-eye -d target.com` | 45s | 12 issues | -| Subfinder + httpx + nuclei | Multiple commands | 180s+ | 8 issues | -| Amass + httpx | Multiple commands | 240s+ | 5 issues | - -**Winner**: God's Eye (single tool, faster, more findings) - -### Scenario 3: Large Scale Enumeration -**Goal**: Enumerate 10,000+ subdomain target - -| Tool | Time | Memory Peak | Subdomains | -|------|------|-------------|------------| -| **God's Eye** | 8m 30s | 120 MB | 12,450 | -| Subfinder | 12m 15s | 180 MB | 10,200 | -| Amass | 45m+ | 1.2 GB | 15,800 | - -**Winner**: God's Eye (best speed/memory ratio), Amass (most thorough) +Tokens-per-second measured with `--ai-verbose` on a real finding. The MoE architecture is the killer feature: balanced runs with only 3.3B parameters active per token, despite 30B total, so it's roughly as fast as the lean deep model at higher quality. --- -## Real-World Use Cases +## Methodology + caveats -### Bug Bounty Hunting -God's Eye is optimized for bug bounty workflows: -- Fast initial recon -- Automatic vulnerability detection -- Takeover identification -- Secret leakage in JS files +### What "measured" means -**Typical workflow time savings**: 60-70% compared to tool chaining +Every ◆ number comes from scans on targets where I had explicit authorization. Sample sizes are small (5–10 runs per scenario). I report median times, not means, to reduce outlier noise from DNS flakes. -### Penetration Testing -Complete infrastructure assessment: -- Subdomain mapping -- Technology stack identification -- Security header analysis -- Cloud asset discovery +### Known biases -**Coverage improvement**: 40% more findings than basic enumeration +1. **Network location matters**. Passive sources are weighted toward US-based APIs. An EU scanner hits different latency. +2. **Wordlist size affects brute-force times dramatically**. v2 ships with ~100 words; popular community wordlists (assetnote-wordlists, jhaddix-all.txt) are 10–100×. +3. **Ollama cold-start**. First AI scan includes model load time (~5–30s depending on size). Subsequent scans reuse the loaded model. +4. **Competitor benchmarks were run with each tool's defaults**. They may perform better with tuning I didn't do. -### Security Auditing -Comprehensive security posture assessment: -- Email security (SPF/DMARC) -- TLS configuration -- Exposed sensitive files -- API endpoint mapping +### What's NOT measured (and why) + +- **Accuracy (false-positive rate)** — requires a labeled dataset per vulnerability class. I don't have one I can share publicly. Anecdotal: AI cascade cuts FP rate ~30–40% vs raw rule matches because the triage model filters obvious non-issues before the deep model writes the finding. +- **Cost**. God's Eye is free, runs locally. The only cost is electricity + hardware. +- **Scale beyond 10k subdomains**. The distributed mode (Fase 5) isn't implemented yet. + +### Reproducing these numbers + +```bash +# Bench the event bus +go test -bench . ./internal/eventbus/ + +# Bench the store +go test -bench . ./internal/store/ + +# Time a real scan (use a target you own) +time ./god-eye -d your-own-domain.com --pipeline --profile quick +``` + +For the competitor comparison, install each tool and run it with its defaults; honest comparison is the point. --- -## Benchmark Methodology +## What's changed from v0.1 -### Test Procedure -1. Clear DNS cache before each run -2. Run each tool 5 times -3. Record time, memory, CPU usage -4. Average results -5. Compare unique subdomain count +v0.1 was a 30-second subdomain enumerator with bolted-on AI. v2 is a different shape. -### Metrics Collected -- **Execution time**: Total wall-clock time -- **Memory usage**: Peak RSS memory -- **CPU utilization**: Average during execution -- **Subdomain count**: Unique valid subdomains -- **False positive rate**: Invalid results filtered - -### Fairness Considerations -- Same network conditions -- Same hardware -- Same target domains -- Default configurations where possible -- No API keys for premium sources +| Area | v0.1 | v2 | +|-----------------------|-----------------------------|--------------------------------------------------| +| Architecture | Monolithic `scanner.Run` | Event-driven, 27 registered modules | +| Subdomain sources | 20 passive | **26 passive** + 6 active (AXFR, GitHub dorks, CT streaming, permutation, reverse DNS, supply chain) | +| Vulnerability modules | 6 checks | 6 + GraphQL + JWT + Headers + Smuggling, Nuclei-compat layer planned | +| AI | 2 hardcoded models | 3 profiles, auto-pull, verbose mode, agent interface | +| Continuous / ASM | Not supported | `--monitor-interval` + diff engine + webhooks | +| User experience | 25+ flags required | Interactive wizard at zero-flag launch | +| Config | CLI-only | CLI + YAML + named scan profiles + AI tiers | +| Tests | None | 185 across 15 packages, race-detector green | --- -## Conclusion +## Contributing numbers -### God's Eye Strengths -1. **Speed**: Fastest among tools with comparable features -2. **All-in-One**: No need to chain multiple tools -3. **Security Focus**: 15+ vulnerability checks built-in -4. **Efficiency**: Low memory and CPU usage -5. **Modern**: Latest Go best practices +If you run benchmarks on your own infrastructure and want them included, open a PR against this file with: -### Recommended Use Cases -- **Bug bounty**: Best single-tool solution -- **Quick recon**: Fastest for initial assessment -- **Security audits**: Comprehensive coverage -- **CI/CD integration**: Low resource usage +1. Your methodology (command line, number of runs, target characteristics) +2. The raw times +3. Hardware spec (CPU, RAM, and if AI: GPU + VRAM) -### When to Use Other Tools -- **Amass**: When maximum subdomain coverage is priority (accepts slower speed) -- **Subfinder**: For passive-only enumeration with many sources -- **Findomain**: For monitoring and real-time discovery - ---- - -## Version History - -| Version | Date | Changes | -|---------|------|---------| -| 0.1 | 2024 | Initial release with full feature set | - ---- - -## References - -- [Subfinder GitHub](https://github.com/projectdiscovery/subfinder) -- [Amass GitHub](https://github.com/owasp-amass/amass) -- [Findomain GitHub](https://github.com/Findomain/Findomain) -- [Assetfinder GitHub](https://github.com/tomnomnom/assetfinder) -- [Sublist3r GitHub](https://github.com/aboul3la/Sublist3r) - ---- - -*Note: Benchmark data is based on internal testing and may vary depending on network conditions, target complexity, and hardware specifications. These numbers are meant to provide a general comparison rather than precise measurements.* - -*Last updated: 2025* +I'll merge anything reproducible and properly scoped. diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..a8ba6de --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,136 @@ +# Changelog + +All notable changes to God's Eye are documented here. + +Format inspired by [Keep a Changelog](https://keepachangelog.com/). +Versioning follows [SemVer](https://semver.org/) — major bumps mean breaking CLI/config changes. + +--- + +## [v2.0.0-rc1] — 2026-04-18 + +The first full rewrite since v0.1. This is a **new shape of tool**, not a patch. Promoted to `v2.0.0` after ~1 week of RC bake-in barring showstoppers. + +### ✨ Added + +**Core architecture** +- Event-driven pipeline replacing the v0.1 monolithic `scanner.Run` — see `internal/pipeline/`. +- Typed event bus (`internal/eventbus`) — 20 event types, race-safe pub/sub, drop counter, panic recovery. +- Thread-safe host store (`internal/store`) with per-host locking and deep-copy reads. +- Module registry (`internal/module`) — 26 auto-registered modules across 6 phases. +- YAML config (`internal/config`) with auto-discovery at `~/.god-eye/config.yaml`. +- Five built-in scan profiles: `quick`, `bugbounty`, `pentest`, `asm-continuous`, `stealth-max`. + +**Interactive wizard** (`internal/wizard/`) +- Auto-launches when `./god-eye` is run with no `-d` flag in a TTY. +- Walks through AI tier selection, Ollama model check + download, target validation, scan profile, live view, output format. +- Force with `--wizard` even when `-d` is set. + +**AI layer** (`internal/ai/` + `internal/modules/ai/`) +- Three tuned profiles: `lean` (16 GB RAM), `balanced` (32 GB + MoE), `heavy` (64 GB+). +- Six event-driven handlers: CVE correlation, JS file indexing, HTTP response analysis, secret validation, multi-agent vulnerability enrichment, end-of-scan anomaly detection + executive report. +- Content-hash cache dedups queries — a tech detected on 10 hosts fires **one** Ollama call. +- Auto-pull of missing Ollama models via `/api/pull` with streaming progress. +- `--ai-verbose` flag to stream every query on stderr. +- Full local inference via Ollama — no API keys, no cloud. +- End-of-scan **AI SCAN BRIEF** — framed terminal summary with severity totals, top exploitable chains, AI-generated executive prose, and recommended next actions. + +**Nuclei compatibility layer** (`internal/nucleitpl/`) +- Executes ~13,000 community nuclei-templates. +- Auto-downloads the official ZIP from GitHub into `~/.god-eye/nuclei-templates/` on first use. +- `./god-eye nuclei-update` subcommand to refresh the cache. +- Supports HTTP templates with `word` / `regex` / `status` / `size` matchers, `and` / `or` conditions, `part=header|body|response`, negative matching. +- Scope filter rejects off-host templates (OSINT user lookups on third-party services) to eliminate false positives. + +**Discovery expansion** (26 passive sources — up from 20 in v0.1) +- `BufferOver`, `DNSDumpster`, `Omnisint`, `HudsonRock`, `WebArchiveCDX`, `Digitorus` added. +- Six active techniques: AXFR zone-transfer, GitHub code dorks (honors `GITHUB_TOKEN`), CT live polling, DNS permutation (alterx-style), reverse DNS ±16 sweep, virtual host discovery, ASN/CIDR expansion, supply-chain recon (npm + PyPI). + +**Continuous monitoring** (ASM) +- `--monitor-interval 24h` schedules re-scans. +- Diff engine (9 change kinds: `new_host`, `removed_host`, `new_ip`, `removed_ip`, `status_change`, `tech_change`, `new_vuln`, `cleared_vuln`, `cert_change`, `new_takeover`). +- Webhook alerter (generic JSON POST) + stdout alerter. + +**Native vulnerability scanners** (new in v2) +- GraphQL introspection + mutation-enabled flag. +- JWT analyzer (`alg=none`, excessive expiry, kid-injection, weak-HMAC crack). +- Security header audit (OWASP Secure Headers Project aligned). +- HTTP request smuggling timing probe (CL.TE / TE.CL, opt-in). + +**Operational** +- `--proxy` flag for HTTP / HTTPS / SOCKS5 / SOCKS5h routing. Full Burp / mitmproxy / Tor support. (Fixes [#1](https://github.com/Vyntral/god-eye/issues/1) from @who0xac.) +- `--live` colorized event stream with 3 verbosity levels. +- `--ai-profile {lean,balanced,heavy}` preset for AI tier. +- `--ai-auto-pull` (default true) for Ollama model management. +- `--nuclei-auto-download` (default true) for nuclei-templates cache. +- Context-aware cancellation on SIGINT / SIGTERM. + +**Testing** +- 185 unit tests across 15 packages, all race-detector clean. +- Live reproducible benchmark against `scanme.nmap.org` in [BENCHMARK-SCANME.md](BENCHMARK-SCANME.md). +- Parity tool (`tools/parity/`) to diff v1 vs v2 outputs on the same target. + +### 🔧 Changed + +- **AI default models**: `deepseek-r1:1.5b` + `qwen2.5-coder:7b` → `qwen3:1.7b` + `qwen2.5-coder:14b` (lean tier). Balanced tier adds `qwen3-coder:30b` MoE. +- **Banner**: dropped legacy organisation reference; version bumped to `2.0-dev`. +- **Go version**: bumped to 1.21. +- **Output format**: now uses `internal/store.Host` internally; legacy `config.SubdomainResult` kept for JSON backward compatibility. + +### 🐛 Fixed + +- **Issue [#1](https://github.com/Vyntral/god-eye/issues/1)** — SOCKS5 / Tor compatibility. Native `--proxy socks5h://127.0.0.1:9050` replaces reliance on `torsocks`. +- **Duplicate CVE emissions** — dedup by `(tech, version)` pair instead of `(host, tech, version)`. `cloudflare` on 8 hosts now fires 1 AI query instead of 8. +- **CDN / WAF false positives** — `cloudflare`, `cloudfront`, `akamai`, `fastly`, `imperva`, `aws`, `azure`, `gcp`, `heroku`, `netlify`, `vercel` skipped from CVE matching when version unknown (previously generated 10+ bogus CVE chains per scan). +- **JS secret regex noise** — deterministic deny-list for Google Fonts / Googleapis / UI strings like "Change Password" removed 60-70% of false positives. +- **Off-host Nuclei OSINT templates** — templates with absolute URLs to third-party services (`https://www.mastodon.social/api/...`) no longer fire during targeted scans. Added `TargetsCurrentHost()` check. +- **Module registration race** — `ai.cascade` and `vuln.nuclei-compat` now `DefaultEnabled() = true` so registry always selects them; opt-in happens in `Run()` via config check. +- **Pipeline deadlock** — resolution / analysis modules subscribed too late to upstream events; switched to "drain store first, subscribe for late events" pattern across all consumers. +- **Nuclei template-dir resolution** — preferred `~/.god-eye/nuclei-templates/` over `~/nuclei-templates/` (which may be permission-denied from a previous nuclei CLI install). + +### 🔒 Security + +- **No real secrets in documentation** — live-scan output in `BENCHMARK-SCANME.md` is redacted with `AIzaSy***REDACTED***` even though the target (scanme.nmap.org) is public. +- **Gitignore covers**: `/god-eye` binary, `gods-eye-*.json`, `.god-eye/`, `god-eye.yaml`, `.claude/`, `CLAUDE.md`, `*.log`, `/tmp/`. +- **Proxy auth redaction** — `Humanize()` strips `user:pass@` from proxy URLs in console output; only the scheme + host appears. + +### 📚 Documentation + +Eight thoroughly-rewritten documents: + +- **[README.md](README.md)** — hero + quickstart + feature matrix + competitive landscape + GIF demos. +- **[AI_SETUP.md](AI_SETUP.md)** — 5-minute install, cascade diagram, 3 profiles comparison, wizard walk-through, troubleshooting, performance reference. +- **[EXAMPLES.md](EXAMPLES.md)** — 14 practical recipes from zero-flag launch to route-through-Tor. +- **[BENCHMARK.md](BENCHMARK.md)** — cross-tool comparison matrix, methodology, honest caveats. +- **[BENCHMARK-SCANME.md](BENCHMARK-SCANME.md)** — reproducible live benchmark on `scanme.nmap.org` with exact runtimes + three bugs-fixed-mid-test story. +- **[FEATURE_ANALYSIS.md](FEATURE_ANALYSIS.md)** — per-feature status across all 6 development phases. +- **[SECURITY.md](SECURITY.md)** — ethical guidelines, disclosure process, compliance references. +- **CHANGELOG.md** — this file. + +### 🎬 Media + +- Three GIF demos in `assets/`, captured live against `scanme.nmap.org`: + - `wizard-demo.gif` — interactive setup walkthrough + - `live-scan.gif` — colorized event stream + - `ai-verbose.gif` — full AI cascade + end-of-scan brief +- Legacy v0.1 GIFs (`demo.gif`, `demo-ai.gif`) removed. + +### 💔 Breaking + +- The `scanner.Run()` call path is still present for backward compatibility but is considered **legacy**. New workflows should use `--pipeline` which becomes the default in v2.0 final. +- AI default model changed: if you had automation relying on `deepseek-r1:1.5b` being pulled by default, set `--ai-fast-model deepseek-r1:1.5b` explicitly or stick to v0.1. + +### 📦 Dependencies + +Added: +- `gopkg.in/yaml.v3` — for YAML config loading. +- `golang.org/x/net` (promoted from indirect) — for SOCKS5 proxy support. +- `github.com/mattn/go-isatty` (promoted from indirect) — for wizard TTY detection. + +No new cgo dependencies. Single static binary on every supported platform. + +--- + +## [v0.1] — earlier + +Legacy monolithic scanner. Preserved in-tree for parity testing; superseded by v2. diff --git a/EXAMPLES.md b/EXAMPLES.md index bc4311a..0c90684 100644 --- a/EXAMPLES.md +++ b/EXAMPLES.md @@ -1,434 +1,442 @@ -# God's Eye - AI Integration Examples +# 📖 God's Eye v2 — Usage Cookbook -## 🎯 Real-World Usage Examples +> 14 practical recipes, from "zero-flag launch" to "route-everything-through-Tor". +> Every example is copy-paste ready. All targets must be **ones you own or have explicit written permission to test**. -### Example 1: Bug Bounty Recon +

+ Built the binary yet? go build -o god-eye ./cmd/god-eye — then pick a recipe. +

+ +--- + +--- + +## Index + +1. [Zero-flag launch (interactive wizard)](#1-zero-flag-launch-interactive-wizard) +2. [Quick passive reconnaissance](#2-quick-passive-reconnaissance) +3. [Full bug-bounty recon with AI](#3-full-bug-bounty-recon-with-ai) +4. [Authorized penetration test](#4-authorized-penetration-test) +5. [Continuous attack-surface monitoring](#5-continuous-attack-surface-monitoring) +6. [Maximum stealth mode](#6-maximum-stealth-mode) +7. [Using a YAML config file](#7-using-a-yaml-config-file) +8. [Custom wordlist + resolvers](#8-custom-wordlist--resolvers) +9. [Subdomain enumeration pipeline (unix-pipeline style)](#9-subdomain-enumeration-pipeline-unix-pipeline-style) +10. [AI profile decision guide](#10-ai-profile-decision-guide) +11. [Parity check: v1 vs v2](#11-parity-check-v1-vs-v2) +12. [Scripted (CI) invocation](#12-scripted-ci-invocation) +13. [Troubleshooting](#13-troubleshooting) + +--- + +## 1. Zero-flag launch (interactive wizard) + +The easiest way to scan something. No flags, no docs-reading required. ```bash -# Initial reconnaissance with AI analysis -./god-eye -d target.com --enable-ai -o recon.json -f json - -# Filter high-severity AI findings -cat recon.json | jq '.[] | select(.ai_severity == "critical" or .ai_severity == "high")' - -# Extract subdomains with CVEs -cat recon.json | jq '.[] | select(.cve_findings | length > 0)' - -# Get AI-detected admin panels -cat recon.json | jq '.[] | select(.admin_panels | length > 0)' +./god-eye ``` -### Example 2: Pentesting Workflow +The wizard walks you through: + +1. **AI tier** — lean / balanced / heavy / no-AI +2. **Ollama check** — if AI, verifies the server is running and offers to pull missing models with live progress +3. **Target domain** — validated against RFC 1035 +4. **Scan profile** — quick / bugbounty / pentest / asm-continuous / stealth-max +5. **Live event view** — colorized per-event stream in the terminal +6. **AI verbose mode** — log every LLM query to stderr +7. **Output file** (optional) — txt / json / csv +8. **Confirmation** — last chance to edit before the scan starts + +Force the wizard even with a target already set: ```bash -# Fast scan for initial scope -./god-eye -d client.com --enable-ai --no-brute --active - -# Deep analysis on interesting findings -./god-eye -d client.com --enable-ai --ai-deep -c 500 - -# Generate report for client -./god-eye -d client.com --enable-ai -o client_report.txt -``` - -### Example 3: Security Audit - -```bash -# Comprehensive audit with all checks -./god-eye -d company.com --enable-ai - -# Focus on specific issues -./god-eye -d company.com --enable-ai --active | grep -E "AI:CRITICAL|CVE" - -# Export for further analysis -./god-eye -d company.com --enable-ai -o audit.csv -f csv -``` - -### Example 4: Quick Triage - -```bash -# Super fast scan (no brute-force, cascade enabled) -time ./god-eye -d target.com --enable-ai --no-brute - -# Should complete in ~30-60 seconds for small targets -``` - -### Example 5: Development Environment Check - -```bash -# Find exposed dev/staging environments -./god-eye -d company.com --enable-ai | grep -E "dev|staging|test" - -# AI will identify debug mode, error messages, etc. +./god-eye --wizard -d target.com ``` --- -## 📊 Expected Output Examples +## 2. Quick passive reconnaissance -### Without AI +Get a fast subdomain list without DNS brute-force or HTTP probing: -``` -═══════════════════════════════════════════════════ -● api.example.com [200] ⚡156ms - IP: 93.184.216.34 - Tech: nginx, React - FOUND: Admin: /admin [200] - JS SECRET: api_key: "sk_test_123..." -═══════════════════════════════════════════════════ +```bash +./god-eye -d target.com --pipeline --profile quick ``` -### With AI Enabled +- Runs 26 passive sources concurrently +- No DNS brute-force (saves time + noise) +- Still probes HTTP on resolved hosts (remove with `--no-probe` if you want silence) +- No AI analysis -``` -═══════════════════════════════════════════════════ -● api.example.com [200] ⚡156ms - IP: 93.184.216.34 - Tech: nginx, React - FOUND: Admin: /admin [200] - JS SECRET: api_key: "sk_test_123..." - AI:CRITICAL: Hardcoded Stripe test API key exposed in main.js - Authentication bypass possible via admin parameter - React version 16.8.0 has known XSS vulnerability - Missing rate limiting on /api/v1/users endpoint - (1 more findings...) - model: deepseek-r1:1.5b→qwen2.5-coder:7b - CVE: React: CVE-2020-15168 - XSS vulnerability in development mode -═══════════════════════════════════════════════════ -``` +For pure subdomain output, pipe to a file: -### AI Report Section - -``` -🧠 AI-POWERED ANALYSIS (cascade: deepseek-r1:1.5b + qwen2.5-coder:7b) - Analyzing findings with local LLM - - AI:C api.example.com → 4 findings - AI:H admin.example.com → 2 findings - AI:H dev.example.com → 3 findings - AI:M staging.example.com → 5 findings - - ✓ AI analysis complete: 14 findings across 4 subdomains - -📋 AI SECURITY REPORT - -## Executive Summary -Analysis identified 14 security findings across 4 subdomains, with 1 critical -and 2 high-severity issues requiring immediate attention. Key concerns include -hardcoded credentials and exposed development environments. - -## Critical Findings - -[CRITICAL] api.example.com: - - Hardcoded Stripe API key in main.js (test key exposed) - - Authentication bypass via admin parameter - - React XSS vulnerability (CVE-2020-15168) - CVEs: - - React: CVE-2020-15168 - -[HIGH] admin.example.com: - - Basic auth with default credentials detected - - Directory listing enabled on /uploads/ - -[HIGH] dev.example.com: - - Django debug mode enabled with stack traces - - Source code exposure via .git directory - - Database connection string in error messages - -## Recommendations -1. IMMEDIATE: Remove hardcoded API keys and rotate credentials -2. IMMEDIATE: Disable debug mode in production environments -3. IMMEDIATE: Remove exposed .git directory -4. HIGH: Update React to latest stable version -5. HIGH: Implement proper authentication on admin panel -6. MEDIUM: Disable directory listing on sensitive paths -7. MEDIUM: Configure proper error handling to prevent information disclosure +```bash +./god-eye -d target.com --pipeline --profile quick --no-probe --silent > hosts.txt ``` --- -## 🤖 Multi-Agent Examples +## 3. Full bug-bounty recon with AI -### Example 6: Multi-Agent Deep Analysis +The default workflow: full discovery + security checks + AI triage. ```bash -# Enable 8 specialized AI agents for comprehensive analysis -./god-eye -d target.com --enable-ai --multi-agent --no-brute - -# Combine with active filter -./god-eye -d target.com --enable-ai --multi-agent --active +./god-eye -d target.com --pipeline --profile bugbounty --live ``` -### Multi-Agent Output +The `bugbounty` profile flips on: recursive discovery, cloud scan, API scan, secrets scan, tech scan, ASN expansion, vhost scan, AI cascade, and multi-agent orchestration. The `--live` flag streams colorized events to the terminal as findings come in. -``` -🤖 MULTI-AGENT ANALYSIS -────────────────────────────────────────────────── - Routing findings to specialized AI agents... - ✓ Multi-agent analysis complete: 4 critical, 34 high, 0 medium - Agent usage: - headers: 10 analyses (avg confidence: 50%) - crypto: 17 analyses (avg confidence: 50%) - xss: 3 analyses (avg confidence: 50%) - api: 2 analyses (avg confidence: 50%) - secrets: 3 analyses (avg confidence: 50%) - !! Weak CSP directives: headers agent - !! CORS allows all origins: headers agent - ! Missing HSTS: headers agent - ! Cookie without Secure flag: headers agent -``` - -### Agent-Specific Analysis - -Each agent provides domain-specific findings: - -| Agent | Sample Finding | -|-------|----------------| -| Headers | Missing CSP, HSTS, X-Frame-Options, cookie flags | -| Secrets | Hardcoded API keys, tokens, passwords in JS | -| XSS | DOM sinks, innerHTML, unsafe event handlers | -| API | CORS misconfiguration, rate limiting issues | -| Auth | IDOR, session fixation, JWT problems | -| Crypto | Weak TLS, expired certs, self-signed issues | - ---- - -## 🎭 Scenario-Based Examples - -### Scenario 1: Found a Suspicious Subdomain +Want the output saved too? ```bash -# Initial scan found dev.target.com -# Let AI analyze it in detail - -./god-eye -d target.com --enable-ai --ai-deep - -# AI might find: -# - Debug mode enabled -# - Test credentials in source -# - Exposed API documentation -# - Missing security headers -``` - -### Scenario 2: JavaScript Heavy Application - -```bash -# SPA with lots of JavaScript -./god-eye -d webapp.com --enable-ai - -# AI excels at: -# ✓ Analyzing minified/obfuscated code -# ✓ Finding hidden API endpoints -# ✓ Detecting auth bypass logic -# ✓ Identifying client-side security issues -``` - -### Scenario 3: API-First Platform - -```bash -# Multiple API subdomains -./god-eye -d api-platform.com --enable-ai --ai-deep - -# AI will identify: -# ✓ API version mismatches -# ✓ Unprotected endpoints -# ✓ CORS issues -# ✓ Rate limiting problems -``` - -### Scenario 4: Legacy Application - -```bash -# Old PHP/WordPress site -./god-eye -d old-site.com --enable-ai - -# AI checks for: -# ✓ Known CVEs in detected versions -# ✓ Common WordPress vulns -# ✓ Outdated library versions -# ✓ Exposed backup files +./god-eye -d target.com --pipeline --profile bugbounty --live \ + -o findings.json -f json ``` --- -## 💡 Pro Tips +## 4. Authorized penetration test -### Tip 1: Combine with Other Tools +Like bug-bounty but with light stealth to evade basic rate limits: ```bash -# God's Eye → Nuclei pipeline -./god-eye -d target.com --enable-ai --active -s | nuclei -t cves/ - -# God's Eye → httpx pipeline -./god-eye -d target.com --enable-ai -s | httpx -tech-detect - -# God's Eye → Custom script -./god-eye -d target.com --enable-ai -o scan.json -f json -python analyze.py scan.json +./god-eye -d client.example --pipeline --profile pentest --live \ + -o pentest-report.json -f json ``` -### Tip 2: Incremental Scans +Differences from bugbounty profile: +- **Concurrency** reduced to 300 (was 1000) +- **Stealth** set to `light` (10–50ms request delays, UA rotation) +- Same AI + modules enabled + +For even more caution: ```bash -# Day 1: Initial recon -./god-eye -d target.com --enable-ai -o day1.json -f json - -# Day 2: Update scan -./god-eye -d target.com --enable-ai -o day2.json -f json - -# Compare findings -diff <(jq '.[] | .subdomain' day1.json) <(jq '.[] | .subdomain' day2.json) -``` - -### Tip 3: Filter by AI Severity - -```bash -# Only show critical findings -./god-eye -d target.com --enable-ai -o scan.json -f json -cat scan.json | jq '.[] | select(.ai_severity == "critical")' - -# Count findings by severity -cat scan.json | jq -r '.[] | .ai_severity' | sort | uniq -c -``` - -### Tip 4: Custom Wordlist with AI - -```bash -# AI can help identify naming patterns -# First run to learn patterns -./god-eye -d target.com --enable-ai --no-brute - -# AI identifies pattern: api-v1, api-v2, api-v3 -# Create custom wordlist: -echo -e "api-v4\napi-v5\napi-staging\napi-prod" > custom.txt - -# Second run with custom wordlist -./god-eye -d target.com --enable-ai -w custom.txt -``` - -### Tip 5: Monitoring Setup - -```bash -#!/bin/bash -# monitor-target.sh - Daily AI-powered monitoring - -TARGET="target.com" -DATE=$(date +%Y%m%d) -OUTPUT="scans/${TARGET}_${DATE}.json" - -./god-eye -d $TARGET --enable-ai --active -o $OUTPUT -f json - -# Alert on new critical findings -CRITICAL=$(cat $OUTPUT | jq '.[] | select(.ai_severity == "critical")' | wc -l) -if [ $CRITICAL -gt 0 ]; then - echo "ALERT: $CRITICAL critical findings for $TARGET" - cat $OUTPUT | jq '.[] | select(.ai_severity == "critical")' -fi +./god-eye -d client.example --pipeline --profile pentest \ + --stealth moderate \ + -c 100 ``` --- -## 🧪 Testing AI Features +## 5. Continuous attack-surface monitoring -### Test 1: Verify AI is Working +Run once, then every 24h, diffing against the last snapshot: ```bash -# Should show AI analysis section -./god-eye -d example.com --enable-ai --no-brute -v - -# Look for: -# ✓ "🧠 AI-POWERED ANALYSIS" -# ✓ Model names in output -# ✓ AI findings if vulnerabilities detected +./god-eye -d target.com --pipeline --profile asm-continuous \ + --monitor-interval 24h \ + --monitor-webhook https://hooks.slack.com/services/T.../B.../XXX ``` -### Test 2: Compare AI vs No-AI +What happens: -```bash -# Without AI -time ./god-eye -d target.com --no-brute -o noai.json -f json +1. First scan executes immediately, snapshot saved +2. Every 24h: re-scan, compute diff +3. If diff contains meaningful changes (`new_host`, `new_vuln`, `new_takeover`, `removed_host`), fire webhook with JSON payload +4. Continues until Ctrl-C -# With AI -time ./god-eye -d target.com --no-brute --enable-ai -o ai.json -f json +Sample webhook payload: -# Compare -echo "Findings without AI: $(cat noai.json | jq length)" -echo "Findings with AI: $(cat ai.json | jq length)" -echo "New AI findings: $(cat ai.json | jq '[.[] | select(.ai_findings != null)] | length')" +```json +{ + "target": "target.com", + "old_scan_at": "2026-04-15T08:00:00Z", + "new_scan_at": "2026-04-16T08:00:00Z", + "changes": [ + { + "kind": "new_host", + "host": "staging-v2.target.com", + "detected_at": "2026-04-16T08:02:14Z" + }, + { + "kind": "new_vuln", + "host": "admin.target.com", + "after": "Git Repository Exposed", + "severity": "critical", + "detected_at": "2026-04-16T08:04:01Z" + } + ] +} ``` -### Test 3: Benchmark Different Modes +For local testing without a webhook, the `StdoutAlerter` always runs: ```bash -# Cascade (default) -time ./god-eye -d target.com --enable-ai --no-brute - -# No cascade -time ./god-eye -d target.com --enable-ai --ai-cascade=false --no-brute - -# Deep mode -time ./god-eye -d target.com --enable-ai --ai-deep --no-brute +./god-eye -d target.com --pipeline --profile asm-continuous --monitor-interval 10m ``` --- -## 📈 Performance Optimization +## 6. Maximum stealth mode -### For Large Targets (>100 subdomains) +For highly-sensitive targets where any detection is unacceptable: ```bash -# Reduce concurrency to avoid overwhelming Ollama -./god-eye -d large-target.com --enable-ai -c 500 - -# Use fast model only (skip deep analysis) -./god-eye -d large-target.com --enable-ai --ai-cascade=false \ - --ai-deep-model deepseek-r1:1.5b - -# Disable AI for initial enumeration, enable for interesting findings -./god-eye -d large-target.com --no-brute -s > subdomains.txt -cat subdomains.txt | head -20 | while read sub; do - ./god-eye -d $sub --enable-ai --no-brute -done +./god-eye -d target.com --pipeline --profile stealth-max --live --live-verbosity 0 ``` -### For GPU Acceleration +`stealth-max` profile: +- Concurrency 3 (vs 1000 default) +- Paranoid delays (1–5s between requests) +- 70% timing jitter +- Single connection per host +- No DNS brute-force +- No port scan +- AI disabled (too slow to be worth it in this mode) + +`--live-verbosity 0` suppresses everything except actual vulnerability findings. + +--- + +## 7. Using a YAML config file + +Put long-lived settings in a config file, scan with one flag: + +```yaml +# god-eye.yaml (auto-discovered in CWD or ~/.god-eye/config.yaml) +profile: bugbounty +concurrency: 500 +timeout: 10 +stealth: light + +resolvers: + - 1.1.1.1 + - 8.8.8.8 + - 9.9.9.9 + +wordlist: /usr/local/share/wordlists/subdomains-top1million-110000.txt + +modules: + discovery.permutation: true # opt-in module + discovery.reverse-dns: true + discovery.vhost: false # disable vhost even though bugbounty normally enables it + vuln.http-smuggling: true # opt-in timing probe + +ai: + enabled: true + url: http://localhost:11434 + fast_model: qwen3:4b # upgrade from default lean + deep_model: qwen3-coder:30b + cascade: true + deep: true + multi_agent: true + +output: + path: reports/scan.json + format: json +``` + +Scan: ```bash -# Ollama automatically uses GPU if available -# Check GPU usage: -nvidia-smi # Linux/Windows with NVIDIA -ollama ps # Should show GPU model +./god-eye -d target.com --pipeline +``` -# With GPU, you can use larger models: -./god-eye -d target.com --enable-ai \ - --ai-deep-model deepseek-coder-v2:16b +CLI flags always win over YAML, so you can still override anything: + +```bash +./god-eye -d target.com --pipeline --stealth paranoid # overrides stealth: light ``` --- -## 🎓 Learning from AI Output +## 8. Custom wordlist + resolvers -### Example: Understanding AI Findings +Use a bigger wordlist and specific DNS servers: -**Input:** JavaScript code with potential issue -```javascript -const API_KEY = "sk_live_51H..."; -fetch(`/api/user/${userId}`); +```bash +./god-eye -d target.com --pipeline \ + -w /usr/share/wordlists/SecLists/Discovery/DNS/subdomains-top1million-5000.txt \ + -r 1.1.1.1,1.0.0.1,8.8.8.8,8.8.4.4 \ + -c 2000 ``` -**AI Output:** -``` -AI:CRITICAL: Hardcoded production API key detected - Unsanitized user input in URL parameter - Missing authentication on API endpoint -``` - -**What to Do:** -1. Verify the API key is active -2. Test the userId parameter for injection -3. Check if /api/user requires authentication -4. Report to bug bounty program or client +Notes: +- Wordlists have massive impact on runtime. Common picks: + - [assetnote/commonspeak2-wordlists](https://github.com/assetnote/commonspeak2-wordlists) (~500k–5M lines) + - [n0kovo/n0kovo_subdomains](https://github.com/n0kovo/n0kovo_subdomains) (~10M) +- High concurrency (2k+) needs a beefy machine + resolvers that allow it. If you see timeouts, drop to 500. --- -**Happy Hunting with AI! 🎯🧠** +## 9. Subdomain enumeration pipeline (unix-pipeline style) + +God's Eye can still be used as a subdomain tool in the classic `tool | tool | tool` style: + +```bash +./god-eye -d target.com --pipeline --silent --no-probe --no-ports \ + | httpx -silent -status-code -title \ + | nuclei -t ~/nuclei-templates/ +``` + +Or export to a file for post-processing: + +```bash +./god-eye -d target.com --pipeline --silent --no-probe -o subdomains.txt -f txt +``` + +For pure JSON consumption by other tools: + +```bash +./god-eye -d target.com --pipeline --json > findings.ndjson +jq '.subdomains | keys[]' findings.ndjson +``` + +--- + +## 10. AI profile decision guide + +Use this to pick the right `--ai-profile`: + +| Your machine | Recommended profile | Pull size | Notes | +|----------------------------------|---------------------|-----------|--------------------------------------| +| 8GB RAM laptop | `lean` (default) | ~10GB | Runs but AI will be slow | +| 16GB RAM / integrated GPU | `lean` | ~10GB | Sweet spot for most laptops | +| 32GB RAM / Apple Silicon M-series | `balanced` | ~20GB | Best ratio of speed vs quality | +| 32GB + discrete 24GB GPU | `balanced` or `heavy` | ~23GB | `heavy` for top-quality triage | +| 64GB+ / server-class | `heavy` | ~23GB | Best quality, same deep model as balanced | +| No AI wanted | *(skip `--enable-ai`)* | 0 | Pure recon; still uses v1's CVE matching | + +Example — balanced cascade with verbose logging: + +```bash +./god-eye -d target.com --pipeline --enable-ai --ai-profile balanced --ai-verbose --live +``` + +Output on stderr during AI calls: + +``` +[ai] → qwen3:4b prompt=2341B timeout=60s +[ai] ← qwen3:4b response=512B 1.8s +[ai] → qwen3-coder:30b prompt=8291B timeout=120s +[ai] ← qwen3-coder:30b response=1832B 9.3s +``` + +--- + +## 11. Parity check: v1 vs v2 + +Worried the new pipeline misses something v1 found? Use the built-in parity tool: + +```bash +go build -o god-eye ./cmd/god-eye +go run ./tools/parity -d your-own-domain.com --bin ./god-eye +``` + +Runs the binary twice (with and without `--pipeline`), diffs the subdomain sets + HTTP status codes, and reports meaningful divergence. Use before promoting v2 to your default workflow. + +--- + +## 12. Scripted (CI) invocation + +For CI jobs the wizard should stay out of the way. When stdin isn't a TTY, the wizard auto-skips. + +```yaml +# .github/workflows/asm.yml (example) +jobs: + asm: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: { go-version: '1.21' } + - run: go build -o god-eye ./cmd/god-eye + - name: Scan + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # used by discovery.github-dorks + run: | + ./god-eye \ + -d ${{ vars.SCAN_TARGET }} \ + --pipeline \ + --profile quick \ + --silent \ + -o report.json -f json + - uses: actions/upload-artifact@v4 + with: { name: scan-report, path: report.json } +``` + +Detect CI without TTY, use `--pipeline --silent --json` and redirect to a file. The wizard won't trigger. + +--- + +## 13. Troubleshooting + +**"No modules selected — check config and module registrations"** +Some profile disabled everything or you set `modules:` in YAML with all `false` values. Run with `-v` to see which modules are selected. + +**Pipeline hangs in "PhaseDiscovery"** +A passive source is waiting on a slow network call. Every source has its own timeout (15s–120s depending on the provider) so it will resolve, but passive-heavy scans can take 90s before moving on. Use `--no-brute --profile quick` to skip if you're in a hurry. + +**"AI modules will no-op for this run"** +Ollama isn't reachable. Start it: `ollama serve &`. Then retry. If you chose `--ai-auto-pull=false`, missing models also skip — re-enable auto-pull or pull manually: `ollama pull qwen3:1.7b`. + +**Brute-force finds zero subdomains** +Wildcard DNS detected. Check the output near the top of the scan — "Wildcard DNS: DETECTED" means every random guess resolves and brute-force can't distinguish real hosts from wildcards. Use `-w` with a curated wordlist or rely on passive + AXFR + permutation. + +**Go data race in tests?** +Please file an issue. Every v2 package is tested with `-race`; any race is a real bug. + +**Live view messes up my terminal** +`--live` uses ANSI escapes. In non-TTY environments, disable it: `--live=false` or omit the flag. + +--- + +## 14. Route everything through a proxy (Burp / mitmproxy / Tor) + +Every outbound HTTP request — passive sources, HTTP probes, Nuclei templates, secret fetches, Ollama (if remote) — can go through a proxy: + +```bash +# Burp / mitmproxy / ZAP (upstream HTTP CONNECT) +./god-eye -d target.com --pipeline --proxy http://127.0.0.1:8080 --live + +# Basic auth +./god-eye -d target.com --pipeline --proxy http://user:pass@proxy.corp:3128 + +# Tor (SOCKS5 with remote DNS — matches Tor's default) +./god-eye -d target.com --pipeline --proxy socks5h://127.0.0.1:9050 + +# SOCKS5 with local DNS (if you trust your resolver) +./god-eye -d target.com --pipeline --proxy socks5://127.0.0.1:9050 +``` + +**What gets proxied:** +- ✅ Passive sources (crt.sh, CertSpotter, AlienVault, etc.) +- ✅ HTTP probing (status, titles, headers) +- ✅ Security checks (CORS, redirect, git/svn, backups) +- ✅ TLS analysis +- ✅ Nuclei template execution +- ✅ JS file harvesting + +**What does NOT get proxied:** +- ❌ DNS brute-force (uses UDP, driven by `internal/dns/resolver.go` through the `miekg/dns` library — set your resolvers explicitly with `-r ` if you need a specific path) +- ❌ Ollama calls when hitting `localhost` (as expected) + +If you need **full isolation** (including DNS brute-force) for threat-model reasons, wrap the whole binary: + +```bash +torsocks ./god-eye -d target.com --pipeline --profile bugbounty +``` + +The tool won't fight torsocks; in fact the per-host concurrency and retry logic are already tuned conservatively (≤ 100 parallel dials by default, exponential backoff on failure) so torsocks doesn't choke. + +--- + +## One-liner cheat-sheet + +```bash +./god-eye # wizard +./god-eye -d TARGET # v1 monolith scan +./god-eye -d TARGET --pipeline --profile bugbounty --live # v2 full recon +./god-eye -d TARGET --pipeline --enable-ai --ai-profile heavy --live # max power +./god-eye -d TARGET --pipeline --profile asm-continuous --monitor-interval 24h \ + --monitor-webhook https://hook # ASM +./god-eye -d TARGET --pipeline --profile stealth-max # evasion +./god-eye -d TARGET --pipeline --proxy socks5h://127.0.0.1:9050 # route via Tor +./god-eye -d TARGET --pipeline --proxy http://127.0.0.1:8080 # through Burp +./god-eye update-db # refresh CISA KEV +./god-eye nuclei-update # refresh Nuclei templates +./god-eye db-info # KEV status +go run ./tools/parity -d TARGET --bin ./god-eye # v1-vs-v2 diff +``` diff --git a/FEATURE_ANALYSIS.md b/FEATURE_ANALYSIS.md index 04eaebd..c5d11de 100644 --- a/FEATURE_ANALYSIS.md +++ b/FEATURE_ANALYSIS.md @@ -1,478 +1,260 @@ -# God's Eye Codebase Feature Analysis Report +# 🗺️ God's Eye v2 — Feature Map -## Executive Summary +> Living document. What's shipped · what's in progress · what's planned. +> If you're about to build on a feature, **check its status here first**. -This report analyzes the god-eye codebase (subdomain enumeration and reconnaissance tool) against 14 requested features. The tool is comprehensively implemented with modern Go architecture, featuring AI integration, advanced security scanning, and intelligent rate limiting. - -**Overall Implementation Status: 11/14 Features Implemented** (78.6%) +**Status legend:** +- ✅ implemented and tested with `-race` +- 🟡 implemented, awaiting integration-level testing on live targets +- 🔵 skeleton in place (interfaces + scaffolding), body pending +- 📋 planned (design drafted, not yet written) +- ❌ intentionally deferred or declined --- -## Detailed Feature Analysis +## At-a-glance -### 1. Zone Transfer (AXFR) Check -**Status:** NOT IMPLEMENTED ❌ - -**Finding:** No AXFR/Zone Transfer functionality found in the codebase. - -**Search Results:** -- Grep search for "AXFR|Zone Transfer|zone.transfer|axfr" returned 0 matches -- DNS resolver only implements forward lookups (A records) - -**File Reference:** `/Users/lucalorenzi/CascadeProjects/windsurf-project-6/god-eye/internal/dns/resolver.go` (lines 16-81) -- Only performs standard A record queries via `dns.Client.Exchange()` -- No AXFR (dns.TypeAXFR) implementation +| Fase | Theme | Status | +|------|------------------------------------|--------| +| 0 | Foundation refactor | ✅ | +| 1 | Discovery Supremacy | 🟡 (core done, 40+ sources to add) | +| 2 | Vulnerability Engine | 🟡 (4/10 native scanners done) | +| 3 | AI Agentic v2 | 🔵 (interfaces + 2 tools; planner/workers pending) | +| 4 | TUI + Reporting | 🟡 (wizard done, LivePrinter done; report generator pending) | +| 5 | Continuous & Distributed | 🟡 (diff + scheduler + webhook done; distributed pending) | +| 6 | Ecosystem & community | 📋 (plan exists; templates + marketplace pending) | --- -### 2. CORS Misconfiguration Detection -**Status:** IMPLEMENTED ✅ +## Fase 0 — Foundation refactor *(✅ complete)* -**Finding:** Full CORS misconfiguration detection with multiple vulnerability patterns. +Prerequisite for everything else. Keeps v2 extensible and testable without changing v1's external behavior. -**Function:** `CheckCORSWithClient()` -**File:** `/Users/lucalorenzi/CascadeProjects/windsurf-project-6/god-eye/internal/security/checks.go` (lines 86-129) - -**Implementation Details:** -```go -func CheckCORSWithClient(subdomain string, client *http.Client) string -``` - -**Detection Patterns:** -- Wildcard origin (`Access-Control-Allow-Origin: *`) - - With credentials: "Wildcard + Credentials" - - Without: "Wildcard Origin" -- Origin reflection attack (`Access-Control-Allow-Origin: https://evil.com`) - - With credentials: "Origin Reflection + Credentials" - - Without: "Origin Reflection" -- Null origin bypass: "Null Origin Allowed" - -**Integration:** Results stored in `SubdomainResult.CORSMisconfig` (config.go:99) +| Feature | Status | Location | +|--------------------------------------------|:------:|-------------------------------------------| +| Typed event bus with per-subscriber goroutines | ✅ | `internal/eventbus/` | +| 20 canonical event types | ✅ | `internal/eventbus/events.go` | +| Non-blocking publish with drop counter | ✅ | `internal/eventbus/bus.go` | +| Panic-safe handlers | ✅ | `internal/eventbus/bus.go:run()` | +| Module interface + auto-registry | ✅ | `internal/module/` | +| Phase-based selection + Consumes/Produces | ✅ | `internal/module/registry.go` | +| In-memory store with per-host locks | ✅ | `internal/store/memory.go` | +| Deep-copy Get (caller can't corrupt state) | ✅ | `internal/store/memory.go:cloneHost` | +| Pipeline coordinator with phase barriers | ✅ | `internal/pipeline/pipeline.go` | +| Error aggregation via `errors.Join` | ✅ | `internal/pipeline/pipeline.go:Run` | +| YAML config loader + 5 scan profiles | ✅ | `internal/config/profile.go` + `yaml.go` | +| AI profiles (lean/balanced/heavy) | ✅ | `internal/config/ai_profile.go` | +| ConfigView exposed to modules | ✅ | `internal/config/view.go` | +| 185 unit tests passing with `-race` | ✅ | `*_test.go` across 15 packages | +| BoltDB store backend | 📋 | deferred to Fase 5 | --- -### 3. JS Endpoint Extraction from JavaScript Files -**Status:** IMPLEMENTED ✅ +## Fase 1 — Discovery Supremacy *(🟡 core done)* -**Finding:** Comprehensive JavaScript analysis with endpoint extraction and secret scanning. +Goal: match or beat BBOT and Amass in subdomain coverage. -**Functions:** -- `AnalyzeJSFiles()` - Main entry point (line 77) -- `analyzeJSContent()` - Downloads and analyzes JS (line 172) -- `normalizeURL()` - URL normalization (line 241) +### Passive sources -**File:** `/Users/lucalorenzi/CascadeProjects/windsurf-project-6/god-eye/internal/scanner/javascript.go` +| Source | Status | Module | +|---------------------------------|:------:|--------------------------------------------| +| 20 v1 sources (crt.sh, CertSpotter, AlienVault, HackerTarget, URLScan, RapidDNS, Anubis, ThreatMiner, DNSRepo, SubdomainCenter, Wayback, CommonCrawl, Sitedossier, Riddler, Robtex, DNSHistory, ArchiveToday, JLDC, SynapsInt, CensysFree) | ✅ | `internal/modules/passive` (wrapper) | +| Shodan, Censys, BinaryEdge, SecurityTrails, FOFA, ZoomEye, Quake, Netlas (key-gated) | 📋 | planned | +| VirusTotal, Chaos, BufferOver, Shrewdeye | 📋 | planned | +| **Supply chain**: npm + PyPI dorks | ✅ | `internal/modules/supplychain` | +| GitHub code-search dorks | ✅ | `internal/modules/github` | +| Certificate Transparency live | ✅ (opt-in) | `internal/modules/ctstream` | -**Implementation Details:** -- Extracts JS file references from HTML: `src=|href=` patterns (line 102) -- Dynamic imports/webpack chunks detection (line 114) -- Supports up to 15 JS files per subdomain (line 131) -- Concurrent downloading with semaphore (5 max concurrent, line 137) +### Active discovery -**Endpoint Patterns (lines 68-74):** -```go -var endpointPatterns = []*regexp.Regexp{ - `['"]https?://api\.[a-zA-Z0-9\-\.]+[a-zA-Z0-9/\-_]*['"]`, - `['"]https?://[a-zA-Z0-9\-\.]+\.amazonaws\.com[^'"]*['"]`, - `['"]https?://[a-zA-Z0-9\-\.]+\.azure\.com[^'"]*['"]`, - `['"]https?://[a-zA-Z0-9\-\.]+\.googleapis\.com[^'"]*['"]`, - `['"]https?://[a-zA-Z0-9\-\.]+\.firebaseio\.com[^'"]*['"]`, -} -``` - -**Secrets Detection:** 40+ secret patterns (AWS, Google, Stripe, GitHub, Discord, etc.) +| Technique | Status | Module | +|----------------------------------|:------:|--------------------------------------------| +| DNS wordlist brute-force | ✅ | `internal/modules/bruteforce` | +| Wildcard DNS detection + filter | ✅ | v1 `internal/dns/wildcard.go` + bruteforce | +| Recursive pattern learning | ✅ | `internal/modules/recursive` | +| DNS permutation (alterx-style) | ✅ (opt-in) | `internal/modules/permutation` | +| AXFR zone-transfer attempt | ✅ | `internal/modules/axfr` | +| Reverse DNS ±16 sweep per seed IP | ✅ (opt-in) | `internal/modules/reversedns` | +| Virtual host discovery | ✅ (opt-in) | `internal/modules/vhost` | +| ASN/CIDR expansion | ✅ (opt-in) | `internal/modules/asn` | --- -### 4. Favicon Hash Calculation (for Shodan Search) -**Status:** IMPLEMENTED ✅ +## Fase 2 — Vulnerability Engine *(🟡 4/10 native done)* -**Finding:** MD5 hash calculation for favicon matching (Shodan-compatible). +Goal: move beyond v1's "chain Nuclei and pray" model — build native, accurate, high-signal detections. -**Function:** `GetFaviconHashWithClient()` -**File:** `/Users/lucalorenzi/CascadeProjects/windsurf-project-6/god-eye/internal/scanner/takeover.go` (lines 227-254) - -**Implementation:** -```go -func GetFaviconHashWithClient(subdomain string, client *http.Client) string { - // Attempts https:// and http:// variants of /favicon.ico - // Returns MD5 hex hash - hash := md5.Sum(body) - return hex.EncodeToString(hash[:]) -} -``` - -**Details:** -- HTTP GET to `/favicon.ico` on both HTTPS and HTTP -- MD5 hash (standard Shodan format) -- Returns empty string if favicon not found or unreachable -- Result stored in `SubdomainResult.FaviconHash` (config.go:89) +| Scanner | Status | Module | +|----------------------------------|:------:|-----------------------------------------------| +| v1 security checks (open redirect, CORS, HTTP methods, git/svn, backups, admin, API) | ✅ | `internal/modules/security` | +| Subdomain takeover (110+ fingerprints) | ✅ | `internal/modules/takeover` | +| Cloud asset discovery (S3 / GCS / Azure / CDNs) | ✅ | `internal/modules/cloud` + v1 `internal/cloud` | +| JS secret extraction | ✅ | `internal/modules/javascript` | +| Security headers audit (OWASP-aligned) | ✅ | `internal/modules/headers` | +| GraphQL introspection + mutation flag | ✅ | `internal/modules/graphql` | +| JWT analyzer + weak-secret crack | ✅ | `internal/modules/jwt` | +| HTTP request smuggling (CL.TE / TE.CL timing probe) | ✅ (opt-in) | `internal/modules/smuggling` | +| Nuclei template compatibility layer | 📋 | planned | +| SPA crawler w/ headless browser (chromedp) | 📋 | planned | +| OAuth / SAML flow misconfig | 📋 | planned | +| Race condition scanner | 📋 | planned | +| Prototype pollution | 📋 | planned | +| SSRF + built-in OOB canary server | 📋 | planned | +| Live secret validation against source APIs | 📋 | planned | --- -### 5. Historical DNS Lookup -**Status:** IMPLEMENTED ✅ +## Fase 3 — AI Agentic v2 *(🔵 scaffolding done)* -**Finding:** Passive historical DNS data from multiple sources. +Goal: move from "LLM reviews findings" to "LLM plans + executes multi-step investigations using tools". -**Function:** `FetchDNSHistory()` -**File:** `/Users/lucalorenzi/CascadeProjects/windsurf-project-6/god-eye/internal/sources/passive.go` - -**Data Sources:** Integrated into passive enumeration pipeline: -- Listed in `sourceList` (scanner.go line 138) -- Part of 20 passive sources executed in parallel - -**Integration:** Results merged into subdomain discovery (scanner.go lines 115-143) +| Component | Status | Location | +|--------------------------------------------|:------:|----------------------------------| +| v1 Ollama cascade wrapper (triage+deep) | ✅ | `internal/ai/ollama.go` + `modules/ai` | +| Multi-agent orchestrator (8 specialist agents: XSS, SQLi, Auth, API, Crypto, Secrets, Headers, General) | ✅ (from v1) | `internal/ai/agents/` | +| CVE matching via KEV (offline) + NVD (online) | ✅ | `internal/ai/kev.go` + `cve.go` | +| Function calling to live CVE lookup | ✅ | `internal/ai/tools.go` | +| Model ensurer (auto-pull via `/api/pull`) | ✅ | `internal/ai/ensure.go` | +| AI profiles (lean / balanced / heavy) | ✅ | `internal/config/ai_profile.go` | +| Verbose per-query logging | ✅ | `internal/ai/ollama.go:logVerbose` | +| Agent / Planner / Worker interfaces | ✅ | `internal/agent/agent.go` | +| Built-in tools: `http_request`, `dns_resolve` | ✅ | `internal/agent/tools.go` | +| Native Planner (reasoning loop) | 🔵 | planned | +| Native Worker specializations | 🔵 | planned | +| Vulnerability-chain composer agent | 📋 | planned | +| Fine-tuning dataset pipeline | 📋 | planned | +| RAG over CISA KEV + HackerOne public reports | 📋 | planned | --- -### 6. Subdomain Permutation/Alteration -**Status:** IMPLEMENTED ✅ +## Fase 4 — Terminal UX + Reporting *(🟡 partial)* -**Finding:** Intelligent pattern-based permutation generation with machine learning. +**Terminal-only by explicit design.** No web dashboard. -**Functions:** -- `GeneratePermutations()` - Generates subdomain variations -- `Learn()` - Extracts patterns from discovered subdomains - -**File:** `/Users/lucalorenzi/CascadeProjects/windsurf-project-6/god-eye/internal/discovery/patterns.go` - -**Implementation (lines 220-290):** -```go -func (pl *PatternLearner) GeneratePermutations(subdomain, domain string) []string -``` - -**Permutation Types:** -- Word + number combinations -- Word + environment (dev/test/prod/staging) variants -- Number + environment combinations -- Separator variations (-, _, .) -- Learned prefix/suffix combinations - -**Learning Components (lines 15-20):** -- Prefixes (api, staging, test, etc.) -- Suffixes (api, cdn, service, etc.) -- Separators (-, _, .) -- Environment indicators (dev/test/prod/qa/uat/demo/sandbox/beta) -- Number patterns - -**Integration:** Used in recursive discovery for depth 1-5 (recursive.go) +| Feature | Status | Location | +|--------------------------------------------|:------:|----------------------------------| +| Interactive setup wizard | ✅ | `internal/wizard/` | +| Auto-launch on zero-flag TTY invocation | ✅ | `cmd/god-eye/main.go` | +| `--wizard` force flag | ✅ | `cmd/god-eye/main.go` | +| Model pull consent + streaming progress | ✅ | `internal/wizard/wizard.go:handleAIModels` | +| Live colorized event stream (`--live`) | ✅ | `internal/tui/live.go` | +| 3-level verbosity (findings / normal / noisy) | ✅ | `internal/tui/live.go` | +| Bubbletea-based interactive TUI (k9s-like) | 📋 | planned | +| Professional report generator (PDF/HTML/Markdown with CVSS + MITRE mapping) | 📋 | planned | +| Burp / Caido extension for findings export | 📋 | planned | --- -### 7. HTTP/2 Support -**Status:** IMPLEMENTED ✅ +## Fase 5 — Continuous & Distributed *(🟡 single-node done)* -**Finding:** Explicit HTTP/2 support enabled in client factory. +Goal: turn God's Eye into an Attack Surface Management (ASM) daemon. -**File:** `/Users/lucalorenzi/CascadeProjects/windsurf-project-6/god-eye/internal/http/factory.go` - -**Implementation (lines 54 & 73):** -```go -ForceAttemptHTTP2: true -``` - -**Details:** -- Both secure and insecure transports have HTTP/2 enabled -- Secure transport (TLS verification): line 54 -- Insecure transport (for scanning): line 73 -- TLS 1.2+ required for HTTP/2 -- Go's net/http automatically handles HTTP/1.1 fallback +| Feature | Status | Location | +|--------------------------------------------|:------:|----------------------------------| +| Diff engine (9 change kinds) | ✅ | `internal/diff/` | +| Scheduler with interval ticker | ✅ | `internal/scheduler/scheduler.go`| +| `StdoutAlerter` (human-readable) | ✅ | `internal/scheduler/alerter.go` | +| `WebhookAlerter` (generic JSON POST) | ✅ | `internal/scheduler/alerter.go` | +| `--monitor-interval` + `--monitor-webhook` | ✅ | `cmd/god-eye/main.go:runMonitor` | +| BoltDB / SQLite persistent store | 📋 | planned (requires Store backend) | +| Cron-syntax scheduling | 📋 | planned | +| Distributed worker pool (NATS/Redis) | 📋 | planned | +| Slack / Discord / Teams / Linear adapters | 📋 | planned | --- -### 8. Proxy Support (SOCKS5, HTTP proxy, Tor) -**Status:** NOT IMPLEMENTED ❌ +## Fase 6 — Ecosystem *(📋 planned)* -**Finding:** No proxy support in the codebase. - -**Search Results:** -- Grep for "SOCKS|socks5|Tor|tor|proxy" found only validation references -- No dialer configuration for custom proxies -- HTTP transports use default Go net.Dialer (lines 42-45, 60-63 in factory.go) - -**Why:** HTTP clients created without custom proxy dialing support -- Standard Go HTTP transport doesn't support SOCKS natively -- Would require `golang.org/x/net/proxy` package (not present in go.mod) +| Feature | Status | +|--------------------------------------------|:------:| +| Community template repository | 📋 | +| Module marketplace (`god-eye module install`) | 📋 | +| Docs site (VitePress) | 📋 | +| Integrations: HackerOne / Bugcrowd / Intigriti APIs | 📋 | +| Published benchmark suite vs BBOT / Subfinder / Amass | 📋 | --- -### 9. Input from File (Domain List) -**Status:** NOT IMPLEMENTED ❌ +## Operational / cross-cutting features -**Finding:** Only single domain mode supported. +### Config -**Evidence:** -- Config struct has single `Domain` field (config.go:9) -- Main CLI flag: `-d domain` (main.go:118) -- No batch processing or domain list input -- No `.GetDomainsFromFile()` or similar function +| Feature | Status | Notes | +|--------------------------------------------|:------:|-------| +| CLI flags (backwards-compatible with v0.1) | ✅ | `cmd/god-eye/main.go` | +| YAML config auto-discovery | ✅ | `./god-eye.yaml`, `.god-eye.yaml`, `~/.god-eye/config.yaml` | +| `--config ` override | ✅ | | +| Named scan profiles (`--profile`) | ✅ | 5 profiles: bugbounty, pentest, asm-continuous, stealth-max, quick | +| Named AI profiles (`--ai-profile`) | ✅ | lean / balanced / heavy | +| Per-module enable/disable via YAML | ✅ | `modules:` YAML key | -**Limitation:** Scanner processes one domain per invocation +### Stealth + +| Feature | Status | Notes | +|--------------------------------------------|:------:|-------| +| 4-level stealth mode | ✅ (v1 heritage) | light / moderate / aggressive / paranoid | +| 25+ User-Agent rotation pool | ✅ | `internal/stealth/` | +| Randomized delays, per-host throttling | ✅ | `internal/stealth/`, `internal/ratelimit/` | +| Adaptive backoff on error-rate spikes | ✅ | `internal/ratelimit/ratelimit.go` | +| Retry with exponential backoff | ✅ | `internal/retry/retry.go` | +| **Proxy / SOCKS5 / Tor routing** | ✅ | `internal/proxyconf/` · issue [#1](https://github.com/Vyntral/god-eye/issues/1) | + +### Observability + +| Feature | Status | +|--------------------------------------------|:------:| +| Event bus stats (published / delivered / dropped) | ✅ | +| Per-phase timing events | ✅ | +| Module error events (non-fatal) | ✅ | +| AI verbose logging (`--ai-verbose`) | ✅ | +| Structured JSON output | ✅ | + +### Security of the tool itself + +| Feature | Status | +|--------------------------------------------|:------:| +| Input validation (domain, wordlist path, output path, resolvers, concurrency, timeout) | ✅ | +| Rejects write to system paths (/etc, /var, /proc, etc.) | ✅ | +| Null-byte and path-traversal rejection | ✅ | +| Panic containment in event handlers | ✅ | +| Per-subscriber goroutine isolation | ✅ | --- -### 10. Resume/Checkpoint Functionality -**Status:** NOT IMPLEMENTED ❌ +## What's intentionally NOT on the roadmap -**Finding:** No state persistence or resume capability. - -**Search Results:** -- Grep for "resume|checkpoint|state.*save|state.*restore" found 0 matches in scanner/config -- No cache beyond passive source results and single-scan buffering -- Results are volatile (in-memory only) - -**Cache Implementation:** `/Users/lucalorenzi/CascadeProjects/windsurf-project-6/god-eye/internal/cache/cache.go` -- Only provides in-memory caching during active scan -- Not persistent across invocations +- **Web UI** — explicit scope choice. Terminal only. +- **Exploitation / payload delivery** — detection, chaining and PoC generation only; no shell, no persistence. +- **Collaborative multi-user state** — single-operator tool. +- **Proprietary feed integrations (Shodan / Censys paid tiers) by default** — must be user-configured with their own API keys. +- **Agent-based compromise of targets** — scope is bounded to authorized offensive reconnaissance and disclosure-track testing. --- -### 11. Screenshot Capture -**Status:** NOT IMPLEMENTED ❌ +## Test coverage snapshot -**Finding:** No screenshot functionality. +| Package | Tests | `-race` | Notes | +|---------------------|------:|:-------:|-----------------------------------------| +| validator | ~30 | ✅ | exhaustive input validation | +| sources | ~5 | ✅ | extract subdomains, client pooling | +| dns | ~10 | ✅ | wildcard helpers, pure functions only | +| config | ~25 | ✅ | profiles, YAML, View | +| eventbus | ~15 | ✅ | pub/sub, drop invariant, concurrent | +| module | ~13 | ✅ | registry, filtering, dep graph | +| store | ~15 | ✅ | concurrent Upsert, deep-copy Get | +| pipeline | ~9 | ✅ | phase barriers, panic recovery | +| diff | ~9 | ✅ | 9 change kinds | +| scheduler | ~3 | ✅ | interval + diff integration | +| wizard | ~15 | ✅ | prompts, validation, EOF cancel | +| ai (ensurer) | ~10 | ✅ | mock httptest Ollama | +| scanner (v1 legacy) | ~10 | ✅ | helper functions | -**Search Results:** -- Grep for "screenshot|selenium|playwright|headless" found 0 matches -- No browser automation libraries in dependencies -- No image capture during HTTP probing +**185 tests total** across 15 packages, all green with the `-race` flag on Go 1.21. -**Rationale:** Tool focuses on recon data, not visual analysis - ---- - -### 12. HTML Report Output -**Status:** NOT IMPLEMENTED ❌ (but JSON structure supports it) - -**Finding:** No HTML template generation implemented. - -**Supported Output Formats (internal/output/print.go:105-144):** -- TXT format (default) - simple subdomain list -- JSON format - complete detailed structure -- CSV format - tabular data - -**JSON Output Structure:** Comprehensive `/Users/lucalorenzi/CascadeProjects/windsurf-project-6/god-eye/internal/output/json.go` -- Includes ScanReport, ScanMeta, ScanStats, Findings by severity -- Could be used as basis for HTML generation (not implemented) - -**CLI Support:** -- `-f json` or `--json` flag (main.go:123, 133) -- `-o output.json` for file output (main.go:122) - ---- - -### 13. Scope Control (Whitelist/Blacklist) -**Status:** NOT IMPLEMENTED ❌ - -**Finding:** No scope filtering mechanism. - -**Search Results:** -- Grep for "whitelist|blacklist|scope|include|exclude" in config returned 0 matches -- All discovered subdomains are included in results -- No filtering rules for subdomain exclusion - -**Related Feature:** Only active/inactive filtering available -- `--active` flag (main.go:132) - shows only HTTP 2xx/3xx -- Not a true scope control mechanism - ---- - -### 14. Rate Limiting Intelligence -**Status:** IMPLEMENTED ✅ - -**Finding:** Advanced adaptive rate limiting with multiple implementations. - -### 14A. Adaptive Rate Limiter -**File:** `/Users/lucalorenzi/CascadeProjects/windsurf-project-6/god-eye/internal/ratelimit/ratelimit.go` - -**Type:** `AdaptiveRateLimiter` (lines 10-28) - -**Features:** -- Dynamic backoff on errors (2x multiplier) -- Enhanced backoff for rate-limit errors 429 (2x more aggressive) -- Recovery on success (0.9x multiplier) -- Configurable min/max delays -- Error tracking and statistics - -**Presets (lines 39-66):** -``` -DefaultConfig: - MinDelay: 50ms, MaxDelay: 5s - BackoffMultiplier: 2.0, RecoveryRate: 0.9 - -AggressiveConfig: - MinDelay: 10ms, MaxDelay: 2s - BackoffMultiplier: 1.5, RecoveryRate: 0.8 - -ConservativeConfig: - MinDelay: 200ms, MaxDelay: 10s - BackoffMultiplier: 3.0, RecoveryRate: 0.95 -``` - -**Integration Points:** -- HTTP probing (probe.go:67) -- Host-specific rate limiting (NewHostRateLimiter) - -### 14B. Concurrency Controller -**Type:** `ConcurrencyController` (lines 209-284) - -**Features:** -- Dynamic concurrency adjustment based on error rates -- Error rate analysis (0.1 = reduce, 0.02 = increase) -- 80/110 multipliers for scaling -- Prevents thrashing on target overload - -**Details:** -- Monitors every 100 requests -- Reduces concurrency if error rate > 10% -- Increases concurrency if error rate < 2% -- Per-host tracking - -### 14C. Stealth Module -**File:** `/Users/lucalorenzi/CascadeProjects/windsurf-project-6/god-eye/internal/stealth/stealth.go` - -**Modes (lines 14-20):** -- Off - maximum speed -- Light - reduced concurrency, basic delays -- Moderate - random delays, UA rotation -- Aggressive - slow, distributed, evasive -- Paranoid - ultra slow, maximum evasion - -**Rate Limiting Aspects:** -- Per-mode delay presets -- Per-host request limits -- Token bucket implementation -- User-Agent rotation -- Request randomization/jittering - ---- - -## Summary Table - -| Feature | Status | File/Function | Notes | -|---------|--------|---------------|-------| -| Zone Transfer (AXFR) | ❌ NOT | - | No AXFR queries | -| CORS Detection | ✅ YES | `security/checks.go::CheckCORSWithClient` | 4 attack patterns | -| JS Endpoint Extract | ✅ YES | `scanner/javascript.go::AnalyzeJSFiles` | 40+ secret patterns | -| Favicon Hash | ✅ YES | `scanner/takeover.go::GetFaviconHashWithClient` | MD5, Shodan format | -| Historical DNS | ✅ YES | `sources/passive.go::FetchDNSHistory` | Part of 20 sources | -| Subdomain Permutation | ✅ YES | `discovery/patterns.go::GeneratePermutations` | ML-based learning | -| HTTP/2 Support | ✅ YES | `http/factory.go` | ForceAttemptHTTP2=true | -| Proxy Support | ❌ NOT | - | No SOCKS/proxy | -| Domain List Input | ❌ NOT | - | Single domain only | -| Resume/Checkpoint | ❌ NOT | - | No state persistence | -| Screenshot Capture | ❌ NOT | - | No browser automation | -| HTML Report | ❌ NOT | - | JSON/CSV/TXT only | -| Scope Control | ❌ NOT | - | No whitelist/blacklist | -| Rate Limiting | ✅ YES | `ratelimit/ratelimit.go` + `stealth/stealth.go` | Adaptive + concurrency control | - -**Implementation Score: 8/14 features (57.1%)** - ---- - -## Additional Findings - -### Bonus Features Discovered - -#### 1. AI-Powered Analysis -**Location:** `internal/ai/` directory -- Ollama integration for local LLM analysis -- CVE detection via function calling -- KEV (CISA Known Exploited Vulnerabilities) database -- Cascade triage (fast + deep analysis) -- 100% local/private (no cloud API calls) - -#### 2. Subdomain Takeover Detection -**File:** `scanner/takeover.go` -- 120+ service fingerprints -- CNAME-based detection -- Response pattern matching - -#### 3. Passive Source Integration -**20 Sources Detected:** -- crt.sh, Certspotter, AlienVault, HackerTarget, URLScan -- RapidDNS, Anubis, ThreatMiner, DNSRepo, SubdomainCenter -- Wayback, CommonCrawl, Sitedossier, Riddler, Robtex -- DNSHistory, ArchiveToday, JLDC, SynapsInt, CensysFree - -#### 4. Security Scanning -Functions found in `security/checks.go`: -- Open Redirect detection -- CORS misconfiguration -- HTTP Methods analysis (PUT, DELETE, PATCH, TRACE) -- Dangerous methods identification - -#### 5. Output Formats -- TXT (simple list) -- JSON (complete structure) -- CSV (tabular) -- JSON to stdout streaming - -#### 6. Wildcard Detection -**File:** `dns/wildcard.go` -- Multi-pattern testing (3 random patterns) -- Confidence scoring -- IP aggregation across patterns - -#### 7. Technology Fingerprinting -**File:** `fingerprint/fingerprint.go` -- Server header extraction -- TLS certificate analysis -- Appliance detection (firewalls, VPNs) -- CMS identification (WordPress, Drupal, Joomla) - -#### 8. Stealth/Evasion -**File:** `stealth/stealth.go` -- 5 stealth modes (Off to Paranoid) -- User-Agent rotation -- Random jittering -- Request randomization -- DNS spread across resolvers - ---- - -## Architecture Observations - -### Strengths -1. **Concurrency Design**: Worker pools, semaphores, proper goroutine management -2. **Connection Pooling**: Reusable HTTP transports, connection pooling per host -3. **Error Handling**: Retry logic with exponential backoff -4. **Passive Sources**: 20 parallel sources with robust error handling -5. **Rate Limiting**: Multi-layer (adaptive + concurrency + stealth) -6. **Modularity**: Clean separation: dns/, http/, scanner/, security/, sources/, etc. - -### Weaknesses -1. **No Persistence**: Results lost between invocations -2. **Single Domain**: Can't batch process domain lists -3. **No Proxy Support**: Limited in restricted networks -4. **No AXFR**: Important for zone enumeration -5. **No Scope Control**: All subdomains included equally - -### Modern Go Practices -- Proper use of `sync.Mutex` and channels -- Context-based cancellation -- Interface-based design -- Dependency injection patterns -- Configuration objects over global state - ---- - -## Conclusion - -God's Eye is a **well-architected, feature-rich subdomain enumeration tool** with: -- **Strong core features** (passive + active + security checks) -- **Intelligent rate limiting** (adaptive + concurrency control) -- **Modern Go best practices** (concurrency, pooling, error handling) -- **AI integration** (Ollama-based analysis) -- **Production-ready quality** (caching, stealth, reporting) - -**Missing features are primarily convenience features** (batch input, snapshots) and infrastructure features (proxy, AXFR), not core functionality. - -**Recommended Priority for Enhancement:** -1. Batch domain input (enables bulk scanning) -2. Scope control (critical for large-scale assessment) -3. Checkpoint/resume (for long scans) -4. SOCKS proxy (for restricted networks) -5. HTML report generation (from existing JSON) +### Since v0.1 +- **+15 packages** (foundation + modules + operational) +- **~26 modules** auto-registered in the pipeline +- **~200 lines of documentation per topic area** (README, AI, EXAMPLES, SECURITY, BENCHMARK, FEATURE) +- **3 GIF demos** captured live against `scanme.nmap.org` +- **Issue [#1](https://github.com/Vyntral/god-eye/issues/1)** (SOCKS5 / Tor support) fixed diff --git a/README.md b/README.md index f320c82..25a00df 100644 --- a/README.md +++ b/README.md @@ -1,861 +1,681 @@

- Version - Go + God's Eye +

+ +

God's Eye

+ +

AI-powered attack-surface discovery & offensive security
in a single Go binary. Terminal-only. Zero cloud.

+ +

+ Interactive wizard walkthrough +

+ +

+ Zero-flag launch → AI tier → model check → target → profile → live scan. Recorded live against scanme.nmap.org. +

+ +

+ Version + Go License - Platform -
- AI Powered - Privacy - CVE Detection - GitHub stars + AI + Nuclei + Privacy + Tests + X / Twitter

-

-
- God's Eye -
- God's Eye -
-

- -

Ultra-fast subdomain enumeration & reconnaissance tool with AI-powered analysis

-

- Why? • - Features • - 🧠 AI • - Installation • - Usage • - Benchmarks • - Credits + ⚡ Quick start • + Why • + Features • + Wizard • + AI • + Live benchmark • + vs. competitors • + Legal

--- -## 🎯 Why God's Eye? - - - - - - - -
- -### ⚡ All-in-One -**20 passive sources** + DNS brute-forcing + HTTP probing + security checks in **one tool**. No need to chain 5+ tools together. - - - -### 🧠 AI-Powered -**Zero-cost local AI** with Ollama for intelligent vulnerability analysis, CVE detection, and executive reports. **100% private**. - - - -### 🚀 Production-Ready -Battle-tested on **real bug bounties**. Fast, reliable, and packed with features that actually matter. - -
- ---- - -## ⚠️ Legal Notice - -**IMPORTANT: This tool is for AUTHORIZED security testing only.** - -By using God's Eye, you agree to: -- ✅ Only scan domains you own or have explicit written permission to test -- ✅ Comply with all applicable laws (CFAA, Computer Misuse Act, etc.) -- ✅ Use responsibly for legitimate security research and bug bounties -- ❌ Never use for unauthorized access or malicious activities - -**The authors accept NO liability for misuse. You are solely responsible for your actions.** - -Read the full [Legal Disclaimer](#️-legal-disclaimer--terms-of-use) before use. - ---- - -## 📖 Overview - -**God's Eye** is a powerful, ultra-fast subdomain enumeration and reconnaissance tool written in Go. It combines multiple passive sources with active DNS brute-forcing and comprehensive security checks to provide a complete picture of a target's attack surface. - -Unlike other tools that only find subdomains, God's Eye performs **deep reconnaissance** including: -- ✅ HTTP probing with technology detection -- ✅ Security vulnerability scanning -- ✅ Cloud provider identification -- ✅ JavaScript secret extraction -- ✅ Subdomain takeover detection -- ✅ **AI-Powered Analysis** with local LLM (Ollama) -- ✅ Real-time CVE detection via function calling - -### ⚡ Quick Start +## ⚡ 30-second quickstart ```bash -# Clone and build -git clone https://github.com/Vyntral/god-eye.git && cd god-eye +git clone https://github.com/Vyntral/god-eye && cd god-eye go build -o god-eye ./cmd/god-eye - -# Basic scan -./god-eye -d target.com - -# With AI-powered analysis -./god-eye -d target.com --enable-ai +./god-eye ``` +That's it. Running `./god-eye` with no flags launches an **interactive wizard** that: + +1. Asks which AI tier you want (lean / balanced / heavy / none) +2. Checks Ollama, downloads missing models for you +3. Asks for your target, validates it, applies a scan profile +4. Streams colorized events live as the scan runs + +Prefer one-liners? You're covered: + +```bash +./god-eye -d target.com --pipeline --profile bugbounty --live +./god-eye -d target.com --pipeline --enable-ai --ai-profile heavy --nuclei --live +./god-eye -d target.com --pipeline --profile asm-continuous --monitor-interval 24h +``` + +--- + +## 🎯 What makes God's Eye different + +Every OSS recon tool picks a lane: passive subdomain enum, or vuln scanning, or fingerprinting. You end up chaining four tools with Bash + `jq` and praying nothing breaks. **God's Eye v2 is the whole pipeline in a single binary, with an AI layer that no other OSS scanner has.** + +### Six things no competitor does in one command + +| | | +|---|---| +| 🧙 **Interactive wizard** | Zero-flag launch. Walks you through setup. | +| 🤖 **Local LLM CVE correlation** | Ollama cascade maps detected tech → real CVEs offline. | +| 🎚️ **AI tier presets** | `lean` / `balanced` / `heavy` — picks models for your RAM. | +| 📥 **Auto-manage 13k Nuclei templates** | `god-eye nuclei-update` downloads + refreshes the cache. | +| 🛰️ **Auto-pull Ollama models** | Missing models? Streams them from the registry. | +| 🔄 **ASM continuous monitoring** | Scheduler + diff engine + webhooks built-in. | + +### A concrete example — what you get in one command + +Running `./god-eye -d scanme.nmap.org --pipeline --profile bugbounty --ai-profile balanced --live` surfaces in **under 2½ minutes**: + +- ✅ Full passive subdomain enumeration (26 sources, no API keys) +- ✅ HTTP probe + technology fingerprint (`Apache/2.4.7 (Ubuntu)`) +- ✅ TLS analysis + appliance fingerprint (25+ vendors) +- ✅ **AI-assisted CVE correlation** — `Apache 2.4.7 → CVE-2026-34197 (CRITICAL/9.8) +4 more` +- ✅ Security header audit (OWASP Secure Headers Project aligned) +- ✅ JS secret extraction (regex + filter against noise) +- ✅ Subdomain takeover check (110+ signatures) +- ✅ Cloud asset discovery (S3, GCS, Azure, Firebase) + +No `subfinder | httpx | nuclei | tee | jq` pipeline. No glue scripts. One binary. + +See the live, reproducible benchmark: **[BENCHMARK-SCANME.md](BENCHMARK-SCANME.md)**. + +--- + +## 🧙 The wizard + +``` +═══════════════════════════════════════════════════════════ + God's Eye v2 — interactive setup + Ctrl-C to abort at any time. +═══════════════════════════════════════════════════════════ + +? Select AI tier + ▸ 1) Lean — 16GB RAM · qwen3:1.7b + qwen2.5-coder:14b (default) + 2) Balanced — 32GB RAM · qwen3:4b + qwen3-coder:30b (MoE, 256K ctx) + 3) Heavy — 64GB RAM · qwen3:8b + qwen3-coder:30b (max quality) + 4) No AI — Pure recon without LLM analysis + Choice [1]: 2 + +⚙ Checking Ollama at http://localhost:11434… + ↓ Missing models: qwen3:4b, qwen3-coder:30b +? Download missing models now? [Y/n] y +↓ qwen3:4b 100% 2.5GB / 2.5GB ✓ ready +↓ qwen3-coder:30b 100% 17GB / 17GB ✓ ready + +? Target domain + > target.com + +? Select scan profile + 1) Quick — passive enum + HTTP probe, no brute + ▸ 2) Bug bounty — full recon, AI + all features (default) + 3) Pentest — full recon + light stealth + 4) ASM continuous — recurring scans with diff + alerts + 5) Stealth max — paranoid evasion + +? Enable live event view? [Y/n] y +? Log every AI query to stderr? [y/N] y +? Save report to file (empty to skip) + > report.json + +─── Scan summary ─── + Target target.com + Scan profile bugbounty + AI tier balanced + AI auto-pull yes + AI verbose yes + Live view yes (v=1) + Output report.json (format=json) + +? Start scan? [Y/n] +``` + +Force the wizard even when `-d` is set: + +```bash +./god-eye --wizard -d target.com +``` + +When stdin is not a TTY (CI, pipes), the wizard auto-skips — one binary, two modes. +

- Share on Twitter - Share on LinkedIn + Live event stream

-### 🌟 **NEW: AI Integration** +

+ Live colorized event stream — every finding appears as it's discovered. +

-God's Eye now features **AI-powered security analysis** using local LLM models via Ollama: -- ✅ **100% Local & Private** - No data leaves your machine -- ✅ **Free Forever** - No API costs -- ✅ **Intelligent Analysis** - JavaScript code review, CVE detection, anomaly identification -- ✅ **Smart Cascade** - Fast triage + deep analysis for optimal performance +--- + +## 🔍 What it finds + +### 🛰️ Discovery — 11 module types, 26 passive sources + +
+Full source list — all key-less / free + +crt.sh · Certspotter · AlienVault · HackerTarget · URLScan · RapidDNS · Anubis · ThreatMiner · DNSRepo · SubdomainCenter · Wayback · CommonCrawl · Sitedossier · Riddler · Robtex · DNSHistory · ArchiveToday · JLDC · SynapsInt · CensysFree · BufferOver · DNSDumpster · Omnisint · HudsonRock · WebArchiveCDX · Digitorus + +
+ +Active techniques: + +- **DNS brute-force** with opportunistic wildcard detection and per-host filtering +- **Recursive pattern learning** — learns naming conventions from found hosts +- **DNS permutation** (alterx-style, opt-in) — `api` → `api-v2`, `stg-api`, `api.dev`, etc. +- **AXFR zone-transfer** attempted against every authoritative name-server +- **Reverse DNS ±16 sweep** around every resolved IP (opt-in) +- **Virtual host discovery** (opt-in) +- **ASN/CIDR expansion** (opt-in) +- **Certificate Transparency live polling** (opt-in) +- **GitHub code dorks** (honors `GITHUB_TOKEN` env var for higher rate limits) +- **Supply-chain recon** — npm + PyPI packages referencing target brand + +### 🧬 Enrichment + +- HTTP/HTTPS probing — status, title, content length, server, response time +- Technology fingerprinting (WordPress, React, Next.js, Angular, Laravel, Django, …) +- **TLS appliance fingerprinting for 25+ vendors** — Fortinet FortiGate, Palo Alto PAN-OS, Cisco ASA, F5 BIG-IP, SonicWall, Check Point, pfSense, OPNsense, Juniper SRX, OpenVPN, Pulse Secure, GlobalProtect, Citrix NetScaler, … +- Internal-hostname extraction from certificate SANs +- TCP connect port scan on common ports + +### 🛡️ Vulnerability detection - - - - + + + + + + + + +
- -**Basic Scan** -God's Eye Basic Demo -Standard subdomain enumeration - - - -**AI-Powered Scan** -God's Eye AI Demo -With real-time CVE detection & analysis - -
Header auditHSTS · CSP · X-Frame-Options · X-Content-Type-Options · Referrer-Policy · Permissions-Policy. OWASP-aligned with remediation text.
Surface misconfigsOpen redirect · CORS wildcards · dangerous HTTP methods · Git/SVN exposure · backup-file discovery · admin/API-endpoint enumeration
Takeover110+ fingerprints: GitHub Pages, S3, CloudFront, Heroku, Netlify, Vercel, Azure Web Apps, Shopify, …
GraphQLIntrospection enabled detection + mutation-enabled flag (v2 native)
JWTalg=none, excessive expiry, kid-injection, weak-HMAC crack (v2 native)
HTTP smugglingCL.TE / TE.CL timing probe, non-destructive (v2 native, opt-in)
Cloud assetsS3 / GCS / Azure Blob / Firebase enumeration
Secret extractionRegex + entropy + validation. FP denylist for third-party APIs and UI strings.
Nuclei compat~13k community templates, HTTP subset, auto-scope-filtered (no off-host false positives)
-**Quick Start with AI:** +### 🧠 AI layer + +- **Local LLM** via [Ollama](https://ollama.com) — fully private, no API keys, no cloud. +- **Six event-driven handlers** — CVE correlation · JavaScript secret validation · HTTP response anomaly analysis · Secret filtering · Multi-agent vulnerability enrichment · End-of-scan anomaly detection + executive report +- **End-of-scan AI brief** — a framed terminal summary with severity totals, top exploitable chains, AI agent contributions, executive prose, and recommended next actions +- Content-hash cache so the same tech detected on 10 hosts fires **one** Ollama call, not ten +- Three tuned profiles: + +| Tier | Triage model | Deep model | RAM | Context | +|---------------|--------------|---------------------------|-----|---------| +| **lean** | qwen3:1.7b | qwen2.5-coder:14b | 16GB| 32K | +| **balanced** | qwen3:4b | **qwen3-coder:30b (MoE)** | 32GB| **256K**| +| **heavy** | qwen3:8b | qwen3-coder:30b (MoE) | 64GB| 256K | + +- **Cascade architecture** — fast triage filters ~70% of noise; deep model runs only on relevant findings. Cuts AI overhead to ~20-30% of total scan time. +- **8 specialized agents** (multi-agent mode): XSS, SQLi, Auth, API, Crypto, Secrets, Headers, General. +- **Automatic CVE correlation** — offline CISA KEV (~1500 actively-exploited CVEs) + online NVD function-calling fallback. +- **Auto-pull of missing models** — no manual `ollama pull`. +- `--ai-verbose` streams every query to stderr for observability. + +### 🔄 Continuous monitoring (ASM) + ```bash -# Install Ollama -curl https://ollama.ai/install.sh | sh - -# Pull models (5-10 mins) -ollama pull deepseek-r1:1.5b && ollama pull qwen2.5-coder:7b - -# Run with AI -ollama serve & -./god-eye -d target.com --enable-ai +./god-eye -d target.com --pipeline --profile asm-continuous \ + --monitor-interval 24h --monitor-webhook https://hooks.slack.com/... ``` -📖 **[Full AI Setup Guide](AI_SETUP.md)** | 📋 **[AI Examples](EXAMPLES.md)** +- Interval-based re-scans with **diff engine** (9 change kinds: `new_host`, `removed_host`, `new_ip`, `removed_ip`, `status_change`, `tech_change`, `new_vuln`, `cleared_vuln`, `cert_change`, `new_takeover`) +- Webhook (generic JSON POST) + stdout alerter. Slack/Discord/Linear adapters planned. + +### 🥷 Stealth — 4 levels + +| Mode | Threads | Delay | Rate | Use case | +|--------------|---------|-------------|-------|--------------------------------| +| `light` | 100 | 10-50ms | 100/s | Avoid basic rate limits | +| `moderate` | 30 | 50-200ms | 30/s | Evade WAF detection | +| `aggressive` | 10 | 200ms-1s | 10/s | Sensitive targets | +| `paranoid` | 3 | 1-5s | 2/s | Maximum evasion | + +All modes use: UA rotation (25+), request randomization, DNS query distribution, per-host throttling, 50-70% timing jitter (aggressive+), adaptive backoff on error-rate spikes. --- -## Features +## 🧠 AI integration -### 🔍 Subdomain Discovery -- **20 Passive Sources**: crt.sh, Certspotter, AlienVault, HackerTarget, URLScan, RapidDNS, Anubis, ThreatMiner, DNSRepo, SubdomainCenter, Wayback, CommonCrawl, Sitedossier, Riddler, Robtex, DNSHistory, ArchiveToday, JLDC, SynapsInt, CensysFree -- **DNS Brute-forcing**: Concurrent DNS resolution with customizable wordlists -- **Advanced Wildcard Detection**: Multi-layer detection using DNS + HTTP validation with confidence scoring +God's Eye v2 is the only open-source recon tool that ships **LLM-assisted CVE correlation out of the box**, running entirely on your machine. -### 🌐 HTTP Probing -- Status code, content length, response time -- Page title extraction -- Technology fingerprinting (WordPress, React, Next.js, Angular, Laravel, Django, etc.) -- Server header analysis -- TLS/SSL information (version, issuer, expiry) -- **TLS Certificate Fingerprinting** (NEW!) - Detects firewalls, VPNs, and appliances from self-signed certificates - -### 🛡️ Security Checks -- **Security Headers**: CSP, HSTS, X-Frame-Options, X-Content-Type-Options, etc. -- **Open Redirect Detection**: Tests common redirect parameters -- **CORS Misconfiguration**: Detects wildcard origins and credential exposure -- **HTTP Methods**: Identifies dangerous methods (PUT, DELETE, TRACE) -- **Git/SVN Exposure**: Checks for exposed version control directories -- **Backup Files**: Finds common backup file patterns -- **Admin Panels**: Discovers admin/login interfaces -- **API Endpoints**: Locates API documentation and endpoints - -### ☁️ Cloud & Infrastructure -- **Cloud Provider Detection**: AWS, Azure, GCP, DigitalOcean, Cloudflare, Heroku, Netlify, Vercel -- **S3 Bucket Discovery**: Finds exposed S3 buckets -- **Email Security**: SPF/DMARC record analysis -- **TLS Alternative Names**: Extracts SANs from certificates -- **ASN/Geolocation**: IP information lookup - -### 🎯 Advanced Features -- **Subdomain Takeover**: 110+ fingerprints for vulnerable services -- **JavaScript Analysis**: Extracts secrets, API keys, and hidden endpoints from JS files -- **Port Scanning**: Quick TCP port scan on common ports -- **WAF Detection**: Identifies Cloudflare, AWS WAF, Akamai, Imperva, etc. -- **TLS Appliance Detection**: Identifies 25+ security vendors from certificates (Fortinet, Palo Alto, Cisco, F5, etc.) - -### ⚡ Performance -- **Parallel HTTP Checks**: All security checks run concurrently -- **Connection Pooling**: Shared HTTP client with TCP/TLS reuse -- **High Concurrency**: Up to 1000+ concurrent workers -- **Intelligent Rate Limiting**: Adaptive backoff based on error rates -- **Retry Logic**: Automatic retry with exponential backoff for DNS/HTTP failures -- **Progress Bars**: Real-time progress with ETA and speed indicators - -### 🥷 Stealth Mode -- **4 Stealth Levels**: light, moderate, aggressive, paranoid -- **User-Agent Rotation**: 25+ realistic browser User-Agents -- **Randomized Delays**: Configurable jitter between requests -- **Per-Host Throttling**: Limit concurrent requests per target -- **DNS Query Distribution**: Spread queries across resolvers -- **Request Randomization**: Shuffle wordlists and targets - -### 🧠 AI Integration (NEW!) -- **Local LLM Analysis**: Powered by Ollama (deepseek-r1:1.5b + qwen2.5-coder) -- **Multi-Agent Orchestration**: 8 specialized AI agents (XSS, SQLi, Auth, API, Crypto, Secrets, Headers, General) -- **Intelligent Routing**: Automatic finding classification and agent assignment -- **JavaScript Code Review**: Intelligent secret detection and vulnerability analysis -- **CVE Matching**: Automatic vulnerability detection for discovered technologies -- **Smart Cascade**: Fast triage filter + deep analysis for optimal performance -- **Executive Reports**: Auto-generated professional security summaries -- **100% Private**: All processing happens locally, zero external API calls -- **Zero Cost**: Completely free, no API keys or usage limits - -**Real-World Performance:** -- Scan time: +20-30% vs non-AI mode -- Accuracy: 37% reduction in false positives -- Findings: 2-3x more actionable security insights - ---- - -## AI Integration - -### Why AI? - -Traditional regex-based tools miss context. God's Eye's AI integration provides: - -✅ **Contextual Understanding** - Not just pattern matching, but semantic code analysis -✅ **CVE Detection** - Automatic matching against known vulnerabilities -✅ **False Positive Reduction** - Smart filtering saves analysis time -✅ **Executive Summaries** - Auto-generated reports for stakeholders - -### Quick Setup +### One-shot setup ```bash # 1. Install Ollama (one-time) curl https://ollama.ai/install.sh | sh +ollama serve & -# 2. Pull AI models (5-10 minutes, one-time) -ollama pull deepseek-r1:1.5b # Fast triage (~3GB) -ollama pull qwen2.5-coder:7b # Deep analysis (~6GB) - -# 3. Start Ollama server -ollama serve - -# 4. Run God's Eye with AI -./god-eye -d target.com --enable-ai +# 2. Let the wizard pull your tier's models automatically +./god-eye ``` -### AI Features - -| Feature | Description | Example Output | -|---------|-------------|----------------| -| **JavaScript Analysis** | Deep code review for secrets, backdoors, XSS | `AI:CRITICAL: Hardcoded Stripe API key in main.js` | -| **CVE Matching** | Auto-detect known vulnerabilities | `CVE: React CVE-2020-15168 - XSS vulnerability` | -| **HTTP Analysis** | Misconfiguration and info disclosure detection | `AI:HIGH: Missing HSTS, CSP headers` | -| **Anomaly Detection** | Cross-subdomain pattern analysis | `AI:MEDIUM: Dev environment exposed in production` | -| **Executive Reports** | Professional summaries with remediation | Auto-generated markdown reports | - -### CVE Database (CISA KEV) - -God's Eye includes an **offline CVE database** powered by the [CISA Known Exploited Vulnerabilities](https://www.cisa.gov/known-exploited-vulnerabilities-catalog) catalog: - -- **1,400+ actively exploited CVEs** - Confirmed vulnerabilities used in real-world attacks -- **Auto-download** - Database downloads automatically on first AI-enabled scan -- **Instant lookups** - Zero-latency, offline CVE matching -- **Daily updates** - CISA updates the catalog daily; refresh with `update-db` +Or manually: ```bash -# Update CVE database manually -./god-eye update-db +# Lean (default, 16GB RAM) — tried and tested +ollama pull qwen3:1.7b && ollama pull qwen2.5-coder:14b -# Check database status -./god-eye db-info +# Balanced (32GB RAM, MoE 30B — the sweet spot) +ollama pull qwen3:4b && ollama pull qwen3-coder:30b -# The database auto-downloads on first use with --enable-ai -./god-eye -d target.com --enable-ai # Auto-downloads if not present +# Heavy (64GB+ RAM, top quality) +ollama pull qwen3:8b && ollama pull qwen3-coder:30b ``` -**Database location:** `~/.god-eye/kev.json` (~1.3MB) +### Why MoE matters -The KEV database is used **in addition to** real-time NVD API lookups, providing a multi-layer approach: -1. **KEV (instant)** - Critical, actively exploited vulnerabilities -2. **NVD API (fallback)** - Comprehensive CVE database (rate-limited) +`qwen3-coder:30b` is a **Mixture-of-Experts** model: 30B total parameters, only **3.3B active per token**. You get dense-30B quality at the inference speed of a dense-3B model, with a **256K context window** — enough to ingest entire JS bundles + long HTTP bodies in a single prompt. -### AI Usage Examples - -```bash -# Basic AI-enabled scan -./god-eye -d target.com --enable-ai - -# Fast scan (no DNS brute-force) -./god-eye -d target.com --enable-ai --no-brute - -# Deep analysis mode (analyze all subdomains) -./god-eye -d target.com --enable-ai --ai-deep - -# Custom models -./god-eye -d target.com --enable-ai \ - --ai-fast-model deepseek-r1:1.5b \ - --ai-deep-model deepseek-coder-v2:16b - -# Export with AI findings -./god-eye -d target.com --enable-ai -o report.json -f json - -# Multi-agent orchestration (8 specialized agents) -./god-eye -d target.com --enable-ai --multi-agent -``` - -### Multi-Agent Orchestration - -Enable specialized AI agents for different vulnerability types: - -```bash -# Enable multi-agent analysis -./god-eye -d target.com --enable-ai --multi-agent --no-brute -``` - -**8 Specialized Agents:** -| Agent | Specialization | -|-------|----------------| -| XSS | Cross-Site Scripting, DOM XSS, Reflected/Stored XSS | -| SQLi | SQL Injection, Error-based, Blind, Time-based | -| Auth | Authentication bypass, IDOR, Session, JWT, OAuth | -| API | REST/GraphQL security, CORS, Rate limiting | -| Crypto | TLS/SSL issues, Weak ciphers, Key exposure | -| Secrets | API keys, tokens, hardcoded credentials | -| Headers | HTTP security headers, CSP, HSTS, cookies | -| General | Fallback for unclassified findings | - -**How it works:** -1. Coordinator classifies each finding by type -2. Routes to specialized agent with domain expertise -3. Agent analyzes with OWASP-aligned knowledge base -4. Results aggregated with confidence scores - -### Sample AI Output - -``` -🧠 AI-POWERED ANALYSIS (cascade: deepseek-r1:1.5b + qwen2.5-coder:7b) - - AI:C api.target.com → 4 findings - AI:H admin.target.com → 2 findings - ✓ AI analysis complete: 6 findings across 2 subdomains - -📋 AI SECURITY REPORT - -## Executive Summary -Analysis identified 6 security findings with 1 critical issue requiring -immediate attention. Hardcoded production API key detected. - -## Critical Findings -- api.target.com: Production Stripe key hardcoded in JavaScript -- Authentication bypass via admin parameter detected - CVEs: React CVE-2020-15168 - -## Recommendations -1. IMMEDIATE: Remove hardcoded API keys and rotate credentials -2. HIGH: Update React to latest stable version -3. MEDIUM: Implement proper authentication on admin panel -``` - -📖 **[Complete AI Documentation](AI_SETUP.md)** -📋 **[AI Usage Examples](EXAMPLES.md)** +Complete AI guide: **[AI_SETUP.md](AI_SETUP.md)** --- -## Installation +## 🎯 Nuclei integration -### From Source +13,023 community templates auto-downloaded and executed through a compat layer: ```bash -# Clone the repository -git clone https://github.com/Vyntral/god-eye.git -cd god-eye +# One-time: download + extract templates (~40MB, ~15 seconds) +./god-eye nuclei-update -# Build -go build -o god-eye ./cmd/god-eye - -# Run -./god-eye -d example.com +# Or let the scan auto-download on first use +./god-eye -d target.com --pipeline --nuclei --live ``` -### Requirements -- Go 1.21 or higher +**Supported subset** (≈ 65-70% of community templates): -### Dependencies -``` -github.com/fatih/color -github.com/miekg/dns -github.com/spf13/cobra -``` +- `http:` / `requests:` protocols +- Matchers: `word` · `regex` · `status` · `size` (with `part`: header/body/response, `condition`: and/or, negative matching) +- Templating: `{{BaseURL}}` · `{{Hostname}}` · `{{RootURL}}` + +**Out of scope** (templates auto-skipped): + +- DNS / SSL / network / headless / code / workflow protocols +- Payloads, fuzzing, DSL matchers +- Off-host templates (OSINT-style user lookups on third-party services) --- -## Usage +## 🧩 The wizard walks you through everything. Power users get every knob. -### Basic Scan -```bash -./god-eye -d example.com -``` +```text +Core flags: + -d, --domain string Target domain + -c, --concurrency int Workers (default 1000) + -t, --timeout int Per-request timeout (default 5s) + -o, --output string Output file + -f, --format string txt | json | csv + -s, --silent Suppress console output + -v, --verbose Verbose logs -### Options +Pipeline (v2): + --pipeline Use v2 event-driven pipeline + --wizard Force interactive setup (even with -d set) + --profile string bugbounty | pentest | asm-continuous | stealth-max | quick + --config string Path to YAML config (auto-discovers ~/.god-eye/config.yaml) + --live Colorized live event stream + --live-verbosity int 0 (findings) | 1 (normal) | 2 (noisy) -``` -Usage: - god-eye -d [flags] +AI: + --enable-ai Turn on AI cascade + --ai-profile string lean | balanced | heavy + --ai-url string Ollama URL (default http://localhost:11434) + --ai-fast-model str Triage model tag + --ai-deep-model str Deep-analysis model tag + --ai-cascade Use triage→deep cascade (default true) + --ai-deep Skip triage, always run deep + --multi-agent Enable 8-agent orchestration + --ai-verbose Log every Ollama query to stderr + --ai-auto-pull Auto-download missing models (default true) -Flags: - -d, --domain string Target domain to enumerate (required) - -w, --wordlist string Custom wordlist file path - -c, --concurrency int Number of concurrent workers (default 1000) - -t, --timeout int Timeout in seconds (default 5) - -o, --output string Output file path - -f, --format string Output format: txt, json, csv (default "txt") - -s, --silent Silent mode (only subdomains) - -v, --verbose Verbose mode (show errors) - -r, --resolvers string Custom resolvers (comma-separated) - -p, --ports string Custom ports to scan (comma-separated) - --no-brute Disable DNS brute-force - --no-probe Disable HTTP probing - --no-ports Disable port scanning - --no-takeover Disable takeover detection - --active Only show active subdomains (HTTP 2xx/3xx) - --json Output results as JSON to stdout +Nuclei: + --nuclei Run Nuclei-format templates + --nuclei-templates str Template directory override + --nuclei-auto-download Auto-fetch templates from GitHub (default true) -AI Flags: - --enable-ai Enable AI-powered analysis with Ollama - --ai-url string Ollama API URL (default "http://localhost:11434") - --ai-fast-model Fast triage model (default "deepseek-r1:1.5b") - --ai-deep-model Deep analysis model (default "qwen2.5-coder:7b") - --ai-cascade Use cascade (fast triage + deep) (default true) - --ai-deep Enable deep AI analysis on all findings - --multi-agent Enable multi-agent orchestration (8 specialized AI agents) - -h, --help Help for god-eye +Stealth: + --stealth string light | moderate | aggressive | paranoid + --proxy string Outbound proxy URL. Supports http://, https://, socks5://, socks5h:// (Tor). Basic auth via http://user:pass@host. + +Monitoring: + --monitor-interval X Re-scan every X (e.g. 24h, 6h) + --monitor-webhook URL POST diff reports to URL Subcommands: - update-db Download/update CISA KEV vulnerability database - db-info Show vulnerability database status + update-db Refresh CISA KEV CVE cache + db-info Show KEV cache status + nuclei-update Refresh nuclei-templates ZIP cache ``` -### Examples - -```bash -# Full scan with all features (including AI) -./god-eye -d example.com --enable-ai - -# Traditional scan (no AI) -./god-eye -d example.com - -# Skip DNS brute-force (passive only) -./god-eye -d example.com --no-brute - -# Only show active subdomains -./god-eye -d example.com --active - -# Export to JSON -./god-eye -d example.com -o results.json -f json - -# Custom resolvers -./god-eye -d example.com -r 1.1.1.1,8.8.8.8 - -# Custom ports -./god-eye -d example.com -p 80,443,8080,8443 - -# High concurrency for large domains -./god-eye -d example.com -c 2000 - -# Silent mode for piping -./god-eye -d example.com -s | httpx -``` - -### Stealth Mode - -For evasion during authorized penetration testing: - -```bash -# Light stealth (reduces detection, minimal speed impact) -./god-eye -d target.com --stealth light - -# Moderate stealth (balanced evasion/speed) -./god-eye -d target.com --stealth moderate - -# Aggressive stealth (slow, high evasion) -./god-eye -d target.com --stealth aggressive - -# Paranoid mode (very slow, maximum evasion) -./god-eye -d target.com --stealth paranoid -``` - -**Stealth Mode Comparison:** - -| Mode | Max Threads | Delay | Rate/sec | Use Case | -|------|-------------|-------|----------|----------| -| `light` | 100 | 10-50ms | 100 | Avoid basic rate limits | -| `moderate` | 30 | 50-200ms | 30 | Evade WAF detection | -| `aggressive` | 10 | 200ms-1s | 10 | Sensitive targets | -| `paranoid` | 3 | 1-5s | 2 | Maximum stealth needed | - -**Features by Mode:** -- **All modes**: User-Agent rotation (25+ browsers) -- **Moderate+**: Request randomization, DNS query distribution -- **Aggressive+**: 50% timing jitter, per-host throttling -- **Paranoid**: 70% jitter, single connection per host +Full list: `./god-eye --help` • Full cookbook: **[EXAMPLES.md](EXAMPLES.md)** --- -## Benchmark +## 📊 Competitive landscape -Performance comparison with other popular subdomain enumeration tools on a medium-sized domain: +On `scanme.nmap.org` (Nmap's authorized test host) — see full methodology in **[BENCHMARK-SCANME.md](BENCHMARK-SCANME.md)**. -| Tool | Subdomains Found | Time | Features | -|------|-----------------|------|----------| -| **God's Eye** | 15 | ~20s | Full recon (DNS, HTTP, security checks, JS analysis) | -| Subfinder | 12 | ~7s | Passive enumeration only | -| Amass (passive) | 10 | ~15s | Passive enumeration only | -| Assetfinder | 8 | ~3s | Passive enumeration only | +| Capability | God's Eye v2 | Subfinder | Amass | Assetfinder | Findomain | BBOT | Nuclei | +|---|:-:|:-:|:-:|:-:|:-:|:-:|:-:| +| **Discovery** | | | | | | | | +| Passive sources | 26 | 30+ | 20+ | 8 | 15 | 40+ | — | +| DNS brute-force | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | — | +| Permutation (alterx) | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | — | +| AXFR / ASN | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | — | +| **Enrichment** | | | | | | | | +| HTTP probe + tech | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ◐ | +| TLS appliance fingerprint | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| **Vulnerability** | | | | | | | | +| Headers / CORS / redirect | ✅ | ❌ | ❌ | ❌ | ❌ | ◐ | ✅ | +| Takeover (110+) | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | +| GraphQL introspection | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | +| JWT analyzer + crack | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| HTTP smuggling probe | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ◐ | +| Cloud assets (S3/GCS) | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | +| Nuclei templates | ✅ subset | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ full | +| **AI** | | | | | | | | +| Local LLM analysis | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| Multi-agent orchestration | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| Auto-pull models | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| AI CVE correlation | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| **Ops** | | | | | | | | +| Interactive wizard | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| Continuous monitoring + diff | ✅ | ❌ | ❌ | ❌ | ❌ | ◐ | ❌ | +| Webhook alerts | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | +| Event-driven plugin arch | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | +| Stealth profiles (4 levels) | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | -### Key Insights +### Honest positioning -- **God's Eye finds more subdomains** thanks to DNS brute-forcing combined with passive sources -- **God's Eye provides complete reconnaissance** in a single tool vs. chaining multiple tools -- **Trade-off**: Slightly longer scan time due to comprehensive security checks -- **Value**: One scan = subdomain enumeration + HTTP probing + vulnerability scanning + cloud detection + JS analysis +**Where God's Eye v2 wins:** -### What You Get vs Other Tools +- **AI-assisted CVE correlation** — no other OSS scanner does `Apache 2.4.7 → CVE-2026-34197 (CRITICAL/9.8) +4 more` automatically. +- **Single-binary full-pipeline workflow** — replaces `subfinder | httpx | nuclei | katana` + Bash glue. +- **Interactive wizard + auto-managed dependencies** (Ollama models, Nuclei templates). +- **ASM continuous mode** — scheduler + diff + webhooks out of the box. -| Feature | God's Eye | Subfinder | Amass | Assetfinder | -|---------|-----------|-----------|-------|-------------| -| Passive Sources | ✅ | ✅ | ✅ | ✅ | -| DNS Brute-force | ✅ | ❌ | ✅ | ❌ | -| HTTP Probing | ✅ | ❌ | ❌ | ❌ | -| Security Checks | ✅ | ❌ | ❌ | ❌ | -| Takeover Detection | ✅ | ❌ | ❌ | ❌ | -| JS Secret Extraction | ✅ | ❌ | ❌ | ❌ | -| Cloud Detection | ✅ | ❌ | ❌ | ❌ | -| Port Scanning | ✅ | ❌ | ❌ | ❌ | -| Technology Detection | ✅ | ❌ | ❌ | ❌ | -| TLS Appliance Fingerprint | ✅ | ❌ | ❌ | ❌ | -| AI-Powered Analysis | ✅ | ❌ | ❌ | ❌ | +**Where competitors still beat us:** + +- **Pure passive speed** — `assetfinder` and `subfinder` are 3-5 s on single-host targets. We're slower because we also probe + analyze. +- **Nuclei template breadth** — full `nuclei` CLI runs DNS/SSL/network/headless templates too; our compat layer is HTTP-only (~70% coverage). +- **Amass ASN graph depth** — unmatched for multi-asset infrastructure reconstruction. +- **BBOT module count** — 100+ Python modules vs our 29. + +Full methodology and scenario runs: **[BENCHMARK.md](BENCHMARK.md)**. --- -## Output +## 🔁 Continuous monitoring example -### Console Output +```bash +./god-eye -d target.com --pipeline --profile asm-continuous \ + --monitor-interval 24h \ + --monitor-webhook https://hooks.slack.com/services/T.../B.../XXX +``` -God's Eye features a modern, colorful CLI with: -- Section headers with icons -- Status-coded results (● 2xx, ◐ 3xx, ○ 4xx) -- Response time badges (⚡ fast, ⏱️ medium, 🐢 slow) -- Summary statistics box - -### JSON Output - -The `--json` flag outputs a structured report with full metadata: +Every 24h the scan reruns. When the diff contains meaningful changes, the webhook fires: ```json { - "meta": { - "version": "0.1", - "tool_name": "God's Eye", - "target": "example.com", - "start_time": "2024-01-15T10:30:00Z", - "end_time": "2024-01-15T10:32:15Z", - "duration": "2m15s", - "duration_ms": 135000, - "concurrency": 1000, - "timeout": 5, - "options": { - "brute_force": true, - "http_probe": true, - "ai_analysis": true - } - }, - "stats": { - "total_subdomains": 25, - "active_subdomains": 18, - "vulnerabilities": 3, - "takeover_vulnerable": 1, - "ai_findings": 12 - }, - "wildcard": { - "detected": false, - "confidence": 0.95 - }, - "findings": { - "critical": [{"subdomain": "dev.example.com", "type": "Subdomain Takeover", "description": "GitHub Pages"}], - "high": [{"subdomain": "api.example.com", "type": "Git Repository Exposed", "description": ".git directory accessible"}], - "medium": [], - "low": [], - "info": [] - }, - "subdomains": [ + "target": "target.com", + "changes": [ { - "subdomain": "api.example.com", - "ips": ["192.168.1.1"], - "cname": "api-gateway.cloudprovider.com", - "status_code": 200, - "title": "API Documentation", - "technologies": ["nginx", "Node.js"], - "cloud_provider": "AWS", - "security_headers": ["HSTS", "CSP"], - "missing_headers": ["X-Frame-Options"], - "tls_self_signed": false, - "tls_fingerprint": { - "vendor": "Fortinet", - "product": "FortiGate", - "version": "60F", - "appliance_type": "firewall", - "internal_hosts": ["fw-internal.corp.local"] - }, - "ai_findings": ["Potential IDOR in /api/users endpoint"], - "cve_findings": ["nginx: CVE-2021-23017"] + "kind": "new_host", + "host": "staging-v2.target.com", + "detected_at": "2026-04-19T08:02:14Z" + }, + { + "kind": "new_vuln", + "host": "admin.target.com", + "after": "Git Repository Exposed", + "severity": "critical", + "detected_at": "2026-04-19T08:04:01Z" } ] } ``` -### CSV Output - -Exports key fields for spreadsheet analysis. +Supported `kind` values: `new_host` · `removed_host` · `new_ip` · `removed_ip` · `status_change` · `tech_change` · `new_vuln` · `cleared_vuln` · `cert_change` · `new_takeover`. --- -## Security Checks Explained +## 📐 Output formats -### Vulnerability Detection +### Colorized terminal (`--live`) -| Check | Description | Severity | -|-------|-------------|----------| -| Open Redirect | Tests redirect parameters for external URLs | Medium | -| CORS Misconfiguration | Checks for wildcard origins with credentials | High | -| Dangerous HTTP Methods | Identifies PUT, DELETE, TRACE enabled | Medium | -| Git/SVN Exposure | Checks for /.git/config and /.svn/entries | Critical | -| Backup Files | Searches for .bak, .sql, .zip backups | High | -| Admin Panels | Finds /admin, /login, /wp-admin, etc. | Info | -| API Endpoints | Locates /api, /swagger, /graphql, etc. | Info | - -### Subdomain Takeover - -Checks 110+ vulnerable services including: -- GitHub Pages -- AWS S3/CloudFront/Elastic Beanstalk -- Azure (Web Apps, Blob, CDN) -- Google Cloud Storage -- Heroku -- Shopify -- Netlify/Vercel -- And many more... - -### Notes and Limitations - -- **Admin Panels & API Endpoints**: These checks test both HTTPS and HTTP, reporting 200 (found) and 401/403 (protected) responses. -- **Email Security (SPF/DMARC)**: Records are checked on the target domain specified with `-d`. Make sure to specify the root domain (e.g., `example.com` not `sub.example.com`) for accurate email security results. -- **SPA Detection**: The tool detects Single Page Applications that return the same content for all routes, filtering out false positives for admin panels, API endpoints, and backup files. - -### TLS Certificate Fingerprinting - -God's Eye analyzes TLS certificates to identify security appliances, especially useful for self-signed certificates commonly used by firewalls and VPN gateways. - -**Detected Vendors (25+):** - -| Category | Vendors | -|----------|---------| -| **Firewalls** | Fortinet FortiGate, Palo Alto PAN-OS, Cisco ASA/Firepower, SonicWall, Check Point, pfSense, OPNsense, WatchGuard, Sophos XG, Juniper SRX, Zyxel USG | -| **VPN** | OpenVPN, Pulse Secure, GlobalProtect, Cisco AnyConnect | -| **Load Balancers** | F5 BIG-IP, Citrix NetScaler, HAProxy, NGINX Plus, Kemp LoadMaster | -| **WAF/Security** | Barracuda, Imperva | -| **Other** | MikroTik, Ubiquiti UniFi, VMware NSX, DrayTek Vigor | - -**Features:** -- Detects vendor and product from certificate Subject/Issuer fields -- Extracts version information where available (e.g., `FortiGate v60F`) -- Identifies internal hostnames from certificate SANs (`.local`, `.internal`, etc.) -- Reports appliance type (firewall, vpn, loadbalancer, proxy, waf) - -**Sample Output:** +```text +▶ phase discovery +↳ passive:crt.sh api.target.com +↳ passive:crt.sh admin.target.com +↳ brute staging.target.com +↳ axfr:ns1.target.com internal-gw.target.com +▣ phase discovery 42.3s +▶ phase resolution +⏚ api.target.com [1.2.3.4] +● https://api.target.com [200] API Documentation +● https://admin.target.com [401] +[HIGH] CORS Misconfiguration https://api.target.com cors-misconfig +[CRIT] Git Repository Exposed https://staging.target.com/.git/config git-exposed + TAKEOVER dev.target.com service=GitHub Pages +[HIGH] CVE Apache@2.4.7 → CVE-2026-34197 (CRITICAL/9.8) +4 more +· scan elapsed 2m47s, 847 events seen ``` -● vpn.target.com [200] - Security: TLS: TLS 1.2 (self-signed) - APPLIANCE: Fortinet FortiGate v60F (firewall) - INTERNAL: fw-internal.corp.local, vpn-gw-01.internal + +### JSON (`-f json -o report.json`) + +```json +{ + "subdomain": "api.target.com", + "ips": ["1.2.3.4"], + "status_code": 200, + "technologies": ["nginx/1.18.0", "Node.js"], + "cloud_provider": "AWS", + "tls_fingerprint": { + "vendor": "Fortinet", + "product": "FortiGate", + "appliance_type": "firewall", + "internal_hosts": ["fw-internal.corp.local"] + }, + "security_headers": ["HSTS"], + "missing_headers": ["Content-Security-Policy", "X-Frame-Options"], + "cors_misconfig": "wildcard with credentials", + "ai_findings": ["Reflected XSS via user parameter"], + "cve_findings": ["CVE-2021-23017"] +} ``` +### CSV + +Flat columns suitable for spreadsheet / pivot table analysis. + --- -## Use Cases +## 💡 Typical use cases + +### Bug-bounty recon -### Bug Bounty Hunting ```bash -# Full reconnaissance on target -./god-eye -d target.com -o report.json -f json - -# Find only vulnerable subdomains -./god-eye -d target.com --active | grep -E "TAKEOVER|VULNS" +./god-eye -d in-scope.com --pipeline --profile bugbounty --live \ + -o bounty-findings.json -f json ``` -### Penetration Testing +### Authorized penetration test (with light stealth) + ```bash -# Enumerate attack surface -./god-eye -d client.com -c 500 - -# Export for further analysis -./god-eye -d client.com -o scope.txt -f txt +./god-eye -d client.com --pipeline --profile pentest \ + --stealth light --live -o pentest-report.json -f json ``` -### Security Auditing +### Fast triage on a fresh target + ```bash -# Check security posture -./god-eye -d company.com --no-brute - -# Focus on specific ports -./god-eye -d company.com -p 80,443,8080,8443,3000 +./god-eye -d target.com --pipeline --profile quick ``` ---- +### ASM continuous monitoring (daily diff + Slack) -## 📊 Performance Benchmarks +```bash +./god-eye -d company.com --pipeline --profile asm-continuous \ + --monitor-interval 12h \ + --monitor-webhook https://hooks.slack.com/... +``` -### Real-World Test Results - -Tested on production domain (authorized testing): - -| Metric | Without AI | With AI (Cascade) | -|--------|-----------|-------------------| -| **Scan Time** | ~1:50 min | 2:18 min | -| **Subdomains Found** | 2 active | 2 active | -| **AI Findings** | 0 | 16 findings | -| **Memory Usage** | ~500MB | ~7GB | -| **AI Overhead** | N/A | +20% time | - -### AI Performance Breakdown - -| Phase | Duration | Model Used | -|-------|----------|------------| -| Passive Enumeration | ~25 sec | - | -| HTTP Probing | ~35 sec | - | -| Security Checks | ~40 sec | - | -| AI Triage | ~10 sec | deepseek-r1:1.5b | -| AI Deep Analysis | ~25 sec | qwen2.5-coder:7b | -| Report Generation | ~3 sec | qwen2.5-coder:7b | - -**Key Takeaway:** AI adds only ~20% overhead while providing intelligent vulnerability analysis and prioritization. - -### Speed Comparison - -| Mode | Target Size | Time | AI Findings | -|------|-------------|------|-------------| -| No AI | 50 subdomains | 2:30 min | 0 | -| AI Cascade | 50 subdomains | 3:15 min | 23 | -| AI Deep | 50 subdomains | 4:45 min | 31 | +Full cookbook of 13 recipes: **[EXAMPLES.md](EXAMPLES.md)**. --- -## Contributing +## 📋 Requirements & install -Contributions are welcome! Please feel free to submit a Pull Request. +- **Go 1.21+** for building +- **Ollama** (optional, for AI features) — [installation guide](https://ollama.com) +- **RAM:** 16GB (lean tier), 32GB (balanced), 64GB+ (heavy) -1. Fork the repository -2. Create your feature branch (`git checkout -b feature/AmazingFeature`) -3. Commit your changes (`git commit -m 'Add some AmazingFeature'`) -4. Push to the branch (`git push origin feature/AmazingFeature`) -5. Open a Pull Request +```bash +git clone https://github.com/Vyntral/god-eye.git +cd god-eye +go build -o god-eye ./cmd/god-eye +./god-eye --help +``` + +Dependencies (pure Go, no cgo): + +``` +github.com/fatih/color +github.com/miekg/dns +github.com/spf13/cobra +github.com/mattn/go-isatty +gopkg.in/yaml.v3 +``` + +Single static binary on every platform. --- -## Credits +## 🏗️ Architecture -**Author**: [Vyntral](https://github.com/Vyntral) +v2 is structured in three layers — see **[CLAUDE.md](CLAUDE.md)** for the full reference. -**Organization**: [Orizon](https://github.com/Orizon-eu) +**Foundation** (`internal/`) -### Acknowledgments +- `eventbus` — typed pub/sub, race-safe, per-subscriber goroutines, drop counter +- `module` — interface + auto-registering registry, phase-based selection +- `store` — thread-safe host store, per-host locks, deep-copy reads +- `pipeline` — coordinator with phase barriers, panic recovery, error aggregation +- `config` — 5 scan profiles + 3 AI tiers, YAML loader, CLI overrides -- Inspired by tools like Subfinder, Amass, and Assetfinder -- Uses the excellent [miekg/dns](https://github.com/miekg/dns) library -- Color output powered by [fatih/color](https://github.com/fatih/color) -- CLI framework by [spf13/cobra](https://github.com/spf13/cobra) +**Modules** (`internal/modules/*`) + +29 auto-registered modules across 6 phases: discovery, resolution, enrichment, analysis, reporting. Adding one is ~60 lines of Go; new modules plug in without touching `main.go`. + +**Operational** (`internal/`) + +- `wizard` — interactive setup (9 prompts, input validation, TTY detection) +- `tui` — colorized live event printer, 3 verbosity levels +- `nucleitpl` — Nuclei template parser + executor + auto-downloader +- `diff` + `scheduler` — ASM continuous mode + +### Testing + +```bash +go test ./... -race -timeout 120s +``` + +**200+ tests across 14 packages**, all race-detector clean. --- -## License +## 🗺️ Roadmap -This project is licensed under the MIT License with additional terms - see the [LICENSE](LICENSE) file for details. +v2.0 is in active development. Current state: + +| Fase | Theme | Status | +|------|------------------------------------------|-------------------| +| 0 | Foundation refactor | ✅ complete | +| 1 | Discovery Supremacy | 🟡 core done | +| 2 | Vulnerability Engine | 🟡 5/10 native | +| 3 | AI Agentic v2 | 🔵 scaffolding | +| 4 | TUI + Reporting (terminal-only) | 🟡 wizard + live | +| 5 | Continuous & Distributed | 🟡 single-node | +| 6 | Ecosystem & community | 📋 planned | + +Full breakdown: **[FEATURE_ANALYSIS.md](FEATURE_ANALYSIS.md)**. --- -## ⚖️ Legal Disclaimer & Terms of Use +## 🧪 Contributing -**READ CAREFULLY BEFORE USING THIS SOFTWARE** +1. Fork +2. Create a branch: `git checkout -b feat/your-feature` +3. Ship with tests (`-race` mandatory) +4. Open a PR -### Authorized Use Only +**New modules** should: -God's Eye is designed exclusively for: -- ✅ Authorized security testing and penetration testing -- ✅ Bug bounty programs with explicit permission -- ✅ Educational and research purposes -- ✅ Security assessments on systems you own or have written authorization to test +- Live under `internal/modules//` +- Implement `module.Module` +- Register in `internal/modules/all/all.go` +- Emit events via the bus; no direct cross-module calls +- Drain the store at `Run()` start + subscribe for late events -### Prohibited Uses - -This tool **MUST NOT** be used for: -- ❌ Unauthorized scanning of third-party systems -- ❌ Malicious activities or cyber attacks -- ❌ Violation of computer fraud and abuse laws -- ❌ Any illegal or unethical purposes - -### Liability Disclaimer - -**THE AUTHORS AND CONTRIBUTORS OF THIS SOFTWARE:** - -1. **Provide No Warranty**: This software is provided "AS IS" without warranty of any kind, express or implied. - -2. **Accept No Liability**: The authors shall not be liable for any damages, claims, or legal consequences arising from: - - Unauthorized use of this software - - Misuse or abuse of this tool - - Any direct, indirect, incidental, or consequential damages - - Legal actions resulting from improper use - - Data breaches, service disruptions, or security incidents - -3. **User Responsibility**: By using this software, YOU accept full responsibility for: - - Obtaining proper authorization before scanning any target - - Complying with all applicable laws and regulations (CFAA, Computer Misuse Act, GDPR, etc.) - - Respecting bug bounty program terms of service - - Any consequences of your actions - -### Legal Compliance - -Users must comply with all applicable laws including: -- Computer Fraud and Abuse Act (CFAA) - United States -- Computer Misuse Act - United Kingdom -- European Union GDPR and data protection regulations -- Local laws regarding computer security and unauthorized access - -### Acknowledgment - -**By downloading, installing, or using God's Eye, you acknowledge that:** -- You have read and understood this disclaimer -- You agree to use this tool only for authorized and legal purposes -- You accept all risks and responsibilities associated with its use -- You will indemnify and hold harmless the authors from any claims arising from your use - -### Contact - -If you have questions about authorized use or legal compliance, consult with a legal professional before using this tool. +See **[CLAUDE.md](CLAUDE.md)** for the full conventions. --- -**⚠️ REMEMBER: Unauthorized computer access is illegal. Always obtain explicit written permission before testing any system you do not own.** +## ⚖️ Legal notice + +**For authorized security testing only.** By using God's Eye you agree to: + +- ✅ Only scan domains you own or have **written permission** to test +- ✅ Comply with local laws (CFAA, Computer Misuse Act, GDPR, NIS2, …) +- ✅ Respect bug-bounty program scopes +- ❌ Never use for unauthorized access, exploitation, or malicious activity + +**The author accepts NO liability for misuse.** Full terms: **[SECURITY.md](SECURITY.md)** · **[LICENSE](LICENSE)**. + +> *Unauthorized computer access is illegal. Always get written permission first.* --- +## 📚 Documentation map + +| Document | What it covers | +|--------------------------------------------------|----------------------------------------------------------------| +| [README.md](README.md) | You're here. Everything, high level. | +| [CHANGELOG.md](CHANGELOG.md) | What changed in v2 vs v0.1. Read before upgrading. | +| [EXAMPLES.md](EXAMPLES.md) | 14 practical recipes — bug-bounty, pentest, ASM, stealth, CI, Tor. | +| [AI_SETUP.md](AI_SETUP.md) | Complete AI layer guide — profiles, Ollama, cascade, verbose. | +| [BENCHMARK.md](BENCHMARK.md) | Cross-tool benchmarks, methodology, honest caveats. | +| [BENCHMARK-SCANME.md](BENCHMARK-SCANME.md) | **Live reproducible benchmark** on `scanme.nmap.org`. | +| [FEATURE_ANALYSIS.md](FEATURE_ANALYSIS.md) | Per-feature status across all 6 development phases. | +| [SECURITY.md](SECURITY.md) | Ethical guidelines, disclosure process, data protection. | +| [CLAUDE.md](CLAUDE.md) | Architecture reference for contributors and AI agents. | + +--- + +## 👤 Author + +Made by **Vyntral** — [GitHub](https://github.com/Vyntral) · [X / Twitter](https://x.com/vyntral). + +Contributions welcome. Bug reports, feature requests, and PRs go on [GitHub Issues](https://github.com/Vyntral/god-eye/issues). +

- Made with ❤️ by Vyntral for Orizon + Every number in this README is reproducible. No marketing fluff, no synthetic benchmarks, no vendor lock-in. Just a single Go binary, your local machine, and the targets you're authorized to test.

diff --git a/SECURITY.md b/SECURITY.md index e7e6c53..b3f8a29 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -1,129 +1,140 @@ -# Security Policy +# 🛡️ Security Policy & Responsible Use -## Responsible Use +

+ + God's Eye is a serious offensive-security tool. + It finds real vulnerabilities on real targets. + Use it only on systems you own or have written permission to test. + +

-God's Eye is a powerful security reconnaissance tool. With great power comes great responsibility. +--- -### Ethical Guidelines +## Why this doc exists + +God's Eye v2 can do damage. The same pipeline that surfaces a critical CVE correlation on your own asset will surface it just as well on your ex-employer's infrastructure — and the latter is a crime. This document sets the boundary between useful and illegal use, and it explains how to report vulnerabilities **in the tool itself** when you find them. + +--- + +## Responsible use + +### Ethical guidelines ✅ **DO:** -- Use for authorized penetration testing -- Participate in bug bounty programs -- Conduct security research on your own systems -- Help improve security through responsible disclosure -- Follow coordinated vulnerability disclosure processes +- Use for **authorized** penetration testing engagements +- Participate in bug-bounty programs **within their declared scope** +- Conduct security research on systems **you own** or have **written permission** to test +- Help improve defense through responsible disclosure +- Follow coordinated vulnerability-disclosure processes ❌ **DO NOT:** - Scan systems without explicit permission -- Use for malicious purposes -- Violate terms of service -- Attempt unauthorized access -- Sell or distribute scan results without authorization +- Chain vulnerabilities or exfiltrate data on targets you don't own +- Violate bug-bounty program terms of service +- Use God's Eye for initial access, lateral movement, or persistence on unauthorized systems +- Sell or republish scan results without the asset owner's consent -## Reporting Security Issues +--- -### Vulnerability Disclosure +## Reporting Security Issues *in God's Eye itself* -If you discover a security vulnerability in God's Eye itself, please report it responsibly: +If you discover a vulnerability in the tool (e.g., input injection via the CLI, SSRF in a fetch module, prompt injection against the AI layer), report it **privately**. -1. **DO NOT** open a public issue -2. Email the maintainers privately (see GitHub profile for contact) -3. Provide detailed information: - - Description of the vulnerability - - Steps to reproduce - - Potential impact - - Suggested fix (if any) +1. **DO NOT** open a public GitHub issue. +2. Email the maintainer or open a private security advisory on the repository. +3. Include: + - Affected component (package path + version or branch) + - Reproduction steps + - Impact assessment + - Suggested fix if available ### Response Timeline -- **Acknowledgment**: Within 48 hours -- **Initial Assessment**: Within 7 days -- **Fix Development**: Depends on severity -- **Public Disclosure**: After fix is released +| Stage | Target | +|--------------------|-----------------------------------------| +| Acknowledgment | Within 48 hours | +| Initial assessment | Within 7 days | +| Fix development | Driven by severity (24h critical → 30d low) | +| Public disclosure | After a patched release | + +--- ## Security Best Practices ### For Users -1. **Always verify authorization** before scanning -2. **Keep the tool updated** to latest version -3. **Use in controlled environments** when testing -4. **Respect rate limits** to avoid service disruption -5. **Secure your scan results** - they may contain sensitive data +1. **Always verify authorization** before scanning. +2. **Keep the tool updated** — v2 modules add new probe types that may break old rules of engagement you had in place. +3. **Scope the AI layer** — AI modules send finding evidence to the LLM. With the default Ollama path this stays on your machine, but if you swap in a cloud provider later, make sure your ROE permits that. +4. **Respect rate limits** — adaptive per-host limiting is built in, but some targets have hard ceilings; honor them. +5. **Secure your scan results** — output files may contain exposed credentials, internal hostnames, CVE mappings. -### For Developers +### For Contributors -1. **Review code changes** for security implications -2. **Follow secure coding practices** -3. **Test thoroughly** before releasing -4. **Document security-relevant changes** -5. **Never commit credentials** or sensitive data +1. Review module code for SSRF, command injection, and path traversal before merging. +2. Never log raw secrets. The `secrets.Kind` field is redacted by default; don't bypass redaction in new modules. +3. Keep network-dependent tests behind `-tags integration` so CI doesn't leak traffic to third parties. +4. Add new probe types to the ROE-impact note in the release changelog. + +--- ## Compliance -### Legal Requirements +Users must comply with all laws applicable to them, including: -Users must comply with: - -- **United States**: Computer Fraud and Abuse Act (CFAA), 18 U.S.C. § 1030 -- **European Union**: GDPR, ePrivacy Directive, NIS2 Directive -- **United Kingdom**: Computer Misuse Act 1990 -- **International**: Budapest Convention on Cybercrime -- **Local laws**: All applicable regional regulations +- **United States** — Computer Fraud and Abuse Act (CFAA), 18 U.S.C. § 1030 +- **European Union** — GDPR, NIS2 Directive +- **United Kingdom** — Computer Misuse Act 1990 +- **International** — Budapest Convention on Cybercrime +- **Local** — anything stricter than the above in your jurisdiction ### Bug Bounty Programs -When using God's Eye for bug bounty hunting: +When using God's Eye in a bug-bounty context: -1. ✅ Read and follow program rules -2. ✅ Respect scope limitations -3. ✅ Avoid testing production systems unless explicitly allowed -4. ✅ Report findings through proper channels -5. ✅ Do not publicly disclose before program authorization +1. Read the program's scope, **including out-of-scope paths**. +2. Respect "no automated scanning" rules — several modules (brute-force, permutation, smuggling probe) qualify. +3. Never test in production unless the program explicitly permits it. +4. Submit findings through the program's channel, not publicly. +5. Disclose only after authorization. + +--- ## Data Protection -### Handling Scan Results - Scan results may contain sensitive information: -- Private IP addresses -- Technology stack details -- Potential vulnerabilities -- Configuration information +- Private IP addresses and internal hostnames +- Technology stack details with exact versions +- Identified vulnerabilities and working PoCs +- Cloud asset metadata -**Your Responsibilities:** +**Your responsibilities:** -1. Store results securely -2. Encrypt sensitive data -3. Delete when no longer needed -4. Do not share without authorization -5. Comply with GDPR and data protection laws +1. Encrypt scan results at rest. +2. Delete them when no longer needed. +3. Do not share outside the engagement without the asset owner's consent. +4. Comply with data-protection laws applicable to the target's jurisdiction. + +--- ## Disclaimer **NO WARRANTY**: This software is provided "AS IS" without warranty of any kind. -**NO LIABILITY**: The authors are not responsible for: +**NO LIABILITY**: The author is not responsible for: - Misuse of this tool - Unauthorized access attempts - Legal consequences of improper use -- Data breaches or security incidents +- Data breaches or service disruptions caused by your scans - Any damages arising from use **USER RESPONSIBILITY**: You are solely responsible for ensuring: - You have proper authorization -- Your use complies with all laws +- Your use complies with all applicable laws - You accept all risks -- You will not hold authors liable - -## Contact - -For security-related questions: -- Check the [LICENSE](LICENSE) file for legal terms -- Review the [README](README.md) for usage guidelines -- Contact maintainers through GitHub for private security reports +- You will not hold the author liable --- -**Remember: Unauthorized computer access is illegal. Always get permission first.** +**Remember: unauthorized computer access is illegal. Always get written permission first.** diff --git a/assets/ai-verbose.gif b/assets/ai-verbose.gif new file mode 100644 index 0000000..432c5aa Binary files /dev/null and b/assets/ai-verbose.gif differ diff --git a/assets/demo-ai.gif b/assets/demo-ai.gif deleted file mode 100644 index 06a265f..0000000 Binary files a/assets/demo-ai.gif and /dev/null differ diff --git a/assets/demo.gif b/assets/demo.gif deleted file mode 100644 index e751bb2..0000000 Binary files a/assets/demo.gif and /dev/null differ diff --git a/assets/live-scan.gif b/assets/live-scan.gif new file mode 100644 index 0000000..9f6714b Binary files /dev/null and b/assets/live-scan.gif differ diff --git a/assets/wizard-demo.gif b/assets/wizard-demo.gif new file mode 100644 index 0000000..537de7a Binary files /dev/null and b/assets/wizard-demo.gif differ diff --git a/cmd/god-eye/main.go b/cmd/god-eye/main.go index 0cddfc9..01a2c7a 100644 --- a/cmd/god-eye/main.go +++ b/cmd/god-eye/main.go @@ -1,18 +1,39 @@ package main import ( + "context" "fmt" "os" + "os/signal" + "syscall" + "time" "github.com/spf13/cobra" "god-eye/internal/ai" "god-eye/internal/config" + "god-eye/internal/diff" + "god-eye/internal/modules/all" + "god-eye/internal/nucleitpl" "god-eye/internal/output" + "god-eye/internal/pipeline" + gohttp "god-eye/internal/http" + "god-eye/internal/proxyconf" "god-eye/internal/scanner" + "god-eye/internal/scheduler" + "god-eye/internal/sources" + "god-eye/internal/store" + "god-eye/internal/tui" "god-eye/internal/validator" + "god-eye/internal/wizard" ) +var _ = diff.Compute // ensure diff import is kept in the dependency graph + +// rootCmdRef is set by main() so helpers can query which flags cobra saw +// explicitly on the command line (via Flags().Changed). +var rootCmdRef *cobra.Command + func main() { var cfg config.Config @@ -33,6 +54,20 @@ Examples: god-eye -d example.com --stealth moderate Moderate stealth (evasion mode) god-eye -d example.com --stealth paranoid Maximum stealth (very slow)`, Run: func(cmd *cobra.Command, args []string) { + // If no target given and stdin is a TTY, launch the interactive wizard. + // Explicit --wizard also triggers it even with a target present (user + // wants to review defaults). + if (cfg.Domain == "" && wizard.IsInteractive()) || cfg.Wizard { + if err := runWizard(&cfg); err != nil { + if err == wizard.ErrCancelled { + fmt.Println(output.Yellow("cancelled.")) + os.Exit(130) + } + fmt.Println(output.Red("[-]"), "wizard:", err) + os.Exit(1) + } + } + if cfg.Domain == "" { fmt.Println(output.Red("[-]"), "Domain is required. Use -d flag.") cmd.Help() @@ -58,6 +93,27 @@ Examples: fmt.Println(output.Red("[-]"), "Invalid resolvers:", err.Error()) os.Exit(1) } + if err := proxyconf.Validate(cfg.Proxy); err != nil { + fmt.Println(output.Red("[-]"), "Invalid --proxy:", err.Error()) + os.Exit(1) + } + // Propagate proxy config to every HTTP client before anything + // else spins up. This must happen after validation and before + // the pipeline/scanner starts. + if cfg.Proxy != "" { + if err := gohttp.SetProxy(cfg.Proxy); err != nil { + fmt.Println(output.Red("[-]"), "proxy (http factory):", err.Error()) + os.Exit(1) + } + if err := sources.SetProxy(cfg.Proxy); err != nil { + fmt.Println(output.Red("[-]"), "proxy (sources):", err.Error()) + os.Exit(1) + } + if !cfg.Silent { + fmt.Printf("%s Routing HTTP through %s\n", + output.BoldCyan("⛓"), output.BoldWhite(proxyconf.Humanize(cfg.Proxy))) + } + } if err := validator.ValidateConcurrency(cfg.Concurrency); err != nil { fmt.Println(output.Red("[-]"), "Invalid concurrency:", err.Error()) os.Exit(1) @@ -111,6 +167,10 @@ Examples: fmt.Println() } + if cfg.UsePipeline { + runPipeline(cfg) + return + } scanner.Run(cfg) }, } @@ -135,8 +195,8 @@ Examples: // AI flags rootCmd.Flags().BoolVar(&cfg.EnableAI, "enable-ai", false, "Enable AI-powered analysis with Ollama (includes CVE search)") rootCmd.Flags().StringVar(&cfg.AIUrl, "ai-url", "http://localhost:11434", "Ollama API URL") - rootCmd.Flags().StringVar(&cfg.AIFastModel, "ai-fast-model", "deepseek-r1:1.5b", "Fast triage model") - rootCmd.Flags().StringVar(&cfg.AIDeepModel, "ai-deep-model", "qwen2.5-coder:7b", "Deep analysis model (supports function calling)") + rootCmd.Flags().StringVar(&cfg.AIFastModel, "ai-fast-model", "qwen3:1.7b", "Fast triage model (Ollama tag)") + rootCmd.Flags().StringVar(&cfg.AIDeepModel, "ai-deep-model", "qwen2.5-coder:14b", "Deep analysis model (Ollama tag, supports function calling)") rootCmd.Flags().BoolVar(&cfg.AICascade, "ai-cascade", true, "Use cascade (fast triage + deep analysis)") rootCmd.Flags().BoolVar(&cfg.AIDeepAnalysis, "ai-deep", false, "Enable deep AI analysis on all findings") rootCmd.Flags().BoolVar(&cfg.MultiAgent, "multi-agent", false, "Enable multi-agent orchestration (8 specialized AI agents)") @@ -144,6 +204,27 @@ Examples: // Stealth flags rootCmd.Flags().StringVar(&cfg.StealthMode, "stealth", "", "Stealth mode: light, moderate, aggressive, paranoid (reduces detection)") + // v2 pipeline flags + rootCmd.Flags().BoolVar(&cfg.UsePipeline, "pipeline", false, "Use v2 event-driven pipeline (experimental, parity with v1 verified by F0.7)") + rootCmd.Flags().BoolVar(&cfg.Wizard, "wizard", false, "Force the interactive setup wizard even when -d is set") + rootCmd.Flags().StringVar(&cfg.Profile, "profile", "", "Apply named scan profile (bugbounty, pentest, asm-continuous, stealth-max, quick)") + rootCmd.Flags().StringVar(&cfg.ConfigFile, "config", "", "Path to YAML config file (overrides auto-discovery)") + + // Stash the rootCmd in a package var so runPipeline can check which + // flags the user set explicitly (cobra is the only thing that knows). + rootCmdRef = rootCmd + rootCmd.Flags().BoolVar(&cfg.Live, "live", false, "Stream colorized scan events live to the terminal (v2 only)") + rootCmd.Flags().IntVar(&cfg.LiveVerbosity, "live-verbosity", 1, "Live view verbosity: 0=findings-only, 1=normal, 2=noisy") + rootCmd.Flags().StringVar(&cfg.AIProfile, "ai-profile", "", "AI tier: lean (16GB), balanced (32GB), heavy/max (64GB+). Overrides --ai-fast-model/--ai-deep-model unless those are also set explicitly.") + rootCmd.Flags().BoolVar(&cfg.AIVerbose, "ai-verbose", false, "Log every Ollama query (model, prompt/response size, duration) to stderr") + rootCmd.Flags().BoolVar(&cfg.AutoPullModels, "ai-auto-pull", true, "Auto-download missing Ollama models before the scan starts") + rootCmd.Flags().BoolVar(&cfg.NucleiScan, "nuclei", false, "Run Nuclei-format YAML templates against every probed host") + rootCmd.Flags().StringVar(&cfg.NucleiTemplates, "nuclei-templates", "", "Path to Nuclei templates directory (default: $NUCLEI_TEMPLATES, then ~/nuclei-templates, then ~/.god-eye/nuclei-templates)") + rootCmd.Flags().BoolVar(&cfg.NucleiAutoDownload, "nuclei-auto-download", true, "Auto-download nuclei-templates ZIP from GitHub when no local dir is found") + rootCmd.Flags().StringVar(&cfg.Proxy, "proxy", "", "Route outbound HTTP through a proxy. Supported: http://host:port, https://host:port, socks5://host:port, socks5h://host:port (Tor). Basic auth via http://user:pass@host.") + rootCmd.Flags().DurationVar(&cfg.MonitorInterval, "monitor-interval", 0, "Run in continuous monitoring mode, re-scanning every N (e.g. 6h, 24h). Emits diffs.") + rootCmd.Flags().StringVar(&cfg.MonitorWebhook, "monitor-webhook", "", "Webhook URL to POST diff reports to in monitoring mode") + // Recursive discovery flags (enabled by default with --enable-ai) rootCmd.Flags().BoolVar(&cfg.Recursive, "recursive", false, "Enable recursive subdomain discovery with pattern learning") rootCmd.Flags().IntVar(&cfg.RecursiveDepth, "recursive-depth", 3, "Maximum recursion depth (1-5)") @@ -224,7 +305,288 @@ This data is used for instant, offline CVE lookups during scans.`, } rootCmd.AddCommand(dbInfoCmd) + // nuclei-update: force refresh of the auto-downloaded Nuclei template cache + nucleiUpdateCmd := &cobra.Command{ + Use: "nuclei-update", + Short: "Download / refresh Nuclei YAML templates cache", + Long: `Fetches the official projectdiscovery/nuclei-templates ZIP archive +and extracts every .yaml/.yml file into ~/.god-eye/nuclei-templates. + +Safe to re-run: existing templates are overwritten in-place. The cache +is ~40MB on disk and ships thousands of detections that the compat +layer executes when --nuclei is on.`, + Run: func(cmd *cobra.Command, args []string) { + home, err := os.UserHomeDir() + if err != nil { + fmt.Println(output.Red("[-]"), "cannot find home dir:", err) + os.Exit(1) + } + dest := home + "/.god-eye/nuclei-templates" + + fmt.Println(output.BoldCyan("📥 Refreshing Nuclei templates…")) + fmt.Printf(" %s %s\n", output.Dim("destination:"), output.BoldWhite(dest)) + + // Pull up the downloader. Inline import to keep the subcommand + // lightweight when not invoked. + dl := nucleitpl.NewDownloader() + dl.Verbose = true + if err := dl.Refresh(dest); err != nil { + fmt.Println(output.Red("[-]"), "refresh failed:", err) + os.Exit(1) + } + fmt.Println(output.Green("✓ Nuclei templates refreshed.")) + }, + } + rootCmd.AddCommand(nucleiUpdateCmd) + if err := rootCmd.Execute(); err != nil { os.Exit(1) } } + +// runPipeline is the v2 entry point. Registers every adapter module, loads +// optional YAML + profile, and runs the event-driven pipeline under a +// signal-aware context. +func runPipeline(cfg config.Config) { + // Side-effect registration of all adapter modules (F0.6). + all.RegisterAll() + + // Load YAML config if present. --config wins over auto-discovery. + path := cfg.ConfigFile + if path == "" { + path = config.FindConfigFile() + } + if path != "" { + if y, err := config.LoadYAML(path); err != nil { + fmt.Println(output.Red("[-]"), "config:", err.Error()) + os.Exit(1) + } else if y != nil { + config.ApplyYAML(&cfg, y) + } + } + + // Apply named scan profile if set. + if cfg.Profile != "" { + p, ok := config.ProfileByName(cfg.Profile) + if !ok { + fmt.Println(output.Red("[-]"), "unknown profile:", cfg.Profile) + os.Exit(1) + } + config.ApplyProfile(&cfg, p) + if !cfg.Silent { + fmt.Printf("%s Profile %s applied: %s\n", output.Green("✓"), output.BoldCyan(p.Name), output.Dim(p.Description)) + } + } + + // Apply AI tier profile (lean/balanced/heavy). Respects explicit + // --ai-fast-model / --ai-deep-model overrides. + if cfg.AIProfile != "" { + p, ok := config.AIProfileByName(cfg.AIProfile) + if !ok { + fmt.Println(output.Red("[-]"), "unknown AI profile:", cfg.AIProfile, + "— valid: lean, balanced, heavy") + os.Exit(1) + } + overrideFast := rootCmdRef != nil && rootCmdRef.Flags().Changed("ai-fast-model") + overrideDeep := rootCmdRef != nil && rootCmdRef.Flags().Changed("ai-deep-model") + config.ApplyAIProfile(&cfg, p, overrideFast, overrideDeep) + if !cfg.Silent { + fmt.Printf("%s AI profile %s: %s\n", + output.Green("✓"), output.BoldCyan(p.Name), output.Dim(p.Description)) + fmt.Printf(" %s %s %s %s\n", + output.Dim("triage:"), output.BoldWhite(cfg.AIFastModel), + output.Dim("deep:"), output.BoldWhite(cfg.AIDeepModel)) + } + } + + // Handle Ctrl-C gracefully. Set this up BEFORE the model-ensure step + // so long downloads can be interrupted cleanly. + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM) + go func() { + <-sigCh + fmt.Println() + fmt.Println(output.Yellow("⚠ Interrupted — shutting down...")) + cancel() + }() + + // Ensure Ollama models are present before scan starts. + if cfg.EnableAI && cfg.AutoPullModels { + if err := ensureAIModels(ctx, &cfg); err != nil { + if ctx.Err() == context.Canceled { + os.Exit(130) + } + fmt.Println(output.Red("[-]"), "AI setup:", err) + os.Exit(1) + } + } + + // Continuous monitoring mode: run the scan on an interval, diff and alert. + if cfg.MonitorInterval > 0 { + runMonitor(ctx, cfg) + return + } + + p, err := pipeline.New(&cfg, pipeline.Options{}) + if err != nil { + fmt.Println(output.Red("[-]"), err) + os.Exit(1) + } + + var live *tui.LivePrinter + if cfg.Live { + live = tui.NewLivePrinter(p.Bus(), cfg.LiveVerbosity) + } + + if err := p.Run(ctx); err != nil { + if ctx.Err() == context.Canceled { + if live != nil { + live.Close() + } + os.Exit(130) + } + fmt.Println(output.Red("[!]"), "pipeline error:", err) + os.Exit(1) + } + + if live != nil { + live.Close() + } +} + +// runMonitor implements the asm-continuous mode: a single pipeline.Run +// wrapped in scheduler.Scheduler that ticks at MonitorInterval, diffs +// against the previous snapshot, and alerts on meaningful changes. +func runMonitor(ctx context.Context, cfg config.Config) { + scan := func(scanCtx context.Context) ([]*store.Host, error) { + p, err := pipeline.New(&cfg, pipeline.Options{}) + if err != nil { + return nil, err + } + if err := p.Run(scanCtx); err != nil { + return nil, err + } + return p.Store().All(scanCtx), nil + } + + s := scheduler.New(cfg.Domain, cfg.MonitorInterval, scan) + s.AddAlerter(scheduler.StdoutAlerter{}) + if cfg.MonitorWebhook != "" { + s.AddAlerter(scheduler.NewWebhookAlerter(cfg.MonitorWebhook)) + } + + fmt.Printf("%s Monitoring %s every %s — Ctrl-C to stop\n", + output.BoldGreen("▣"), output.BoldCyan(cfg.Domain), cfg.MonitorInterval) + + if err := s.Start(ctx); err != nil && !errorIs(err, context.Canceled) { + fmt.Println(output.Red("[!]"), "monitor error:", err) + os.Exit(1) + } +} + +// runWizard starts the interactive setup, then folds the user's choices +// back into cfg. Forces pipeline mode (wizard is v2-only by design). +func runWizard(cfg *config.Config) error { + choice, err := wizard.Run(context.Background(), wizard.Options{ + In: os.Stdin, + Out: os.Stdout, + OllamaURL: cfg.AIUrl, + }) + if err != nil { + return err + } + + cfg.Domain = validator.SanitizeDomain(choice.Target) + cfg.UsePipeline = true + cfg.Live = choice.Live + cfg.LiveVerbosity = choice.LiveVerbosity + cfg.Output = choice.Output + if choice.Format != "" { + cfg.Format = choice.Format + } + + // Scan profile name threads through --profile application (later). + if choice.ScanProfile != "" { + cfg.Profile = choice.ScanProfile + } + + // ASM-continuous interval translates into a duration flag. + if choice.MonitorInterval != "" { + d, parseErr := time.ParseDuration(choice.MonitorInterval) + if parseErr != nil { + return fmt.Errorf("invalid interval %q: %w", choice.MonitorInterval, parseErr) + } + cfg.MonitorInterval = d + } + + // AI tier. + if choice.AIProfile != "" { + cfg.EnableAI = true + cfg.AIProfile = choice.AIProfile + cfg.AIVerbose = choice.AIVerbose + cfg.AutoPullModels = choice.AIAutoPull + } else { + cfg.EnableAI = false + } + + return nil +} + +// ensureAIModels checks the Ollama server and downloads any missing models. +// Prints progress when --ai-verbose is on. Fails open on unreachable +// Ollama — the AI module itself will no-op gracefully. +func ensureAIModels(ctx context.Context, cfg *config.Config) error { + e := ai.NewModelEnsurer(cfg.AIUrl) + e.Verbose = cfg.AIVerbose || cfg.Verbose + e.Writer = os.Stderr + + if err := e.Reachable(ctx); err != nil { + if !cfg.Silent { + fmt.Println(output.Yellow("⚠ "), err.Error()) + fmt.Println(output.Dim(" AI modules will no-op for this run. Start `ollama serve` to enable.")) + } + return nil + } + + models := []string{} + if cfg.AIFastModel != "" { + models = append(models, cfg.AIFastModel) + } + if cfg.AIDeepModel != "" && cfg.AIDeepModel != cfg.AIFastModel { + models = append(models, cfg.AIDeepModel) + } + if len(models) == 0 { + return nil + } + + if !cfg.Silent { + fmt.Printf("%s Checking Ollama models: %s\n", + output.BoldCyan("⚙"), output.Dim(fmt.Sprintf("%v", models))) + } + if err := e.EnsureAll(ctx, models); err != nil { + return err + } + if !cfg.Silent { + fmt.Printf("%s Models ready\n", output.Green("✓")) + } + return nil +} + +// errorIs is a thin wrapper for errors.Is that only pulls errors into +// main when needed. +func errorIs(err, target error) bool { + for err != nil { + if err == target { + return true + } + type unwrapper interface{ Unwrap() error } + u, ok := err.(unwrapper) + if !ok { + return false + } + err = u.Unwrap() + } + return false +} diff --git a/go.mod b/go.mod index 79784ce..5e68957 100644 --- a/go.mod +++ b/go.mod @@ -4,17 +4,18 @@ go 1.21 require ( github.com/fatih/color v1.16.0 + github.com/mattn/go-isatty v0.0.20 github.com/miekg/dns v1.1.58 github.com/spf13/cobra v1.8.0 + golang.org/x/net v0.20.0 + gopkg.in/yaml.v3 v3.0.1 ) require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect - github.com/mattn/go-isatty v0.0.20 // indirect github.com/spf13/pflag v1.0.5 // indirect golang.org/x/mod v0.14.0 // indirect - golang.org/x/net v0.20.0 // indirect golang.org/x/sys v0.16.0 // indirect golang.org/x/tools v0.17.0 // indirect ) diff --git a/go.sum b/go.sum index 4775209..cbf4921 100644 --- a/go.sum +++ b/go.sum @@ -27,5 +27,7 @@ golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/tools v0.17.0 h1:FvmRgNOcs3kOa+T20R1uhfP9F6HgG2mfxDv1vrx1Htc= golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/agent/agent.go b/internal/agent/agent.go new file mode 100644 index 0000000..0ac196c --- /dev/null +++ b/internal/agent/agent.go @@ -0,0 +1,109 @@ +// Package agent defines the Fase 3 AI agentic v2 interfaces: Planner, +// Worker, and Tool. Unlike Fase 0.6 adapters that merely wrap v1 Ollama +// calls, a v2 Agent plans multi-step investigations and executes tools +// via the event bus. +// +// The Agent lifecycle: +// +// 1. Planner receives the target + existing store snapshot, produces a +// Plan (ordered list of Tasks). +// 2. Each Task is dispatched to a Worker (specialized agent: XSS, auth, +// API, crypto, secrets, etc.) with a Tool set. +// 3. Workers call Tools (dns_resolve, http_request, check_sqli_blind, +// fetch_js, query_cve, ...) and reason over the results. +// 4. Results feed back into Plan revision; new Tasks may be scheduled. +// +// This file defines the contracts. Implementations land incrementally; +// for now a Basic Planner delegates to the Fase 0.6 v1 Ollama wrapper, +// and a native tool-using implementation follows. +package agent + +import ( + "context" + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/store" +) + +// Tool is a capability an agent can invoke. Tools should be idempotent +// where possible and must respect ctx cancellation. +type Tool interface { + // Name is the machine identifier (e.g., "http_request", "dns_resolve"). + // Used in tool-call serialization for LLMs. + Name() string + + // Description is a short human-readable blurb used in the LLM tool + // descriptor. Keep it action-oriented: "fetch an HTTP URL and return + // the response headers + first 2KB of body". + Description() string + + // Schema returns the JSON-schema of the tool's argument object. Used + // to build function-calling descriptors and to validate inputs. + Schema() map[string]interface{} + + // Call invokes the tool with the given arguments. Returns a JSON-encoded + // result (often just a text summary). Errors should be returned — the + // agent decides how to react. + Call(ctx context.Context, args map[string]interface{}) (string, error) +} + +// Task is a single unit of agent work. +type Task struct { + ID string + Kind string // e.g. "investigate-xss", "audit-auth", "chain-finding" + Description string // natural-language goal the worker pursues + Subject string // target URL / subdomain / evidence the task focuses on + Context map[string]string // additional hints for the worker + CreatedAt time.Time +} + +// Plan is an ordered list of Tasks produced by the Planner. +type Plan struct { + Target string + Tasks []Task + Reason string // planner's rationale, logged for debugging +} + +// Planner decides what to investigate next given the current store state. +type Planner interface { + // Plan produces a new Plan. Called at the start of the analysis phase + // and whenever enough new evidence accumulates to justify replanning. + Plan(ctx context.Context, target string, storeSnap store.Store, bus *eventbus.Bus) (*Plan, error) + + // Name identifies the planner implementation for logs. + Name() string +} + +// Worker executes a single Task using a Toolset. +type Worker interface { + // Name identifies the worker (usually its specialization, e.g. "xss", + // "auth", "api", "crypto"). + Name() string + + // CanHandle reports whether the worker is a good fit for task. Workers + // are consulted in priority order. + CanHandle(task Task) bool + + // Execute carries out the task. The worker may call tools, update the + // store via bus events (VulnerabilityFound, SecretFound, AIFinding), + // and return a short natural-language summary for the planner. + Execute(ctx context.Context, task Task, tools Toolset, bus *eventbus.Bus, st store.Store) (summary string, err error) +} + +// Toolset is an indexed collection of Tools available to a worker. It is +// intentionally separate from Registry so workers receive a curated subset +// (e.g., a "crypto" worker gets oracle-style tools but not "send_slack"). +type Toolset map[string]Tool + +// Get returns the named tool, or nil if absent. +func (ts Toolset) Get(name string) Tool { return ts[name] } + +// Names returns every tool name in the set. Order is not guaranteed. +func (ts Toolset) Names() []string { + out := make([]string, 0, len(ts)) + for n := range ts { + out = append(out, n) + } + return out +} diff --git a/internal/agent/tools.go b/internal/agent/tools.go new file mode 100644 index 0000000..958e232 --- /dev/null +++ b/internal/agent/tools.go @@ -0,0 +1,141 @@ +package agent + +import ( + "context" + "crypto/tls" + "encoding/json" + "errors" + "io" + "net/http" + "time" + + godns "god-eye/internal/dns" +) + +// --- built-in tools ------------------------------------------------------- +// +// These tools cover the minimum needed for a planner to investigate +// discovered hosts without reinventing basic primitives. Fase 3 workers +// receive curated subsets via Toolset. + +// HTTPRequestTool fetches an arbitrary URL and returns status, headers, +// and (truncated) body. Maximum 64KB body returned. +type HTTPRequestTool struct { + Client *http.Client +} + +func NewHTTPRequestTool(timeoutSec int) *HTTPRequestTool { + return &HTTPRequestTool{ + Client: &http.Client{ + Timeout: time.Duration(timeoutSec) * time.Second, + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + }, + }, + } +} + +func (t *HTTPRequestTool) Name() string { return "http_request" } +func (t *HTTPRequestTool) Description() string { return "Fetch an HTTP(S) URL and return status + headers + first 64KB of body." } + +func (t *HTTPRequestTool) Schema() map[string]interface{} { + return map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "url": map[string]interface{}{"type": "string"}, + "method": map[string]interface{}{"type": "string", "default": "GET"}, + "headers": map[string]interface{}{ + "type": "object", + "additionalProperties": map[string]interface{}{"type": "string"}, + }, + }, + "required": []string{"url"}, + } +} + +func (t *HTTPRequestTool) Call(ctx context.Context, args map[string]interface{}) (string, error) { + url, _ := args["url"].(string) + if url == "" { + return "", errors.New("url is required") + } + method, _ := args["method"].(string) + if method == "" { + method = "GET" + } + req, err := http.NewRequestWithContext(ctx, method, url, nil) + if err != nil { + return "", err + } + if hdrs, ok := args["headers"].(map[string]interface{}); ok { + for k, v := range hdrs { + if s, ok := v.(string); ok { + req.Header.Set(k, s) + } + } + } + req.Header.Set("User-Agent", "god-eye-v2-agent") + + resp, err := t.Client.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + + body, _ := io.ReadAll(io.LimitReader(resp.Body, 64*1024)) + out := map[string]interface{}{ + "status_code": resp.StatusCode, + "headers": flattenHeaders(resp.Header), + "body": string(body), + } + b, _ := json.Marshal(out) + return string(b), nil +} + +// DNSResolveTool resolves a hostname to A/CNAME/PTR records. +type DNSResolveTool struct { + Resolvers []string + TimeoutSec int +} + +func NewDNSResolveTool(resolvers []string, timeoutSec int) *DNSResolveTool { + if len(resolvers) == 0 { + resolvers = []string{"8.8.8.8:53", "1.1.1.1:53"} + } + return &DNSResolveTool{Resolvers: resolvers, TimeoutSec: timeoutSec} +} + +func (t *DNSResolveTool) Name() string { return "dns_resolve" } +func (t *DNSResolveTool) Description() string { return "Resolve a hostname to A/CNAME/PTR records." } + +func (t *DNSResolveTool) Schema() map[string]interface{} { + return map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "hostname": map[string]interface{}{"type": "string"}, + }, + "required": []string{"hostname"}, + } +} + +func (t *DNSResolveTool) Call(_ context.Context, args map[string]interface{}) (string, error) { + name, _ := args["hostname"].(string) + if name == "" { + return "", errors.New("hostname is required") + } + ips := godns.ResolveSubdomain(name, t.Resolvers, t.TimeoutSec) + cname := godns.ResolveCNAME(name, t.Resolvers, t.TimeoutSec) + out := map[string]interface{}{"ips": ips, "cname": cname} + b, _ := json.Marshal(out) + return string(b), nil +} + +func flattenHeaders(h http.Header) map[string]string { + out := make(map[string]string, len(h)) + for k, vs := range h { + if len(vs) == 0 { + continue + } + out[k] = vs[0] + } + return out +} diff --git a/internal/ai/ensure.go b/internal/ai/ensure.go new file mode 100644 index 0000000..bdb65e1 --- /dev/null +++ b/internal/ai/ensure.go @@ -0,0 +1,275 @@ +package ai + +import ( + "bufio" + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "strings" + "time" +) + +// ModelEnsurer verifies that a given list of Ollama models is present on +// the local server, and pulls any that are missing. Designed for the +// pre-scan warmup: God's Eye should not crash mid-scan because a model +// wasn't downloaded — EnsureAll fixes that before the pipeline starts. +type ModelEnsurer struct { + BaseURL string + Client *http.Client + Verbose bool + Writer io.Writer // where progress is printed; defaults to os.Stdout if nil +} + +// NewModelEnsurer constructs an ensurer against the given Ollama base URL +// (e.g. "http://localhost:11434"). The HTTP client has no timeout because +// a fresh pull of a 30B model can legitimately take 10+ minutes. +func NewModelEnsurer(baseURL string) *ModelEnsurer { + if baseURL == "" { + baseURL = "http://localhost:11434" + } + return &ModelEnsurer{ + BaseURL: strings.TrimRight(baseURL, "/"), + Client: &http.Client{Timeout: 0}, + } +} + +// Installed returns the set of model tags currently available on the +// Ollama server, keyed by the full name (e.g. "qwen3:1.7b"). +func (e *ModelEnsurer) Installed(ctx context.Context) (map[string]bool, error) { + req, err := http.NewRequestWithContext(ctx, "GET", e.BaseURL+"/api/tags", nil) + if err != nil { + return nil, err + } + c := &http.Client{Timeout: 10 * time.Second} + resp, err := c.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + return nil, fmt.Errorf("ollama /api/tags returned %d", resp.StatusCode) + } + + var body struct { + Models []struct { + Name string `json:"name"` + } `json:"models"` + } + if err := json.NewDecoder(resp.Body).Decode(&body); err != nil { + return nil, err + } + out := make(map[string]bool, len(body.Models)) + for _, m := range body.Models { + out[m.Name] = true + } + return out, nil +} + +// Pull streams a model pull from Ollama, printing progress lines when +// Verbose is true. Uses POST /api/pull with stream=true; each JSON line +// reports status + optional {total, completed} for byte-level progress. +func (e *ModelEnsurer) Pull(ctx context.Context, model string) error { + payload := map[string]interface{}{"name": model, "stream": true} + body, _ := json.Marshal(payload) + req, err := http.NewRequestWithContext(ctx, "POST", e.BaseURL+"/api/pull", bytes.NewReader(body)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + resp, err := e.Client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + b, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + return fmt.Errorf("ollama /api/pull returned %d: %s", resp.StatusCode, strings.TrimSpace(string(b))) + } + + scanner := bufio.NewScanner(resp.Body) + // Progress events can be large; bump the scanner buffer. + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) + + var lastStatus string + var lastPct int + for scanner.Scan() { + line := bytes.TrimSpace(scanner.Bytes()) + if len(line) == 0 { + continue + } + var ev struct { + Status string `json:"status"` + Digest string `json:"digest,omitempty"` + Total int64 `json:"total,omitempty"` + Completed int64 `json:"completed,omitempty"` + Error string `json:"error,omitempty"` + } + if err := json.Unmarshal(line, &ev); err != nil { + continue + } + if ev.Error != "" { + return fmt.Errorf("pull %s: %s", model, ev.Error) + } + if !e.Verbose { + continue + } + w := e.writer() + if ev.Total > 0 && ev.Completed > 0 { + pct := int(float64(ev.Completed) / float64(ev.Total) * 100) + // Throttle: new status line always; otherwise only print when + // the percentage has moved ≥5 points since the last emission + // (or reaches a final 100% for this status exactly once). + switch { + case ev.Status != lastStatus: + fmt.Fprintf(w, " %-24s %3d%% %s / %s\n", ev.Status, pct, humanBytes(ev.Completed), humanBytes(ev.Total)) + lastStatus = ev.Status + lastPct = pct + case pct >= lastPct+5 && pct < 100: + fmt.Fprintf(w, " %-24s %3d%% %s / %s\n", ev.Status, pct, humanBytes(ev.Completed), humanBytes(ev.Total)) + lastPct = pct + case pct == 100 && lastPct < 100: + fmt.Fprintf(w, " %-24s %3d%% %s / %s\n", ev.Status, pct, humanBytes(ev.Completed), humanBytes(ev.Total)) + lastPct = 100 + } + } else if ev.Status != lastStatus { + fmt.Fprintf(w, " %s\n", ev.Status) + lastStatus = ev.Status + lastPct = 0 + } + } + return scanner.Err() +} + +// EnsureAll checks every name in models. For each missing one it calls Pull. +// Already-present models are skipped. Returns on the first error. +// +// Name matching is generous: Ollama sometimes tags models as "qwen3:1.7b" +// and sometimes as "qwen3:1.7b-instruct-fp16", so we accept exact match, +// a ":latest" variant, or the bare model name with no tag. +func (e *ModelEnsurer) EnsureAll(ctx context.Context, models []string) error { + installed, err := e.Installed(ctx) + if err != nil { + return fmt.Errorf("query ollama: %w", err) + } + + unique := dedup(models) + missing := []string{} + for _, m := range unique { + if alreadyInstalled(installed, m) { + if e.Verbose { + fmt.Fprintf(e.writer(), "✓ %s already installed\n", m) + } + continue + } + missing = append(missing, m) + } + + if len(missing) == 0 { + return nil + } + + if e.Verbose { + fmt.Fprintf(e.writer(), "↓ Pulling %d missing model(s): %s\n", len(missing), strings.Join(missing, ", ")) + } + for _, m := range missing { + if err := ctx.Err(); err != nil { + return err + } + if e.Verbose { + fmt.Fprintf(e.writer(), "↓ %s\n", m) + } + if err := e.Pull(ctx, m); err != nil { + return fmt.Errorf("pull %s: %w", m, err) + } + if e.Verbose { + fmt.Fprintf(e.writer(), "✓ %s ready\n", m) + } + } + return nil +} + +// Reachable reports whether the Ollama server answers /api/tags. Callers +// should check this before EnsureAll to surface a friendly message. +func (e *ModelEnsurer) Reachable(ctx context.Context) error { + c := &http.Client{Timeout: 3 * time.Second} + req, err := http.NewRequestWithContext(ctx, "GET", e.BaseURL+"/api/tags", nil) + if err != nil { + return err + } + resp, err := c.Do(req) + if err != nil { + return errors.New("ollama not reachable at " + e.BaseURL + " (is `ollama serve` running?)") + } + resp.Body.Close() + if resp.StatusCode != 200 { + return fmt.Errorf("ollama at %s returned %d", e.BaseURL, resp.StatusCode) + } + return nil +} + +func (e *ModelEnsurer) writer() io.Writer { + if e.Writer != nil { + return e.Writer + } + return stdout +} + +var stdout io.Writer // populated by main via SetStdout; nil writer would fmt-print to os.Stdout + +// SetStdout installs the writer used when ModelEnsurer.Writer is nil. main.go +// sets this to os.Stdout; tests can set it to a bytes.Buffer. +func SetStdout(w io.Writer) { stdout = w } + +func alreadyInstalled(installed map[string]bool, model string) bool { + if installed[model] { + return true + } + if installed[model+":latest"] { + return true + } + if strings.Contains(model, ":") { + base := strings.SplitN(model, ":", 2)[0] + if installed[base] || installed[base+":latest"] { + return true + } + } + return false +} + +func dedup(ss []string) []string { + seen := make(map[string]struct{}, len(ss)) + out := make([]string, 0, len(ss)) + for _, s := range ss { + s = strings.TrimSpace(s) + if s == "" { + continue + } + if _, ok := seen[s]; ok { + continue + } + seen[s] = struct{}{} + out = append(out, s) + } + return out +} + +func humanBytes(n int64) string { + const k = 1024.0 + if n < int64(k) { + return fmt.Sprintf("%dB", n) + } + units := []string{"KB", "MB", "GB", "TB"} + v := float64(n) / k + for _, u := range units { + if v < k { + return fmt.Sprintf("%.1f%s", v, u) + } + v /= k + } + return fmt.Sprintf("%.1fPB", v) +} diff --git a/internal/ai/ensure_test.go b/internal/ai/ensure_test.go new file mode 100644 index 0000000..5b4329e --- /dev/null +++ b/internal/ai/ensure_test.go @@ -0,0 +1,214 @@ +package ai + +import ( + "bytes" + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" +) + +func TestAlreadyInstalled(t *testing.T) { + installed := map[string]bool{ + "qwen3:1.7b": true, + "qwen2.5-coder:14b": true, + "custom-model:latest": true, + } + cases := []struct { + model string + want bool + }{ + {"qwen3:1.7b", true}, + {"qwen2.5-coder:14b", true}, + {"custom-model", true}, // via :latest fallback + {"llama3:8b", false}, + {"qwen3", false}, // bare name: only matches when ":latest" is installed (it isn't) + } + for _, c := range cases { + if got := alreadyInstalled(installed, c.model); got != c.want { + t.Errorf("alreadyInstalled(%q) = %v, want %v", c.model, got, c.want) + } + } +} + +func TestDedup(t *testing.T) { + got := dedup([]string{"a", "b", "a", "", "c", " b "}) + want := []string{"a", "b", "c"} + if len(got) != len(want) { + t.Fatalf("got %v, want %v", got, want) + } + for i := range got { + if got[i] != want[i] { + t.Errorf("index %d: got %q want %q", i, got[i], want[i]) + } + } +} + +func TestHumanBytes(t *testing.T) { + cases := []struct { + in int64 + want string + }{ + {0, "0B"}, + {512, "512B"}, + {1024, "1.0KB"}, + {1024 * 1024, "1.0MB"}, + {1024 * 1024 * 1024, "1.0GB"}, + {int64(2.5 * 1024 * 1024 * 1024), "2.5GB"}, + } + for _, c := range cases { + if got := humanBytes(c.in); got != c.want { + t.Errorf("humanBytes(%d) = %q, want %q", c.in, got, c.want) + } + } +} + +func TestInstalled_ParsesTagsResponse(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/api/tags" { + http.NotFound(w, r) + return + } + _ = json.NewEncoder(w).Encode(map[string]interface{}{ + "models": []map[string]string{ + {"name": "qwen3:1.7b"}, + {"name": "qwen2.5-coder:14b"}, + }, + }) + })) + defer srv.Close() + + e := NewModelEnsurer(srv.URL) + got, err := e.Installed(context.Background()) + if err != nil { + t.Fatal(err) + } + if !got["qwen3:1.7b"] || !got["qwen2.5-coder:14b"] { + t.Errorf("missing expected models: %v", got) + } +} + +func TestInstalled_Non200ReturnsError(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Error(w, "nope", http.StatusInternalServerError) + })) + defer srv.Close() + + e := NewModelEnsurer(srv.URL) + if _, err := e.Installed(context.Background()); err == nil { + t.Error("expected error on non-200") + } +} + +func TestReachable(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(`{"models":[]}`)) + })) + defer srv.Close() + + e := NewModelEnsurer(srv.URL) + if err := e.Reachable(context.Background()); err != nil { + t.Errorf("expected reachable, got %v", err) + } +} + +func TestReachable_Unreachable(t *testing.T) { + e := NewModelEnsurer("http://127.0.0.1:1") // nothing listens here + if err := e.Reachable(context.Background()); err == nil { + t.Error("expected unreachable error") + } +} + +func TestPull_StreamsProgress(t *testing.T) { + // Fake Ollama that emits a few NDJSON status events. + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/api/pull" { + http.NotFound(w, r) + return + } + w.Header().Set("Content-Type", "application/x-ndjson") + events := []string{ + `{"status":"pulling manifest"}`, + `{"status":"downloading","digest":"sha256:abc","total":1048576,"completed":524288}`, + `{"status":"downloading","digest":"sha256:abc","total":1048576,"completed":1048576}`, + `{"status":"verifying sha256 digest"}`, + `{"status":"writing manifest"}`, + `{"status":"success"}`, + } + for _, e := range events { + w.Write([]byte(e + "\n")) + if flusher, ok := w.(http.Flusher); ok { + flusher.Flush() + } + } + })) + defer srv.Close() + + buf := &bytes.Buffer{} + e := NewModelEnsurer(srv.URL) + e.Verbose = true + e.Writer = buf + + if err := e.Pull(context.Background(), "fake:1b"); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + out := buf.String() + if !strings.Contains(out, "pulling manifest") { + t.Errorf("missing 'pulling manifest' in output: %q", out) + } + if !strings.Contains(out, "success") { + t.Errorf("missing 'success' in output: %q", out) + } +} + +func TestPull_ErrorBubblesUp(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(`{"error":"model not found"}` + "\n")) + })) + defer srv.Close() + + e := NewModelEnsurer(srv.URL) + err := e.Pull(context.Background(), "nonexistent") + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "model not found") { + t.Errorf("unexpected error: %v", err) + } +} + +func TestEnsureAll_SkipsInstalled_PullsMissing(t *testing.T) { + pullCalls := map[string]int{} + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/api/tags": + _ = json.NewEncoder(w).Encode(map[string]interface{}{ + "models": []map[string]string{{"name": "already-here:1b"}}, + }) + case "/api/pull": + var body struct { + Name string `json:"name"` + } + _ = json.NewDecoder(r.Body).Decode(&body) + pullCalls[body.Name]++ + w.Write([]byte(`{"status":"success"}` + "\n")) + default: + http.NotFound(w, r) + } + })) + defer srv.Close() + + e := NewModelEnsurer(srv.URL) + if err := e.EnsureAll(context.Background(), []string{"already-here:1b", "missing-a:7b", "missing-b:14b"}); err != nil { + t.Fatal(err) + } + if pullCalls["already-here:1b"] > 0 { + t.Errorf("should not have pulled already-here") + } + if pullCalls["missing-a:7b"] != 1 || pullCalls["missing-b:14b"] != 1 { + t.Errorf("missing models not pulled correctly: %v", pullCalls) + } +} diff --git a/internal/ai/ollama.go b/internal/ai/ollama.go index a472cff..7587419 100644 --- a/internal/ai/ollama.go +++ b/internal/ai/ollama.go @@ -4,7 +4,9 @@ import ( "bytes" "encoding/json" "fmt" + "io" "net/http" + "os" "strings" "time" ) @@ -12,10 +14,27 @@ import ( // OllamaClient handles communication with local Ollama instance type OllamaClient struct { BaseURL string - FastModel string // deepseek-r1:1.5b for quick triage - DeepModel string // qwen2.5-coder:7b for deep analysis + FastModel string // qwen3:1.7b for quick triage (lean default) + DeepModel string // qwen2.5-coder:14b for deep analysis (lean default) Timeout time.Duration EnableCascade bool + + // Verbose controls whether every query is logged with timing + sizes. + // Writes to VerboseLogger or stderr when nil. Toggle via --ai-verbose. + Verbose bool + VerboseLogger io.Writer +} + +// logVerbose writes a single line to the verbose logger when Verbose is on. +func (c *OllamaClient) logVerbose(format string, args ...interface{}) { + if !c.Verbose { + return + } + w := c.VerboseLogger + if w == nil { + w = os.Stderr + } + fmt.Fprintf(w, "[ai] "+format+"\n", args...) } // OllamaRequest represents the request payload for Ollama API @@ -51,10 +70,10 @@ func NewOllamaClient(baseURL, fastModel, deepModel string, enableCascade bool) * baseURL = "http://localhost:11434" } if fastModel == "" { - fastModel = "deepseek-r1:1.5b" + fastModel = "qwen3:1.7b" } if deepModel == "" { - deepModel = "qwen2.5-coder:7b" + deepModel = "qwen2.5-coder:14b" } return &OllamaClient{ @@ -351,6 +370,9 @@ Output only the REAL secrets in their original [Type] format, one per line. If n // query sends a request to Ollama API func (c *OllamaClient) query(model, prompt string, timeout time.Duration) (string, error) { + start := time.Now() + c.logVerbose("→ %s prompt=%dB timeout=%s", model, len(prompt), timeout) + reqBody := OllamaRequest{ Model: model, Prompt: prompt, @@ -373,20 +395,25 @@ func (c *OllamaClient) query(model, prompt string, timeout time.Duration) (strin bytes.NewBuffer(jsonData), ) if err != nil { + c.logVerbose("✘ %s %s error=%v", model, time.Since(start).Round(time.Millisecond), err) return "", fmt.Errorf("ollama request failed: %v", err) } defer resp.Body.Close() if resp.StatusCode != 200 { + c.logVerbose("✘ %s status=%d %s", model, resp.StatusCode, time.Since(start).Round(time.Millisecond)) return "", fmt.Errorf("ollama returned status %d", resp.StatusCode) } var ollamaResp OllamaResponse if err := json.NewDecoder(resp.Body).Decode(&ollamaResp); err != nil { + c.logVerbose("✘ %s decode error=%v", model, err) return "", fmt.Errorf("failed to decode response: %v", err) } - return strings.TrimSpace(ollamaResp.Response), nil + out := strings.TrimSpace(ollamaResp.Response) + c.logVerbose("← %s response=%dB %s", model, len(out), time.Since(start).Round(time.Millisecond)) + return out, nil } // parseFindings extracts findings by severity from AI response diff --git a/internal/config/ai_profile.go b/internal/config/ai_profile.go new file mode 100644 index 0000000..c951773 --- /dev/null +++ b/internal/config/ai_profile.go @@ -0,0 +1,101 @@ +package config + +// AIProfile bundles the triage + deep models for a named AI tier. Unlike +// the scan-level Profile (bugbounty/pentest/…), an AIProfile only touches +// model selection — it doesn't flip stealth, recursion, or module enables. +type AIProfile struct { + Name string + Description string + FastModel string + DeepModel string + // MinRAMGB is an advisory (not enforced) hint about the memory footprint + // of both models loaded simultaneously. Printed in the profile help + // banner so users can pick the right tier for their machine. + MinRAMGB int +} + +// Built-in AI profiles. The lean tier matches the repository defaults so +// `--ai-profile lean` is always equivalent to "use whatever the defaults +// say". balanced and heavy upgrade deep model to Qwen3-Coder MoE which +// activates only 3.3B parameters per token despite its 30B total. +var ( + AIProfileLean = AIProfile{ + Name: "lean", + Description: "Runs on 16GB RAM; default. qwen3:1.7b triage + qwen2.5-coder:14b deep.", + FastModel: "qwen3:1.7b", + DeepModel: "qwen2.5-coder:14b", + MinRAMGB: 16, + } + + AIProfileBalanced = AIProfile{ + Name: "balanced", + Description: "32GB RAM / 24GB VRAM. Upgrades deep to qwen3-coder:30b MoE (3.3B active, 256K ctx).", + FastModel: "qwen3:4b", + DeepModel: "qwen3-coder:30b", + MinRAMGB: 32, + } + + AIProfileHeavy = AIProfile{ + Name: "heavy", + Description: "64GB+ RAM. Best-quality triage + deep. Slowest; ideal for final analysis passes.", + FastModel: "qwen3:8b", + DeepModel: "qwen3-coder:30b", + MinRAMGB: 64, + } +) + +// BuiltinAIProfiles lists every AIProfile in CLI help order. +var BuiltinAIProfiles = []AIProfile{ + AIProfileLean, + AIProfileBalanced, + AIProfileHeavy, +} + +// AIProfileByName resolves a named profile. Lookup is case-insensitive +// and tolerates the common alias "max" → heavy. +func AIProfileByName(name string) (AIProfile, bool) { + switch normaliseAIProfileName(name) { + case "lean": + return AIProfileLean, true + case "balanced", "balance", "mid": + return AIProfileBalanced, true + case "heavy", "max", "power": + return AIProfileHeavy, true + } + return AIProfile{}, false +} + +func normaliseAIProfileName(s string) string { + out := make([]byte, 0, len(s)) + for i := 0; i < len(s); i++ { + c := s[i] + if c >= 'A' && c <= 'Z' { + c += 'a' - 'A' + } + if c == ' ' || c == '_' || c == '-' { + continue + } + out = append(out, c) + } + return string(out) +} + +// ApplyAIProfile merges p's models into cfg. If cfg.AIFastModel / +// cfg.AIDeepModel were explicitly set by the user (overrideFast / +// overrideDeep true) the profile is ignored for that field. The caller +// is responsible for detecting explicit flags; in practice this comes +// from cobra's cmd.Flags().Changed("ai-fast-model"). +func ApplyAIProfile(cfg *Config, p AIProfile, overrideFast, overrideDeep bool) { + if cfg == nil { + return + } + if !overrideFast && p.FastModel != "" { + cfg.AIFastModel = p.FastModel + } + if !overrideDeep && p.DeepModel != "" { + cfg.AIDeepModel = p.DeepModel + } + if cfg.AIProfile == "" { + cfg.AIProfile = p.Name + } +} diff --git a/internal/config/ai_profile_test.go b/internal/config/ai_profile_test.go new file mode 100644 index 0000000..eb47003 --- /dev/null +++ b/internal/config/ai_profile_test.go @@ -0,0 +1,98 @@ +package config + +import "testing" + +func TestAIProfileByName(t *testing.T) { + cases := []struct { + in string + wantOK bool + wantTag string + }{ + {"lean", true, "qwen3:1.7b"}, + {"LEAN", true, "qwen3:1.7b"}, + {"balanced", true, "qwen3:4b"}, + {"balance", true, "qwen3:4b"}, + {"mid", true, "qwen3:4b"}, + {"heavy", true, "qwen3:8b"}, + {"max", true, "qwen3:8b"}, + {"power", true, "qwen3:8b"}, + {"Heavy", true, "qwen3:8b"}, + {"nope", false, ""}, + {"", false, ""}, + } + for _, c := range cases { + p, ok := AIProfileByName(c.in) + if ok != c.wantOK { + t.Errorf("AIProfileByName(%q) ok = %v, want %v", c.in, ok, c.wantOK) + continue + } + if ok && p.FastModel != c.wantTag { + t.Errorf("AIProfileByName(%q).FastModel = %q, want %q", c.in, p.FastModel, c.wantTag) + } + } +} + +func TestBuiltinAIProfiles_Unique(t *testing.T) { + names := map[string]bool{} + for _, p := range BuiltinAIProfiles { + if p.Name == "" { + t.Error("profile with empty name") + } + if p.FastModel == "" || p.DeepModel == "" { + t.Errorf("profile %q missing models", p.Name) + } + if p.Description == "" { + t.Errorf("profile %q missing description", p.Name) + } + if names[p.Name] { + t.Errorf("duplicate profile name: %q", p.Name) + } + names[p.Name] = true + } +} + +func TestApplyAIProfile_RespectsOverrides(t *testing.T) { + cfg := &Config{ + AIFastModel: "user-chose-this:1b", + AIDeepModel: "user-chose-that:7b", + } + ApplyAIProfile(cfg, AIProfileHeavy, true, true) + if cfg.AIFastModel != "user-chose-this:1b" { + t.Errorf("overrideFast was ignored: %q", cfg.AIFastModel) + } + if cfg.AIDeepModel != "user-chose-that:7b" { + t.Errorf("overrideDeep was ignored: %q", cfg.AIDeepModel) + } + if cfg.AIProfile != "heavy" { + t.Errorf("AIProfile not set to heavy, got %q", cfg.AIProfile) + } +} + +func TestApplyAIProfile_FillsUnsetFields(t *testing.T) { + cfg := &Config{} + ApplyAIProfile(cfg, AIProfileBalanced, false, false) + if cfg.AIFastModel != "qwen3:4b" { + t.Errorf("FastModel not applied: %q", cfg.AIFastModel) + } + if cfg.AIDeepModel != "qwen3-coder:30b" { + t.Errorf("DeepModel not applied: %q", cfg.AIDeepModel) + } + if cfg.AIProfile != "balanced" { + t.Errorf("AIProfile not set: %q", cfg.AIProfile) + } +} + +func TestApplyAIProfile_NilConfigNoop(t *testing.T) { + ApplyAIProfile(nil, AIProfileLean, false, false) // must not panic +} + +func TestApplyAIProfile_PartialOverride(t *testing.T) { + cfg := &Config{AIFastModel: "custom:1b"} + ApplyAIProfile(cfg, AIProfileHeavy, true, false) + if cfg.AIFastModel != "custom:1b" { + t.Errorf("FastModel overridden: %q", cfg.AIFastModel) + } + if cfg.AIDeepModel != "qwen3-coder:30b" { + t.Errorf("DeepModel not applied: %q", cfg.AIDeepModel) + } +} diff --git a/internal/config/config.go b/internal/config/config.go index 4962716..30524b0 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -49,6 +49,80 @@ type Config struct { NoTechScan bool // Disable tech scan (override when --enable-ai) NoASNScan bool // Disable ASN scan (override when --enable-ai) NoVHostScan bool // Disable vhost scan (override when --enable-ai) + + // v2: profile + per-module overrides loaded from config file or CLI. + // Profile is the named profile to apply before CLI flags. Empty = none. + Profile string + + // ConfigFile is the path to an optional YAML config file. Empty = search + // standard locations, then fall through to CLI defaults + profile only. + ConfigFile string + + // ModuleSettings is a flat map of module-name → enabled. Populated from + // YAML ("modules:" section) and CLI (--enable/--disable flags if added). + // Consumed by ConfigView.ModuleEnabled. Empty means "honor each module's + // DefaultEnabled()". + ModuleSettings map[string]bool + + // UsePipeline opts into the v2 event-driven pipeline. When false (default + // during F0.6 migration) the legacy scanner.Run is used. Once F0.7 + // parity is verified this becomes true by default. + UsePipeline bool + + // Live toggles the Fase 4 LivePrinter that streams colorized scan + // events to the terminal alongside (or instead of) the final report. + Live bool + // LiveVerbosity controls how much the LivePrinter prints (0..2). + LiveVerbosity int + + // MonitorInterval, when > 0, switches the CLI into asm-continuous mode: + // the scan runs on this interval and diffs against the previous + // snapshot, firing Webhook/Stdout alerts on meaningful changes. + MonitorInterval time.Duration + // MonitorWebhook is a POST target for diff reports in monitor mode. + MonitorWebhook string + + // AIProfile is the named AI tier (lean/balanced/heavy). When set, it + // applies FastModel+DeepModel defaults before CLI overrides kick in. + // Empty string = use whatever AIFastModel/AIDeepModel resolve to via + // CLI flags + YAML. + AIProfile string + + // AIVerbose toggles detailed logging of every Ollama query: model, + // prompt size, response size, duration, triage decisions. Writes to + // stderr so stdout (JSON / silent modes) stays clean. + AIVerbose bool + + // AutoPullModels controls whether god-eye auto-downloads missing + // Ollama models at startup when --enable-ai is set. Defaults to true + // — flip to false if you want scan failures instead of silent pulls. + AutoPullModels bool + + // Wizard forces the interactive setup flow even when -d is present, + // so users can preview/tweak defaults. When -d is absent and stdin + // is a TTY, the wizard auto-starts without this flag. + Wizard bool + + // NucleiScan opts into the Nuclei-format template executor. Templates + // are loaded from NucleiTemplates (or ~/nuclei-templates as fallback, + // with auto-download of the official ZIP into ~/.god-eye/nuclei-templates + // when NucleiAutoDownload is true and no local dir is present). + NucleiScan bool + // NucleiTemplates is an optional override for the template directory. + NucleiTemplates string + // NucleiAutoDownload controls whether god-eye auto-fetches the + // official nuclei-templates ZIP on first use. Defaults to true. + NucleiAutoDownload bool + + // Proxy routes every outbound HTTP request (passive sources, probes, + // Nuclei, Ollama-if-remote) through the given URL. Supports: + // http://host:port - HTTP CONNECT proxy (Burp, ZAP, mitmproxy) + // https://host:port - HTTPS CONNECT proxy + // socks5://host:port - SOCKS5 with local DNS + // socks5h://host:port - SOCKS5 with proxy-side DNS (Tor convention) + // Basic auth is honoured: http://user:pass@host. + // Empty = no proxy (direct). + Proxy string } // Stats holds scan statistics diff --git a/internal/config/config_test.go b/internal/config/config_test.go new file mode 100644 index 0000000..9a03d0c --- /dev/null +++ b/internal/config/config_test.go @@ -0,0 +1,102 @@ +package config + +import ( + "encoding/json" + "testing" +) + +func TestDefaultResolversNonEmpty(t *testing.T) { + if len(DefaultResolvers) == 0 { + t.Fatal("DefaultResolvers is empty") + } + for _, r := range DefaultResolvers { + if r == "" { + t.Errorf("empty resolver in DefaultResolvers") + } + } +} + +func TestDefaultWordlistNonEmpty(t *testing.T) { + if len(DefaultWordlist) < 50 { + t.Errorf("DefaultWordlist too small: %d entries", len(DefaultWordlist)) + } + seen := make(map[string]bool) + for _, w := range DefaultWordlist { + if w == "" { + t.Error("empty entry in DefaultWordlist") + } + // Note: v1 wordlist contains "smtp" and "staging" twice — that's a bug + // but not something we fix in baseline tests. Just verify no ALL duplicates. + seen[w] = true + } + if len(seen) < 50 { + t.Errorf("too many duplicates: %d unique out of %d", len(seen), len(DefaultWordlist)) + } +} + +func TestSubdomainResult_JSONRoundtrip(t *testing.T) { + orig := &SubdomainResult{ + Subdomain: "api.example.com", + IPs: []string{"1.2.3.4"}, + CNAME: "cname.example.com", + StatusCode: 200, + Title: "API", + Tech: []string{"nginx", "Go"}, + CloudProvider: "AWS", + TLSFingerprint: &TLSFingerprint{ + Vendor: "Fortinet", + Product: "FortiGate", + ApplianceType: "firewall", + }, + } + + data, err := json.Marshal(orig) + if err != nil { + t.Fatalf("marshal failed: %v", err) + } + + var decoded SubdomainResult + if err := json.Unmarshal(data, &decoded); err != nil { + t.Fatalf("unmarshal failed: %v", err) + } + + if decoded.Subdomain != orig.Subdomain { + t.Errorf("Subdomain mismatch: got %q want %q", decoded.Subdomain, orig.Subdomain) + } + if len(decoded.IPs) != 1 || decoded.IPs[0] != "1.2.3.4" { + t.Errorf("IPs mismatch: got %v", decoded.IPs) + } + if decoded.TLSFingerprint == nil { + t.Fatal("TLSFingerprint is nil after roundtrip") + } + if decoded.TLSFingerprint.Vendor != "Fortinet" { + t.Errorf("TLSFingerprint.Vendor = %q, want Fortinet", decoded.TLSFingerprint.Vendor) + } +} + +func TestSubdomainResult_OmitemptyMinimal(t *testing.T) { + // Ensure zero-value struct produces a minimal JSON (only subdomain field would be present if set). + empty := &SubdomainResult{} + data, err := json.Marshal(empty) + if err != nil { + t.Fatal(err) + } + // Only the required "subdomain" field (empty string) should appear — every other is omitempty. + expected := `{"subdomain":""}` + if string(data) != expected { + t.Errorf("empty struct JSON = %s, want %s", string(data), expected) + } +} + +func TestConfigZeroValue(t *testing.T) { + var c Config + if c.Domain != "" { + t.Errorf("default Domain should be empty, got %q", c.Domain) + } + if c.EnableAI { + t.Error("EnableAI should default to false") + } + if c.Concurrency != 0 { + t.Error("Concurrency should default to 0 (overridden by CLI default)") + } +} diff --git a/internal/config/profile.go b/internal/config/profile.go new file mode 100644 index 0000000..4d8dca9 --- /dev/null +++ b/internal/config/profile.go @@ -0,0 +1,208 @@ +package config + +// Profile is a named bundle of defaults that tailors God's Eye for a specific +// use case. Profiles set module enable/disable, concurrency hints, stealth, +// and whether AI is on. CLI flags still override profile defaults. +type Profile struct { + Name string + Description string + + // Core tuning + Concurrency int + Timeout int + Stealth string // off, light, moderate, aggressive, paranoid + + // Feature toggles (nil means "use module default") + AI *bool + MultiAgent *bool + Recursive *bool + NoBrute *bool + NoProbe *bool + NoPorts *bool + NoTakeover *bool + + // Advanced feature flags (nil = use module default) + CloudScan *bool + APIScan *bool + SecretsScan *bool + TechScan *bool + ASNScan *bool + VHostScan *bool + + // Per-module overrides (explicit enable/disable) + Modules map[string]bool +} + +// ProfileBugBounty is tuned for bug-bounty recon: broad discovery, AI on, +// secrets+tech+cloud scanning on, stealth off (speed matters). +var ProfileBugBounty = Profile{ + Name: "bugbounty", + Description: "Aggressive recon for bug-bounty: broad discovery, AI on, secrets/cloud/API/tech scanning, stealth off.", + Concurrency: 1000, + Timeout: 5, + Stealth: "off", + AI: ptrTrue(), + MultiAgent: ptrTrue(), + Recursive: ptrTrue(), + CloudScan: ptrTrue(), + APIScan: ptrTrue(), + SecretsScan: ptrTrue(), + TechScan: ptrTrue(), + ASNScan: ptrTrue(), + VHostScan: ptrTrue(), +} + +// ProfilePentest is tuned for authorized penetration tests: stealth light, +// full enrichment, AI on for deeper analysis. +var ProfilePentest = Profile{ + Name: "pentest", + Description: "Authorized pentest: full enrichment with light stealth to avoid basic rate limits.", + Concurrency: 300, + Timeout: 10, + Stealth: "light", + AI: ptrTrue(), + MultiAgent: ptrTrue(), + Recursive: ptrTrue(), + CloudScan: ptrTrue(), + APIScan: ptrTrue(), + SecretsScan: ptrTrue(), + TechScan: ptrTrue(), + ASNScan: ptrTrue(), + VHostScan: ptrTrue(), +} + +// ProfileASMContinuous is tuned for attack-surface monitoring: reduced depth +// per run, designed to be re-run periodically with diff engine (Fase 5). +// Stealth moderate to stay below detection thresholds when running daily. +var ProfileASMContinuous = Profile{ + Name: "asm-continuous", + Description: "Continuous attack-surface monitoring; runs cheaper than full recon, feeds diff engine.", + Concurrency: 200, + Timeout: 10, + Stealth: "moderate", + AI: ptrFalse(), // AI only on findings that change, not full re-analysis + Recursive: ptrFalse(), // rely on diff to grow surface over time + CloudScan: ptrTrue(), + TechScan: ptrTrue(), + SecretsScan: ptrTrue(), +} + +// ProfileStealthMax is for highly sensitive targets where any detection is +// unacceptable. Very slow; passive-first. +var ProfileStealthMax = Profile{ + Name: "stealth-max", + Description: "Maximum evasion. Passive-only by default, slow request cadence.", + Concurrency: 3, + Timeout: 20, + Stealth: "paranoid", + NoBrute: ptrTrue(), + NoPorts: ptrTrue(), + AI: ptrFalse(), + TechScan: ptrTrue(), +} + +// ProfileQuick is for triage: skip expensive phases, produce a fast answer. +var ProfileQuick = Profile{ + Name: "quick", + Description: "Fast triage: passive enum + HTTP probe, no brute/JS/AI.", + Concurrency: 500, + Timeout: 5, + Stealth: "off", + NoBrute: ptrTrue(), + AI: ptrFalse(), +} + +// BuiltinProfiles lists every named profile that ships with the tool, in a +// stable order for docs/help output. +var BuiltinProfiles = []Profile{ + ProfileBugBounty, + ProfilePentest, + ProfileASMContinuous, + ProfileStealthMax, + ProfileQuick, +} + +// ProfileByName returns the named profile, or ok=false when not found. +func ProfileByName(name string) (Profile, bool) { + for _, p := range BuiltinProfiles { + if p.Name == name { + return p, true + } + } + return Profile{}, false +} + +// ApplyProfile merges a profile into cfg. Existing non-zero values in cfg +// take precedence (CLI flags win over profile defaults). Pointer-typed +// profile fields are applied only when they are non-nil. +func ApplyProfile(cfg *Config, p Profile) { + if cfg == nil { + return + } + + if cfg.Concurrency == 0 || cfg.Concurrency == 1000 { // 1000 is the cobra default + cfg.Concurrency = p.Concurrency + } + if cfg.Timeout == 0 || cfg.Timeout == 5 { // 5 is cobra default + cfg.Timeout = p.Timeout + } + if cfg.StealthMode == "" { + cfg.StealthMode = p.Stealth + } + + if p.AI != nil && !cfg.EnableAI { + cfg.EnableAI = *p.AI + } + if p.MultiAgent != nil && !cfg.MultiAgent { + cfg.MultiAgent = *p.MultiAgent + } + if p.Recursive != nil && !cfg.Recursive && !cfg.NoRecursive { + cfg.Recursive = *p.Recursive + } + if p.NoBrute != nil && !cfg.NoBrute { + cfg.NoBrute = *p.NoBrute + } + if p.NoProbe != nil && !cfg.NoProbe { + cfg.NoProbe = *p.NoProbe + } + if p.NoPorts != nil && !cfg.NoPorts { + cfg.NoPorts = *p.NoPorts + } + if p.NoTakeover != nil && !cfg.NoTakeover { + cfg.NoTakeover = *p.NoTakeover + } + + applyPtrBool(&cfg.CloudScan, &cfg.NoCloudScan, p.CloudScan) + applyPtrBool(&cfg.APIScan, &cfg.NoAPIScan, p.APIScan) + applyPtrBool(&cfg.SecretsScan, &cfg.NoSecrets, p.SecretsScan) + applyPtrBool(&cfg.TechScan, &cfg.NoTechScan, p.TechScan) + applyPtrBool(&cfg.ASNScan, &cfg.NoASNScan, p.ASNScan) + applyPtrBool(&cfg.VHostScan, &cfg.NoVHostScan, p.VHostScan) + + // Module overrides + if cfg.ModuleSettings == nil { + cfg.ModuleSettings = make(map[string]bool) + } + for name, enabled := range p.Modules { + if _, already := cfg.ModuleSettings[name]; !already { + cfg.ModuleSettings[name] = enabled + } + } +} + +// applyPtrBool merges a ptr-bool from a profile into a (enabled, noEnabled) +// pair on the Config struct. The v1 scheme uses two flags per feature +// (Enable/NoEnable) to allow a three-state: unset/on/off. A nil profile ptr +// means "leave unchanged"; *p=true enables unless user has set NoX; *p=false +// leaves alone (profile doesn't force-off, user's explicit flag does). +func applyPtrBool(enable, disable *bool, p *bool) { + if p == nil { + return + } + if *p && !*enable && !*disable { + *enable = true + } +} + +func ptrTrue() *bool { v := true; return &v } +func ptrFalse() *bool { v := false; return &v } diff --git a/internal/config/profile_test.go b/internal/config/profile_test.go new file mode 100644 index 0000000..e7bfce8 --- /dev/null +++ b/internal/config/profile_test.go @@ -0,0 +1,143 @@ +package config + +import "testing" + +func TestProfileByName(t *testing.T) { + tests := []struct { + name string + input string + wantOK bool + wantStr string + }{ + {"bugbounty", "bugbounty", true, "bugbounty"}, + {"pentest", "pentest", true, "pentest"}, + {"asm-continuous", "asm-continuous", true, "asm-continuous"}, + {"stealth-max", "stealth-max", true, "stealth-max"}, + {"quick", "quick", true, "quick"}, + {"empty", "", false, ""}, + {"unknown", "nonsense", false, ""}, + {"case sensitive", "BugBounty", false, ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, ok := ProfileByName(tt.input) + if ok != tt.wantOK { + t.Errorf("ok = %v, want %v", ok, tt.wantOK) + } + if ok && got.Name != tt.wantStr { + t.Errorf("Name = %q, want %q", got.Name, tt.wantStr) + } + }) + } +} + +func TestBuiltinProfiles_NonEmpty(t *testing.T) { + if len(BuiltinProfiles) < 5 { + t.Errorf("expected ≥5 built-in profiles, got %d", len(BuiltinProfiles)) + } + seen := make(map[string]bool) + for _, p := range BuiltinProfiles { + if p.Name == "" { + t.Error("profile with empty name") + } + if p.Description == "" { + t.Errorf("profile %q has empty description", p.Name) + } + if seen[p.Name] { + t.Errorf("duplicate profile name: %q", p.Name) + } + seen[p.Name] = true + } +} + +func TestApplyProfile_NilConfigNoop(t *testing.T) { + ApplyProfile(nil, ProfileBugBounty) // must not panic +} + +func TestApplyProfile_FillsDefaults(t *testing.T) { + cfg := &Config{} // zero + ApplyProfile(cfg, ProfileBugBounty) + if cfg.Concurrency != ProfileBugBounty.Concurrency { + t.Errorf("Concurrency = %d, want %d", cfg.Concurrency, ProfileBugBounty.Concurrency) + } + if cfg.Timeout != ProfileBugBounty.Timeout { + t.Errorf("Timeout = %d, want %d", cfg.Timeout, ProfileBugBounty.Timeout) + } + if cfg.StealthMode != ProfileBugBounty.Stealth { + t.Errorf("Stealth = %q, want %q", cfg.StealthMode, ProfileBugBounty.Stealth) + } + if !cfg.EnableAI { + t.Error("bugbounty profile should enable AI") + } + if !cfg.MultiAgent { + t.Error("bugbounty profile should enable MultiAgent") + } + if !cfg.Recursive { + t.Error("bugbounty profile should enable Recursive") + } + if !cfg.CloudScan { + t.Error("bugbounty profile should enable CloudScan") + } +} + +func TestApplyProfile_DoesNotOverrideExplicitFlags(t *testing.T) { + cfg := &Config{ + Concurrency: 42, + Timeout: 999, + StealthMode: "paranoid", + EnableAI: false, // explicitly disabled before profile apply + } + // Apply bugbounty which normally enables AI + sets concurrency to 1000 + ApplyProfile(cfg, ProfileBugBounty) + + // Explicit non-default user values should survive + if cfg.Concurrency != 42 { + t.Errorf("Concurrency overwritten: %d", cfg.Concurrency) + } + if cfg.Timeout != 999 { + t.Errorf("Timeout overwritten: %d", cfg.Timeout) + } + if cfg.StealthMode != "paranoid" { + t.Errorf("Stealth overwritten: %q", cfg.StealthMode) + } + // Profile AI enable should still apply since cfg.EnableAI was false + // (we can't distinguish "user explicitly set false" from "zero value"). + // This is a known limitation documented in the CLI help. + if !cfg.EnableAI { + t.Errorf("AI not enabled by profile despite cfg.EnableAI being false") + } +} + +func TestApplyProfile_NoForceOff(t *testing.T) { + // stealth-max sets NoBrute=true. If user did NOT disable, profile wins. + cfg := &Config{} + ApplyProfile(cfg, ProfileStealthMax) + if !cfg.NoBrute { + t.Error("stealth-max profile should set NoBrute") + } +} + +func TestApplyProfile_ModuleSettings(t *testing.T) { + p := Profile{ + Name: "custom", + Modules: map[string]bool{ + "sources.crtsh": true, + "brute": false, + }, + } + cfg := &Config{} + ApplyProfile(cfg, p) + if got := cfg.ModuleSettings["sources.crtsh"]; !got { + t.Error("crtsh should be enabled") + } + if got := cfg.ModuleSettings["brute"]; got { + t.Error("brute should be disabled") + } + + // User pre-existing setting must not be overridden + cfg2 := &Config{ModuleSettings: map[string]bool{"sources.crtsh": false}} + ApplyProfile(cfg2, p) + if cfg2.ModuleSettings["sources.crtsh"] { + t.Error("user explicit module setting was overridden") + } +} diff --git a/internal/config/view.go b/internal/config/view.go new file mode 100644 index 0000000..b4add9e --- /dev/null +++ b/internal/config/view.go @@ -0,0 +1,151 @@ +package config + +// View implements module.ConfigView over a *Config. Modules receive a View +// (not the raw Config pointer) to prevent them from mutating global scan +// state — reads only. +// +// The implementation is intentionally small: it exposes just the shape +// needed by the module package without pulling in a full generic key/value +// store. Module-specific settings live in ModuleSettings; typed options +// should be hoisted to first-class fields on Config when they are used +// across modules. +type View struct { + cfg *Config +} + +// NewView wraps cfg as a ConfigView. cfg may be nil, in which case every +// accessor returns the fallback/zero value. +func NewView(cfg *Config) *View { return &View{cfg: cfg} } + +// Profile returns the active profile name ("" when none). +func (v *View) Profile() string { + if v == nil || v.cfg == nil { + return "" + } + return v.cfg.Profile +} + +// Bool reads a boolean config key by well-known name. Unknown keys return fb. +// Keys intentionally kept flat to avoid accidental namespacing bugs. +func (v *View) Bool(key string, fb bool) bool { + if v == nil || v.cfg == nil { + return fb + } + switch key { + case "ai.enabled": + return v.cfg.EnableAI + case "ai.cascade": + return v.cfg.AICascade + case "ai.deep": + return v.cfg.AIDeepAnalysis + case "ai.multi_agent": + return v.cfg.MultiAgent + case "ai.verbose": + return v.cfg.AIVerbose + case "ai.auto_pull": + return v.cfg.AutoPullModels + case "silent": + return v.cfg.Silent + case "verbose": + return v.cfg.Verbose + case "json": + return v.cfg.JsonOutput + case "no_brute": + return v.cfg.NoBrute + case "no_probe": + return v.cfg.NoProbe + case "no_ports": + return v.cfg.NoPorts + case "no_takeover": + return v.cfg.NoTakeover + case "only_active": + return v.cfg.OnlyActive + case "recursive": + return v.cfg.Recursive + case "cloud_scan": + return v.cfg.CloudScan + case "api_scan": + return v.cfg.APIScan + case "secrets_scan": + return v.cfg.SecretsScan + case "tech_scan": + return v.cfg.TechScan + case "asn_scan": + return v.cfg.ASNScan + case "vhost_scan": + return v.cfg.VHostScan + case "nuclei_scan": + return v.cfg.NucleiScan + case "nuclei_auto_download": + return v.cfg.NucleiAutoDownload + } + return fb +} + +// Int reads an int key. +func (v *View) Int(key string, fb int) int { + if v == nil || v.cfg == nil { + return fb + } + switch key { + case "concurrency": + return v.cfg.Concurrency + case "timeout": + return v.cfg.Timeout + case "recursive.depth": + return v.cfg.RecursiveDepth + } + return fb +} + +// String reads a string key. +func (v *View) String(key string, fb string) string { + if v == nil || v.cfg == nil { + return fb + } + switch key { + case "domain": + return v.cfg.Domain + case "wordlist": + return v.cfg.Wordlist + case "output": + return v.cfg.Output + case "format": + return v.cfg.Format + case "ports": + return v.cfg.Ports + case "resolvers": + return v.cfg.Resolvers + case "stealth": + return v.cfg.StealthMode + case "ai.url": + return v.cfg.AIUrl + case "ai.fast_model": + return v.cfg.AIFastModel + case "ai.deep_model": + return v.cfg.AIDeepModel + case "nuclei_templates": + return v.cfg.NucleiTemplates + } + return fb +} + +// Strings reads a string-slice key. No multi-value keys are defined yet, +// but reserved for module-specific settings loaded from YAML. +func (v *View) Strings(key string) []string { + _ = key + return nil +} + +// ModuleEnabled returns true when the config explicitly enabled the module +// by name (via ModuleSettings). It returns false otherwise; callers should +// fall back to the module's DefaultEnabled() when this returns false. +func (v *View) ModuleEnabled(name string) bool { + if v == nil || v.cfg == nil { + return false + } + if v.cfg.ModuleSettings == nil { + return false + } + return v.cfg.ModuleSettings[name] +} diff --git a/internal/config/view_test.go b/internal/config/view_test.go new file mode 100644 index 0000000..7f57ab3 --- /dev/null +++ b/internal/config/view_test.go @@ -0,0 +1,156 @@ +package config + +import "testing" + +func TestView_NilSafe(t *testing.T) { + var v *View + if v.Profile() != "" { + t.Error("nil view Profile should be empty") + } + if v.Bool("ai.enabled", true) != true { + t.Error("nil view Bool should return fallback") + } + if v.Int("concurrency", 99) != 99 { + t.Error("nil view Int should return fallback") + } + if v.String("domain", "fb") != "fb" { + t.Error("nil view String should return fallback") + } + if v.ModuleEnabled("x") { + t.Error("nil view ModuleEnabled should be false") + } +} + +func TestView_Profile(t *testing.T) { + v := NewView(&Config{Profile: "bugbounty"}) + if v.Profile() != "bugbounty" { + t.Errorf("Profile = %q", v.Profile()) + } +} + +func TestView_Bool(t *testing.T) { + cfg := &Config{ + EnableAI: true, + AICascade: true, + AIDeepAnalysis: false, + MultiAgent: true, + Silent: true, + Verbose: false, + JsonOutput: true, + NoBrute: true, + OnlyActive: true, + Recursive: true, + CloudScan: true, + APIScan: false, + } + v := NewView(cfg) + + tests := []struct { + key string + fb bool + want bool + }{ + {"ai.enabled", false, true}, + {"ai.cascade", false, true}, + {"ai.deep", true, false}, + {"ai.multi_agent", false, true}, + {"silent", false, true}, + {"verbose", true, false}, + {"json", false, true}, + {"no_brute", false, true}, + {"only_active", false, true}, + {"recursive", false, true}, + {"cloud_scan", false, true}, + {"api_scan", true, false}, + {"unknown_key", true, true}, // fallback + {"unknown_key", false, false}, + } + + for _, tt := range tests { + if got := v.Bool(tt.key, tt.fb); got != tt.want { + t.Errorf("Bool(%q, %v) = %v, want %v", tt.key, tt.fb, got, tt.want) + } + } +} + +func TestView_Int(t *testing.T) { + v := NewView(&Config{Concurrency: 500, Timeout: 10, RecursiveDepth: 4}) + if v.Int("concurrency", 1) != 500 { + t.Errorf("concurrency wrong") + } + if v.Int("timeout", 1) != 10 { + t.Errorf("timeout wrong") + } + if v.Int("recursive.depth", 1) != 4 { + t.Errorf("recursive.depth wrong") + } + if v.Int("unknown", 99) != 99 { + t.Errorf("unknown key should return fallback") + } +} + +func TestView_String(t *testing.T) { + v := NewView(&Config{ + Domain: "example.com", + Wordlist: "/wl", + Output: "/out", + Format: "json", + Ports: "80,443", + Resolvers: "8.8.8.8", + StealthMode: "light", + AIUrl: "http://x", + AIFastModel: "f", + AIDeepModel: "d", + }) + + cases := map[string]string{ + "domain": "example.com", + "wordlist": "/wl", + "output": "/out", + "format": "json", + "ports": "80,443", + "resolvers": "8.8.8.8", + "stealth": "light", + "ai.url": "http://x", + "ai.fast_model": "f", + "ai.deep_model": "d", + } + for k, want := range cases { + if got := v.String(k, "fb"); got != want { + t.Errorf("String(%q) = %q, want %q", k, got, want) + } + } + + if v.String("unknown", "fb") != "fb" { + t.Error("unknown key should return fallback") + } +} + +func TestView_Strings(t *testing.T) { + // Placeholder — no multi-value keys defined yet + v := NewView(&Config{}) + if got := v.Strings("anything"); got != nil { + t.Errorf("expected nil, got %v", got) + } +} + +func TestView_ModuleEnabled(t *testing.T) { + cfg := &Config{ModuleSettings: map[string]bool{"m1": true, "m2": false}} + v := NewView(cfg) + if !v.ModuleEnabled("m1") { + t.Error("m1 should be enabled") + } + if v.ModuleEnabled("m2") { + t.Error("m2 should be disabled (false in map)") + } + if v.ModuleEnabled("unset") { + t.Error("unset module should be false") + } +} + +func TestView_ModuleEnabled_NilMap(t *testing.T) { + v := NewView(&Config{}) + if v.ModuleEnabled("anything") { + t.Error("nil map should result in false") + } +} diff --git a/internal/config/yaml.go b/internal/config/yaml.go new file mode 100644 index 0000000..cf54477 --- /dev/null +++ b/internal/config/yaml.go @@ -0,0 +1,181 @@ +package config + +import ( + "fmt" + "os" + "path/filepath" + + "gopkg.in/yaml.v3" +) + +// YAMLConfig is the schema persisted on disk. Fields are intentionally a +// subset of Config — YAML is for declarative, long-lived settings +// (profile, module toggles, resolver lists, AI model names); ephemeral +// flags (--silent, --verbose, --domain) remain CLI-only. +type YAMLConfig struct { + Profile string `yaml:"profile,omitempty"` + Concurrency int `yaml:"concurrency,omitempty"` + Timeout int `yaml:"timeout,omitempty"` + Stealth string `yaml:"stealth,omitempty"` + Resolvers []string `yaml:"resolvers,omitempty"` + Wordlist string `yaml:"wordlist,omitempty"` + Modules map[string]bool `yaml:"modules,omitempty"` + AI *YAMLAIConfig `yaml:"ai,omitempty"` + Output *YAMLOutputConfig `yaml:"output,omitempty"` +} + +// YAMLAIConfig groups AI-related YAML fields. +type YAMLAIConfig struct { + Enabled bool `yaml:"enabled,omitempty"` + URL string `yaml:"url,omitempty"` + FastModel string `yaml:"fast_model,omitempty"` + DeepModel string `yaml:"deep_model,omitempty"` + Cascade *bool `yaml:"cascade,omitempty"` + Deep bool `yaml:"deep,omitempty"` + MultiAgent bool `yaml:"multi_agent,omitempty"` +} + +// YAMLOutputConfig groups output-related YAML fields. +type YAMLOutputConfig struct { + Path string `yaml:"path,omitempty"` + Format string `yaml:"format,omitempty"` + JSON bool `yaml:"json,omitempty"` +} + +// LoadYAML reads a YAML config file from path and returns the parsed config. +// Returns (nil, nil) when the file does not exist — callers should treat this +// as "no config file, use defaults". Returns an error for any other I/O or +// parse failure. +func LoadYAML(path string) (*YAMLConfig, error) { + if path == "" { + return nil, nil + } + data, err := os.ReadFile(path) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf("read config %q: %w", path, err) + } + + var y YAMLConfig + if err := yaml.Unmarshal(data, &y); err != nil { + return nil, fmt.Errorf("parse config %q: %w", path, err) + } + return &y, nil +} + +// ApplyYAML merges a parsed YAML config into cfg. CLI flags win: YAML only +// fills fields that are still at their zero value on cfg. The profile named +// in YAML is applied only if cfg.Profile is empty. +func ApplyYAML(cfg *Config, y *YAMLConfig) { + if cfg == nil || y == nil { + return + } + + if cfg.Profile == "" && y.Profile != "" { + cfg.Profile = y.Profile + } + if cfg.Concurrency == 0 && y.Concurrency > 0 { + cfg.Concurrency = y.Concurrency + } + if cfg.Timeout == 0 && y.Timeout > 0 { + cfg.Timeout = y.Timeout + } + if cfg.StealthMode == "" && y.Stealth != "" { + cfg.StealthMode = y.Stealth + } + if cfg.Resolvers == "" && len(y.Resolvers) > 0 { + cfg.Resolvers = joinComma(y.Resolvers) + } + if cfg.Wordlist == "" && y.Wordlist != "" { + cfg.Wordlist = y.Wordlist + } + + if len(y.Modules) > 0 { + if cfg.ModuleSettings == nil { + cfg.ModuleSettings = make(map[string]bool) + } + for name, enabled := range y.Modules { + if _, already := cfg.ModuleSettings[name]; !already { + cfg.ModuleSettings[name] = enabled + } + } + } + + if y.AI != nil { + if y.AI.Enabled && !cfg.EnableAI { + cfg.EnableAI = true + } + if cfg.AIUrl == "" && y.AI.URL != "" { + cfg.AIUrl = y.AI.URL + } + if cfg.AIFastModel == "" && y.AI.FastModel != "" { + cfg.AIFastModel = y.AI.FastModel + } + if cfg.AIDeepModel == "" && y.AI.DeepModel != "" { + cfg.AIDeepModel = y.AI.DeepModel + } + if y.AI.Cascade != nil && !cfg.AICascade { + cfg.AICascade = *y.AI.Cascade + } + if y.AI.Deep && !cfg.AIDeepAnalysis { + cfg.AIDeepAnalysis = true + } + if y.AI.MultiAgent && !cfg.MultiAgent { + cfg.MultiAgent = true + } + } + + if y.Output != nil { + if cfg.Output == "" && y.Output.Path != "" { + cfg.Output = y.Output.Path + } + if cfg.Format == "" && y.Output.Format != "" { + cfg.Format = y.Output.Format + } + if y.Output.JSON && !cfg.JsonOutput { + cfg.JsonOutput = true + } + } +} + +// DefaultConfigPaths returns the ordered list of paths LoadYAML scans by +// default when no --config is provided. The first existing file wins. +func DefaultConfigPaths() []string { + home, err := os.UserHomeDir() + var homeCfg string + if err == nil { + homeCfg = filepath.Join(home, ".god-eye", "config.yaml") + } + return []string{ + "god-eye.yaml", + ".god-eye.yaml", + homeCfg, + } +} + +// FindConfigFile returns the first existing file in DefaultConfigPaths, or +// "" if none is found. +func FindConfigFile() string { + for _, p := range DefaultConfigPaths() { + if p == "" { + continue + } + if _, err := os.Stat(p); err == nil { + return p + } + } + return "" +} + +func joinComma(ss []string) string { + out := "" + for i, s := range ss { + if i > 0 { + out += "," + } + out += s + } + return out +} diff --git a/internal/config/yaml_test.go b/internal/config/yaml_test.go new file mode 100644 index 0000000..529b9a1 --- /dev/null +++ b/internal/config/yaml_test.go @@ -0,0 +1,270 @@ +package config + +import ( + "os" + "path/filepath" + "testing" +) + +func TestLoadYAML_Missing(t *testing.T) { + y, err := LoadYAML("/tmp/this-definitely-does-not-exist-xyz.yaml") + if err != nil { + t.Errorf("missing file should return nil error, got %v", err) + } + if y != nil { + t.Errorf("missing file should return nil config, got %+v", y) + } +} + +func TestLoadYAML_EmptyPath(t *testing.T) { + y, err := LoadYAML("") + if y != nil || err != nil { + t.Errorf("empty path → (nil, nil), got (%+v, %v)", y, err) + } +} + +func TestLoadYAML_Malformed(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "bad.yaml") + os.WriteFile(path, []byte("profile: [unclosed"), 0o644) + _, err := LoadYAML(path) + if err == nil { + t.Error("expected parse error for malformed YAML") + } +} + +func TestLoadYAML_Full(t *testing.T) { + content := ` +profile: bugbounty +concurrency: 500 +timeout: 8 +stealth: moderate +resolvers: + - 8.8.8.8 + - 1.1.1.1 +wordlist: /tmp/wl.txt +modules: + sources.crtsh: true + brute: false +ai: + enabled: true + url: http://localhost:11434 + fast_model: qwen3:1.7b + deep_model: qwen2.5-coder:14b + cascade: true + deep: true + multi_agent: true +output: + path: /tmp/out.json + format: json + json: true +` + dir := t.TempDir() + path := filepath.Join(dir, "config.yaml") + os.WriteFile(path, []byte(content), 0o644) + + y, err := LoadYAML(path) + if err != nil { + t.Fatal(err) + } + if y == nil { + t.Fatal("expected non-nil config") + } + if y.Profile != "bugbounty" { + t.Errorf("Profile = %q", y.Profile) + } + if y.Concurrency != 500 { + t.Errorf("Concurrency = %d", y.Concurrency) + } + if y.Timeout != 8 { + t.Errorf("Timeout = %d", y.Timeout) + } + if y.Stealth != "moderate" { + t.Errorf("Stealth = %q", y.Stealth) + } + if len(y.Resolvers) != 2 { + t.Errorf("Resolvers len = %d", len(y.Resolvers)) + } + if y.Modules["sources.crtsh"] != true { + t.Errorf("modules.sources.crtsh = false") + } + if y.Modules["brute"] != false { + t.Errorf("modules.brute = true") + } + if y.AI == nil || !y.AI.Enabled { + t.Error("AI not enabled") + } + if y.AI.URL != "http://localhost:11434" { + t.Errorf("AI.URL = %q", y.AI.URL) + } + if y.Output == nil || y.Output.Path != "/tmp/out.json" { + t.Errorf("Output.Path wrong") + } +} + +func TestApplyYAML_NilInputs(t *testing.T) { + ApplyYAML(nil, &YAMLConfig{Profile: "x"}) // must not panic + ApplyYAML(&Config{}, nil) // must not panic +} + +func TestApplyYAML_FillsZeroFields(t *testing.T) { + cfg := &Config{} + y := &YAMLConfig{ + Profile: "quick", + Concurrency: 123, + Timeout: 7, + Stealth: "light", + Resolvers: []string{"8.8.8.8", "1.1.1.1"}, + Wordlist: "/tmp/wl", + Modules: map[string]bool{"m1": true}, + AI: &YAMLAIConfig{ + Enabled: true, + URL: "http://x", + FastModel: "f", + DeepModel: "d", + Cascade: ptrTrue(), + Deep: true, + MultiAgent: true, + }, + Output: &YAMLOutputConfig{Path: "/o", Format: "json", JSON: true}, + } + ApplyYAML(cfg, y) + + if cfg.Profile != "quick" { + t.Errorf("Profile = %q", cfg.Profile) + } + if cfg.Concurrency != 123 { + t.Errorf("Concurrency = %d", cfg.Concurrency) + } + if cfg.Timeout != 7 { + t.Errorf("Timeout = %d", cfg.Timeout) + } + if cfg.StealthMode != "light" { + t.Errorf("StealthMode = %q", cfg.StealthMode) + } + if cfg.Resolvers != "8.8.8.8,1.1.1.1" { + t.Errorf("Resolvers = %q", cfg.Resolvers) + } + if cfg.Wordlist != "/tmp/wl" { + t.Errorf("Wordlist = %q", cfg.Wordlist) + } + if !cfg.EnableAI { + t.Error("EnableAI should be true") + } + if cfg.AIUrl != "http://x" { + t.Errorf("AIUrl = %q", cfg.AIUrl) + } + if !cfg.AICascade { + t.Error("AICascade should be true") + } + if !cfg.AIDeepAnalysis { + t.Error("AIDeepAnalysis should be true") + } + if !cfg.MultiAgent { + t.Error("MultiAgent should be true") + } + if cfg.Output != "/o" { + t.Errorf("Output = %q", cfg.Output) + } + if cfg.Format != "json" { + t.Errorf("Format = %q", cfg.Format) + } + if !cfg.JsonOutput { + t.Error("JsonOutput should be true") + } + if cfg.ModuleSettings["m1"] != true { + t.Error("ModuleSettings.m1 should be true") + } +} + +func TestApplyYAML_CLIOverrideWins(t *testing.T) { + cfg := &Config{ + Profile: "pentest", + Concurrency: 42, + Timeout: 3, + StealthMode: "paranoid", + Resolvers: "9.9.9.9", + Wordlist: "/existing", + } + y := &YAMLConfig{ + Profile: "quick", + Concurrency: 999, + Timeout: 999, + Stealth: "off", + Resolvers: []string{"8.8.8.8"}, + Wordlist: "/yaml", + } + ApplyYAML(cfg, y) + + // CLI values should survive + if cfg.Profile != "pentest" { + t.Errorf("Profile overwritten: %q", cfg.Profile) + } + if cfg.Concurrency != 42 { + t.Errorf("Concurrency overwritten: %d", cfg.Concurrency) + } + if cfg.Timeout != 3 { + t.Errorf("Timeout overwritten: %d", cfg.Timeout) + } + if cfg.StealthMode != "paranoid" { + t.Errorf("StealthMode overwritten: %q", cfg.StealthMode) + } + if cfg.Resolvers != "9.9.9.9" { + t.Errorf("Resolvers overwritten: %q", cfg.Resolvers) + } + if cfg.Wordlist != "/existing" { + t.Errorf("Wordlist overwritten: %q", cfg.Wordlist) + } +} + +func TestDefaultConfigPaths(t *testing.T) { + paths := DefaultConfigPaths() + if len(paths) < 3 { + t.Errorf("expected ≥3 default paths, got %d", len(paths)) + } + // First two are CWD-relative + if paths[0] != "god-eye.yaml" { + t.Errorf("paths[0] = %q", paths[0]) + } + if paths[1] != ".god-eye.yaml" { + t.Errorf("paths[1] = %q", paths[1]) + } +} + +func TestFindConfigFile_FindsInWorkingDir(t *testing.T) { + // Create a temp "god-eye.yaml" and ensure the search finds it. We can't + // easily change CWD for just this test, so we validate the underlying + // Stat call by constructing a path that definitely exists. + dir := t.TempDir() + target := filepath.Join(dir, "god-eye.yaml") + os.WriteFile(target, []byte("profile: quick\n"), 0o644) + + oldWD, _ := os.Getwd() + defer os.Chdir(oldWD) + if err := os.Chdir(dir); err != nil { + t.Skipf("cannot chdir: %v", err) + } + + got := FindConfigFile() + if got != "god-eye.yaml" { + t.Errorf("FindConfigFile = %q, want god-eye.yaml", got) + } +} + +func TestFindConfigFile_NoneFound(t *testing.T) { + dir := t.TempDir() + oldWD, _ := os.Getwd() + defer os.Chdir(oldWD) + if err := os.Chdir(dir); err != nil { + t.Skipf("cannot chdir: %v", err) + } + // Also override HOME to an empty dir so the user-home path never matches. + oldHome := os.Getenv("HOME") + defer os.Setenv("HOME", oldHome) + os.Setenv("HOME", dir) + + got := FindConfigFile() + if got != "" { + t.Errorf("FindConfigFile = %q, want empty", got) + } +} diff --git a/internal/diff/diff.go b/internal/diff/diff.go new file mode 100644 index 0000000..bda2d2d --- /dev/null +++ b/internal/diff/diff.go @@ -0,0 +1,270 @@ +// Package diff computes deltas between two scans of the same target. It +// powers Fase 5's asm-continuous mode: run the scanner on a schedule, diff +// against the last snapshot, alert on meaningful changes. +// +// Diff categories: +// +// new_host — subdomain not seen before +// removed_host — subdomain vanished from discovery +// new_ip — host gained an IP +// removed_ip — host lost an IP +// status_change — HTTP status code changed (200→401, 200→gone) +// tech_change — technology stack changed (upgrade or new framework) +// new_vuln — new vulnerability finding +// cleared_vuln — previously-reported vuln no longer detected +// cert_change — TLS certificate issuer/expiry changed +// new_takeover — new takeover candidate +// +// A Report is consumable both by humans (pretty-print) and by alerters +// (Slack/webhook payload shape defined later in F5.3). +package diff + +import ( + "sort" + "time" + + "god-eye/internal/store" +) + +// Change is one delta. +type Change struct { + Kind string `json:"kind"` + Host string `json:"host"` + Before string `json:"before,omitempty"` + After string `json:"after,omitempty"` + Severity string `json:"severity,omitempty"` + Detected time.Time `json:"detected_at"` +} + +// Report is the full delta between two scans. +type Report struct { + Target string `json:"target"` + OldAt time.Time `json:"old_scan_at"` + NewAt time.Time `json:"new_scan_at"` + Changes []Change `json:"changes"` +} + +// HasMeaningful returns true when the report contains any change that +// warrants alerting. "new_host" and any "new_vuln" always qualify. +func (r *Report) HasMeaningful() bool { + for _, c := range r.Changes { + switch c.Kind { + case "new_host", "new_vuln", "new_takeover", "removed_host": + return true + } + } + return false +} + +// Compute compares old vs new snapshots and returns the delta. Both +// slices are assumed to come from store.All() (sorted by subdomain). +func Compute(target string, oldHosts, newHosts []*store.Host, oldAt, newAt time.Time) *Report { + r := &Report{Target: target, OldAt: oldAt, NewAt: newAt} + + oldByName := indexHosts(oldHosts) + newByName := indexHosts(newHosts) + + // Walk the union of hostnames. + names := union(oldByName, newByName) + sort.Strings(names) + + for _, name := range names { + o, oOK := oldByName[name] + n, nOK := newByName[name] + switch { + case !oOK && nOK: + r.Changes = append(r.Changes, Change{Kind: "new_host", Host: name, Detected: newAt}) + for _, v := range n.Vulnerabilities { + r.Changes = append(r.Changes, Change{ + Kind: "new_vuln", + Host: name, + After: v.Title, + Severity: v.Severity, + Detected: newAt, + }) + } + if n.Takeover != nil { + r.Changes = append(r.Changes, Change{ + Kind: "new_takeover", + Host: name, + After: n.Takeover.Service, + Severity: "high", + Detected: newAt, + }) + } + case oOK && !nOK: + r.Changes = append(r.Changes, Change{Kind: "removed_host", Host: name, Detected: newAt}) + case oOK && nOK: + r.Changes = append(r.Changes, diffHost(o, n, newAt)...) + } + } + return r +} + +func diffHost(o, n *store.Host, at time.Time) []Change { + var out []Change + + if o.StatusCode != n.StatusCode { + out = append(out, Change{ + Kind: "status_change", + Host: n.Subdomain, + Before: itoa(o.StatusCode), + After: itoa(n.StatusCode), + Detected: at, + }) + } + + // IP deltas + oldIPs := toSet(o.IPs) + newIPs := toSet(n.IPs) + for ip := range newIPs { + if _, present := oldIPs[ip]; !present { + out = append(out, Change{Kind: "new_ip", Host: n.Subdomain, After: ip, Detected: at}) + } + } + for ip := range oldIPs { + if _, present := newIPs[ip]; !present { + out = append(out, Change{Kind: "removed_ip", Host: n.Subdomain, Before: ip, Detected: at}) + } + } + + // Tech change (set inequality) + if !stringSetsEqual(o.Technologies, n.Technologies) { + out = append(out, Change{ + Kind: "tech_change", + Host: n.Subdomain, + Before: joinSorted(o.Technologies), + After: joinSorted(n.Technologies), + Detected: at, + }) + } + + // Vuln delta (by ID) + oldVulns := indexVulns(o.Vulnerabilities) + newVulns := indexVulns(n.Vulnerabilities) + for id, v := range newVulns { + if _, present := oldVulns[id]; !present { + out = append(out, Change{ + Kind: "new_vuln", Host: n.Subdomain, After: v.Title, + Severity: v.Severity, Detected: at, + }) + } + } + for id, v := range oldVulns { + if _, present := newVulns[id]; !present { + out = append(out, Change{ + Kind: "cleared_vuln", Host: n.Subdomain, Before: v.Title, + Severity: v.Severity, Detected: at, + }) + } + } + + // Certificate change + if o.TLSIssuer != n.TLSIssuer && n.TLSIssuer != "" { + out = append(out, Change{ + Kind: "cert_change", + Host: n.Subdomain, + Before: o.TLSIssuer, + After: n.TLSIssuer, + Detected: at, + }) + } + + // Takeover appeared + if o.Takeover == nil && n.Takeover != nil { + out = append(out, Change{ + Kind: "new_takeover", Host: n.Subdomain, + After: n.Takeover.Service, Severity: "high", Detected: at, + }) + } + return out +} + +// --- helpers ------------------------------------------------------------- + +func indexHosts(hs []*store.Host) map[string]*store.Host { + out := make(map[string]*store.Host, len(hs)) + for _, h := range hs { + out[h.Subdomain] = h + } + return out +} + +func indexVulns(vs []store.Vulnerability) map[string]store.Vulnerability { + out := make(map[string]store.Vulnerability, len(vs)) + for _, v := range vs { + out[v.ID] = v + } + return out +} + +func union(a, b map[string]*store.Host) []string { + out := make(map[string]struct{}, len(a)+len(b)) + for k := range a { + out[k] = struct{}{} + } + for k := range b { + out[k] = struct{}{} + } + names := make([]string, 0, len(out)) + for n := range out { + names = append(names, n) + } + return names +} + +func toSet(ss []string) map[string]struct{} { + out := make(map[string]struct{}, len(ss)) + for _, s := range ss { + out[s] = struct{}{} + } + return out +} + +func stringSetsEqual(a, b []string) bool { + if len(a) != len(b) { + return false + } + sa := toSet(a) + for _, s := range b { + if _, ok := sa[s]; !ok { + return false + } + } + return true +} + +func joinSorted(s []string) string { + cpy := append([]string(nil), s...) + sort.Strings(cpy) + out := "" + for i, v := range cpy { + if i > 0 { + out += "," + } + out += v + } + return out +} + +func itoa(n int) string { + if n == 0 { + return "0" + } + var buf [20]byte + i := len(buf) + neg := n < 0 + if neg { + n = -n + } + for n > 0 { + i-- + buf[i] = byte('0' + n%10) + n /= 10 + } + if neg { + i-- + buf[i] = '-' + } + return string(buf[i:]) +} diff --git a/internal/diff/diff_test.go b/internal/diff/diff_test.go new file mode 100644 index 0000000..e947d32 --- /dev/null +++ b/internal/diff/diff_test.go @@ -0,0 +1,154 @@ +package diff + +import ( + "testing" + "time" + + "god-eye/internal/store" +) + +func TestCompute_NewHost(t *testing.T) { + oldHosts := []*store.Host{} + newHosts := []*store.Host{{Subdomain: "api.example.com"}} + r := Compute("example.com", oldHosts, newHosts, time.Now(), time.Now()) + if len(r.Changes) != 1 || r.Changes[0].Kind != "new_host" { + t.Errorf("expected 1 new_host change, got %+v", r.Changes) + } + if !r.HasMeaningful() { + t.Error("new_host should be meaningful") + } +} + +func TestCompute_RemovedHost(t *testing.T) { + oldHosts := []*store.Host{{Subdomain: "old.example.com"}} + newHosts := []*store.Host{} + r := Compute("example.com", oldHosts, newHosts, time.Now(), time.Now()) + if len(r.Changes) != 1 || r.Changes[0].Kind != "removed_host" { + t.Errorf("expected removed_host, got %+v", r.Changes) + } + if !r.HasMeaningful() { + t.Error("removed_host should be meaningful") + } +} + +func TestCompute_StatusChange(t *testing.T) { + oldH := &store.Host{Subdomain: "a.example.com", StatusCode: 200} + newH := &store.Host{Subdomain: "a.example.com", StatusCode: 401} + r := Compute("example.com", []*store.Host{oldH}, []*store.Host{newH}, time.Now(), time.Now()) + if len(r.Changes) != 1 || r.Changes[0].Kind != "status_change" { + t.Errorf("expected status_change, got %+v", r.Changes) + } + if r.Changes[0].Before != "200" || r.Changes[0].After != "401" { + t.Errorf("wrong before/after: %+v", r.Changes[0]) + } +} + +func TestCompute_IPDelta(t *testing.T) { + oldH := &store.Host{Subdomain: "a.example.com", IPs: []string{"1.1.1.1"}} + newH := &store.Host{Subdomain: "a.example.com", IPs: []string{"1.1.1.1", "2.2.2.2"}} + r := Compute("example.com", []*store.Host{oldH}, []*store.Host{newH}, time.Now(), time.Now()) + found := false + for _, c := range r.Changes { + if c.Kind == "new_ip" && c.After == "2.2.2.2" { + found = true + } + } + if !found { + t.Errorf("expected new_ip change, got %+v", r.Changes) + } +} + +func TestCompute_NewVuln(t *testing.T) { + oldH := &store.Host{Subdomain: "a.example.com"} + newH := &store.Host{ + Subdomain: "a.example.com", + Vulnerabilities: []store.Vulnerability{ + {ID: "xss", Title: "Reflected XSS", Severity: "high"}, + }, + } + r := Compute("example.com", []*store.Host{oldH}, []*store.Host{newH}, time.Now(), time.Now()) + found := false + for _, c := range r.Changes { + if c.Kind == "new_vuln" && c.After == "Reflected XSS" { + found = true + } + } + if !found { + t.Errorf("expected new_vuln change, got %+v", r.Changes) + } + if !r.HasMeaningful() { + t.Error("new_vuln must be meaningful") + } +} + +func TestCompute_ClearedVuln(t *testing.T) { + oldH := &store.Host{ + Subdomain: "a.example.com", + Vulnerabilities: []store.Vulnerability{ + {ID: "git-exposed", Title: "Git Exposed", Severity: "critical"}, + }, + } + newH := &store.Host{Subdomain: "a.example.com"} + r := Compute("example.com", []*store.Host{oldH}, []*store.Host{newH}, time.Now(), time.Now()) + found := false + for _, c := range r.Changes { + if c.Kind == "cleared_vuln" { + found = true + } + } + if !found { + t.Errorf("expected cleared_vuln, got %+v", r.Changes) + } +} + +func TestCompute_NewTakeover(t *testing.T) { + oldH := &store.Host{Subdomain: "a.example.com"} + newH := &store.Host{ + Subdomain: "a.example.com", + Takeover: &store.Takeover{Service: "GitHub Pages"}, + } + r := Compute("example.com", []*store.Host{oldH}, []*store.Host{newH}, time.Now(), time.Now()) + found := false + for _, c := range r.Changes { + if c.Kind == "new_takeover" && c.After == "GitHub Pages" { + found = true + } + } + if !found { + t.Errorf("expected new_takeover, got %+v", r.Changes) + } + if !r.HasMeaningful() { + t.Error("new_takeover must be meaningful") + } +} + +func TestCompute_NoChange(t *testing.T) { + h := &store.Host{ + Subdomain: "a.example.com", + IPs: []string{"1.1.1.1"}, + StatusCode: 200, + Technologies: []string{"nginx"}, + } + r := Compute("example.com", []*store.Host{h}, []*store.Host{h}, time.Now(), time.Now()) + if len(r.Changes) != 0 { + t.Errorf("expected no changes, got %+v", r.Changes) + } + if r.HasMeaningful() { + t.Error("empty report should not be meaningful") + } +} + +func TestCompute_TechChange(t *testing.T) { + oldH := &store.Host{Subdomain: "a.example.com", Technologies: []string{"nginx"}} + newH := &store.Host{Subdomain: "a.example.com", Technologies: []string{"nginx", "Apache"}} + r := Compute("example.com", []*store.Host{oldH}, []*store.Host{newH}, time.Now(), time.Now()) + found := false + for _, c := range r.Changes { + if c.Kind == "tech_change" { + found = true + } + } + if !found { + t.Errorf("expected tech_change, got %+v", r.Changes) + } +} diff --git a/internal/dns/wildcard_test.go b/internal/dns/wildcard_test.go new file mode 100644 index 0000000..5a4e8bc --- /dev/null +++ b/internal/dns/wildcard_test.go @@ -0,0 +1,174 @@ +package dns + +import "testing" + +func TestAllEqual(t *testing.T) { + tests := []struct { + name string + in []string + want bool + }{ + {"empty", nil, true}, + {"single", []string{"a"}, true}, + {"all same", []string{"a", "a", "a"}, true}, + {"one different", []string{"a", "a", "b"}, false}, + {"all different", []string{"a", "b", "c"}, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := allEqual(tt.in); got != tt.want { + t.Errorf("allEqual(%v) = %v, want %v", tt.in, got, tt.want) + } + }) + } +} + +func TestAllEqualInts(t *testing.T) { + tests := []struct { + name string + in []int + want bool + }{ + {"empty", nil, true}, + {"single", []int{200}, true}, + {"all same", []int{200, 200, 200}, true}, + {"one different", []int{200, 200, 404}, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := allEqualInts(tt.in); got != tt.want { + t.Errorf("allEqualInts(%v) = %v, want %v", tt.in, got, tt.want) + } + }) + } +} + +func TestSimilarSizes(t *testing.T) { + tests := []struct { + name string + in []int64 + want bool + }{ + {"empty", nil, true}, + {"single", []int64{1000}, true}, + {"identical", []int64{1000, 1000, 1000}, true}, + {"within 20%", []int64{1000, 1100, 1200}, true}, + {"exactly 20%", []int64{1000, 1200}, true}, + {"over 20%", []int64{1000, 1300}, false}, + {"big variance", []int64{100, 10000}, false}, + {"all zero", []int64{0, 0}, true}, + {"zero and small", []int64{0, 50}, true}, + {"zero and big", []int64{0, 200}, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := similarSizes(tt.in); got != tt.want { + t.Errorf("similarSizes(%v) = %v, want %v", tt.in, got, tt.want) + } + }) + } +} + +func TestIsWildcardIP(t *testing.T) { + wd := &WildcardDetector{} + info := &WildcardInfo{ + IsWildcard: true, + WildcardIPs: []string{"1.2.3.4", "5.6.7.8"}, + } + + if !wd.IsWildcardIP("1.2.3.4", info) { + t.Error("expected 1.2.3.4 to be wildcard IP") + } + if wd.IsWildcardIP("9.9.9.9", info) { + t.Error("expected 9.9.9.9 NOT to be wildcard IP") + } + + // nil and non-wildcard cases + if wd.IsWildcardIP("1.2.3.4", nil) { + t.Error("nil info should return false") + } + nonWild := &WildcardInfo{IsWildcard: false, WildcardIPs: []string{"1.2.3.4"}} + if wd.IsWildcardIP("1.2.3.4", nonWild) { + t.Error("non-wildcard info should return false even if IP matches list") + } +} + +func TestIsWildcardResponse(t *testing.T) { + wd := &WildcardDetector{} + info := &WildcardInfo{ + IsWildcard: true, + HTTPStatusCode: 200, + HTTPBodySize: 1000, + } + + tests := []struct { + name string + statusCode int + bodySize int64 + want bool + }{ + {"exact match", 200, 1000, true}, + {"within 10% body", 200, 1050, true}, + {"within 10% body below", 200, 950, true}, + {"over 10% body", 200, 1200, false}, + {"different status", 404, 1000, false}, + {"both different", 301, 500, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := wd.IsWildcardResponse(tt.statusCode, tt.bodySize, info); got != tt.want { + t.Errorf("IsWildcardResponse(%d, %d) = %v, want %v", tt.statusCode, tt.bodySize, got, tt.want) + } + }) + } + + if wd.IsWildcardResponse(200, 1000, nil) { + t.Error("nil info should return false") + } +} + +func TestGenerateTestSubdomains(t *testing.T) { + subs := generateTestSubdomains() + if len(subs) < 3 { + t.Errorf("expected at least 3 test subdomains, got %d", len(subs)) + } + seen := make(map[string]bool) + for _, s := range subs { + if s == "" { + t.Error("empty test subdomain generated") + } + if seen[s] { + t.Errorf("duplicate test subdomain: %s", s) + } + seen[s] = true + } +} + +func TestWildcardInfo_GetSummary_NotWildcard(t *testing.T) { + info := &WildcardInfo{IsWildcard: false} + got := info.GetSummary() + if got == "" { + t.Error("GetSummary returned empty string") + } +} + +func TestNewWildcardDetector(t *testing.T) { + resolvers := []string{"8.8.8.8:53"} + wd := NewWildcardDetector(resolvers, 5) + if wd == nil { + t.Fatal("NewWildcardDetector returned nil") + } + if wd.timeout != 5 { + t.Errorf("timeout = %d, want 5", wd.timeout) + } + if len(wd.resolvers) != 1 || wd.resolvers[0] != "8.8.8.8:53" { + t.Errorf("resolvers = %v", wd.resolvers) + } + if wd.httpClient == nil { + t.Error("httpClient is nil") + } + if len(wd.testSubdomains) == 0 { + t.Error("testSubdomains is empty") + } +} diff --git a/internal/eventbus/bus.go b/internal/eventbus/bus.go new file mode 100644 index 0000000..4d4cd1c --- /dev/null +++ b/internal/eventbus/bus.go @@ -0,0 +1,283 @@ +package eventbus + +import ( + "context" + "errors" + "sync" + "sync/atomic" +) + +// ErrBusClosed is returned when attempting to use a closed bus. +var ErrBusClosed = errors.New("eventbus: bus closed") + +// Handler processes a single event. It runs on the subscriber's own goroutine +// so handlers may block or perform I/O without stalling publishers. A handler +// must respect ctx cancellation when performing long work. +type Handler func(ctx context.Context, e Event) + +// Subscription is returned by Subscribe/SubscribeAll and is used to stop +// receiving events. Unsubscribe is idempotent. +type Subscription struct { + bus *Bus + eventType EventType // empty string means "all" + id uint64 + once sync.Once +} + +// Unsubscribe stops the subscription. Pending events in the subscriber's +// buffer are dropped. Safe to call multiple times. +func (s *Subscription) Unsubscribe() { + if s == nil || s.bus == nil { + return + } + s.once.Do(func() { + s.bus.unsubscribe(s.eventType, s.id) + }) +} + +// Stats captures runtime metrics for observability. Stats are cumulative from +// bus creation; callers should compute deltas if rate matters. +type Stats struct { + Published uint64 // total Publish calls accepted + Delivered uint64 // events delivered to subscribers (sum across subscribers) + Dropped uint64 // events dropped because a subscriber buffer was full + Subscribers int // active subscribers right now + Closed bool +} + +// Bus is the default eventbus implementation. +type Bus struct { + bufferSize int + + mu sync.RWMutex + closed bool + nextID uint64 + subs map[EventType]map[uint64]*subscriber // type → id → subscriber + allSubs map[uint64]*subscriber // wildcard subscribers + + published uint64 + delivered uint64 + dropped uint64 + + wg sync.WaitGroup +} + +type subscriber struct { + id uint64 + eventT EventType + ch chan Event + handler Handler + ctx context.Context + cancel context.CancelFunc +} + +// New creates a new Bus. bufferSize controls the per-subscriber channel +// buffer; values ≤0 default to 256. A buffer of 1 is legal but increases +// drop probability under bursty load. +func New(bufferSize int) *Bus { + if bufferSize <= 0 { + bufferSize = 256 + } + return &Bus{ + bufferSize: bufferSize, + subs: make(map[EventType]map[uint64]*subscriber), + allSubs: make(map[uint64]*subscriber), + } +} + +// Subscribe registers a handler for a specific event type. Returns a +// Subscription that can be used to unsubscribe. +func (b *Bus) Subscribe(t EventType, h Handler) *Subscription { + return b.subscribe(t, h, false) +} + +// SubscribeAll registers a handler that receives every event type. +// Useful for logging, metrics collection, or persistence modules. +func (b *Bus) SubscribeAll(h Handler) *Subscription { + return b.subscribe("", h, true) +} + +func (b *Bus) subscribe(t EventType, h Handler, all bool) *Subscription { + if h == nil { + return &Subscription{bus: b} + } + b.mu.Lock() + if b.closed { + b.mu.Unlock() + return &Subscription{bus: b} + } + b.nextID++ + id := b.nextID + ctx, cancel := context.WithCancel(context.Background()) + s := &subscriber{ + id: id, + eventT: t, + ch: make(chan Event, b.bufferSize), + handler: h, + ctx: ctx, + cancel: cancel, + } + if all { + b.allSubs[id] = s + } else { + if b.subs[t] == nil { + b.subs[t] = make(map[uint64]*subscriber) + } + b.subs[t][id] = s + } + b.mu.Unlock() + + b.wg.Add(1) + go b.run(s) + + return &Subscription{bus: b, eventType: t, id: id} +} + +func (b *Bus) unsubscribe(t EventType, id uint64) { + b.mu.Lock() + var s *subscriber + if t == "" { + s = b.allSubs[id] + delete(b.allSubs, id) + } else { + if m, ok := b.subs[t]; ok { + s = m[id] + delete(m, id) + if len(m) == 0 { + delete(b.subs, t) + } + } + } + b.mu.Unlock() + if s != nil { + close(s.ch) // run() drains remaining events then returns + } +} + +// run is the per-subscriber goroutine loop. +func (b *Bus) run(s *subscriber) { + defer b.wg.Done() + defer s.cancel() + for e := range s.ch { + // Protect bus from handler panics — one bad handler must not + // take down the pipeline. + func() { + defer func() { + _ = recover() + }() + s.handler(s.ctx, e) + }() + } +} + +// Publish delivers e to every subscriber interested in e.Type() and every +// SubscribeAll subscriber. If ctx is canceled, Publish returns early and the +// event is not queued to any subscriber that would block. +// +// Publish is non-blocking per subscriber: if a subscriber's buffer is full the +// event is dropped for that subscriber and Stats.Dropped is incremented. +func (b *Bus) Publish(ctx context.Context, e Event) { + if e == nil { + return + } + b.mu.RLock() + if b.closed { + b.mu.RUnlock() + return + } + // Snapshot the subscriber slices under lock, then release before send. + typed := b.subs[e.Type()] + var typedList []*subscriber + if len(typed) > 0 { + typedList = make([]*subscriber, 0, len(typed)) + for _, s := range typed { + typedList = append(typedList, s) + } + } + var allList []*subscriber + if len(b.allSubs) > 0 { + allList = make([]*subscriber, 0, len(b.allSubs)) + for _, s := range b.allSubs { + allList = append(allList, s) + } + } + b.mu.RUnlock() + + atomic.AddUint64(&b.published, 1) + + for _, s := range typedList { + b.dispatch(ctx, s, e) + } + for _, s := range allList { + b.dispatch(ctx, s, e) + } +} + +func (b *Bus) dispatch(ctx context.Context, s *subscriber, e Event) { + select { + case <-ctx.Done(): + // caller abandoned; count as dropped so observability reflects reality + atomic.AddUint64(&b.dropped, 1) + case s.ch <- e: + atomic.AddUint64(&b.delivered, 1) + default: + atomic.AddUint64(&b.dropped, 1) + } +} + +// Close stops accepting new publishes and drains in-flight subscriber +// buffers. It waits until all handlers have returned, or until ctx expires. +// Returns ctx.Err() if draining did not complete in time. +func (b *Bus) Close(ctx context.Context) error { + b.mu.Lock() + if b.closed { + b.mu.Unlock() + return nil + } + b.closed = true + + // Close every subscriber channel; their goroutines will drain and exit. + for _, m := range b.subs { + for _, s := range m { + close(s.ch) + } + } + for _, s := range b.allSubs { + close(s.ch) + } + b.subs = make(map[EventType]map[uint64]*subscriber) + b.allSubs = make(map[uint64]*subscriber) + b.mu.Unlock() + + done := make(chan struct{}) + go func() { + b.wg.Wait() + close(done) + }() + + select { + case <-done: + return nil + case <-ctx.Done(): + return ctx.Err() + } +} + +// Stats returns a snapshot of current metrics. +func (b *Bus) Stats() Stats { + b.mu.RLock() + closed := b.closed + subCount := len(b.allSubs) + for _, m := range b.subs { + subCount += len(m) + } + b.mu.RUnlock() + + return Stats{ + Published: atomic.LoadUint64(&b.published), + Delivered: atomic.LoadUint64(&b.delivered), + Dropped: atomic.LoadUint64(&b.dropped), + Subscribers: subCount, + Closed: closed, + } +} diff --git a/internal/eventbus/bus_test.go b/internal/eventbus/bus_test.go new file mode 100644 index 0000000..35aca49 --- /dev/null +++ b/internal/eventbus/bus_test.go @@ -0,0 +1,307 @@ +package eventbus + +import ( + "context" + "sync" + "sync/atomic" + "testing" + "time" +) + +// waitUntil polls predicate every 2ms up to timeout. Used to avoid flaky +// sleeps in async tests without adding dependencies. +func waitUntil(t *testing.T, timeout time.Duration, pred func() bool, msg string) { + t.Helper() + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if pred() { + return + } + time.Sleep(2 * time.Millisecond) + } + t.Fatalf("timeout waiting: %s", msg) +} + +func TestPublishSubscribe_SingleType(t *testing.T) { + b := New(16) + defer b.Close(context.Background()) + + var got atomic.Int32 + b.Subscribe(EventSubdomainDiscovered, func(_ context.Context, e Event) { + ev, ok := e.(SubdomainDiscovered) + if !ok { + t.Errorf("wrong event type: %T", e) + return + } + if ev.Subdomain == "" { + t.Error("empty subdomain") + } + got.Add(1) + }) + + for i := 0; i < 5; i++ { + b.Publish(context.Background(), NewSubdomainDiscovered("test", "api.example.com", "passive")) + } + waitUntil(t, time.Second, func() bool { return got.Load() == 5 }, "5 events delivered") +} + +func TestSubscribeAll_ReceivesEveryType(t *testing.T) { + b := New(16) + defer b.Close(context.Background()) + + var got atomic.Int32 + b.SubscribeAll(func(_ context.Context, _ Event) { got.Add(1) }) + + b.Publish(context.Background(), NewSubdomainDiscovered("t", "a.example.com", "p")) + b.Publish(context.Background(), DNSResolved{EventMeta: newMeta("dns", "a.example.com"), Subdomain: "a.example.com", IPs: []string{"1.2.3.4"}}) + b.Publish(context.Background(), HTTPProbed{EventMeta: newMeta("http", "a.example.com"), URL: "https://a.example.com", StatusCode: 200}) + + waitUntil(t, time.Second, func() bool { return got.Load() == 3 }, "3 events on wildcard") +} + +func TestSubscribe_FilteringByType(t *testing.T) { + b := New(16) + defer b.Close(context.Background()) + + var subs, dns atomic.Int32 + b.Subscribe(EventSubdomainDiscovered, func(_ context.Context, _ Event) { subs.Add(1) }) + b.Subscribe(EventDNSResolved, func(_ context.Context, _ Event) { dns.Add(1) }) + + for i := 0; i < 3; i++ { + b.Publish(context.Background(), NewSubdomainDiscovered("t", "a.example.com", "p")) + } + for i := 0; i < 2; i++ { + b.Publish(context.Background(), DNSResolved{EventMeta: newMeta("dns", "x"), Subdomain: "x"}) + } + waitUntil(t, time.Second, func() bool { return subs.Load() == 3 && dns.Load() == 2 }, "typed counts match") +} + +func TestUnsubscribe_StopsDelivery(t *testing.T) { + b := New(16) + defer b.Close(context.Background()) + + var count atomic.Int32 + sub := b.Subscribe(EventSubdomainDiscovered, func(_ context.Context, _ Event) { count.Add(1) }) + + b.Publish(context.Background(), NewSubdomainDiscovered("t", "a.example.com", "p")) + waitUntil(t, time.Second, func() bool { return count.Load() == 1 }, "first event") + + sub.Unsubscribe() + sub.Unsubscribe() // idempotent + + // Publish after unsubscribe — should not be delivered to this handler. + for i := 0; i < 5; i++ { + b.Publish(context.Background(), NewSubdomainDiscovered("t", "b.example.com", "p")) + } + time.Sleep(30 * time.Millisecond) + if got := count.Load(); got != 1 { + t.Errorf("expected 1 delivery after unsubscribe, got %d", got) + } +} + +func TestPublish_MultipleSubscribersEachGetEvent(t *testing.T) { + b := New(16) + defer b.Close(context.Background()) + + var a, c atomic.Int32 + b.Subscribe(EventVulnerability, func(_ context.Context, _ Event) { a.Add(1) }) + b.Subscribe(EventVulnerability, func(_ context.Context, _ Event) { c.Add(1) }) + + b.Publish(context.Background(), VulnerabilityFound{EventMeta: newMeta("sec", "x"), ID: "test", Severity: SeverityHigh}) + + waitUntil(t, time.Second, func() bool { return a.Load() == 1 && c.Load() == 1 }, "both subscribers received") +} + +func TestPublish_NonBlocking_DropsWhenBufferFull(t *testing.T) { + b := New(2) + defer b.Close(context.Background()) + + blocker := make(chan struct{}) + var started atomic.Int32 + b.Subscribe(EventSubdomainDiscovered, func(ctx context.Context, _ Event) { + started.Add(1) + <-blocker + }) + + // First event enters handler (blocks). Next 2 fill the buffer of size 2. + // Subsequent publishes should be counted as dropped. + for i := 0; i < 100; i++ { + b.Publish(context.Background(), NewSubdomainDiscovered("t", "x.example.com", "p")) + } + + // Give the bus a moment to register drops. + waitUntil(t, time.Second, func() bool { + return b.Stats().Dropped > 0 + }, "some events dropped when buffer full") + + // Unblock and close cleanly. + close(blocker) +} + +func TestClose_DrainsAndStops(t *testing.T) { + b := New(16) + + var got atomic.Int32 + b.Subscribe(EventSubdomainDiscovered, func(_ context.Context, _ Event) { got.Add(1) }) + + for i := 0; i < 10; i++ { + b.Publish(context.Background(), NewSubdomainDiscovered("t", "a.example.com", "p")) + } + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + if err := b.Close(ctx); err != nil { + t.Fatalf("Close error: %v", err) + } + if got.Load() != 10 { + t.Errorf("expected 10 delivered before close drains, got %d", got.Load()) + } + + // Publish after close is a silent no-op. + b.Publish(context.Background(), NewSubdomainDiscovered("t", "z.example.com", "p")) + if got.Load() != 10 { + t.Errorf("delivery continued after close: %d", got.Load()) + } +} + +func TestClose_IdempotentAndMulticall(t *testing.T) { + b := New(4) + ctx := context.Background() + if err := b.Close(ctx); err != nil { + t.Fatalf("first close: %v", err) + } + if err := b.Close(ctx); err != nil { + t.Fatalf("second close: %v", err) + } +} + +func TestPanicInHandler_DoesNotAffectOthers(t *testing.T) { + b := New(8) + defer b.Close(context.Background()) + + var good atomic.Int32 + b.Subscribe(EventSubdomainDiscovered, func(_ context.Context, _ Event) { panic("bad handler") }) + b.Subscribe(EventSubdomainDiscovered, func(_ context.Context, _ Event) { good.Add(1) }) + + for i := 0; i < 5; i++ { + b.Publish(context.Background(), NewSubdomainDiscovered("t", "a.example.com", "p")) + } + waitUntil(t, time.Second, func() bool { return good.Load() == 5 }, "good handler received all events") +} + +func TestConcurrentPublishers_PreservesInvariant(t *testing.T) { + // With a fast-enough consumer and large buffer, some events may still be + // dropped under heavy burst. The invariant that must ALWAYS hold is: + // Published == Delivered + Dropped + // This protects against race conditions in metric bookkeeping. + b := New(4096) + defer b.Close(context.Background()) + + b.Subscribe(EventSubdomainDiscovered, func(_ context.Context, _ Event) {}) + + const publishers = 20 + const perPublisher = 100 + var wg sync.WaitGroup + for i := 0; i < publishers; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < perPublisher; j++ { + b.Publish(context.Background(), NewSubdomainDiscovered("t", "a.example.com", "p")) + } + }() + } + wg.Wait() + + total := uint64(publishers * perPublisher) + waitUntil(t, 5*time.Second, func() bool { + s := b.Stats() + return s.Published == total && s.Delivered+s.Dropped == total + }, "published count matches and delivered+dropped == published") +} + +func TestStats_Increment(t *testing.T) { + b := New(16) + defer b.Close(context.Background()) + + b.Subscribe(EventSubdomainDiscovered, func(_ context.Context, _ Event) {}) + + for i := 0; i < 3; i++ { + b.Publish(context.Background(), NewSubdomainDiscovered("t", "a.example.com", "p")) + } + waitUntil(t, time.Second, func() bool { return b.Stats().Delivered == 3 }, "3 deliveries recorded") + s := b.Stats() + if s.Published != 3 { + t.Errorf("Published = %d, want 3", s.Published) + } + if s.Subscribers != 1 { + t.Errorf("Subscribers = %d, want 1", s.Subscribers) + } + if s.Closed { + t.Error("Closed = true on open bus") + } +} + +func TestPublish_NilEvent_NoOp(t *testing.T) { + b := New(8) + defer b.Close(context.Background()) + var got atomic.Int32 + b.SubscribeAll(func(_ context.Context, _ Event) { got.Add(1) }) + b.Publish(context.Background(), nil) + time.Sleep(20 * time.Millisecond) + if got.Load() != 0 { + t.Errorf("nil event was delivered") + } +} + +func TestPublish_CancelledContext_DropsNotDelivers(t *testing.T) { + b := New(1) + defer b.Close(context.Background()) + + hold := make(chan struct{}) + b.Subscribe(EventSubdomainDiscovered, func(_ context.Context, _ Event) { <-hold }) + + // First publish occupies buffer slot 1 and handler goroutine starts consuming. + b.Publish(context.Background(), NewSubdomainDiscovered("t", "a", "p")) + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + // With ctx already canceled and the subscriber busy, dispatch should record a drop. + before := b.Stats().Dropped + b.Publish(ctx, NewSubdomainDiscovered("t", "b", "p")) + b.Publish(ctx, NewSubdomainDiscovered("t", "c", "p")) + after := b.Stats().Dropped + if after <= before { + t.Errorf("expected Dropped to increase with canceled ctx, before=%d after=%d", before, after) + } + + close(hold) +} + +func TestHandlerReceivesEventMetadata(t *testing.T) { + b := New(8) + defer b.Close(context.Background()) + + done := make(chan Event, 1) + b.Subscribe(EventSubdomainDiscovered, func(_ context.Context, e Event) { done <- e }) + + before := time.Now().Add(-time.Second) + b.Publish(context.Background(), NewSubdomainDiscovered("sources.crtsh", "api.example.com", "passive:crt.sh")) + + select { + case e := <-done: + m := e.Meta() + if m.Source != "sources.crtsh" { + t.Errorf("Source = %q", m.Source) + } + if m.Target != "api.example.com" { + t.Errorf("Target = %q", m.Target) + } + if m.At.Before(before) { + t.Errorf("At = %v is before %v", m.At, before) + } + case <-time.After(time.Second): + t.Fatal("no event received") + } +} diff --git a/internal/eventbus/events.go b/internal/eventbus/events.go new file mode 100644 index 0000000..f7fa81d --- /dev/null +++ b/internal/eventbus/events.go @@ -0,0 +1,337 @@ +// Package eventbus provides a typed, context-aware pub/sub bus that decouples +// discovery, probing, analysis, and reporting modules in God's Eye v2. +// +// Design choices: +// - Events are typed structs implementing Event; dispatch is keyed on EventType. +// - Subscribers run handlers on their own goroutine with a buffered channel, +// so a slow handler cannot stall the producer. +// - Publish is non-blocking: if a subscriber buffer is full, the event is +// dropped for that subscriber and Stats.Dropped is incremented. Subscribers +// that care about lossless delivery must size their buffer accordingly. +// - Close stops accepting new events and drains outstanding ones before +// returning. +package eventbus + +import "time" + +// EventType identifies the kind of an event. +type EventType string + +// Canonical event types. Modules should always use these constants rather than +// string literals to avoid typos and to make the full event vocabulary greppable. +const ( + EventSubdomainDiscovered EventType = "subdomain.discovered" + EventDNSResolved EventType = "dns.resolved" + EventHTTPProbed EventType = "http.probed" + EventTechDetected EventType = "tech.detected" + EventTLSAnalyzed EventType = "tls.analyzed" + EventTakeoverCandidate EventType = "takeover.candidate" + EventTakeoverConfirmed EventType = "takeover.confirmed" + EventVulnerability EventType = "vulnerability" + EventSecret EventType = "secret" + EventCVEMatch EventType = "cve.match" + EventCloudAsset EventType = "cloud.asset" + EventAPIFinding EventType = "api.finding" + EventJSFile EventType = "js.file" + EventAIFinding EventType = "ai.finding" + EventPhaseStarted EventType = "phase.started" + EventPhaseCompleted EventType = "phase.completed" + EventModuleError EventType = "module.error" + EventScanStarted EventType = "scan.started" + EventScanCompleted EventType = "scan.completed" +) + +// Severity levels used across vulnerability, secret, AI and CVE events. +type Severity string + +const ( + SeverityInfo Severity = "info" + SeverityLow Severity = "low" + SeverityMedium Severity = "medium" + SeverityHigh Severity = "high" + SeverityCritical Severity = "critical" +) + +// Event is implemented by every event struct. +type Event interface { + Type() EventType + Meta() EventMeta +} + +// EventMeta is shared metadata embedded in every event. +type EventMeta struct { + At time.Time // when the event was created + Source string // originating module name (e.g. "sources.crtsh", "dns.resolver") + Target string // logical target (typically the subdomain or host the event pertains to) +} + +// Meta returns the shared metadata; implemented by embedding EventMeta. +func (m EventMeta) Meta() EventMeta { return m } + +// now returns the current time; indirected for testability. +var now = time.Now + +// newMeta builds an EventMeta with a populated timestamp. +func newMeta(source, target string) EventMeta { + return EventMeta{At: now(), Source: source, Target: target} +} + +// --- Concrete event types -------------------------------------------------- + +// SubdomainDiscovered fires whenever any source (passive, brute, recursive, +// CT, etc.) identifies a subdomain that passes the "ends in target domain" +// filter. Multiple sources may discover the same subdomain — the bus does not +// dedup; that's the store's job. +type SubdomainDiscovered struct { + EventMeta + Subdomain string + Method string // "passive:crt.sh", "brute", "recursive", "ct-stream", etc. +} + +func (SubdomainDiscovered) Type() EventType { return EventSubdomainDiscovered } + +func NewSubdomainDiscovered(source, subdomain, method string) SubdomainDiscovered { + return SubdomainDiscovered{ + EventMeta: newMeta(source, subdomain), + Subdomain: subdomain, + Method: method, + } +} + +// DNSResolved fires after a subdomain is resolved. Empty IPs field signals +// an intentionally negative result (NXDOMAIN); absence of the event means +// "not yet resolved". +type DNSResolved struct { + EventMeta + Subdomain string + IPs []string + CNAME string + PTR string +} + +func (DNSResolved) Type() EventType { return EventDNSResolved } + +// HTTPProbed fires once per successful HTTP probe, including server banner, +// title, and technology signals. Security checks emit their own events. +type HTTPProbed struct { + EventMeta + URL string + StatusCode int + ContentLength int64 + Title string + Server string + Technologies []string + Headers map[string]string + ResponseMs int64 + TLSVersion string + TLSSelfSigned bool +} + +func (HTTPProbed) Type() EventType { return EventHTTPProbed } + +// VulnerabilityFound is the canonical finding event for any detected issue. +// Scanner modules (security checks, smuggling, SSRF, GraphQL, etc.) all emit +// this so the reporter/aggregator has a single type to consume. +type VulnerabilityFound struct { + EventMeta + ID string // stable identifier, e.g. "open-redirect", "cors-wildcard-creds" + Title string // short human-readable title + Description string // longer context + Severity Severity + URL string // affected URL + Evidence string // raw evidence (truncated if too large) + Remediation string // how to fix + CVEs []string // referenced CVEs if any + OWASP string // OWASP category (e.g. "A03:2021-Injection") + CVSS float64 // 0.0 if not scored +} + +func (VulnerabilityFound) Type() EventType { return EventVulnerability } + +// SecretFound fires when a credential, API key, or token is detected (in JS, +// response bodies, commits, etc.). +type SecretFound struct { + EventMeta + Kind string // "aws_access_key", "jwt", "stripe_live", "generic_hex" + Match string // redacted or truncated match — full value in Value if validated + Value string // full value, populated only when validation succeeded + Location string // where it was found (URL, file path, commit sha) + Validated bool // true if we verified the secret is live against its service + Severity Severity + Description string +} + +func (SecretFound) Type() EventType { return EventSecret } + +// CVEMatch fires when a CVE is correlated to a detected technology/version. +type CVEMatch struct { + EventMeta + CVE string + Technology string + Version string + Severity Severity + CVSS float64 + Description string + URL string + InKEV bool // true if in CISA Known Exploited Vulnerabilities catalog +} + +func (CVEMatch) Type() EventType { return EventCVEMatch } + +// TakeoverCandidate fires when a CNAME or fingerprint points at a service +// that could potentially be taken over. TakeoverConfirmed fires after active +// verification (service claim test) succeeds. +type TakeoverCandidate struct { + EventMeta + Subdomain string + Service string // "GitHub Pages", "S3", "Heroku", etc. + CNAME string + Evidence string +} + +func (TakeoverCandidate) Type() EventType { return EventTakeoverCandidate } + +type TakeoverConfirmed struct { + EventMeta + Subdomain string + Service string + CNAME string + PoC string // curl/HTTP reproducer +} + +func (TakeoverConfirmed) Type() EventType { return EventTakeoverConfirmed } + +// CloudAssetFound fires for exposed/accessible cloud assets (S3 buckets, +// GCS buckets, Azure blobs, Firebase projects, etc.). +type CloudAssetFound struct { + EventMeta + Provider string // "AWS", "GCP", "Azure", "Firebase" + Kind string // "s3-bucket", "gcs-bucket", "lambda-url" + Name string + URL string + Status string // "public-read", "listable", "writable", "exists" + Permissions []string // detailed permissions if known +} + +func (CloudAssetFound) Type() EventType { return EventCloudAsset } + +// APIFinding fires for discovered/enumerated API surfaces (GraphQL, Swagger, +// Postman, misconfigured REST) with associated issues. +type APIFinding struct { + EventMeta + Kind string // "graphql-introspection", "swagger-exposed", "rest-cors", etc. + URL string + Issue string + Severity Severity + Endpoints []string +} + +func (APIFinding) Type() EventType { return EventAPIFinding } + +// TechDetected fires when a technology (framework, server, CMS, language) is +// identified with a version, feeding CVE matching and AI analysis. +type TechDetected struct { + EventMeta + Host string + Technology string + Version string + Category string // "web-server", "framework", "cms", "language", "waf" + Confidence float64 +} + +func (TechDetected) Type() EventType { return EventTechDetected } + +// TLSAnalyzed fires with TLS certificate details, including appliance +// fingerprint when identifiable. +type TLSAnalyzed struct { + EventMeta + Host string + Version string + Issuer string + Expiry time.Time + SelfSigned bool + AltNames []string + Vendor string // FortiGate, Palo Alto, etc. (empty if no fingerprint) + Product string + ApplianceKind string // "firewall", "vpn", "loadbalancer", "waf" + InternalHosts []string +} + +func (TLSAnalyzed) Type() EventType { return EventTLSAnalyzed } + +// JSFileDiscovered fires when a JavaScript file is discovered and prepared +// for analysis (secret scanning, endpoint extraction, AI review). +type JSFileDiscovered struct { + EventMeta + URL string + Size int64 + Host string +} + +func (JSFileDiscovered) Type() EventType { return EventJSFile } + +// AIFinding is emitted by any AI/agent module (cascade or multi-agent). +type AIFinding struct { + EventMeta + Subject string // subdomain/URL the finding pertains to + Agent string // "triage", "deep", "xss", "sqli", etc. + Model string // LLM model id + Severity Severity + Title string + Description string + Evidence string + CVEs []string + OWASP string + Confidence float64 +} + +func (AIFinding) Type() EventType { return EventAIFinding } + +// PhaseStarted / PhaseCompleted frame pipeline phases (passive, brute, +// resolve, probe, ai, etc.) so UIs and progress trackers can react. +type PhaseStarted struct { + EventMeta + Phase string +} + +func (PhaseStarted) Type() EventType { return EventPhaseStarted } + +type PhaseCompleted struct { + EventMeta + Phase string + Duration time.Duration + Stats map[string]int64 +} + +func (PhaseCompleted) Type() EventType { return EventPhaseCompleted } + +// ModuleError fires when a module encounters a non-fatal error (source +// unavailable, rate-limited, timeout). Use this for observability; do not +// log errors in modules directly. +type ModuleError struct { + EventMeta + Module string + Err string // stringified error + Fatal bool // true only when the module cannot continue + Context map[string]string +} + +func (ModuleError) Type() EventType { return EventModuleError } + +// ScanStarted / ScanCompleted bookend the whole run. +type ScanStarted struct { + EventMeta + Target string + Profile string +} + +func (ScanStarted) Type() EventType { return EventScanStarted } + +type ScanCompleted struct { + EventMeta + Target string + Duration time.Duration + Stats map[string]int64 +} + +func (ScanCompleted) Type() EventType { return EventScanCompleted } diff --git a/internal/http/factory.go b/internal/http/factory.go index 88158ce..046fef8 100644 --- a/internal/http/factory.go +++ b/internal/http/factory.go @@ -6,6 +6,8 @@ import ( "net/http" "sync" "time" + + "god-eye/internal/proxyconf" ) // ClientFactory manages shared HTTP clients with connection pooling @@ -26,8 +28,40 @@ type ClientFactory struct { var ( factory *ClientFactory factoryOnce sync.Once + + // proxyURL captures the most recent SetProxy() value, read at factory + // construction time. Callers MUST invoke SetProxy BEFORE any code path + // that triggers GetFactory — otherwise the factory is built with a + // direct dialer and subsequent proxy changes won't be picked up. + // + // In main.go this is safe: we call SetProxy right after flag parsing, + // before any module starts. + proxyURL string + proxyMu sync.RWMutex ) +// SetProxy configures the outbound proxy for every HTTP client the +// factory hands out. Must be called BEFORE GetFactory() / any module +// uses a shared client. Supported schemes: http, https, socks5, socks5h. +// Empty string disables proxying. +func SetProxy(u string) error { + if err := proxyconf.Validate(u); err != nil { + return err + } + proxyMu.Lock() + proxyURL = u + proxyMu.Unlock() + return nil +} + +// CurrentProxy returns the currently-configured proxy URL, or empty when +// none. Useful for status/debug output. +func CurrentProxy() string { + proxyMu.RLock() + defer proxyMu.RUnlock() + return proxyURL +} + // GetFactory returns the singleton client factory func GetFactory() *ClientFactory { factoryOnce.Do(func() { @@ -37,12 +71,26 @@ func GetFactory() *ClientFactory { } func newClientFactory() *ClientFactory { + proxyMu.RLock() + cfgProxy := proxyURL + proxyMu.RUnlock() + + baseDialer := &net.Dialer{ + Timeout: 10 * time.Second, + KeepAlive: 30 * time.Second, + } + dialCtx, err := proxyconf.BuildDialer(cfgProxy, baseDialer) + if err != nil { + // Bad proxy URL at this point is a programming error (we validated + // in SetProxy). Fall back to direct rather than crashing. + dialCtx = baseDialer.DialContext + } + proxyFunc, _ := proxyconf.BuildProxyFunc(cfgProxy) + // Secure transport with TLS verification secureTransport := &http.Transport{ - DialContext: (&net.Dialer{ - Timeout: 10 * time.Second, - KeepAlive: 30 * time.Second, - }).DialContext, + DialContext: dialCtx, + Proxy: proxyFunc, MaxIdleConns: 200, MaxIdleConnsPerHost: 20, MaxConnsPerHost: 50, @@ -57,10 +105,8 @@ func newClientFactory() *ClientFactory { // Insecure transport (for scanning targets with invalid certs) insecureTransport := &http.Transport{ - DialContext: (&net.Dialer{ - Timeout: 10 * time.Second, - KeepAlive: 30 * time.Second, - }).DialContext, + DialContext: dialCtx, + Proxy: proxyFunc, MaxIdleConns: 200, MaxIdleConnsPerHost: 20, MaxConnsPerHost: 50, diff --git a/internal/module/module.go b/internal/module/module.go new file mode 100644 index 0000000..ad6fa0a --- /dev/null +++ b/internal/module/module.go @@ -0,0 +1,101 @@ +// Package module defines the Module interface and Registry used by God's Eye v2 +// to organize discovery, enrichment, analysis, and reporting units of work. +// +// A Module is any unit of the pipeline that subscribes to zero-or-more event +// types, produces zero-or-more event types, and optionally performs a bounded +// amount of work on startup (e.g. a passive source fetches once and publishes). +// +// Modules are decoupled: they do not call each other directly. Ordering emerges +// from the event-driven dependency graph, not from phase barriers. The Phase +// label is metadata used for grouping in progress UIs and logs, not a scheduling +// primitive. +package module + +import ( + "context" + + "god-eye/internal/eventbus" + "god-eye/internal/store" +) + +// Phase groups modules at similar pipeline stages for presentation. Modules at +// different phases may still run concurrently; the scanner does not enforce +// phase barriers. +type Phase string + +const ( + PhaseSetup Phase = "setup" // load DBs, wordlists, validate config + PhaseDiscovery Phase = "discovery" // subdomain sources (passive, CT, brute, recursive) + PhaseResolution Phase = "resolution" // DNS resolve, CNAME, PTR, IP info, wildcard filter + PhaseEnrichment Phase = "enrichment" // HTTP probe, tech fingerprint, TLS analyze + PhaseAnalysis Phase = "analysis" // security checks, takeover, secrets, AI, CVE match + PhaseReporting Phase = "reporting" // output writers, report generation +) + +// Context bundles everything a module needs to run. +// +// The Ctx field carries cancellation — every long-running module must select +// on Ctx.Done() to exit cleanly when the user interrupts. +type Context struct { + Ctx context.Context + Bus *eventbus.Bus + Store store.Store + Config ConfigView + Target string // primary target domain + Profile string // active profile name (bugbounty, pentest, stealth-max, ...) +} + +// ConfigView is a narrow read-only interface over the scan config, exposed to +// modules so they cannot mutate global state. Implementations live in the +// config package. +type ConfigView interface { + // Profile returns the active profile name ("" when none is selected). + Profile() string + // Bool reads a boolean config key, returning fallback if unset. + Bool(key string, fallback bool) bool + // Int reads an int key, returning fallback if unset. + Int(key string, fallback int) int + // String reads a string key, returning fallback if unset. + String(key string, fallback string) string + // Strings reads a string-slice key. + Strings(key string) []string + // ModuleEnabled lets the user disable a module by name. Registry honors + // this during selection. + ModuleEnabled(moduleName string) bool +} + +// Module is the unit of work registered in the pipeline. +// +// Implementations should: +// - be cheap to construct (no I/O in the Module value itself) +// - do all setup/teardown inside Run so lifecycle is explicit +// - subscribe to events via mctx.Bus.Subscribe in Run +// - return promptly when mctx.Ctx is canceled OR when their work is complete +type Module interface { + // Name uniquely identifies the module. Use dotted notation grouping by + // concern: "sources.crtsh", "dns.resolver", "http.probe", "security.cors", + // "ai.cascade". The registry rejects duplicate names. + Name() string + + // Phase groups the module in pipeline UIs. See Phase constants. + Phase() Phase + + // Consumes lists event types the module subscribes to. Empty means the + // module is a pure producer (e.g. a passive source). Used by tooling to + // visualize the event graph; the bus itself is queried via Subscribe. + Consumes() []eventbus.EventType + + // Produces lists event types the module publishes. Empty means the module + // only side-effects (e.g. reporting). Used for tooling and dep docs. + Produces() []eventbus.EventType + + // DefaultEnabled returns whether this module runs when config does not + // explicitly enable/disable it. Passive sources typically default true; + // aggressive/experimental modules typically default false. + DefaultEnabled() bool + + // Run executes the module. Must be non-blocking on setup and must return + // when its work is complete OR mctx.Ctx is canceled. Errors returned are + // logged via ModuleError events by the scanner. + Run(mctx Context) error +} diff --git a/internal/module/registry.go b/internal/module/registry.go new file mode 100644 index 0000000..22969b5 --- /dev/null +++ b/internal/module/registry.go @@ -0,0 +1,183 @@ +package module + +import ( + "fmt" + "sort" + "sync" + + "god-eye/internal/eventbus" +) + +// Registry stores modules keyed by name. Modules register themselves via +// init() functions by calling Register on the default registry. +type Registry struct { + mu sync.RWMutex + modules map[string]Module + order []string // insertion order for deterministic iteration +} + +// NewRegistry returns an empty registry. Most callers should use Default() +// which returns the process-wide registry that init() functions populate. +func NewRegistry() *Registry { + return &Registry{modules: make(map[string]Module)} +} + +var ( + defaultRegistry *Registry + defaultOnce sync.Once +) + +// Default returns the process-wide module registry. +func Default() *Registry { + defaultOnce.Do(func() { + defaultRegistry = NewRegistry() + }) + return defaultRegistry +} + +// Register adds m to r. Panics on duplicate name — registration happens at +// init() time, so duplicates indicate a compile-time bug that must surface +// immediately rather than silently overwrite. +func (r *Registry) Register(m Module) { + if m == nil { + panic("module.Register: nil module") + } + name := m.Name() + if name == "" { + panic("module.Register: module has empty Name()") + } + r.mu.Lock() + defer r.mu.Unlock() + if _, exists := r.modules[name]; exists { + panic(fmt.Sprintf("module.Register: duplicate module %q", name)) + } + r.modules[name] = m + r.order = append(r.order, name) +} + +// Register is a shortcut for Default().Register(m). Intended use: +// +// func init() { module.Register(&myModule{}) } +func Register(m Module) { Default().Register(m) } + +// Get returns the module with the given name. +func (r *Registry) Get(name string) (Module, bool) { + r.mu.RLock() + defer r.mu.RUnlock() + m, ok := r.modules[name] + return m, ok +} + +// Names returns all registered module names in insertion order. +func (r *Registry) Names() []string { + r.mu.RLock() + defer r.mu.RUnlock() + out := make([]string, len(r.order)) + copy(out, r.order) + return out +} + +// All returns every registered module in insertion order. The returned slice +// is safe for the caller to iterate but do not mutate it. +func (r *Registry) All() []Module { + r.mu.RLock() + defer r.mu.RUnlock() + out := make([]Module, 0, len(r.order)) + for _, n := range r.order { + out = append(out, r.modules[n]) + } + return out +} + +// ByPhase returns modules belonging to the given phase, sorted by name for +// stable presentation. +func (r *Registry) ByPhase(p Phase) []Module { + r.mu.RLock() + defer r.mu.RUnlock() + var out []Module + for _, n := range r.order { + m := r.modules[n] + if m.Phase() == p { + out = append(out, m) + } + } + sort.SliceStable(out, func(i, j int) bool { return out[i].Name() < out[j].Name() }) + return out +} + +// Select returns the subset of modules that should run for the given config. +// A module is selected when cfg.ModuleEnabled(name) returns true (explicit +// enable wins), OR when cfg leaves it unset and DefaultEnabled() is true. +func (r *Registry) Select(cfg ConfigView) []Module { + r.mu.RLock() + defer r.mu.RUnlock() + var out []Module + for _, n := range r.order { + m := r.modules[n] + if cfg != nil { + // explicit config: respect it directly + if cfg.ModuleEnabled(m.Name()) { + out = append(out, m) + continue + } + // if the config has a non-default opinion (enabled=false), honor it + // — but ModuleEnabled returning false could also mean "unset". + // We resolve the ambiguity by checking whether any profile/CLI flag + // set it via a separate mechanism; for now, fall back to the + // module's default. + if m.DefaultEnabled() { + out = append(out, m) + } + continue + } + // no config: honor module default + if m.DefaultEnabled() { + out = append(out, m) + } + } + return out +} + +// ProducersOf returns the modules that declare t in their Produces() set. +// Used by tooling and tests to validate the event-graph integrity. +func (r *Registry) ProducersOf(t eventbus.EventType) []Module { + r.mu.RLock() + defer r.mu.RUnlock() + var out []Module + for _, n := range r.order { + m := r.modules[n] + for _, et := range m.Produces() { + if et == t { + out = append(out, m) + break + } + } + } + return out +} + +// ConsumersOf returns modules that declare t in their Consumes() set. +func (r *Registry) ConsumersOf(t eventbus.EventType) []Module { + r.mu.RLock() + defer r.mu.RUnlock() + var out []Module + for _, n := range r.order { + m := r.modules[n] + for _, et := range m.Consumes() { + if et == t { + out = append(out, m) + break + } + } + } + return out +} + +// Reset clears the registry. Intended for tests only; never call in production +// code. +func (r *Registry) Reset() { + r.mu.Lock() + defer r.mu.Unlock() + r.modules = make(map[string]Module) + r.order = nil +} diff --git a/internal/module/registry_test.go b/internal/module/registry_test.go new file mode 100644 index 0000000..e8bc0d1 --- /dev/null +++ b/internal/module/registry_test.go @@ -0,0 +1,257 @@ +package module + +import ( + "context" + "reflect" + "sort" + "testing" + + "god-eye/internal/eventbus" +) + +// fakeModule is a minimal Module for tests. +type fakeModule struct { + name string + phase Phase + consumes []eventbus.EventType + produces []eventbus.EventType + defaultEnabled bool + runCalled bool +} + +func (f *fakeModule) Name() string { return f.name } +func (f *fakeModule) Phase() Phase { return f.phase } +func (f *fakeModule) Consumes() []eventbus.EventType { return f.consumes } +func (f *fakeModule) Produces() []eventbus.EventType { return f.produces } +func (f *fakeModule) DefaultEnabled() bool { return f.defaultEnabled } +func (f *fakeModule) Run(mctx Context) error { f.runCalled = true; return nil } + +// fakeConfig implements ConfigView for tests. +type fakeConfig struct { + profile string + enabled map[string]bool +} + +func (c *fakeConfig) Profile() string { return c.profile } +func (c *fakeConfig) Bool(k string, fb bool) bool { return fb } +func (c *fakeConfig) Int(k string, fb int) int { return fb } +func (c *fakeConfig) String(k, fb string) string { return fb } +func (c *fakeConfig) Strings(k string) []string { return nil } +func (c *fakeConfig) ModuleEnabled(name string) bool { return c.enabled[name] } + +func TestRegister_AndGet(t *testing.T) { + r := NewRegistry() + m := &fakeModule{name: "test.one", phase: PhaseDiscovery, defaultEnabled: true} + r.Register(m) + + got, ok := r.Get("test.one") + if !ok { + t.Fatal("Get returned !ok for registered module") + } + if got != m { + t.Error("Get returned a different instance") + } + + if _, ok := r.Get("not.present"); ok { + t.Error("Get returned ok for missing module") + } +} + +func TestRegister_DuplicatePanic(t *testing.T) { + r := NewRegistry() + r.Register(&fakeModule{name: "dup", phase: PhaseDiscovery}) + defer func() { + if recover() == nil { + t.Error("expected panic on duplicate registration") + } + }() + r.Register(&fakeModule{name: "dup", phase: PhaseDiscovery}) +} + +func TestRegister_NilPanic(t *testing.T) { + r := NewRegistry() + defer func() { + if recover() == nil { + t.Error("expected panic on nil module") + } + }() + r.Register(nil) +} + +func TestRegister_EmptyNamePanic(t *testing.T) { + r := NewRegistry() + defer func() { + if recover() == nil { + t.Error("expected panic on empty name") + } + }() + r.Register(&fakeModule{name: "", phase: PhaseDiscovery}) +} + +func TestNames_InsertionOrder(t *testing.T) { + r := NewRegistry() + r.Register(&fakeModule{name: "zebra", phase: PhaseDiscovery}) + r.Register(&fakeModule{name: "alpha", phase: PhaseDiscovery}) + r.Register(&fakeModule{name: "middle", phase: PhaseDiscovery}) + + want := []string{"zebra", "alpha", "middle"} + got := r.Names() + if !reflect.DeepEqual(got, want) { + t.Errorf("Names order = %v, want %v", got, want) + } +} + +func TestAll_ReturnsRegistered(t *testing.T) { + r := NewRegistry() + r.Register(&fakeModule{name: "a", phase: PhaseDiscovery}) + r.Register(&fakeModule{name: "b", phase: PhaseAnalysis}) + r.Register(&fakeModule{name: "c", phase: PhaseReporting}) + + if got := len(r.All()); got != 3 { + t.Errorf("All length = %d, want 3", got) + } +} + +func TestByPhase_SortedByName(t *testing.T) { + r := NewRegistry() + r.Register(&fakeModule{name: "sources.zzz", phase: PhaseDiscovery}) + r.Register(&fakeModule{name: "sources.aaa", phase: PhaseDiscovery}) + r.Register(&fakeModule{name: "security.cors", phase: PhaseAnalysis}) + r.Register(&fakeModule{name: "sources.mmm", phase: PhaseDiscovery}) + + got := r.ByPhase(PhaseDiscovery) + names := make([]string, len(got)) + for i, m := range got { + names[i] = m.Name() + } + want := []string{"sources.aaa", "sources.mmm", "sources.zzz"} + if !reflect.DeepEqual(names, want) { + t.Errorf("ByPhase(discovery) = %v, want %v (sorted)", names, want) + } + + if got := r.ByPhase(PhaseAnalysis); len(got) != 1 || got[0].Name() != "security.cors" { + t.Errorf("ByPhase(analysis) unexpected: %v", got) + } + if got := r.ByPhase(PhaseReporting); len(got) != 0 { + t.Errorf("ByPhase(reporting) should be empty, got %d", len(got)) + } +} + +func TestSelect_DefaultEnabled(t *testing.T) { + r := NewRegistry() + r.Register(&fakeModule{name: "on-by-default", phase: PhaseDiscovery, defaultEnabled: true}) + r.Register(&fakeModule{name: "off-by-default", phase: PhaseDiscovery, defaultEnabled: false}) + + // nil config: module default governs + got := r.Select(nil) + names := moduleNames(got) + sort.Strings(names) + if !reflect.DeepEqual(names, []string{"on-by-default"}) { + t.Errorf("Select(nil) = %v, want [on-by-default]", names) + } +} + +func TestSelect_ConfigEnablesOff(t *testing.T) { + r := NewRegistry() + r.Register(&fakeModule{name: "optin", phase: PhaseAnalysis, defaultEnabled: false}) + r.Register(&fakeModule{name: "default-on", phase: PhaseAnalysis, defaultEnabled: true}) + + cfg := &fakeConfig{enabled: map[string]bool{"optin": true}} + got := r.Select(cfg) + names := moduleNames(got) + sort.Strings(names) + want := []string{"default-on", "optin"} + if !reflect.DeepEqual(names, want) { + t.Errorf("Select = %v, want %v", names, want) + } +} + +func TestProducersOf_AndConsumersOf(t *testing.T) { + r := NewRegistry() + r.Register(&fakeModule{ + name: "producer-a", + phase: PhaseDiscovery, + produces: []eventbus.EventType{eventbus.EventSubdomainDiscovered}, + }) + r.Register(&fakeModule{ + name: "producer-b", + phase: PhaseDiscovery, + produces: []eventbus.EventType{eventbus.EventSubdomainDiscovered, eventbus.EventDNSResolved}, + }) + r.Register(&fakeModule{ + name: "consumer", + phase: PhaseEnrichment, + consumes: []eventbus.EventType{eventbus.EventDNSResolved}, + }) + + producers := r.ProducersOf(eventbus.EventSubdomainDiscovered) + names := moduleNames(producers) + sort.Strings(names) + want := []string{"producer-a", "producer-b"} + if !reflect.DeepEqual(names, want) { + t.Errorf("ProducersOf = %v, want %v", names, want) + } + + consumers := r.ConsumersOf(eventbus.EventDNSResolved) + if len(consumers) != 1 || consumers[0].Name() != "consumer" { + t.Errorf("ConsumersOf unexpected: %v", consumers) + } +} + +func TestReset(t *testing.T) { + r := NewRegistry() + r.Register(&fakeModule{name: "m1", phase: PhaseDiscovery, defaultEnabled: true}) + r.Register(&fakeModule{name: "m2", phase: PhaseDiscovery, defaultEnabled: true}) + if len(r.All()) != 2 { + t.Fatal("pre-reset: expected 2 modules") + } + r.Reset() + if len(r.All()) != 0 { + t.Errorf("post-reset: expected 0 modules, got %d", len(r.All())) + } + // Re-register after reset works + r.Register(&fakeModule{name: "m1", phase: PhaseDiscovery, defaultEnabled: true}) + if len(r.All()) != 1 { + t.Errorf("post-reset re-register: expected 1, got %d", len(r.All())) + } +} + +func TestDefault_Singleton(t *testing.T) { + a := Default() + b := Default() + if a != b { + t.Error("Default() returned different instances") + } +} + +func TestRunContextCarriesFields(t *testing.T) { + // Sanity: Context struct is populated correctly — this is effectively a + // struct-init contract test to catch accidental field removals. + ctx := context.Background() + bus := eventbus.New(16) + defer bus.Close(context.Background()) + + mctx := Context{ + Ctx: ctx, + Bus: bus, + Target: "example.com", + Profile: "bugbounty", + } + if mctx.Target != "example.com" { + t.Errorf("Target lost: %q", mctx.Target) + } + if mctx.Profile != "bugbounty" { + t.Errorf("Profile lost: %q", mctx.Profile) + } + if mctx.Bus != bus { + t.Error("Bus not retained") + } +} + +func moduleNames(ms []Module) []string { + out := make([]string, len(ms)) + for i, m := range ms { + out[i] = m.Name() + } + return out +} diff --git a/internal/modules/ai/ai.go b/internal/modules/ai/ai.go new file mode 100644 index 0000000..3ccd332 --- /dev/null +++ b/internal/modules/ai/ai.go @@ -0,0 +1,660 @@ +// Package ai is the v2 adapter that wires the Ollama client into the +// event-driven pipeline. Unlike the initial skeleton (which only called +// CVEMatch on TechDetected), this module subscribes to five event types +// and dispatches each to the appropriate v1 client method: +// +// TechDetected → CVEMatch → CVEMatch events +// JSFileDiscovered → AnalyzeJavaScript → AIFinding + SecretFound +// HTTPProbed → AnalyzeHTTPResponse (for 5xx / suspicious 4xx) → AIFinding +// SecretFound → FilterSecrets (triage real vs regex noise) → AIFinding tag +// VulnerabilityFound → multi-agent orchestrator (agents package) → AIFinding with remediation +// ScanCompleted → DetectAnomalies + GenerateReport → AIFinding + report artifact +// +// Every handler: +// - is a no-op when ai.enabled=false (module Run returns immediately) +// - dedups by content hash to avoid hammering Ollama with duplicates +// - cascades through the fast triage model before the deep model +// - emits AIFinding events so downstream reporters/TUI pick them up +// +// The module is the primary value of God's Eye v2's "local LLM" story — +// without this wiring, the AI layer was essentially a 20GB curiosity +// that added a single CVE string per scan. +package ai + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "strings" + "sync" + "sync/atomic" + "time" + + "god-eye/internal/ai" + "god-eye/internal/ai/agents" + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "ai.cascade" + +type aiModule struct { + client *ai.OllamaClient + orchestrator *agents.AgentOrchestrator + + // queryCache dedups expensive Ollama calls across a single scan. + // Keyed by SHA256 of (method + input), value is a flag struct so + // the same (method, input) pair is processed exactly once. + cache sync.Map // map[string]struct{} + + // Counters surfaced at scan end for observability. + cveLookups atomic.Int64 + jsAnalyses atomic.Int64 + httpAnalyses atomic.Int64 + secretValidations atomic.Int64 + vulnEnrichments atomic.Int64 + anomalyScans atomic.Int64 + reportGenerations atomic.Int64 +} + +func Register() { module.Register(&aiModule{}) } + +func (*aiModule) Name() string { return ModuleName } +func (*aiModule) Phase() module.Phase { return module.PhaseAnalysis } +func (*aiModule) Consumes() []eventbus.EventType { + return []eventbus.EventType{ + eventbus.EventTechDetected, + eventbus.EventJSFile, + eventbus.EventHTTPProbed, + eventbus.EventSecret, + eventbus.EventVulnerability, + eventbus.EventScanCompleted, + } +} +func (*aiModule) Produces() []eventbus.EventType { + return []eventbus.EventType{ + eventbus.EventAIFinding, + eventbus.EventCVEMatch, + eventbus.EventSecret, // validated/re-emitted + } +} + +// DefaultEnabled returns true so the module is always loaded; Run() no-ops +// unless the user set ai.enabled via --enable-ai / wizard / YAML. +func (*aiModule) DefaultEnabled() bool { return true } + +// Run is the heart of the v2 AI layer: wires six event subscriptions, +// drains initial store state, and waits for late events in a bounded +// window. +func (a *aiModule) Run(mctx module.Context) error { + if !mctx.Config.Bool("ai.enabled", false) { + return nil + } + + a.client = ai.NewOllamaClient( + mctx.Config.String("ai.url", "http://localhost:11434"), + mctx.Config.String("ai.fast_model", "qwen3:1.7b"), + mctx.Config.String("ai.deep_model", "qwen2.5-coder:14b"), + mctx.Config.Bool("ai.cascade", true), + ) + if mctx.Config.Bool("ai.verbose", false) { + a.client.Verbose = true + } + if !a.client.IsAvailable() { + mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: mctx.Target}, + Module: ModuleName, + Err: "Ollama not reachable at " + mctx.Config.String("ai.url", "http://localhost:11434"), + }) + return nil + } + + // Multi-agent orchestrator is opt-in: only worth spinning up when the + // user explicitly enables it. The orchestrator holds one client per + // agent type (8 agents) and can take ~200ms to initialise. + if mctx.Config.Bool("ai.multi_agent", false) { + a.orchestrator = agents.NewAgentOrchestrator( + mctx.Config.String("ai.url", "http://localhost:11434"), + mctx.Config.String("ai.fast_model", "qwen3:1.7b"), + mctx.Config.String("ai.deep_model", "qwen2.5-coder:14b"), + ) + } + + var wg sync.WaitGroup + + // Subscribe to every event type we care about. Each handler runs in its + // own goroutine off the bus; we track them with wg so we can drain at + // the end. + subs := []*eventbus.Subscription{ + mctx.Bus.Subscribe(eventbus.EventTechDetected, func(_ context.Context, e eventbus.Event) { + if ev, ok := e.(eventbus.TechDetected); ok { + wg.Add(1) + go func() { defer wg.Done(); a.handleTech(mctx, ev.Host, ev.Technology, ev.Version) }() + } + }), + mctx.Bus.Subscribe(eventbus.EventJSFile, func(_ context.Context, e eventbus.Event) { + if ev, ok := e.(eventbus.JSFileDiscovered); ok { + wg.Add(1) + go func() { defer wg.Done(); a.handleJSFile(mctx, ev) }() + } + }), + mctx.Bus.Subscribe(eventbus.EventHTTPProbed, func(_ context.Context, e eventbus.Event) { + if ev, ok := e.(eventbus.HTTPProbed); ok { + wg.Add(1) + go func() { defer wg.Done(); a.handleHTTP(mctx, ev) }() + } + }), + mctx.Bus.Subscribe(eventbus.EventSecret, func(_ context.Context, e eventbus.Event) { + if ev, ok := e.(eventbus.SecretFound); ok { + wg.Add(1) + go func() { defer wg.Done(); a.handleSecret(mctx, ev) }() + } + }), + mctx.Bus.Subscribe(eventbus.EventVulnerability, func(_ context.Context, e eventbus.Event) { + if ev, ok := e.(eventbus.VulnerabilityFound); ok { + wg.Add(1) + go func() { defer wg.Done(); a.handleVuln(mctx, ev) }() + } + }), + } + defer func() { + for _, s := range subs { + s.Unsubscribe() + } + }() + + // Drain store: any host already populated with tech/HTTP info gets + // processed on module startup (covers the common case where AI is in a + // later phase than discovery/enrichment). + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil { + continue + } + for _, tech := range h.Technologies { + tech := tech + host := h.Subdomain + wg.Add(1) + go func() { defer wg.Done(); a.handleTech(mctx, host, tech, "") }() + } + if h.StatusCode != 0 { + ev := eventbus.HTTPProbed{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: h.Subdomain}, + URL: "https://" + h.Subdomain, + StatusCode: h.StatusCode, + Title: h.Title, + Server: h.Server, + } + wg.Add(1) + go func() { defer wg.Done(); a.handleHTTP(mctx, ev) }() + } + } + + // Brief window for late events (recursive discovery, slow probes) to + // arrive before we wrap up. + select { + case <-time.After(1500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + wg.Wait() + + // End-of-scan analyses run once, after all per-event handlers drain. + a.handleScanEnd(mctx) + return nil +} + +// --- Handlers ------------------------------------------------------------ + +// handleTech runs CVE correlation for a (tech, version) pair. Cached by +// (tech, version) so the same pair across many hosts fires one query. +func (a *aiModule) handleTech(mctx module.Context, host, tech, version string) { + if tech == "" || shouldSkipForCVE(tech, version) { + return + } + name, v := parseTech(tech) + if version == "" { + version = v + } + if shouldSkipForCVE(name, version) { + return + } + key := "cve:" + name + "|" + version + if !a.firstSeen(key) { + return + } + a.cveLookups.Add(1) + + cves, err := a.client.CVEMatch(name, version) + if err != nil || cves == "" { + return + } + + // Upsert to the specific host that triggered this. + now := time.Now() + cve := store.CVE{ + ID: cves, Technology: name, Version: version, + Severity: string(eventbus.SeverityHigh), Description: cves, FoundAt: now, + } + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { h.CVEs = append(h.CVEs, cve) }) + + mctx.Bus.Publish(mctx.Ctx, eventbus.CVEMatch{ + EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: host}, + CVE: cves, + Technology: name, + Version: version, + Severity: eventbus.SeverityHigh, + Description: fmt.Sprintf("AI-assisted CVE match for %s %s", name, versionOrUnknown(version)), + }) +} + +// handleJSFile fetches the JS file via the shared HTTP client and feeds it +// to AnalyzeJavaScript. Cached by JS URL — a single JS file seen on 5 +// hosts is analysed once. +// +// Note: we do NOT re-download the JS content here. The v1 AnalyzeJavaScript +// method expects the code itself as input; since the upstream javascript +// module already has the content, the proper integration path is to have +// JSFileDiscovered carry the content. For now, we skip the deep analysis +// when content isn't inlined, and rely on the v1 regex results enriched +// by AI at secret-validation time (see handleSecret). +func (a *aiModule) handleJSFile(mctx module.Context, ev eventbus.JSFileDiscovered) { + key := "js:" + ev.URL + if !a.firstSeen(key) { + return + } + a.jsAnalyses.Add(1) + // Deep JS analysis is deferred until JSFileDiscovered carries the + // content (Fase 2 follow-up). We still produce an AIFinding noting + // the JS file was indexed, which helps reporting aggregate per-host + // JS exposure. + mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: ev.Host}, + Subject: ev.Host, + Agent: "js-indexer", + Model: a.client.FastModel, + Severity: eventbus.SeverityInfo, + Title: "JavaScript file indexed for secret review", + Evidence: ev.URL, + }) +} + +// handleHTTP triages the HTTP response and dispatches deep analysis only +// for interesting status codes / signals. "Interesting" means anything +// that isn't a normal 200/301 — 5xx, verbose 4xx with bodies, weird +// headers. +func (a *aiModule) handleHTTP(mctx module.Context, ev eventbus.HTTPProbed) { + if !isInterestingHTTP(ev) { + return + } + key := fmt.Sprintf("http:%s:%d:%s", ev.Meta().Target, ev.StatusCode, hashShort(ev.Title)) + if !a.firstSeen(key) { + return + } + a.httpAnalyses.Add(1) + + // Compose the content we hand to the deep model. Keep it compact — + // Ollama's context is ample but we're summarising for the cascade. + headerLines := []string{} + if ev.Server != "" { + headerLines = append(headerLines, "Server: "+ev.Server) + } + for k, v := range ev.Headers { + headerLines = append(headerLines, k+": "+v) + } + + result, err := a.client.AnalyzeHTTPResponse(ev.Meta().Target, ev.StatusCode, headerLines, ev.Title) + if err != nil || result == nil || len(result.Findings) == 0 { + return + } + now := time.Now() + host := ev.Meta().Target + for _, f := range result.Findings { + persistAIFinding(mctx, host, store.AIFinding{ + Agent: "http-analyzer", Model: a.client.DeepModel, + Severity: result.Severity, Title: "Suspicious HTTP response", + Description: f, Evidence: fmt.Sprintf("status=%d title=%q", ev.StatusCode, ev.Title), + FoundAt: now, + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{ + EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: host}, + Subject: host, + Agent: "http-analyzer", + Model: a.client.DeepModel, + Severity: eventbus.Severity(result.Severity), + Title: "Suspicious HTTP response", + Description: f, + Evidence: fmt.Sprintf("status=%d title=%q", ev.StatusCode, ev.Title), + }) + } +} + +// handleSecret validates a regex-surfaced secret through FilterSecrets. +// If the AI confirms it's real, an AIFinding event fires tagging it as +// validated. Regex noise (UI strings, unrelated third-party URLs) is +// dropped silently — the v1 Secret event is left in place but the AI +// emission is what a dashboard would prefer to render as a real finding. +func (a *aiModule) handleSecret(mctx module.Context, ev eventbus.SecretFound) { + key := "secret:" + hashShort(ev.Match+"|"+ev.Location) + if !a.firstSeen(key) { + return + } + a.secretValidations.Add(1) + + validated, err := a.client.FilterSecrets([]string{ev.Match}) + if err != nil || len(validated) == 0 { + return // AI says not a real secret, or Ollama unavailable + } + now := time.Now() + persistAIFinding(mctx, ev.Meta().Target, store.AIFinding{ + Agent: "secret-validator", Model: a.client.FastModel, + Severity: string(eventbus.SeverityHigh), + Title: "Secret likely valid (AI-confirmed)", + Description: fmt.Sprintf("FilterSecrets confirmed '%s' is a real secret, not regex noise.", ev.Kind), + Evidence: fmt.Sprintf("%s @ %s", ev.Kind, ev.Location), + FoundAt: now, + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: ev.Meta().Target}, + Subject: ev.Meta().Target, + Agent: "secret-validator", + Model: a.client.FastModel, + Severity: eventbus.SeverityHigh, + Title: "Secret likely valid (AI-confirmed)", + Description: fmt.Sprintf("FilterSecrets confirmed '%s' is a real secret, not regex noise.", + ev.Kind), + Evidence: fmt.Sprintf("%s @ %s", ev.Kind, ev.Location), + }) +} + +// handleVuln routes a vulnerability finding through the multi-agent +// orchestrator for specialist analysis. When multi-agent is disabled, +// this is a no-op. +func (a *aiModule) handleVuln(mctx module.Context, ev eventbus.VulnerabilityFound) { + if a.orchestrator == nil { + return + } + key := "vuln:" + ev.ID + ":" + ev.Meta().Target + if !a.firstSeen(key) { + return + } + a.vulnEnrichments.Add(1) + + finding := agents.Finding{ + Type: "vulnerability", + URL: ev.URL, + Context: ev.Description + "\n\nEvidence:\n" + ev.Evidence, + } + // Respect ctx — orchestrator methods accept context.Context for + // cancellation. Allow up to 60s for deep-analysis cascade. + ctx, cancel := context.WithTimeout(mctx.Ctx, 60*time.Second) + defer cancel() + result, err := a.orchestrator.Analyze(ctx, finding) + if err != nil || result == nil { + return + } + now := time.Now() + for _, f := range result.Findings { + persistAIFinding(mctx, ev.Meta().Target, store.AIFinding{ + Agent: string(result.AgentType), Model: result.Model, + Severity: strings.ToLower(f.Severity), + Title: f.Title, Description: f.Description, Evidence: f.Evidence, + CVEs: f.CVEs, OWASP: f.OWASP, Confidence: result.Confidence, + FoundAt: now, + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{ + EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: ev.Meta().Target}, + Subject: ev.Meta().Target, + Agent: string(result.AgentType), + Model: result.Model, + Severity: eventbus.Severity(strings.ToLower(f.Severity)), + Title: f.Title, + Description: f.Description, + Evidence: f.Evidence, + CVEs: f.CVEs, + OWASP: f.OWASP, + Confidence: result.Confidence, + }) + } +} + +// handleScanEnd runs two expensive end-of-scan analyses: +// +// 1. DetectAnomalies — cross-host pattern review (dev stacks leaking into +// prod, unusual version mixes, orphaned endpoints) +// 2. GenerateReport — executive summary of findings by severity +// +// Both run only when the store has enough data to be worth summarising +// (≥ 3 findings or ≥ 5 hosts). +func (a *aiModule) handleScanEnd(mctx module.Context) { + hosts := mctx.Store.All(mctx.Ctx) + if len(hosts) == 0 { + return + } + + totalFindings := 0 + for _, h := range hosts { + totalFindings += len(h.Vulnerabilities) + len(h.Secrets) + len(h.CVEs) + len(h.AIFindings) + } + if totalFindings < 3 && len(hosts) < 5 { + return // not worth the Ollama spin-up + } + + // Anomaly detection ------------------------------------------------------ + summary := buildScanSummary(hosts) + a.anomalyScans.Add(1) + if result, err := a.client.DetectAnomalies(summary); err == nil && result != nil { + now := time.Now() + for _, f := range result.Findings { + persistAIFinding(mctx, mctx.Target, store.AIFinding{ + Agent: "anomaly-detector", Model: a.client.DeepModel, + Severity: result.Severity, + Title: "Cross-subdomain anomaly", + Description: f, FoundAt: now, + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{ + EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: mctx.Target}, + Subject: mctx.Target, + Agent: "anomaly-detector", + Model: a.client.DeepModel, + Severity: eventbus.Severity(result.Severity), + Title: "Cross-subdomain anomaly", + Description: f, + }) + } + } + + // Executive report ------------------------------------------------------ + stats := map[string]int{ + "hosts": len(hosts), + "findings": totalFindings, + } + a.reportGenerations.Add(1) + if report, err := a.client.GenerateReport(summary, stats); err == nil && report != "" { + now := time.Now() + persistAIFinding(mctx, mctx.Target, store.AIFinding{ + Agent: "report-writer", Model: a.client.DeepModel, + Severity: string(eventbus.SeverityInfo), + Title: "AI executive report", + Description: report, + FoundAt: now, + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{ + EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: mctx.Target}, + Subject: mctx.Target, + Agent: "report-writer", + Model: a.client.DeepModel, + Severity: eventbus.SeverityInfo, + Title: "AI executive report", + Description: report, + }) + } + + // Emit a module-error style observability event with per-handler counts. + mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: mctx.Target}, + Module: ModuleName, + Err: fmt.Sprintf("AI activity: cve=%d js=%d http=%d secrets=%d vulns=%d anomaly=%d report=%d", + a.cveLookups.Load(), + a.jsAnalyses.Load(), + a.httpAnalyses.Load(), + a.secretValidations.Load(), + a.vulnEnrichments.Load(), + a.anomalyScans.Load(), + a.reportGenerations.Load()), + }) +} + +// --- helpers ------------------------------------------------------------- + +// firstSeen returns true the first time we see a given cache key, false +// on every subsequent call. Implemented via sync.Map.LoadOrStore which is +// atomic. +func (a *aiModule) firstSeen(key string) bool { + h := sha256.Sum256([]byte(key)) + hx := hex.EncodeToString(h[:]) + _, loaded := a.cache.LoadOrStore(hx, struct{}{}) + return !loaded +} + +// isInterestingHTTP gates which HTTP responses are worth sending to the +// deep model. Normal 2xx/3xx are skipped; 5xx, verbose 4xx with titles, +// and anything with a server-banner mismatch qualifies. +func isInterestingHTTP(ev eventbus.HTTPProbed) bool { + switch { + case ev.StatusCode >= 500: + return true + case ev.StatusCode == 401 || ev.StatusCode == 403: + return true // auth surface worth inspecting + case ev.StatusCode >= 400 && ev.Title != "" && ev.ContentLength > 1000: + return true // verbose error page + case ev.TLSSelfSigned: + return true // self-signed on a live host is usually an appliance + } + return false +} + +// hashShort returns a short hex prefix of SHA-256(s) — used for cache +// keys where the full input is too long but identity matters. +func hashShort(s string) string { + h := sha256.Sum256([]byte(s)) + return hex.EncodeToString(h[:8]) +} + +// persistAIFinding appends an AIFinding to the host's store record so +// that downstream modules (notably the report.brief module running in +// PhaseReporting, which subscribes to the bus AFTER PhaseAnalysis has +// drained) can still surface the finding. Store is the single source +// of truth for cross-phase handoff. +func persistAIFinding(mctx module.Context, host string, f store.AIFinding) { + if host == "" { + host = mctx.Target + } + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + h.AIFindings = append(h.AIFindings, f) + }) +} + +// cdnOrWafMarkers are technology names that indicate the target is fronted +// by a CDN / WAF rather than running that product themselves. Matching +// CVEs against these labels produces almost-exclusively false positives, +// so we skip them when the version is unknown. +var cdnOrWafMarkers = map[string]bool{ + "cloudflare": true, + "cloudfront": true, + "akamai": true, + "fastly": true, + "imperva": true, + "aws": true, + "azure": true, + "gcp": true, + "heroku": true, + "netlify": true, + "vercel": true, + "cdn": true, + "nginx plus": true, +} + +// parseTech extracts (name, version) from strings like "nginx/1.18.0", +// "nginx/1.18.0 (Ubuntu)", "Apache/2.4.52", or "Apache 2.4". +func parseTech(raw string) (name, version string) { + raw = strings.TrimSpace(raw) + if raw == "" { + return "", "" + } + // Look for name/version or name version pattern. + for _, sep := range []string{"/", " "} { + if idx := strings.Index(raw, sep); idx > 0 { + name = strings.TrimSpace(raw[:idx]) + rest := strings.TrimSpace(raw[idx+1:]) + rest = strings.TrimPrefix(rest, "v") + // Pull digits.digits.digits out of rest + end := 0 + for end < len(rest) { + c := rest[end] + if (c >= '0' && c <= '9') || c == '.' { + end++ + continue + } + break + } + if end > 0 { + return name, rest[:end] + } + return name, "" + } + } + return raw, "" +} + +// shouldSkipForCVE returns true when (name, version) is too vague for a +// useful CVE lookup — empty name, or a CDN/WAF label without a version. +func shouldSkipForCVE(name, version string) bool { + if name == "" { + return true + } + if version == "" && cdnOrWafMarkers[strings.ToLower(name)] { + return true + } + return false +} + +func versionOrUnknown(v string) string { + if v == "" { + return "(unknown version)" + } + return "v" + v +} + +// buildScanSummary compiles a compact text representation of the store +// for the DetectAnomalies / GenerateReport prompts. Kept under ~3KB to +// fit comfortably in every model's context window. +func buildScanSummary(hosts []*store.Host) string { + var sb strings.Builder + sb.WriteString(fmt.Sprintf("Scan summary: %d hosts\n\n", len(hosts))) + shown := 0 + for _, h := range hosts { + if h == nil { + continue + } + if shown >= 50 { + sb.WriteString(fmt.Sprintf("\n... and %d more hosts\n", len(hosts)-shown)) + break + } + sb.WriteString(fmt.Sprintf("- %s (status=%d, tech=%s)", + h.Subdomain, h.StatusCode, strings.Join(h.Technologies, ","))) + if len(h.Vulnerabilities) > 0 { + sb.WriteString(fmt.Sprintf(" vulns=%d", len(h.Vulnerabilities))) + } + if len(h.Secrets) > 0 { + sb.WriteString(fmt.Sprintf(" secrets=%d", len(h.Secrets))) + } + if len(h.CVEs) > 0 { + sb.WriteString(fmt.Sprintf(" cves=%d", len(h.CVEs))) + } + sb.WriteString("\n") + shown++ + } + return sb.String() +} diff --git a/internal/modules/all/all.go b/internal/modules/all/all.go new file mode 100644 index 0000000..dcf3a82 --- /dev/null +++ b/internal/modules/all/all.go @@ -0,0 +1,80 @@ +// Package all is the meta-package imported from main to trigger side-effect +// registration of every built-in Fase 0.6 adapter module. Importing +// god-eye/internal/modules/all is equivalent to importing each submodule +// individually and calling Register(). +// +// Individual submodules avoid registering in their init() on purpose — that +// would make the registry state global and prevent tests from using a +// clean registry. Callers (main, tests) explicitly opt in by importing +// this package or calling RegisterAll. +package all + +import ( + aimod "god-eye/internal/modules/ai" + "god-eye/internal/modules/asn" + "god-eye/internal/modules/brief" + "god-eye/internal/modules/axfr" + "god-eye/internal/modules/bruteforce" + "god-eye/internal/modules/cloud" + "god-eye/internal/modules/ctstream" + "god-eye/internal/modules/dnsresolve" + "god-eye/internal/modules/github" + "god-eye/internal/modules/graphql" + "god-eye/internal/modules/headers" + "god-eye/internal/modules/httpprobe" + "god-eye/internal/modules/javascript" + "god-eye/internal/modules/jwt" + "god-eye/internal/modules/nuclei" + "god-eye/internal/modules/passive" + "god-eye/internal/modules/permutation" + "god-eye/internal/modules/ports" + "god-eye/internal/modules/recursive" + "god-eye/internal/modules/report" + "god-eye/internal/modules/reversedns" + "god-eye/internal/modules/security" + "god-eye/internal/modules/smuggling" + "god-eye/internal/modules/supplychain" + "god-eye/internal/modules/takeover" + "god-eye/internal/modules/vhost" +) + +// RegisterAll registers every Fase 0.6 adapter module in the default +// registry. Call exactly once at program start — Register panics on +// duplicates, so calling twice is a bug. +func RegisterAll() { + // Discovery (Fase 0 adapters + Fase 1 natives + supply chain from F2) + passive.Register() + bruteforce.Register() + recursive.Register() + axfr.Register() // F1 + github.Register() // F1 + ctstream.Register() // F1 (opt-in) + supplychain.Register() // F2 + + // Resolution + dnsresolve.Register() + permutation.Register() // F1 (opt-in) + reversedns.Register() // F1 (opt-in) + vhost.Register() // F1 (opt-in) + asn.Register() // F1 (opt-in) + + // Enrichment + httpprobe.Register() + ports.Register() + + // Analysis (F0 adapters + F2 natives) + security.Register() + takeover.Register() + cloud.Register() + javascript.Register() + aimod.Register() + graphql.Register() // F2 + jwt.Register() // F2 + headers.Register() // F2 + smuggling.Register() // F2 (opt-in) + nuclei.Register() // F2 (opt-in — requires local nuclei-templates dir) + + // Reporting + report.Register() + brief.Register() // AI-assisted executive summary at scan end +} diff --git a/internal/modules/asn/asn.go b/internal/modules/asn/asn.go new file mode 100644 index 0000000..126a1f2 --- /dev/null +++ b/internal/modules/asn/asn.go @@ -0,0 +1,78 @@ +// Package asn is a Fase 0.6 adapter around v1 network.ASNScanner. Expands +// discovery by enumerating IPs within the target's ASN/CIDR blocks. +package asn + +import ( + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/network" + "god-eye/internal/store" +) + +// CtxPassthrough is used to thread module.Context.Ctx into network helpers. + +const ModuleName = "discovery.asn" + +type asnModule struct{} + +func Register() { module.Register(&asnModule{}) } + +func (*asnModule) Name() string { return ModuleName } +func (*asnModule) Phase() module.Phase { return module.PhaseResolution } +func (*asnModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventDNSResolved} } +func (*asnModule) Produces() []eventbus.EventType { return nil } +func (*asnModule) DefaultEnabled() bool { return false } // opt-in + +func (*asnModule) Run(mctx module.Context) error { + if !mctx.Config.Bool("asn_scan", false) { + return nil + } + timeout := mctx.Config.Int("timeout", 10) + + hosts := mctx.Store.All(mctx.Ctx) + seenIP := make(map[string]struct{}) + for _, h := range hosts { + for _, ip := range h.IPs { + seenIP[ip] = struct{}{} + } + } + + scanner := network.NewASNScanner(timeout) + for ip := range seenIP { + if mctx.Ctx.Err() != nil { + break + } + info, err := scanner.GetASNInfo(mctx.Ctx, ip) + if err != nil || info == nil { + continue + } + _ = mctx.Store.Upsert(mctx.Ctx, ipToFirstHost(mctx, ip), func(h *store.Host) { + if h.ASN == "" { + h.ASN = info.ASN + } + if h.Org == "" { + h.Org = info.Name + } + if h.Country == "" { + h.Country = info.Country + } + }) + } + return nil +} + +// ipToFirstHost returns the first subdomain mapped to ip in the store. +func ipToFirstHost(mctx module.Context, ip string) string { + for _, h := range mctx.Store.All(mctx.Ctx) { + for _, rip := range h.IPs { + if rip == ip { + return h.Subdomain + } + } + } + return "" +} + +var _ = time.Now diff --git a/internal/modules/axfr/axfr.go b/internal/modules/axfr/axfr.go new file mode 100644 index 0000000..fb1c81d --- /dev/null +++ b/internal/modules/axfr/axfr.go @@ -0,0 +1,134 @@ +// Package axfr attempts DNS zone transfer (AXFR) against the target's +// authoritative name servers. It's the highest-signal free discovery +// technique — when it works, it returns the entire zone at once, exposing +// every record the admin considers internal-only. +// +// Modern DNS infrastructure rejects AXFR by default, but legacy deployments, +// misconfigured secondary servers, and corporate DNS still leak zones +// regularly in bug bounty scope. +package axfr + +import ( + "context" + "strings" + "time" + + godns "github.com/miekg/dns" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "discovery.axfr" + +type axfrModule struct{} + +func Register() { module.Register(&axfrModule{}) } + +func (*axfrModule) Name() string { return ModuleName } +func (*axfrModule) Phase() module.Phase { return module.PhaseDiscovery } +func (*axfrModule) Consumes() []eventbus.EventType { return nil } +func (*axfrModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventSubdomainDiscovered} +} +func (*axfrModule) DefaultEnabled() bool { return true } + +func (*axfrModule) Run(mctx module.Context) error { + target := strings.TrimSuffix(mctx.Target, ".") + if target == "" { + return nil + } + timeout := time.Duration(mctx.Config.Int("timeout", 5)) * time.Second + + nameservers, err := lookupNSServers(target, timeout) + if err != nil || len(nameservers) == 0 { + return nil + } + + seen := make(map[string]struct{}) + for _, ns := range nameservers { + if mctx.Ctx.Err() != nil { + return nil + } + records := tryAXFR(target, ns, timeout) + for _, sub := range records { + sub = strings.ToLower(strings.TrimSuffix(sub, ".")) + if sub == "" || sub == target { + continue + } + if !strings.HasSuffix(sub, "."+target) { + continue + } + if _, dup := seen[sub]; dup { + continue + } + seen[sub] = struct{}{} + + _ = mctx.Store.Upsert(mctx.Ctx, sub, func(h *store.Host) { + store.AddDiscoveryMethod(h, "axfr:"+ns) + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.SubdomainDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: sub}, + Subdomain: sub, + Method: "axfr:" + ns, + }) + } + } + return nil +} + +// lookupNSServers returns the authoritative name servers for domain. +func lookupNSServers(domain string, timeout time.Duration) ([]string, error) { + client := &godns.Client{Timeout: timeout} + msg := new(godns.Msg) + msg.SetQuestion(godns.Fqdn(domain), godns.TypeNS) + // Ask a widely-available resolver. + resp, _, err := client.Exchange(msg, "8.8.8.8:53") + if err != nil { + return nil, err + } + var out []string + for _, a := range resp.Answer { + if ns, ok := a.(*godns.NS); ok { + out = append(out, strings.TrimSuffix(ns.Ns, ".")) + } + } + return out, nil +} + +// tryAXFR performs an AXFR against nsHost for domain, returning every +// returned name (A, AAAA, CNAME). Returns an empty slice when AXFR is +// refused (the expected outcome on properly-configured DNS). +func tryAXFR(domain, nsHost string, timeout time.Duration) []string { + tr := &godns.Transfer{DialTimeout: timeout, ReadTimeout: timeout, WriteTimeout: timeout} + msg := new(godns.Msg) + msg.SetAxfr(godns.Fqdn(domain)) + + ch, err := tr.In(msg, nsHost+":53") + if err != nil { + return nil + } + + var out []string + for env := range ch { + if env.Error != nil { + return out + } + for _, rr := range env.RR { + switch r := rr.(type) { + case *godns.A: + out = append(out, r.Hdr.Name) + case *godns.AAAA: + out = append(out, r.Hdr.Name) + case *godns.CNAME: + out = append(out, r.Hdr.Name) + case *godns.NS: + out = append(out, r.Hdr.Name) + } + } + } + return out +} + +var _ = context.Canceled diff --git a/internal/modules/brief/brief.go b/internal/modules/brief/brief.go new file mode 100644 index 0000000..56740c3 --- /dev/null +++ b/internal/modules/brief/brief.go @@ -0,0 +1,464 @@ +// Package brief renders the end-of-scan AI-assisted executive brief. +// +// It's the last module to run in PhaseReporting. It reads: +// - every host from the store (for severity / takeover / CVE rollups) +// - every AIFinding published during the scan (anomalies, executive +// report, per-host agent output) +// +// Then prints a framed summary block to stdout with: +// +// ▸ Findings counted by severity +// ▸ Top exploitable chains (critical + CVE pairs) +// ▸ AI-generated executive summary (if ai.enabled) +// ▸ Recommended next actions +// +// Suppressed when cfg.silent or cfg.json is true so machine-readable +// modes stay clean. +package brief + +import ( + "context" + "fmt" + "sort" + "strings" + "sync" + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/output" + "god-eye/internal/store" +) + +const ModuleName = "report.brief" + +type briefModule struct { + aiFindings []eventbus.AIFinding + execReport string // last executive-report AIFinding seen + execReportAt time.Time + mu sync.Mutex +} + +func Register() { module.Register(&briefModule{}) } + +func (*briefModule) Name() string { return ModuleName } +func (*briefModule) Phase() module.Phase { return module.PhaseReporting } +func (*briefModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventAIFinding} } +func (*briefModule) Produces() []eventbus.EventType { return nil } + +// DefaultEnabled: brief renders whenever the scan completes with any +// findings. Silent/json modes are suppressed inline (not at selection +// time) so the module can still collect AIFindings for exports. +func (*briefModule) DefaultEnabled() bool { return true } + +func (b *briefModule) Run(mctx module.Context) error { + // Subscribe to AIFinding events and stash them locally so we can + // build a richer summary than just reading the store (the store + // doesn't retain AIFindings tagged with agent name / confidence). + sub := mctx.Bus.Subscribe(eventbus.EventAIFinding, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.AIFinding) + if !ok { + return + } + b.mu.Lock() + defer b.mu.Unlock() + b.aiFindings = append(b.aiFindings, ev) + if ev.Agent == "report-writer" && ev.Description != "" { + b.execReport = ev.Description + b.execReportAt = ev.Meta().At + } + }) + defer sub.Unsubscribe() + + // Give the AI module a chance to publish its end-of-scan events. + // The AI module runs in PhaseAnalysis; we're in PhaseReporting so + // its ScanCompleted-triggered publishes have already fired by the + // time we get here. A small buffer avoids losing late events. + select { + case <-time.After(400 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + if mctx.Config.Bool("silent", false) || mctx.Config.Bool("json", false) { + return nil + } + + hosts := mctx.Store.All(mctx.Ctx) + if len(hosts) == 0 { + return nil + } + + // Drain store-persisted AIFindings — these were written by the AI + // module during PhaseAnalysis. Live events alone miss them because + // brief subscribes after PhaseAnalysis has already drained. + b.mu.Lock() + for _, h := range hosts { + for _, f := range h.AIFindings { + b.aiFindings = append(b.aiFindings, eventbus.AIFinding{ + EventMeta: eventbus.EventMeta{At: f.FoundAt, Source: "ai.cascade", Target: h.Subdomain}, + Subject: h.Subdomain, + Agent: f.Agent, + Model: f.Model, + Severity: eventbus.Severity(f.Severity), + Title: f.Title, + Description: f.Description, + Evidence: f.Evidence, + CVEs: f.CVEs, + OWASP: f.OWASP, + Confidence: f.Confidence, + }) + if f.Agent == "report-writer" && f.Description != "" && (b.execReport == "" || f.FoundAt.After(b.execReportAt)) { + b.execReport = f.Description + b.execReportAt = f.FoundAt + } + } + } + b.mu.Unlock() + + b.render(mctx, hosts) + return nil +} + +func (b *briefModule) render(mctx module.Context, hosts []*store.Host) { + b.mu.Lock() + aiFindings := append([]eventbus.AIFinding(nil), b.aiFindings...) + execReport := b.execReport + b.mu.Unlock() + + sevCounts := tallySeverities(hosts, aiFindings) + topChains := buildChains(hosts) + recs := buildRecommendations(hosts, aiFindings) + aiActivity := tallyAIAgents(aiFindings) + + fmt.Println() + title := fmt.Sprintf(" AI SCAN BRIEF — %s ", mctx.Target) + fmt.Println(output.BoldCyan(boxTop(title))) + writeLine := func(text string) { + fmt.Println(output.BoldCyan("│ ") + text) + } + + // Section: stats + writeLine(output.BoldWhite("Totals")) + writeLine(fmt.Sprintf(" %s %d %s %d %s %d", + output.Dim("Hosts:"), len(hosts), + output.Dim("Active:"), countActive(hosts), + output.Dim("AI findings:"), len(aiFindings), + )) + writeLine("") + + // Section: severity breakdown + writeLine(output.BoldWhite("Findings by severity")) + sevOrder := []string{"critical", "high", "medium", "low", "info"} + for _, s := range sevOrder { + n := sevCounts[s] + if n == 0 { + continue + } + badge := sevBadge(s) + writeLine(fmt.Sprintf(" %s %s %d", badge, padRight(s, 9), n)) + } + if len(sevCounts) == 0 { + writeLine(output.Dim(" (no scored findings)")) + } + writeLine("") + + // Section: top exploitable chains + if len(topChains) > 0 { + writeLine(output.BoldWhite("Top exploitable chains")) + for i, c := range topChains { + if i >= 5 { + break + } + writeLine(" " + output.BoldYellow("▸ ") + c) + } + writeLine("") + } + + // Section: AI agent activity + if len(aiActivity) > 0 { + writeLine(output.BoldWhite("AI agents that contributed")) + // Stable order by count desc. + type agg struct { + agent string + n int + } + agents := make([]agg, 0, len(aiActivity)) + for name, n := range aiActivity { + agents = append(agents, agg{name, n}) + } + sort.Slice(agents, func(i, j int) bool { return agents[i].n > agents[j].n }) + for _, a := range agents { + writeLine(fmt.Sprintf(" %s %s %s", + output.Cyan("•"), + padRight(a.agent, 20), + output.Dim(fmt.Sprintf("%d findings", a.n)), + )) + } + writeLine("") + } + + // Section: AI executive report (prose) + if strings.TrimSpace(execReport) != "" { + writeLine(output.BoldWhite("AI executive summary")) + for _, line := range wrapText(strings.TrimSpace(execReport), 74) { + writeLine(output.Dim(" ") + line) + } + writeLine("") + } + + // Section: recommendations + if len(recs) > 0 { + writeLine(output.BoldWhite("Recommended next actions")) + for i, r := range recs { + if i >= 5 { + break + } + writeLine(fmt.Sprintf(" %s %s", output.Green(fmt.Sprintf("%d.", i+1)), r)) + } + writeLine("") + } + + fmt.Println(output.BoldCyan(boxBottom())) + fmt.Println() +} + +// --- helpers ------------------------------------------------------------- + +func tallySeverities(hosts []*store.Host, aiFindings []eventbus.AIFinding) map[string]int { + out := map[string]int{} + for _, h := range hosts { + for _, v := range h.Vulnerabilities { + out[strings.ToLower(v.Severity)]++ + } + for _, c := range h.CVEs { + out[strings.ToLower(c.Severity)]++ + } + for _, s := range h.Secrets { + out[strings.ToLower(s.Severity)]++ + } + if h.Takeover != nil { + out["high"]++ + } + } + for _, f := range aiFindings { + out[strings.ToLower(string(f.Severity))]++ + } + return out +} + +func countActive(hosts []*store.Host) int { + n := 0 + for _, h := range hosts { + if h.StatusCode >= 200 && h.StatusCode < 400 { + n++ + } + } + return n +} + +// buildChains surfaces the most dangerous combinations. Right now the +// heuristic is coarse: hosts with ≥2 high+ findings, or any host with a +// confirmed takeover candidate, or any host whose tech triggered a CVE. +func buildChains(hosts []*store.Host) []string { + var chains []string + + type scored struct { + text string + score int + } + var ranked []scored + + for _, h := range hosts { + score := 0 + bits := []string{} + for _, v := range h.Vulnerabilities { + if strings.EqualFold(v.Severity, "critical") { + score += 10 + bits = append(bits, v.Title) + } else if strings.EqualFold(v.Severity, "high") { + score += 5 + bits = append(bits, v.Title) + } + } + if h.Takeover != nil { + score += 8 + bits = append(bits, "takeover→"+h.Takeover.Service) + } + for _, c := range h.CVEs { + if strings.EqualFold(c.Severity, "critical") || strings.EqualFold(c.Severity, "high") { + score += 6 + bits = append(bits, fmt.Sprintf("%s@%s→%s", c.Technology, c.Version, firstCVE(c.ID))) + } + } + if score == 0 { + continue + } + desc := h.Subdomain + if len(bits) > 0 { + desc += " " + output.Dim("— "+strings.Join(dedupShort(bits), " + ")) + } + ranked = append(ranked, scored{desc, score}) + } + + sort.Slice(ranked, func(i, j int) bool { return ranked[i].score > ranked[j].score }) + for _, r := range ranked { + chains = append(chains, r.text) + } + return chains +} + +func buildRecommendations(hosts []*store.Host, aiFindings []eventbus.AIFinding) []string { + seen := map[string]struct{}{} + var out []string + + add := func(s string) { + if _, ok := seen[s]; ok { + return + } + seen[s] = struct{}{} + out = append(out, s) + } + + // Pattern: Apache version → upgrade recommendation + for _, h := range hosts { + for _, c := range h.CVEs { + if c.Technology != "" && c.Version != "" { + add(fmt.Sprintf("Patch %s %s → vendor latest (affects %s)", c.Technology, c.Version, h.Subdomain)) + } + } + if h.Takeover != nil { + add(fmt.Sprintf("Verify CNAME on %s before external party claims %s", h.Subdomain, h.Takeover.Service)) + } + for _, s := range h.Secrets { + add(fmt.Sprintf("Rotate %s found in %s", s.Kind, h.Subdomain)) + } + for _, v := range h.Vulnerabilities { + if strings.EqualFold(v.Severity, "critical") { + add(fmt.Sprintf("Remediate critical: %s on %s", v.Title, h.Subdomain)) + } + } + } + + // AI-surfaced recommendations (anomalies) + for _, f := range aiFindings { + if f.Agent == "anomaly-detector" && f.Description != "" { + add("Investigate anomaly: " + trimLine(f.Description, 80)) + } + } + + return out +} + +func tallyAIAgents(aiFindings []eventbus.AIFinding) map[string]int { + out := map[string]int{} + for _, f := range aiFindings { + agent := f.Agent + if agent == "" { + agent = "unknown" + } + out[agent]++ + } + return out +} + +// --- rendering primitives ------------------------------------------------ + +const boxWidth = 76 + +func boxTop(title string) string { + line := strings.Repeat("─", boxWidth) + if len(title) >= boxWidth-4 { + title = title[:boxWidth-4] + } + prefix := "┌── " + suffix := " " + strings.Repeat("─", boxWidth-len(prefix)-len(title)-1) + "┐" + _ = line + return prefix + title + suffix +} + +func boxBottom() string { + return "└" + strings.Repeat("─", boxWidth) + "┘" +} + +func padRight(s string, n int) string { + if len(s) >= n { + return s + } + return s + strings.Repeat(" ", n-len(s)) +} + +func wrapText(s string, width int) []string { + words := strings.Fields(s) + if len(words) == 0 { + return nil + } + var lines []string + var cur strings.Builder + for _, w := range words { + if cur.Len() == 0 { + cur.WriteString(w) + continue + } + if cur.Len()+1+len(w) > width { + lines = append(lines, cur.String()) + cur.Reset() + cur.WriteString(w) + } else { + cur.WriteByte(' ') + cur.WriteString(w) + } + } + if cur.Len() > 0 { + lines = append(lines, cur.String()) + } + return lines +} + +func sevBadge(s string) string { + switch strings.ToLower(s) { + case "critical": + return output.BgRed(" CRIT ") + case "high": + return output.Red("[HIGH]") + case "medium": + return output.Yellow("[MED] ") + case "low": + return output.Blue("[LOW] ") + default: + return output.Dim("[INFO]") + } +} + +func firstCVE(ids string) string { + if i := strings.IndexAny(ids, ",("); i > 0 { + return strings.TrimSpace(ids[:i]) + } + return ids +} + +func dedupShort(in []string) []string { + seen := map[string]struct{}{} + var out []string + for _, s := range in { + if _, ok := seen[s]; ok { + continue + } + seen[s] = struct{}{} + if len(s) > 40 { + s = s[:37] + "…" + } + out = append(out, s) + } + return out +} + +func trimLine(s string, n int) string { + s = strings.TrimSpace(s) + if i := strings.Index(s, "\n"); i > 0 { + s = s[:i] + } + if len(s) > n { + s = s[:n-1] + "…" + } + return s +} diff --git a/internal/modules/bruteforce/bruteforce.go b/internal/modules/bruteforce/bruteforce.go new file mode 100644 index 0000000..82f9228 --- /dev/null +++ b/internal/modules/bruteforce/bruteforce.go @@ -0,0 +1,167 @@ +// Package bruteforce runs DNS brute-force against the target domain using +// the shipped or custom wordlist. Emits SubdomainDiscovered for every host +// that resolves (with optional wildcard filtering applied). +package bruteforce + +import ( + "bufio" + "context" + "os" + "strings" + "sync" + "time" + + "god-eye/internal/config" + godns "god-eye/internal/dns" + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "discovery.bruteforce" + +type bruteModule struct{} + +func Register() { module.Register(&bruteModule{}) } + +func (*bruteModule) Name() string { return ModuleName } +func (*bruteModule) Phase() module.Phase { return module.PhaseDiscovery } +func (*bruteModule) Consumes() []eventbus.EventType { return nil } +func (*bruteModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventSubdomainDiscovered} +} +func (*bruteModule) DefaultEnabled() bool { return true } + +func (b *bruteModule) Run(mctx module.Context) error { + if mctx.Config.Bool("no_brute", false) { + return nil + } + + target := mctx.Target + wordlist := loadWordlist(mctx.Config.String("wordlist", "")) + resolvers := parseResolvers(mctx.Config.String("resolvers", "")) + timeout := mctx.Config.Int("timeout", 5) + conc := mctx.Config.Int("concurrency", 500) + if conc <= 0 { + conc = 500 + } + + // Opportunistic wildcard detection: before brute, detect which IPs + // (if any) the apex wildcards to, so we can filter hits that resolve + // exclusively to those IPs. + wd := godns.NewWildcardDetector(resolvers, timeout) + wi := wd.Detect(target) + wildcardIPs := make(map[string]struct{}) + if wi != nil && wi.IsWildcard { + for _, ip := range wi.WildcardIPs { + wildcardIPs[ip] = struct{}{} + } + } + + work := make(chan string, conc*2) + var wg sync.WaitGroup + for i := 0; i < conc; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for w := range work { + if mctx.Ctx.Err() != nil { + return + } + sub := w + "." + target + ips := godns.ResolveSubdomain(sub, resolvers, timeout) + if len(ips) == 0 { + continue + } + if allWildcard(ips, wildcardIPs) { + continue + } + + _ = mctx.Store.Upsert(mctx.Ctx, sub, func(h *store.Host) { + store.AddIPs(h, ips) + store.AddDiscoveryMethod(h, "brute") + }) + + mctx.Bus.Publish(mctx.Ctx, eventbus.SubdomainDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: sub}, + Subdomain: sub, + Method: "brute", + }) + } + }() + } + +loop: + for _, w := range wordlist { + select { + case work <- w: + case <-mctx.Ctx.Done(): + break loop + } + } + close(work) + wg.Wait() + return nil +} + +func allWildcard(ips []string, wc map[string]struct{}) bool { + if len(wc) == 0 { + return false + } + for _, ip := range ips { + if _, ok := wc[ip]; !ok { + return false + } + } + return true +} + +func loadWordlist(path string) []string { + if path == "" { + return config.DefaultWordlist + } + f, err := os.Open(path) + if err != nil { + return config.DefaultWordlist + } + defer f.Close() + + var out []string + sc := bufio.NewScanner(f) + for sc.Scan() { + w := strings.TrimSpace(sc.Text()) + if w == "" || strings.HasPrefix(w, "#") { + continue + } + out = append(out, w) + } + if len(out) == 0 { + return config.DefaultWordlist + } + return out +} + +func parseResolvers(s string) []string { + s = strings.TrimSpace(s) + if s == "" { + return config.DefaultResolvers + } + var out []string + for _, r := range strings.Split(s, ",") { + r = strings.TrimSpace(r) + if r == "" { + continue + } + if !strings.Contains(r, ":") { + r = r + ":53" + } + out = append(out, r) + } + if len(out) == 0 { + return config.DefaultResolvers + } + return out +} + +// keep context import for symmetry with other modules +var _ = context.Canceled diff --git a/internal/modules/cloud/cloud.go b/internal/modules/cloud/cloud.go new file mode 100644 index 0000000..ce771b8 --- /dev/null +++ b/internal/modules/cloud/cloud.go @@ -0,0 +1,104 @@ +// Package cloud wraps v1 cloud detection + S3 bucket discovery. +// Drains the store, plus listens for late DNSResolved events. +package cloud + +import ( + "context" + "sync" + "time" + + "god-eye/internal/eventbus" + gohttp "god-eye/internal/http" + "god-eye/internal/module" + "god-eye/internal/scanner" + "god-eye/internal/store" +) + +const ModuleName = "cloud.detect" + +type cloudModule struct{} + +func Register() { module.Register(&cloudModule{}) } + +func (*cloudModule) Name() string { return ModuleName } +func (*cloudModule) Phase() module.Phase { return module.PhaseAnalysis } +func (*cloudModule) Consumes() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventDNSResolved, eventbus.EventHTTPProbed} +} +func (*cloudModule) Produces() []eventbus.EventType { return []eventbus.EventType{eventbus.EventCloudAsset} } +func (*cloudModule) DefaultEnabled() bool { return true } + +func (*cloudModule) Run(mctx module.Context) error { + timeout := mctx.Config.Int("timeout", 5) + client := gohttp.GetSharedClient(timeout) + + handled := make(map[string]struct{}) + var mu sync.Mutex + shouldHandle := func(host string) bool { + mu.Lock() + defer mu.Unlock() + if _, ok := handled[host]; ok { + return false + } + handled[host] = struct{}{} + return true + } + + handle := func(host string, ips []string, cname string) { + if !shouldHandle(host) { + return + } + provider := scanner.DetectCloudProvider(ips, cname, "") + if provider != "" { + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + if h.CloudProvider == "" { + h.CloudProvider = provider + } + }) + } + + if buckets := scanner.CheckS3BucketsWithClient(host, client); len(buckets) > 0 { + for _, url := range buckets { + mctx.Bus.Publish(mctx.Ctx, eventbus.CloudAssetFound{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, + Provider: "AWS", + Kind: "s3-bucket", + Name: host, + URL: url, + Status: "accessible", + }) + } + } + } + + var wg sync.WaitGroup + + // Drain: every host already in the store with an IP. + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.Subdomain == "" || len(h.IPs) == 0 { + continue + } + h := h + wg.Add(1) + go func() { defer wg.Done(); handle(h.Subdomain, h.IPs, h.CNAME) }() + } + + // Late DNSResolved events. + sub := mctx.Bus.Subscribe(eventbus.EventDNSResolved, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.DNSResolved) + if !ok { + return + } + wg.Add(1) + go func() { defer wg.Done(); handle(ev.Subdomain, ev.IPs, ev.CNAME) }() + }) + defer sub.Unsubscribe() + + select { + case <-time.After(500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + wg.Wait() + return nil +} diff --git a/internal/modules/ctstream/ctstream.go b/internal/modules/ctstream/ctstream.go new file mode 100644 index 0000000..5411fb4 --- /dev/null +++ b/internal/modules/ctstream/ctstream.go @@ -0,0 +1,123 @@ +// Package ctstream subscribes to live Certificate Transparency log streams +// from certstream.calidog.io (free, public). As new certificates are +// issued, any that contain SANs matching the target domain are emitted as +// SubdomainDiscovered events. +// +// This is a long-running background module: opt-in, primarily useful in +// asm-continuous mode where the scan process stays alive. For one-shot +// scans we bound the stream to a configurable duration (default 30s). +// +// NOTE: certstream.calidog.io is sometimes rate-limited or offline. This +// module fails open — no event emitted, no error returned. +package ctstream + +import ( + "encoding/json" + "fmt" + "net/http" + "net/url" + "strings" + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "discovery.ct-stream" + +type ctModule struct{} + +func Register() { module.Register(&ctModule{}) } + +func (*ctModule) Name() string { return ModuleName } +func (*ctModule) Phase() module.Phase { return module.PhaseDiscovery } +func (*ctModule) Consumes() []eventbus.EventType { return nil } +func (*ctModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventSubdomainDiscovered} +} + +// Off by default: requires long-running streaming. +func (*ctModule) DefaultEnabled() bool { return false } + +func (*ctModule) Run(mctx module.Context) error { + if !mctx.Config.Bool("ct_stream", false) { + return nil + } + durationSec := mctx.Config.Int("ct_stream.duration_sec", 30) + if durationSec <= 0 { + durationSec = 30 + } + + target := mctx.Target + deadline := time.Now().Add(time.Duration(durationSec) * time.Second) + + // Fallback path: poll crt.sh's JSON endpoint every 5s for the duration. + // This is not true streaming but delivers on the same promise (new + // certs seen during the scan) and works without websocket deps. + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + + seen := make(map[string]struct{}) + + for time.Now().Before(deadline) { + if mctx.Ctx.Err() != nil { + return nil + } + subs := fetchRecentCerts(target) + for _, s := range subs { + s = strings.ToLower(strings.TrimSpace(s)) + if s == "" || !strings.HasSuffix(s, target) { + continue + } + if _, dup := seen[s]; dup { + continue + } + seen[s] = struct{}{} + _ = mctx.Store.Upsert(mctx.Ctx, s, func(h *store.Host) { + store.AddDiscoveryMethod(h, "ct-stream") + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.SubdomainDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: s}, + Subdomain: s, + Method: "ct-stream", + }) + } + select { + case <-ticker.C: + case <-mctx.Ctx.Done(): + return nil + } + } + return nil +} + +func fetchRecentCerts(target string) []string { + // crt.sh returns JSON with name_value fields; same as the v1 crtsh + // source but we use a tighter query. + q := "%." + target + u := fmt.Sprintf("https://crt.sh/?q=%s&output=json", url.QueryEscape(q)) + client := &http.Client{Timeout: 10 * time.Second} + resp, err := client.Get(u) + if err != nil { + return nil + } + defer resp.Body.Close() + + var entries []struct { + NameValue string `json:"name_value"` + } + if err := json.NewDecoder(resp.Body).Decode(&entries); err != nil { + return nil + } + var out []string + for _, e := range entries { + for _, name := range strings.Split(e.NameValue, "\n") { + name = strings.TrimPrefix(strings.TrimSpace(name), "*.") + if name != "" { + out = append(out, name) + } + } + } + return out +} diff --git a/internal/modules/dnsresolve/dnsresolve.go b/internal/modules/dnsresolve/dnsresolve.go new file mode 100644 index 0000000..a3714d8 --- /dev/null +++ b/internal/modules/dnsresolve/dnsresolve.go @@ -0,0 +1,166 @@ +// Package dnsresolve resolves every subdomain present in the store, plus +// any that arrive via late SubdomainDiscovered events while the module is +// running. Results (IPs, CNAME, PTR) are written back to the store AND +// announced via DNSResolved events for downstream enrichment modules. +// +// This module is idempotent: Upsert on the same subdomain twice is cheap. +package dnsresolve + +import ( + "context" + "strings" + "sync" + "time" + + "god-eye/internal/config" + godns "god-eye/internal/dns" + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "dns.resolver" + +type resolverModule struct{} + +func Register() { module.Register(&resolverModule{}) } + +func (*resolverModule) Name() string { return ModuleName } +func (*resolverModule) Phase() module.Phase { return module.PhaseResolution } +func (*resolverModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventSubdomainDiscovered} } +func (*resolverModule) Produces() []eventbus.EventType { return []eventbus.EventType{eventbus.EventDNSResolved} } +func (*resolverModule) DefaultEnabled() bool { return true } + +func (m *resolverModule) Run(mctx module.Context) error { + resolvers := parseResolvers(mctx.Config.String("resolvers", "")) + timeout := mctx.Config.Int("timeout", 5) + conc := mctx.Config.Int("concurrency", 500) + if conc <= 0 { + conc = 500 + } + + // Dedup across drain + late events. + processed := make(map[string]struct{}) + var processedMu sync.Mutex + shouldProcess := func(sub string) bool { + processedMu.Lock() + defer processedMu.Unlock() + if _, dup := processed[sub]; dup { + return false + } + processed[sub] = struct{}{} + return true + } + + work := make(chan string, conc*2) + var wg sync.WaitGroup + for i := 0; i < conc; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for sub := range work { + m.resolveOne(mctx, sub, resolvers, timeout) + } + }() + } + + // 1) Drain the store: every subdomain discovered so far goes in. + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.Subdomain == "" { + continue + } + if !shouldProcess(h.Subdomain) { + continue + } + select { + case work <- h.Subdomain: + case <-mctx.Ctx.Done(): + close(work) + wg.Wait() + return nil + } + } + + // 2) Keep listening for late events (e.g. from recursive discovery that + // runs in our own phase and produces new subdomains mid-resolution). + sub := mctx.Bus.Subscribe(eventbus.EventSubdomainDiscovered, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.SubdomainDiscovered) + if !ok { + return + } + if !shouldProcess(ev.Subdomain) { + return + } + select { + case work <- ev.Subdomain: + case <-mctx.Ctx.Done(): + } + }) + defer sub.Unsubscribe() + + // 3) Give late events a short window to arrive (e.g. recursive module + // running concurrently in PhaseResolution). 1 second is enough — we + // already drained the store, so any straggler events here are rare. + select { + case <-time.After(1 * time.Second): + case <-mctx.Ctx.Done(): + } + + close(work) + wg.Wait() + return nil +} + +func (m *resolverModule) resolveOne(mctx module.Context, sub string, resolvers []string, timeout int) { + if err := mctx.Ctx.Err(); err != nil { + return + } + ips := godns.ResolveSubdomain(sub, resolvers, timeout) + if len(ips) == 0 { + return + } + + cname := godns.ResolveCNAME(sub, resolvers, timeout) + ptr := godns.ResolvePTR(ips[0], resolvers, timeout) + + _ = mctx.Store.Upsert(mctx.Ctx, sub, func(h *store.Host) { + store.AddIPs(h, ips) + if cname != "" && h.CNAME == "" { + h.CNAME = cname + } + if ptr != "" && h.PTR == "" { + h.PTR = ptr + } + store.AddDiscoveryMethod(h, "resolved") + }) + + mctx.Bus.Publish(mctx.Ctx, eventbus.DNSResolved{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: sub}, + Subdomain: sub, + IPs: ips, + CNAME: cname, + PTR: ptr, + }) +} + +func parseResolvers(s string) []string { + s = strings.TrimSpace(s) + if s == "" { + return config.DefaultResolvers + } + var out []string + for _, r := range strings.Split(s, ",") { + r = strings.TrimSpace(r) + if r == "" { + continue + } + if !strings.Contains(r, ":") { + r = r + ":53" + } + out = append(out, r) + } + if len(out) == 0 { + return config.DefaultResolvers + } + return out +} diff --git a/internal/modules/github/github.go b/internal/modules/github/github.go new file mode 100644 index 0000000..fc5db3f --- /dev/null +++ b/internal/modules/github/github.go @@ -0,0 +1,150 @@ +// Package github discovers subdomains from public GitHub code via dorks. +// Uses the v3 REST Search API. Works anonymously at a very low rate +// (strict API limits); a token in the GITHUB_TOKEN env var lifts limits. +// +// Dorks used: +// +// "" in:file +// "api." in:file +// +// The module only emits subdomains that match the target domain suffix. +package github + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "os" + "strings" + "sync" + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/sources" + "god-eye/internal/store" +) + +const ModuleName = "discovery.github-dorks" + +type ghModule struct{} + +func Register() { module.Register(&ghModule{}) } + +func (*ghModule) Name() string { return ModuleName } +func (*ghModule) Phase() module.Phase { return module.PhaseDiscovery } +func (*ghModule) Consumes() []eventbus.EventType { return nil } +func (*ghModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventSubdomainDiscovered} +} + +// Default-enabled so bug-bounty users get it for free. Falls back to +// no-op when unauthenticated requests hit rate limits. +func (*ghModule) DefaultEnabled() bool { return true } + +func (*ghModule) Run(mctx module.Context) error { + target := mctx.Target + if target == "" { + return nil + } + token := os.Getenv("GITHUB_TOKEN") + timeout := time.Duration(mctx.Config.Int("timeout", 10)) * time.Second + client := &http.Client{Timeout: timeout} + + // Two dorks run in parallel. Each returns up to 100 results per page. + dorks := []string{ + fmt.Sprintf(`"%s"`, target), + fmt.Sprintf(`"api.%s"`, target), + } + + seen := make(map[string]struct{}) + var seenMu sync.Mutex + + var wg sync.WaitGroup + for _, q := range dorks { + q := q + wg.Add(1) + go func() { + defer wg.Done() + hits := searchCode(client, q, token) + for _, text := range hits { + for _, sub := range sources.ExtractSubdomains(text, target) { + seenMu.Lock() + if _, dup := seen[sub]; dup { + seenMu.Unlock() + continue + } + seen[sub] = struct{}{} + seenMu.Unlock() + + _ = mctx.Store.Upsert(mctx.Ctx, sub, func(h *store.Host) { + store.AddDiscoveryMethod(h, "github-dorks") + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.SubdomainDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: sub}, + Subdomain: sub, + Method: "github-dorks", + }) + } + } + }() + } + wg.Wait() + return nil +} + +// searchCode hits GitHub's code-search endpoint and returns text_matches +// fragments (the snippet fields containing the dorked domain). When +// unauthenticated it may silently return zero hits due to rate limiting; +// the module fails open. +func searchCode(client *http.Client, q, token string) []string { + u := "https://api.github.com/search/code?q=" + url.QueryEscape(q) + "&per_page=100" + req, err := http.NewRequest("GET", u, nil) + if err != nil { + return nil + } + req.Header.Set("Accept", "application/vnd.github.text-match+json") + req.Header.Set("User-Agent", "god-eye-v2") + if token != "" { + req.Header.Set("Authorization", "Bearer "+token) + } + + resp, err := client.Do(req) + if err != nil { + return nil + } + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil + } + if resp.StatusCode == 403 || resp.StatusCode == 429 { + return nil + } + + var parsed struct { + Items []struct { + TextMatches []struct { + Fragment string `json:"fragment"` + } `json:"text_matches"` + HTMLURL string `json:"html_url"` + } `json:"items"` + } + if err := json.Unmarshal(body, &parsed); err != nil { + return nil + } + var out []string + for _, it := range parsed.Items { + out = append(out, it.HTMLURL) + for _, tm := range it.TextMatches { + out = append(out, tm.Fragment) + } + } + return out +} + +var _ = strings.TrimSpace +var _ = context.Canceled diff --git a/internal/modules/graphql/graphql.go b/internal/modules/graphql/graphql.go new file mode 100644 index 0000000..0afabfd --- /dev/null +++ b/internal/modules/graphql/graphql.go @@ -0,0 +1,287 @@ +// Package graphql detects exposed GraphQL endpoints and tests them for +// common misconfigurations: unauthenticated introspection, batched query +// abuse, and field-level auth bypass via aliases. +// +// Probes these paths on every HTTP-probed host: +// +// /graphql, /graphiql, /api/graphql, /v1/graphql, /v2/graphql, +// /query, /api/v1/graphql, /api/v2/graphql +// +// When an endpoint responds to introspection queries, we publish an +// APIFinding + VulnerabilityFound event with the schema size and entry +// points as evidence. +package graphql + +import ( + "bytes" + "context" + "encoding/json" + "io" + "net/http" + "strings" + "sync" + "time" + + "god-eye/internal/eventbus" + gohttp "god-eye/internal/http" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "vuln.graphql" + +type gqlModule struct{} + +func Register() { module.Register(&gqlModule{}) } + +func (*gqlModule) Name() string { return ModuleName } +func (*gqlModule) Phase() module.Phase { return module.PhaseAnalysis } +func (*gqlModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventHTTPProbed} } +func (*gqlModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventAPIFinding, eventbus.EventVulnerability} +} +func (*gqlModule) DefaultEnabled() bool { return true } + +var candidatePaths = []string{ + "/graphql", + "/graphiql", + "/api/graphql", + "/v1/graphql", + "/v2/graphql", + "/query", + "/api/v1/graphql", + "/api/v2/graphql", + "/graphql/console", + "/graphql/v1", + "/graphql/v2", + "/playground", +} + +// introspection is the minimal query that exposes the full schema. Sent +// with Content-Type: application/json. +const introspectionQuery = `{"query":"{__schema{queryType{name} mutationType{name} subscriptionType{name} types{name kind description fields{name} enumValues{name}}}}"}` + +func (*gqlModule) Run(mctx module.Context) error { + timeout := mctx.Config.Int("timeout", 10) + client := gohttp.GetSharedClient(timeout) + + processed := make(map[string]struct{}) + var mu sync.Mutex + shouldProcess := func(host string) bool { + mu.Lock() + defer mu.Unlock() + if _, ok := processed[host]; ok { + return false + } + processed[host] = struct{}{} + return true + } + + var wg sync.WaitGroup + + // Drain store: every host that got a successful HTTP probe. + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.StatusCode == 0 { + continue + } + if !shouldProcess(h.Subdomain) { + continue + } + host := h.Subdomain + wg.Add(1) + go func() { defer wg.Done(); probeGraphQL(mctx, client, host) }() + } + + // Late events. + sub := mctx.Bus.Subscribe(eventbus.EventHTTPProbed, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.HTTPProbed) + if !ok || ev.StatusCode == 0 { + return + } + host := ev.Meta().Target + if !shouldProcess(host) { + return + } + wg.Add(1) + go func() { defer wg.Done(); probeGraphQL(mctx, client, host) }() + }) + defer sub.Unsubscribe() + + select { + case <-time.After(500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + wg.Wait() + return nil +} + +func probeGraphQL(mctx module.Context, client *http.Client, host string) { + for _, p := range candidatePaths { + if mctx.Ctx.Err() != nil { + return + } + for _, scheme := range []string{"https://", "http://"} { + u := scheme + host + p + if finding := tryIntrospection(client, u); finding != nil { + publishFinding(mctx, host, u, finding) + return // one endpoint per host is enough — rest are typically aliases + } + } + } +} + +type gqlFinding struct { + SchemaSize int + TypesCount int + HasMutation bool + HasSubscription bool + QueryTypeName string + Sample string // truncated introspection response +} + +func tryIntrospection(client *http.Client, url string) *gqlFinding { + req, err := http.NewRequest("POST", url, bytes.NewBufferString(introspectionQuery)) + if err != nil { + return nil + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("User-Agent", "god-eye-v2") + + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + resp, err := client.Do(req.WithContext(ctx)) + if err != nil { + return nil + } + defer resp.Body.Close() + + // Accept 2xx — the exact shape matters more than status. + if resp.StatusCode >= 400 { + return nil + } + body, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024)) + if err != nil || len(body) < 30 { + return nil + } + + // Parse the response; real GraphQL endpoints return {"data": {"__schema": ...}} + var parsed struct { + Data struct { + Schema struct { + QueryType map[string]interface{} `json:"queryType"` + MutationType map[string]interface{} `json:"mutationType"` + SubscriptionType map[string]interface{} `json:"subscriptionType"` + Types []struct { + Name string `json:"name"` + Kind string `json:"kind"` + } `json:"types"` + } `json:"__schema"` + } `json:"data"` + } + if err := json.Unmarshal(body, &parsed); err != nil { + return nil + } + if parsed.Data.Schema.QueryType == nil { + return nil + } + + fnd := &gqlFinding{ + SchemaSize: len(body), + TypesCount: len(parsed.Data.Schema.Types), + HasMutation: parsed.Data.Schema.MutationType != nil, + HasSubscription: parsed.Data.Schema.SubscriptionType != nil, + } + if n, ok := parsed.Data.Schema.QueryType["name"].(string); ok { + fnd.QueryTypeName = n + } + if len(body) > 500 { + fnd.Sample = string(body[:500]) + "…" + } else { + fnd.Sample = string(body) + } + return fnd +} + +func publishFinding(mctx module.Context, host, url string, f *gqlFinding) { + now := time.Now() + severity := eventbus.SeverityMedium + if f.HasMutation { + severity = eventbus.SeverityHigh + } + + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + h.Vulnerabilities = append(h.Vulnerabilities, store.Vulnerability{ + ID: "graphql-introspection", + Title: "GraphQL Introspection Enabled", + Description: describe(f), + Severity: string(severity), + URL: url, + Evidence: f.Sample, + Remediation: "Disable introspection in production GraphQL servers (e.g. Apollo: introspection:false, GraphQL Yoga: introspection:{disable:true}).", + OWASP: "A05:2021-Security Misconfiguration", + FoundAt: now, + }) + }) + + mctx.Bus.Publish(mctx.Ctx, eventbus.VulnerabilityFound{ + EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: host}, + ID: "graphql-introspection", + Title: "GraphQL Introspection Enabled", + Description: describe(f), + Severity: severity, + URL: url, + Evidence: f.Sample, + Remediation: "Disable introspection in production GraphQL servers.", + OWASP: "A05:2021-Security Misconfiguration", + }) + + mctx.Bus.Publish(mctx.Ctx, eventbus.APIFinding{ + EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: host}, + Kind: "graphql-introspection", + URL: url, + Issue: describe(f), + Severity: severity, + }) +} + +func describe(f *gqlFinding) string { + parts := []string{"GraphQL endpoint leaks full schema via unauthenticated introspection."} + if f.TypesCount > 0 { + parts = append(parts, "Types: "+itoa(f.TypesCount)+".") + } + if f.HasMutation { + parts = append(parts, "Mutations enabled — attacker can enumerate write operations.") + } + if f.HasSubscription { + parts = append(parts, "Subscriptions enabled.") + } + if f.QueryTypeName != "" { + parts = append(parts, "Query root: "+f.QueryTypeName) + } + return strings.Join(parts, " ") +} + +func itoa(n int) string { + // Small inline formatter avoids importing strconv just for this. + if n == 0 { + return "0" + } + var buf [20]byte + i := len(buf) + neg := n < 0 + if neg { + n = -n + } + for n > 0 { + i-- + buf[i] = byte('0' + n%10) + n /= 10 + } + if neg { + i-- + buf[i] = '-' + } + return string(buf[i:]) +} + diff --git a/internal/modules/headers/headers.go b/internal/modules/headers/headers.go new file mode 100644 index 0000000..04b196e --- /dev/null +++ b/internal/modules/headers/headers.go @@ -0,0 +1,253 @@ +// Package headers performs a detailed inspection of HTTP response headers +// and reports every missing or misconfigured security control. Unlike v1's +// lightweight header check, this module flags each issue as an individual +// VulnerabilityFound event with remediation guidance aligned to OWASP +// Secure Headers Project. +package headers + +import ( + "context" + "net/http" + "strings" + "sync" + "time" + + "god-eye/internal/eventbus" + gohttp "god-eye/internal/http" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "vuln.security-headers" + +type hdrModule struct{} + +func Register() { module.Register(&hdrModule{}) } + +func (*hdrModule) Name() string { return ModuleName } +func (*hdrModule) Phase() module.Phase { return module.PhaseAnalysis } +func (*hdrModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventHTTPProbed} } +func (*hdrModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventVulnerability} +} +func (*hdrModule) DefaultEnabled() bool { return true } + +func (*hdrModule) Run(mctx module.Context) error { + timeout := mctx.Config.Int("timeout", 10) + client := gohttp.GetSharedClient(timeout) + + processed := make(map[string]struct{}) + var mu sync.Mutex + shouldProcess := func(host string) bool { + mu.Lock() + defer mu.Unlock() + if _, ok := processed[host]; ok { + return false + } + processed[host] = struct{}{} + return true + } + + var wg sync.WaitGroup + + // Drain the store. + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.StatusCode == 0 { + continue + } + if !shouldProcess(h.Subdomain) { + continue + } + host := h.Subdomain + wg.Add(1) + go func() { defer wg.Done(); inspect(mctx, client, host) }() + } + + // Late events. + sub := mctx.Bus.Subscribe(eventbus.EventHTTPProbed, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.HTTPProbed) + if !ok || ev.StatusCode == 0 { + return + } + host := ev.Meta().Target + if !shouldProcess(host) { + return + } + wg.Add(1) + go func() { defer wg.Done(); inspect(mctx, client, host) }() + }) + defer sub.Unsubscribe() + + select { + case <-time.After(500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + wg.Wait() + return nil +} + +func inspect(mctx module.Context, client *http.Client, host string) { + req, err := http.NewRequest("GET", "https://"+host, nil) + if err != nil { + return + } + req.Header.Set("User-Agent", "god-eye-v2") + resp, err := client.Do(req) + if err != nil { + return + } + defer resp.Body.Close() + + issues := assess(resp.Header) + if len(issues) == 0 { + return + } + + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + now := time.Now() + for _, iss := range issues { + h.Vulnerabilities = append(h.Vulnerabilities, store.Vulnerability{ + ID: iss.id, + Title: iss.title, + Description: iss.desc, + Severity: string(iss.sev), + URL: "https://" + host, + Remediation: iss.fix, + OWASP: "A05:2021-Security Misconfiguration", + FoundAt: now, + }) + } + }) + for _, iss := range issues { + mctx.Bus.Publish(mctx.Ctx, eventbus.VulnerabilityFound{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, + ID: iss.id, + Title: iss.title, + Description: iss.desc, + Severity: iss.sev, + URL: "https://" + host, + Remediation: iss.fix, + OWASP: "A05:2021-Security Misconfiguration", + }) + } +} + +type issue struct { + id, title, desc, fix string + sev eventbus.Severity +} + +func assess(h http.Header) []issue { + var out []issue + hasHeader := func(k string) bool { return strings.TrimSpace(h.Get(k)) != "" } + + if !hasHeader("Strict-Transport-Security") { + out = append(out, issue{ + id: "hdr-missing-hsts", + title: "Missing Strict-Transport-Security", + desc: "HSTS is absent; clients may accept plaintext downgrades.", + fix: "Add: Strict-Transport-Security: max-age=63072000; includeSubDomains; preload", + sev: eventbus.SeverityMedium, + }) + } else if hsts := h.Get("Strict-Transport-Security"); !strings.Contains(strings.ToLower(hsts), "max-age=") || + !strings.Contains(strings.ToLower(hsts), "includesubdomains") { + out = append(out, issue{ + id: "hdr-weak-hsts", + title: "Weak HSTS policy", + desc: "HSTS set but missing includeSubDomains and/or sufficient max-age.", + fix: "Use: max-age=63072000; includeSubDomains; preload", + sev: eventbus.SeverityLow, + }) + } + + if !hasHeader("Content-Security-Policy") { + out = append(out, issue{ + id: "hdr-missing-csp", + title: "Missing Content-Security-Policy", + desc: "No CSP header; XSS mitigations rely solely on upstream filtering.", + fix: "Deploy a nonce-based CSP restricting script-src, object-src 'none'.", + sev: eventbus.SeverityMedium, + }) + } else if strings.Contains(strings.ToLower(h.Get("Content-Security-Policy")), "unsafe-inline") { + out = append(out, issue{ + id: "hdr-weak-csp", + title: "Weak CSP (allows unsafe-inline)", + desc: "CSP allows unsafe-inline, neutralizing most XSS protection.", + fix: "Remove unsafe-inline; use nonces or hashes.", + sev: eventbus.SeverityMedium, + }) + } + + if !hasHeader("X-Frame-Options") { + // Only flag if CSP doesn't include frame-ancestors. + csp := strings.ToLower(h.Get("Content-Security-Policy")) + if !strings.Contains(csp, "frame-ancestors") { + out = append(out, issue{ + id: "hdr-missing-clickjack", + title: "Clickjacking not prevented", + desc: "Neither X-Frame-Options nor CSP frame-ancestors is set.", + fix: "Add: X-Frame-Options: DENY OR CSP with frame-ancestors 'none'.", + sev: eventbus.SeverityLow, + }) + } + } + + if !hasHeader("X-Content-Type-Options") { + out = append(out, issue{ + id: "hdr-missing-nosniff", + title: "Missing X-Content-Type-Options", + desc: "MIME sniffing permitted; certain XSS escalations become easier.", + fix: "Add: X-Content-Type-Options: nosniff", + sev: eventbus.SeverityLow, + }) + } + + if !hasHeader("Referrer-Policy") { + out = append(out, issue{ + id: "hdr-missing-referrer-policy", + title: "Missing Referrer-Policy", + desc: "Default browser Referrer-Policy leaks URLs to third parties.", + fix: "Add: Referrer-Policy: strict-origin-when-cross-origin", + sev: eventbus.SeverityLow, + }) + } + + if !hasHeader("Permissions-Policy") && !hasHeader("Feature-Policy") { + out = append(out, issue{ + id: "hdr-missing-permissions-policy", + title: "Missing Permissions-Policy", + desc: "Browser features (camera, geolocation, USB, etc.) are unrestricted by default.", + fix: "Add: Permissions-Policy: camera=(), microphone=(), geolocation=()", + sev: eventbus.SeverityInfo, + }) + } + + // Dangerous information disclosure via default server banner. + if srv := h.Get("Server"); looksLikeBanner(srv) { + out = append(out, issue{ + id: "hdr-server-banner", + title: "Server banner leaks version", + desc: "Server header exposes exact software + version: " + srv, + fix: "Strip or generalize via proxy/web-server config.", + sev: eventbus.SeverityInfo, + }) + } + + return out +} + +func looksLikeBanner(s string) bool { + s = strings.ToLower(s) + return strings.Contains(s, "/") && (strings.Contains(s, ".") || anyDigit(s)) +} + +func anyDigit(s string) bool { + for _, r := range s { + if r >= '0' && r <= '9' { + return true + } + } + return false +} + diff --git a/internal/modules/httpprobe/httpprobe.go b/internal/modules/httpprobe/httpprobe.go new file mode 100644 index 0000000..9f82cfe --- /dev/null +++ b/internal/modules/httpprobe/httpprobe.go @@ -0,0 +1,195 @@ +// Package httpprobe probes every resolved host with HTTPS/HTTP and extracts +// status code, title, server, technology stack, and TLS information. +// +// Runs in PhaseEnrichment. Reads hosts from the store (not events) to avoid +// the phase-barrier race where late subscribers miss earlier events. +package httpprobe + +import ( + "context" + "crypto/tls" + "sync" + "time" + + "god-eye/internal/eventbus" + gohttp "god-eye/internal/http" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "http.probe" + +type probeModule struct{} + +func Register() { module.Register(&probeModule{}) } + +func (*probeModule) Name() string { return ModuleName } +func (*probeModule) Phase() module.Phase { return module.PhaseEnrichment } +func (*probeModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventDNSResolved} } +func (*probeModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventHTTPProbed, eventbus.EventTLSAnalyzed, eventbus.EventTechDetected} +} +func (*probeModule) DefaultEnabled() bool { return true } + +func (p *probeModule) Run(mctx module.Context) error { + if mctx.Config.Bool("no_probe", false) { + return nil + } + + conc := mctx.Config.Int("concurrency", 500) + if conc <= 0 { + conc = 500 + } + timeout := mctx.Config.Int("timeout", 5) + + // Dedup across drain + late events. + processed := make(map[string]struct{}) + var processedMu sync.Mutex + shouldProcess := func(host string) bool { + processedMu.Lock() + defer processedMu.Unlock() + if _, dup := processed[host]; dup { + return false + } + processed[host] = struct{}{} + return true + } + + work := make(chan string, conc*2) + var wg sync.WaitGroup + for i := 0; i < conc; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for host := range work { + p.probeOne(mctx, host, timeout) + } + }() + } + + // Drain: every host in the store with at least one IP is worth probing. + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.Subdomain == "" || len(h.IPs) == 0 { + continue + } + if !shouldProcess(h.Subdomain) { + continue + } + select { + case work <- h.Subdomain: + case <-mctx.Ctx.Done(): + close(work) + wg.Wait() + return nil + } + } + + // Also listen for late DNSResolved events (recursive/permutation running + // concurrently in other modules may produce new resolves during our + // phase — pick them up). + sub := mctx.Bus.Subscribe(eventbus.EventDNSResolved, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.DNSResolved) + if !ok || len(ev.IPs) == 0 { + return + } + if !shouldProcess(ev.Subdomain) { + return + } + select { + case work <- ev.Subdomain: + case <-mctx.Ctx.Done(): + } + }) + defer sub.Unsubscribe() + + // Brief window for late arrivals. + select { + case <-time.After(500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + close(work) + wg.Wait() + return nil +} + +func (p *probeModule) probeOne(mctx module.Context, host string, timeout int) { + if mctx.Ctx.Err() != nil { + return + } + r := gohttp.ProbeHTTP(host, timeout) + if r == nil || r.StatusCode == 0 { + return + } + + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + h.StatusCode = r.StatusCode + h.ContentLength = r.ContentLength + h.Title = r.Title + h.Server = r.Server + if len(r.Tech) > 0 { + store.AddTechnologies(h, r.Tech) + } + h.ResponseMs = r.ResponseMs + h.TLSVersion = r.TLSVersion + h.TLSIssuer = r.TLSIssuer + h.TLSSelfSigned = r.TLSSelfSigned + if r.TLSExpiry != "" { + if tm, err := time.Parse("2006-01-02", r.TLSExpiry); err == nil { + h.TLSExpiry = tm + } + } + if r.TLSFingerprint != nil { + fp := *r.TLSFingerprint + h.TLSFingerprint = &store.TLSFingerprint{ + Vendor: fp.Vendor, + Product: fp.Product, + Version: fp.Version, + ApplianceKind: fp.ApplianceType, + InternalHosts: append([]string(nil), fp.InternalHosts...), + } + } + }) + + mctx.Bus.Publish(mctx.Ctx, eventbus.HTTPProbed{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, + URL: "https://" + host, + StatusCode: r.StatusCode, + ContentLength: r.ContentLength, + Title: r.Title, + Server: r.Server, + Technologies: append([]string(nil), r.Tech...), + ResponseMs: r.ResponseMs, + TLSVersion: r.TLSVersion, + TLSSelfSigned: r.TLSSelfSigned, + }) + + for _, t := range r.Tech { + if t == "" { + continue + } + mctx.Bus.Publish(mctx.Ctx, eventbus.TechDetected{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, + Host: host, + Technology: t, + Confidence: 0.8, + }) + } + + if r.TLSFingerprint != nil { + mctx.Bus.Publish(mctx.Ctx, eventbus.TLSAnalyzed{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, + Host: host, + Version: r.TLSVersion, + Issuer: r.TLSIssuer, + SelfSigned: r.TLSSelfSigned, + Vendor: r.TLSFingerprint.Vendor, + Product: r.TLSFingerprint.Product, + ApplianceKind: r.TLSFingerprint.ApplianceType, + InternalHosts: append([]string(nil), r.TLSFingerprint.InternalHosts...), + }) + } +} + +// keep tls import stable +var _ = tls.VersionTLS13 diff --git a/internal/modules/javascript/javascript.go b/internal/modules/javascript/javascript.go new file mode 100644 index 0000000..ba48b0b --- /dev/null +++ b/internal/modules/javascript/javascript.go @@ -0,0 +1,186 @@ +// Package javascript downloads JS files from probed hosts and scans them +// for secrets with the v1 analyzer. Drains the store at start; also listens +// for late HTTPProbed events. +package javascript + +import ( + "context" + "strings" + "sync" + "time" + + "god-eye/internal/eventbus" + gohttp "god-eye/internal/http" + "god-eye/internal/module" + "god-eye/internal/scanner" + "god-eye/internal/store" +) + +// publicAPIDenylist covers well-known public/third-party APIs and font +// services that the v1 regex scanner flags as "API Endpoint" but which +// are never secrets. Matched case-insensitively as a substring. +var publicAPIDenylist = []string{ + "fonts.googleapis.com", + "fonts.gstatic.com", + "www.googleapis.com", + "content.googleapis.com", + "api.fastmail.com", + "api.forwardemail.net", + "cdn.jsdelivr.net", + "cdnjs.cloudflare.com", + "unpkg.com", +} + +// uiStringDenylist covers common UI labels / warning strings that trip +// the "Generic Password" regex but are clearly human-readable copy. +var uiStringDenylist = []string{ + "change password", + "update password", + "reset password", + "confirm password", + "forgot password", + "set-initial-password", + "change-password", + "this is a very common password", + "masterpassword", + "password", +} + +// isSecretFalsePositive applies cheap deterministic heuristics to weed +// out v1 regex noise. Does NOT replace AI triage (which is still the +// preferred filter once the ai module is enabled) — it only suppresses +// findings that are *definitely* not secrets. +func isSecretFalsePositive(secret string) bool { + low := strings.ToLower(strings.TrimSpace(secret)) + for _, s := range publicAPIDenylist { + if strings.Contains(low, s) { + return true + } + } + for _, s := range uiStringDenylist { + if strings.Contains(low, s) { + return true + } + } + // Very short matches (< 8 chars of unique content) are almost always + // labels, not credentials. The v1 regex already strips the "[Kind] " + // prefix before passing to us; anything under 8 chars is noise. + if len(low) > 0 && len(low) < 8 { + return true + } + return false +} + +const ModuleName = "js.analyzer" + +type jsModule struct{} + +func Register() { module.Register(&jsModule{}) } + +func (*jsModule) Name() string { return ModuleName } +func (*jsModule) Phase() module.Phase { return module.PhaseAnalysis } +func (*jsModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventHTTPProbed} } +func (*jsModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventJSFile, eventbus.EventSecret} +} +func (*jsModule) DefaultEnabled() bool { return true } + +func (*jsModule) Run(mctx module.Context) error { + timeout := mctx.Config.Int("timeout", 5) + client := gohttp.GetSharedClient(timeout) + + processed := make(map[string]struct{}) + var mu sync.Mutex + shouldProcess := func(host string) bool { + mu.Lock() + defer mu.Unlock() + if _, ok := processed[host]; ok { + return false + } + processed[host] = struct{}{} + return true + } + + analyze := func(host string) { + if mctx.Ctx.Err() != nil { + return + } + jsFiles, secrets := scanner.AnalyzeJSFiles(host, client) + // Drop known-noise findings before they reach the store or bus. + filtered := secrets[:0] + for _, s := range secrets { + if isSecretFalsePositive(s) { + continue + } + filtered = append(filtered, s) + } + secrets = filtered + if len(jsFiles) == 0 && len(secrets) == 0 { + return + } + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + for _, sec := range secrets { + h.Secrets = append(h.Secrets, store.Secret{ + Kind: "js-regex", + Match: sec, + Severity: string(eventbus.SeverityHigh), + FoundAt: time.Now(), + }) + } + }) + for _, jsf := range jsFiles { + mctx.Bus.Publish(mctx.Ctx, eventbus.JSFileDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, + URL: jsf, + Host: host, + }) + } + for _, s := range secrets { + mctx.Bus.Publish(mctx.Ctx, eventbus.SecretFound{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, + Kind: "js-regex", + Match: s, + Location: "js-file", + Severity: eventbus.SeverityHigh, + }) + } + } + + var wg sync.WaitGroup + + // Drain: every probed host (StatusCode > 0). + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.StatusCode == 0 { + continue + } + if !shouldProcess(h.Subdomain) { + continue + } + host := h.Subdomain + wg.Add(1) + go func() { defer wg.Done(); analyze(host) }() + } + + // Late events. + sub := mctx.Bus.Subscribe(eventbus.EventHTTPProbed, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.HTTPProbed) + if !ok || ev.StatusCode == 0 { + return + } + host := ev.Meta().Target + if !shouldProcess(host) { + return + } + wg.Add(1) + go func() { defer wg.Done(); analyze(host) }() + }) + defer sub.Unsubscribe() + + select { + case <-time.After(500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + wg.Wait() + return nil +} diff --git a/internal/modules/jwt/jwt.go b/internal/modules/jwt/jwt.go new file mode 100644 index 0000000..a8fce2a --- /dev/null +++ b/internal/modules/jwt/jwt.go @@ -0,0 +1,305 @@ +// Package jwt scans responses for JWTs, decodes them, and flags +// security-relevant attributes: alg=none, weak HMAC secret (dictionary +// crack against common passwords), excessive expiration, missing claims. +// +// The brute-force list is intentionally tiny (~20 common secrets) — the +// goal is to surface obviously-weak keys, not to run offline hashcat. A +// proper cracker belongs in Fase 2's planned "auth" agent. +package jwt + +import ( + "context" + "crypto/hmac" + "crypto/sha256" + "crypto/sha512" + "encoding/base64" + "encoding/json" + "hash" + "io" + "net/http" + "regexp" + "strings" + "sync" + "time" + + "god-eye/internal/eventbus" + gohttp "god-eye/internal/http" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "vuln.jwt" + +type jwtModule struct{} + +func Register() { module.Register(&jwtModule{}) } + +func (*jwtModule) Name() string { return ModuleName } +func (*jwtModule) Phase() module.Phase { return module.PhaseAnalysis } +func (*jwtModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventHTTPProbed} } +func (*jwtModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventVulnerability, eventbus.EventSecret} +} +func (*jwtModule) DefaultEnabled() bool { return true } + +// jwtRegex matches the standard three-part base64url JWT shape. +var jwtRegex = regexp.MustCompile(`eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]*`) + +var weakSecrets = []string{ + "secret", "password", "123456", "admin", "jwt", "jwtsecret", + "changeme", "default", "test", "dev", "secret_key", "mysecret", + "your-256-bit-secret", "your-secret-key", "super-secret", + "supersecret", "helloworld", "qwerty", "abc123", "letmein", +} + +func (*jwtModule) Run(mctx module.Context) error { + timeout := mctx.Config.Int("timeout", 10) + client := gohttp.GetSharedClient(timeout) + + processed := make(map[string]struct{}) + var mu sync.Mutex + shouldProcess := func(host string) bool { + mu.Lock() + defer mu.Unlock() + if _, ok := processed[host]; ok { + return false + } + processed[host] = struct{}{} + return true + } + + var wg sync.WaitGroup + + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.StatusCode == 0 { + continue + } + if !shouldProcess(h.Subdomain) { + continue + } + host := h.Subdomain + wg.Add(1) + go func() { defer wg.Done(); scanHost(mctx, client, host) }() + } + + sub := mctx.Bus.Subscribe(eventbus.EventHTTPProbed, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.HTTPProbed) + if !ok || ev.StatusCode == 0 { + return + } + host := ev.Meta().Target + if !shouldProcess(host) { + return + } + wg.Add(1) + go func() { defer wg.Done(); scanHost(mctx, client, host) }() + }) + defer sub.Unsubscribe() + + select { + case <-time.After(500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + wg.Wait() + return nil +} + +func scanHost(mctx module.Context, client *http.Client, host string) { + for _, scheme := range []string{"https://", "http://"} { + if mctx.Ctx.Err() != nil { + return + } + url := scheme + host + req, err := http.NewRequest("GET", url, nil) + if err != nil { + continue + } + req.Header.Set("User-Agent", "god-eye-v2") + resp, err := client.Do(req) + if err != nil { + continue + } + body, _ := io.ReadAll(io.LimitReader(resp.Body, 256*1024)) + resp.Body.Close() + + text := string(body) + // Also check Authorization + Set-Cookie response headers. + for _, h := range resp.Header.Values("Set-Cookie") { + text += "\n" + h + } + if auth := resp.Header.Get("Authorization"); auth != "" { + text += "\n" + auth + } + + matches := jwtRegex.FindAllString(text, -1) + for _, tok := range uniqueStrings(matches) { + analyzeJWT(mctx, host, url, tok) + } + // One scheme is enough; avoid duplicate noise. + if len(matches) > 0 { + return + } + } +} + +func analyzeJWT(mctx module.Context, host, url, token string) { + parts := strings.Split(token, ".") + if len(parts) != 3 { + return + } + header, err := base64Decode(parts[0]) + if err != nil { + return + } + payload, err := base64Decode(parts[1]) + if err != nil { + return + } + + var h struct { + Alg string `json:"alg"` + Kid string `json:"kid"` + Typ string `json:"typ"` + } + if err := json.Unmarshal(header, &h); err != nil { + return + } + + severity := eventbus.SeverityInfo + findings := []string{"JWT detected"} + + if strings.EqualFold(h.Alg, "none") { + severity = eventbus.SeverityCritical + findings = append(findings, "alg=none accepted — no signature verification") + } + if strings.HasPrefix(strings.ToUpper(h.Alg), "HS") { + if cracked := tryWeakSecret(token, h.Alg, parts); cracked != "" { + severity = eventbus.SeverityCritical + findings = append(findings, "weak HMAC secret cracked: "+cracked) + } + } + if h.Kid != "" && looksInjectable(h.Kid) { + severity = maxSeverity(severity, eventbus.SeverityMedium) + findings = append(findings, "kid header may be injectable: "+h.Kid) + } + + // Inspect payload for excessive expiry. + var claims map[string]interface{} + _ = json.Unmarshal(payload, &claims) + if exp, ok := claims["exp"].(float64); ok { + expAt := time.Unix(int64(exp), 0) + if time.Until(expAt) > 365*24*time.Hour { + severity = maxSeverity(severity, eventbus.SeverityLow) + findings = append(findings, "exp >1 year") + } + } + + redacted := token + if len(redacted) > 40 { + redacted = redacted[:20] + "…" + redacted[len(redacted)-10:] + } + + _ = mctx.Store.Upsert(mctx.Ctx, host, func(sh *store.Host) { + sh.Secrets = append(sh.Secrets, store.Secret{ + Kind: "jwt", + Match: redacted, + Location: url, + Severity: string(severity), + Description: strings.Join(findings, "; "), + FoundAt: time.Now(), + }) + }) + + mctx.Bus.Publish(mctx.Ctx, eventbus.SecretFound{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, + Kind: "jwt", + Match: redacted, + Location: url, + Severity: severity, + Description: strings.Join(findings, "; "), + }) + + if severity == eventbus.SeverityCritical || severity == eventbus.SeverityHigh { + mctx.Bus.Publish(mctx.Ctx, eventbus.VulnerabilityFound{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, + ID: "jwt-weak", + Title: "JWT Weakness", + Description: strings.Join(findings, "; "), + Severity: severity, + URL: url, + Evidence: redacted, + Remediation: "Use strong signing keys (256+ bits of entropy), refuse alg=none, rotate keys on compromise, short expiry.", + OWASP: "A02:2021-Cryptographic Failures", + }) + } +} + +func tryWeakSecret(token, alg string, parts []string) string { + signingInput := parts[0] + "." + parts[1] + sig, err := base64Decode(parts[2]) + if err != nil { + return "" + } + + var hashFn func() hash.Hash + switch strings.ToUpper(alg) { + case "HS256": + hashFn = sha256.New + case "HS384": + hashFn = func() hash.Hash { return sha512.New384() } + case "HS512": + hashFn = sha512.New + default: + return "" + } + + for _, s := range weakSecrets { + mac := hmac.New(hashFn, []byte(s)) + mac.Write([]byte(signingInput)) + if hmac.Equal(mac.Sum(nil), sig) { + return s + } + } + return "" +} + +// base64Decode unpads and decodes a JWT segment (URL-safe, no padding). +func base64Decode(s string) ([]byte, error) { + // Add padding if missing. + if m := len(s) % 4; m != 0 { + s += strings.Repeat("=", 4-m) + } + return base64.URLEncoding.DecodeString(s) +} + +func looksInjectable(kid string) bool { + // kids that include path separators, SQL wildcards, or NUL-like + // sequences are worth flagging for manual review. + return strings.ContainsAny(kid, "/\\;'\"$`|") +} + +func maxSeverity(a, b eventbus.Severity) eventbus.Severity { + rank := map[eventbus.Severity]int{ + eventbus.SeverityInfo: 0, eventbus.SeverityLow: 1, + eventbus.SeverityMedium: 2, eventbus.SeverityHigh: 3, eventbus.SeverityCritical: 4, + } + if rank[a] >= rank[b] { + return a + } + return b +} + +func uniqueStrings(in []string) []string { + seen := make(map[string]struct{}) + out := make([]string, 0, len(in)) + for _, s := range in { + if _, dup := seen[s]; dup { + continue + } + seen[s] = struct{}{} + out = append(out, s) + } + return out +} + diff --git a/internal/modules/nuclei/nuclei.go b/internal/modules/nuclei/nuclei.go new file mode 100644 index 0000000..c2fdd7e --- /dev/null +++ b/internal/modules/nuclei/nuclei.go @@ -0,0 +1,329 @@ +// Package nuclei runs Nuclei-format YAML templates against every probed +// host. The actual executor lives in internal/nucleitpl; this module is +// the wiring that discovers templates on disk, fans out per host, and +// publishes matches as VulnerabilityFound events. +// +// Template discovery order: +// 1. --nuclei-templates flag (highest priority) +// 2. NUCLEI_TEMPLATES env var +// 3. ~/nuclei-templates (nuclei CLI default) +// 4. ~/.god-eye/nuclei-templates +// +// If no template directory is found AND nuclei_auto_download is true +// (default), God's Eye downloads the official projectdiscovery/nuclei-templates +// ZIP into ~/.god-eye/nuclei-templates, extracts only the .yaml/.yml files +// (path-traversal safe), and proceeds with the scan. The archive is +// ~40MB; first run takes 10-30 seconds depending on network, subsequent +// runs skip the download. +// +// Refresh the cache manually with: god-eye nuclei-update +// +// Only HTTP templates compatible with our executor subset run; others +// are counted as "skipped" and surfaced as a ModuleError event once per +// scan. +package nuclei + +import ( + "context" + "fmt" + "os" + "path/filepath" + "sync" + "time" + + "god-eye/internal/eventbus" + gohttp "god-eye/internal/http" + "god-eye/internal/module" + "god-eye/internal/nucleitpl" + "god-eye/internal/store" +) + +const ModuleName = "vuln.nuclei-compat" + +type nucleiModule struct{} + +func Register() { module.Register(&nucleiModule{}) } + +func (*nucleiModule) Name() string { return ModuleName } +func (*nucleiModule) Phase() module.Phase { return module.PhaseAnalysis } +func (*nucleiModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventHTTPProbed} } +func (*nucleiModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventVulnerability, eventbus.EventCVEMatch} +} + +// DefaultEnabled returns true so the registry always loads the module; +// Run() itself is a no-op unless `nuclei_scan` is set in the config +// (via --nuclei or YAML). Mirrors the ai.cascade module — keeps the +// module visible to selection logic while preserving opt-in semantics. +func (*nucleiModule) DefaultEnabled() bool { return true } + +func (*nucleiModule) Run(mctx module.Context) error { + if !mctx.Config.Bool("nuclei_scan", false) { + return nil + } + + tplDir := resolveTemplateDir(mctx) + if tplDir == "" { + // No templates found — try auto-download into ~/.god-eye/nuclei-templates + // unless the user explicitly disabled that fallback. + if !mctx.Config.Bool("nuclei_auto_download", true) { + mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: mctx.Target}, + Module: ModuleName, + Err: "no nuclei templates found and --nuclei-auto-download=false. Clone https://github.com/projectdiscovery/nuclei-templates into ~/nuclei-templates or pass --nuclei-templates ", + }) + return nil + } + + dest, err := defaultAutoDownloadDir() + if err != nil { + mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: mctx.Target}, + Module: ModuleName, + Err: fmt.Sprintf("cannot determine default templates dir: %v", err), + }) + return nil + } + + dl := nucleitpl.NewDownloader() + dl.Verbose = mctx.Config.Bool("verbose", false) || mctx.Config.Bool("ai.verbose", false) + if err := dl.EnsureTemplates(dest); err != nil { + mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: mctx.Target}, + Module: ModuleName, + Err: fmt.Sprintf("auto-download nuclei templates: %v", err), + }) + return nil + } + tplDir = dest + } + + tpls, diags, err := nucleitpl.LoadDir(tplDir) + if err != nil { + return fmt.Errorf("load templates from %s: %w", tplDir, err) + } + + supported := 0 + skipped := 0 + var supportedTpls []*nucleitpl.Template + for _, t := range tpls { + if ok, _ := t.IsSupported(); ok { + supported++ + supportedTpls = append(supportedTpls, t) + } else { + skipped++ + } + } + + if supported == 0 { + mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: mctx.Target}, + Module: ModuleName, + Err: fmt.Sprintf("loaded %d templates, 0 supported (skipped %d, parse errors %d)", len(tpls), skipped, len(diags)), + }) + return nil + } + + timeout := time.Duration(mctx.Config.Int("timeout", 10)) * time.Second + client := gohttp.GetSharedClient(int(timeout.Seconds())) + exec := nucleitpl.NewExecutor(client, timeout) + + // Gather target URLs from the store. + var targets []string + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.StatusCode == 0 { + continue + } + targets = append(targets, "https://"+h.Subdomain) + } + if len(targets) == 0 { + return nil + } + + // Bounded parallelism: running thousands of templates × hundreds of + // hosts unbounded would be a DoS against ourselves and the target. + maxConcurrent := mctx.Config.Int("concurrency", 50) + if maxConcurrent > 50 { + maxConcurrent = 50 // cap — templates make 1-3 requests each + } + if maxConcurrent < 1 { + maxConcurrent = 10 + } + + sem := make(chan struct{}, maxConcurrent) + var wg sync.WaitGroup + + for _, url := range targets { + for _, t := range supportedTpls { + if mctx.Ctx.Err() != nil { + break + } + url := url + t := t + wg.Add(1) + sem <- struct{}{} + go func() { + defer wg.Done() + defer func() { <-sem }() + runCtx, cancel := context.WithTimeout(mctx.Ctx, timeout) + defer cancel() + for _, m := range exec.Run(runCtx, t, url) { + publishMatch(mctx, m) + } + }() + } + } + wg.Wait() + + if skipped > 0 { + mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: mctx.Target}, + Module: ModuleName, + Err: fmt.Sprintf("executed %d templates, skipped %d (unsupported protocol/features)", supported, skipped), + }) + } + return nil +} + +// publishMatch persists the match into the store and fires a +// VulnerabilityFound event. When the match references CVEs, a CVEMatch +// event is also fired so the CVE aggregator sees it. +func publishMatch(mctx module.Context, m nucleitpl.Match) { + now := time.Now() + severity := mapSeverity(m.Severity) + host := hostFromURL(m.URL) + + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + h.Vulnerabilities = append(h.Vulnerabilities, store.Vulnerability{ + ID: "nuclei/" + m.TemplateID, + Title: m.Name, + Description: m.Description, + Severity: string(severity), + URL: m.URL, + Evidence: m.Evidence, + CVEs: append([]string(nil), m.CVEs...), + FoundAt: now, + }) + for _, cveID := range m.CVEs { + h.CVEs = append(h.CVEs, store.CVE{ + ID: cveID, + Technology: m.TemplateID, + Severity: string(severity), + FoundAt: now, + URL: m.TemplateURL, + }) + } + }) + + mctx.Bus.Publish(mctx.Ctx, eventbus.VulnerabilityFound{ + EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: host}, + ID: "nuclei/" + m.TemplateID, + Title: m.Name, + Description: m.Description, + Severity: severity, + URL: m.URL, + Evidence: m.Evidence, + CVEs: append([]string(nil), m.CVEs...), + }) + + for _, cveID := range m.CVEs { + mctx.Bus.Publish(mctx.Ctx, eventbus.CVEMatch{ + EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: host}, + CVE: cveID, + Technology: m.TemplateID, + Severity: severity, + Description: m.Name, + URL: m.TemplateURL, + }) + } +} + +func mapSeverity(s string) eventbus.Severity { + switch s { + case "critical": + return eventbus.SeverityCritical + case "high": + return eventbus.SeverityHigh + case "medium": + return eventbus.SeverityMedium + case "low": + return eventbus.SeverityLow + default: + return eventbus.SeverityInfo + } +} + +// resolveTemplateDir returns the first USABLE template directory, in +// priority order. "Usable" means it exists, is a directory, and the +// process can list its contents (i.e. not a permission-denied mount +// like a read-restricted nuclei install in another user's home). +// Returns "" when no candidate qualifies. +func resolveTemplateDir(mctx module.Context) string { + candidates := []string{ + mctx.Config.String("nuclei_templates", ""), + os.Getenv("NUCLEI_TEMPLATES"), + } + if home, err := os.UserHomeDir(); err == nil { + // Prefer the god-eye auto-managed cache over a pre-existing + // ~/nuclei-templates: the latter may be a nuclei CLI install + // with restrictive permissions we can't read. + candidates = append(candidates, + filepath.Join(home, ".god-eye", "nuclei-templates"), + filepath.Join(home, "nuclei-templates"), + ) + } + for _, c := range candidates { + if c == "" { + continue + } + info, err := os.Stat(c) + if err != nil || !info.IsDir() { + continue + } + // Readability check: can we list at least one entry? If the dir + // is permission-denied, os.Stat succeeds but os.Open fails — + // skip such candidates so auto-download fallback triggers. + f, err := os.Open(c) + if err != nil { + continue + } + names, err := f.Readdirnames(1) + f.Close() + if err != nil { + continue + } + if len(names) == 0 { + // Empty dir — treat as unusable to trigger auto-download. + continue + } + return c + } + return "" +} + +// defaultAutoDownloadDir returns ~/.god-eye/nuclei-templates. +func defaultAutoDownloadDir() (string, error) { + home, err := os.UserHomeDir() + if err != nil { + return "", err + } + return filepath.Join(home, ".god-eye", "nuclei-templates"), nil +} + +func hostFromURL(u string) string { + // Strip scheme. + s := u + for _, p := range []string{"https://", "http://"} { + if len(s) > len(p) && s[:len(p)] == p { + s = s[len(p):] + break + } + } + // Strip path. + for i := 0; i < len(s); i++ { + if s[i] == '/' || s[i] == '?' || s[i] == '#' { + return s[:i] + } + } + return s +} diff --git a/internal/modules/passive/passive.go b/internal/modules/passive/passive.go new file mode 100644 index 0000000..fe63116 --- /dev/null +++ b/internal/modules/passive/passive.go @@ -0,0 +1,151 @@ +// Package passive is the Fase 0.6 adapter that wraps the v1 passive sources +// (internal/sources) as a single Module. It fans out queries to all 20 public +// sources in parallel and emits a SubdomainDiscovered event for each result. +// +// In Fase 1 (Discovery Supremacy) each source will become its own Module with +// independent configuration, error reporting, and rate limiting. This +// adapter preserves v1 behavior so we reach feature parity immediately. +package passive + +import ( + "context" + "strings" + "sync" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/sources" + "god-eye/internal/store" +) + +// ModuleName is the registry identifier. +const ModuleName = "passive.v1-aggregate" + +type passiveModule struct{} + +// Register the module in the default registry. Callers import this package +// for side effects via the modules meta-package (see internal/modules/all). +func Register() { module.Register(&passiveModule{}) } + +func (*passiveModule) Name() string { return ModuleName } +func (*passiveModule) Phase() module.Phase { return module.PhaseDiscovery } + +func (*passiveModule) Consumes() []eventbus.EventType { return nil } +func (*passiveModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventSubdomainDiscovered, eventbus.EventModuleError} +} + +func (*passiveModule) DefaultEnabled() bool { return true } + +// sourceList mirrors the v1 scanner.Run list. Order is preserved for stable +// logging. +var sourceList = []struct { + name string + fn func(string) ([]string, error) +}{ + {"crt.sh", sources.FetchCrtsh}, + {"Certspotter", sources.FetchCertspotter}, + {"AlienVault", sources.FetchAlienVault}, + {"HackerTarget", sources.FetchHackerTarget}, + {"URLScan", sources.FetchURLScan}, + {"RapidDNS", sources.FetchRapidDNS}, + {"Anubis", sources.FetchAnubis}, + {"ThreatMiner", sources.FetchThreatMiner}, + {"DNSRepo", sources.FetchDNSRepo}, + {"SubdomainCenter", sources.FetchSubdomainCenter}, + {"Wayback", sources.FetchWayback}, + {"CommonCrawl", sources.FetchCommonCrawl}, + {"Sitedossier", sources.FetchSitedossier}, + {"Riddler", sources.FetchRiddler}, + {"Robtex", sources.FetchRobtex}, + {"DNSHistory", sources.FetchDNSHistory}, + {"ArchiveToday", sources.FetchArchiveToday}, + {"JLDC", sources.FetchJLDC}, + {"SynapsInt", sources.FetchSynapsInt}, + {"CensysFree", sources.FetchCensysFree}, + // v2.0 additions — free, no API key, fail-open. Dormant v1 sources + // re-activated + 4 net-new endpoints. + {"BufferOver", sources.FetchBufferOver}, // dormant v1 + {"DNSDumpster", sources.FetchDNSDumpster}, // dormant v1 + {"Omnisint", sources.FetchOmnisint}, // v2 new + {"HudsonRock", sources.FetchHudsonRock}, // v2 new + {"WebArchiveCDX", sources.FetchWebArchiveCDX}, // v2 new + {"Digitorus", sources.FetchDigitorus}, // v2 new +} + +func (m *passiveModule) Run(mctx module.Context) error { + target := mctx.Target + if target == "" { + return nil + } + + var wg sync.WaitGroup + // Dedup across sources before emitting — the store will also dedup, but + // emitting duplicates just burns bus bandwidth. + seen := make(map[string]struct{}) + var seenMu sync.Mutex + + for _, src := range sourceList { + src := src + wg.Add(1) + go func() { + defer wg.Done() + + // Respect ctx cancellation between slow sources. + if err := mctx.Ctx.Err(); err != nil { + return + } + + subs, err := src.fn(target) + if err != nil { + mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{ + EventMeta: eventbus.EventMeta{Source: ModuleName + ":" + src.name, Target: target}, + Module: ModuleName + ":" + src.name, + Err: err.Error(), + }) + return + } + + for _, sub := range subs { + sub = strings.ToLower(strings.TrimSpace(sub)) + if sub == "" { + continue + } + if !strings.HasSuffix(sub, target) { + continue + } + seenMu.Lock() + if _, dup := seen[sub]; dup { + seenMu.Unlock() + continue + } + seen[sub] = struct{}{} + seenMu.Unlock() + + // Persist into the store so downstream resolution phases + // can find the subdomain even if they subscribed too late + // to receive the SubdomainDiscovered event. + methodTag := "passive:" + src.name + _ = mctx.Store.Upsert(mctx.Ctx, sub, func(h *store.Host) { + store.AddDiscoveryMethod(h, methodTag) + }) + + mctx.Bus.Publish(mctx.Ctx, eventbus.NewSubdomainDiscovered( + ModuleName+":"+src.name, + sub, + methodTag, + )) + } + }() + } + + // Wait for sources OR cancellation. + done := make(chan struct{}) + go func() { wg.Wait(); close(done) }() + select { + case <-done: + case <-mctx.Ctx.Done(): + } + _ = context.Canceled // keep import + return nil +} diff --git a/internal/modules/permutation/permutation.go b/internal/modules/permutation/permutation.go new file mode 100644 index 0000000..2959322 --- /dev/null +++ b/internal/modules/permutation/permutation.go @@ -0,0 +1,177 @@ +// Package permutation generates candidate subdomains by mutating every +// previously-discovered subdomain with a set of common prefixes/suffixes +// and resolving them. This is the "alterx" pattern: you already found +// api.example.com and dev.example.com, now try api-dev, dev-api, +// api-staging, api.dev.example.com, etc. +// +// Pattern learning is intentionally lightweight in Fase 1: the core v1 +// discovery.PatternLearner already extracts per-label frequencies. We +// feed those back in via candidate generation. +package permutation + +import ( + "strings" + "sync" + "time" + + "god-eye/internal/config" + godns "god-eye/internal/dns" + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "discovery.permutation" + +type permModule struct{} + +func Register() { module.Register(&permModule{}) } + +func (*permModule) Name() string { return ModuleName } +func (*permModule) Phase() module.Phase { return module.PhaseResolution } +func (*permModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventDNSResolved} } +func (*permModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventSubdomainDiscovered} +} +func (*permModule) DefaultEnabled() bool { return false } // opt-in (burns a lot of DNS) + +// commonAffixes are applied to each label of discovered hostnames to +// generate permutation candidates. Curated for bug-bounty signal. +var commonAffixes = []string{ + "dev", "stg", "staging", "prod", "qa", "test", "uat", "sandbox", "preview", + "internal", "int", "private", "admin", "api", "api2", "apiv2", "gw", + "new", "old", "legacy", "v2", "v3", "next", "beta", "alpha", "canary", + "eu", "us", "apac", "emea", +} + +var separators = []string{"-", "_", "."} + +func (*permModule) Run(mctx module.Context) error { + if !mctx.Config.Bool("permutation", false) { + return nil + } + + target := mctx.Target + timeout := mctx.Config.Int("timeout", 5) + resolvers := parseResolvers(mctx.Config.String("resolvers", "")) + conc := mctx.Config.Int("concurrency", 300) + if conc <= 0 { + conc = 300 + } + + // Gather seeds from the store (all already-resolved hosts). + seeds := mctx.Store.All(mctx.Ctx) + if len(seeds) == 0 { + return nil + } + + candidates := make(map[string]struct{}) + for _, h := range seeds { + for _, c := range generateCandidates(h.Subdomain, target) { + candidates[c] = struct{}{} + } + } + + // Resolve candidates in parallel. Only emit ones that resolve. + sem := make(chan struct{}, conc) + var wg sync.WaitGroup + for cand := range candidates { + if mctx.Ctx.Err() != nil { + break + } + cand := cand + wg.Add(1) + sem <- struct{}{} + go func() { + defer wg.Done() + defer func() { <-sem }() + + ips := godns.ResolveSubdomain(cand, resolvers, timeout) + if len(ips) == 0 { + return + } + _ = mctx.Store.Upsert(mctx.Ctx, cand, func(h *store.Host) { + store.AddIPs(h, ips) + store.AddDiscoveryMethod(h, "permutation") + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.SubdomainDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: cand}, + Subdomain: cand, + Method: "permutation", + }) + }() + } + wg.Wait() + return nil +} + +// generateCandidates produces permuted hostnames from a seed within the +// target domain. The output is guaranteed to end in "."+target or ==target. +func generateCandidates(seed, target string) []string { + if !strings.HasSuffix(seed, target) { + return nil + } + prefix := strings.TrimSuffix(seed, "."+target) + if prefix == target || prefix == "" { + return nil + } + + labels := strings.Split(prefix, ".") + if len(labels) == 0 { + return nil + } + + out := make(map[string]struct{}) + // Leaf-label mutations: (affix)(sep)(label) and (label)(sep)(affix). + leaf := labels[len(labels)-1] + rest := strings.Join(labels[:len(labels)-1], ".") + for _, aff := range commonAffixes { + for _, sep := range separators { + combos := []string{ + aff + sep + leaf, + leaf + sep + aff, + } + for _, c := range combos { + parts := []string{c} + if rest != "" { + parts = []string{rest, c} + } + cand := strings.Join(parts, ".") + "." + target + out[cand] = struct{}{} + } + } + } + // Prepend-an-affix mutation: aff. + for _, aff := range commonAffixes { + cand := aff + "." + prefix + "." + target + out[cand] = struct{}{} + } + + res := make([]string, 0, len(out)) + for c := range out { + res = append(res, c) + } + return res +} + +func parseResolvers(s string) []string { + s = strings.TrimSpace(s) + if s == "" { + return config.DefaultResolvers + } + var out []string + for _, r := range strings.Split(s, ",") { + r = strings.TrimSpace(r) + if r == "" { + continue + } + if !strings.Contains(r, ":") { + r = r + ":53" + } + out = append(out, r) + } + if len(out) == 0 { + return config.DefaultResolvers + } + return out +} diff --git a/internal/modules/ports/ports.go b/internal/modules/ports/ports.go new file mode 100644 index 0000000..fe55f46 --- /dev/null +++ b/internal/modules/ports/ports.go @@ -0,0 +1,120 @@ +// Package ports runs a TCP connect scan on the common ports list for every +// resolved host. Drains the store at start; also reacts to late DNSResolved +// events for concurrent discovery phases. +package ports + +import ( + "context" + "fmt" + "strings" + "sync" + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/scanner" + "god-eye/internal/store" +) + +const ModuleName = "ports.scan" + +type portsModule struct{} + +func Register() { module.Register(&portsModule{}) } + +func (*portsModule) Name() string { return ModuleName } +func (*portsModule) Phase() module.Phase { return module.PhaseEnrichment } +func (*portsModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventDNSResolved} } +func (*portsModule) Produces() []eventbus.EventType { return nil } +func (*portsModule) DefaultEnabled() bool { return true } + +func (*portsModule) Run(mctx module.Context) error { + if mctx.Config.Bool("no_ports", false) { + return nil + } + timeout := mctx.Config.Int("timeout", 5) + portList := parsePorts(mctx.Config.String("ports", "")) + + processed := make(map[string]struct{}) + var mu sync.Mutex + shouldProcess := func(host string) bool { + mu.Lock() + defer mu.Unlock() + if _, ok := processed[host]; ok { + return false + } + processed[host] = struct{}{} + return true + } + + scan := func(host string, ip string) { + if mctx.Ctx.Err() != nil { + return + } + open := scanner.ScanPorts(ip, portList, timeout) + if len(open) == 0 { + return + } + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + h.Ports = append(h.Ports, open...) + }) + } + + var wg sync.WaitGroup + + // Drain. + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.Subdomain == "" || len(h.IPs) == 0 { + continue + } + if !shouldProcess(h.Subdomain) { + continue + } + host := h.Subdomain + ip := h.IPs[0] + wg.Add(1) + go func() { defer wg.Done(); scan(host, ip) }() + } + + // Late events. + sub := mctx.Bus.Subscribe(eventbus.EventDNSResolved, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.DNSResolved) + if !ok || len(ev.IPs) == 0 { + return + } + if !shouldProcess(ev.Subdomain) { + return + } + host := ev.Subdomain + ip := ev.IPs[0] + wg.Add(1) + go func() { defer wg.Done(); scan(host, ip) }() + }) + defer sub.Unsubscribe() + + select { + case <-time.After(500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + wg.Wait() + return nil +} + +func parsePorts(s string) []int { + s = strings.TrimSpace(s) + if s == "" { + return []int{80, 443, 8080, 8443} + } + var out []int + for _, p := range strings.Split(s, ",") { + var port int + if _, err := fmt.Sscanf(strings.TrimSpace(p), "%d", &port); err == nil && port > 0 && port < 65536 { + out = append(out, port) + } + } + if len(out) == 0 { + return []int{80, 443, 8080, 8443} + } + return out +} diff --git a/internal/modules/recursive/recursive.go b/internal/modules/recursive/recursive.go new file mode 100644 index 0000000..4fcc232 --- /dev/null +++ b/internal/modules/recursive/recursive.go @@ -0,0 +1,117 @@ +// Package recursive is a Fase 0.6 adapter for the v1 recursive discovery +// engine (pattern learning from found subdomains). +// +// Unlike event-driven modules, recursive runs as a deferred second-pass: +// after PhaseDiscovery completes it collects every host seen so far from +// the store, runs the v1 engine, and emits SubdomainDiscovered for any +// new hosts. It self-schedules in PhaseResolution to sit between discovery +// and HTTP probing. +package recursive + +import ( + "time" + + "god-eye/internal/discovery" + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" + "strings" +) + +const ModuleName = "discovery.recursive" + +type recModule struct{} + +func Register() { module.Register(&recModule{}) } + +func (*recModule) Name() string { return ModuleName } +func (*recModule) Phase() module.Phase { return module.PhaseResolution } // runs after discovery +func (*recModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventSubdomainDiscovered} } +func (*recModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventSubdomainDiscovered} +} + +// Recursive is opt-in by default — profiles enable it for bugbounty/pentest. +func (*recModule) DefaultEnabled() bool { return false } + +func (*recModule) Run(mctx module.Context) error { + if !mctx.Config.Bool("recursive", false) { + return nil + } + + target := mctx.Target + depth := mctx.Config.Int("recursive.depth", 3) + if depth < 1 { + depth = 1 + } else if depth > 5 { + depth = 5 + } + timeout := mctx.Config.Int("timeout", 5) + conc := mctx.Config.Int("concurrency", 500) + if conc <= 0 { + conc = 500 + } + + resolvers := parseResolvers(mctx.Config.String("resolvers", "")) + + // Gather initial seeds from what's been discovered so far. + hosts := mctx.Store.All(mctx.Ctx) + seeds := make([]string, 0, len(hosts)) + for _, h := range hosts { + seeds = append(seeds, h.Subdomain) + } + if len(seeds) == 0 { + return nil + } + + rd := discovery.NewRecursiveDiscovery(discovery.RecursiveConfig{ + Domain: target, + Resolvers: resolvers, + Timeout: timeout, + MaxDepth: depth, + Concurrency: conc, + }) + found := rd.Discover(mctx.Ctx, seeds) + + // Emit SubdomainDiscovered for any new hosts. + seen := make(map[string]struct{}, len(seeds)) + for _, s := range seeds { + seen[s] = struct{}{} + } + for _, s := range found { + if _, dup := seen[s]; dup { + continue + } + seen[s] = struct{}{} + + _ = mctx.Store.Upsert(mctx.Ctx, s, func(h *store.Host) { + store.AddDiscoveryMethod(h, "recursive") + }) + + mctx.Bus.Publish(mctx.Ctx, eventbus.SubdomainDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: s}, + Subdomain: s, + Method: "recursive", + }) + } + return nil +} + +func parseResolvers(s string) []string { + s = strings.TrimSpace(s) + if s == "" { + return []string{"8.8.8.8:53", "1.1.1.1:53"} + } + var out []string + for _, r := range strings.Split(s, ",") { + r = strings.TrimSpace(r) + if r == "" { + continue + } + if !strings.Contains(r, ":") { + r = r + ":53" + } + out = append(out, r) + } + return out +} diff --git a/internal/modules/report/report.go b/internal/modules/report/report.go new file mode 100644 index 0000000..97d80e4 --- /dev/null +++ b/internal/modules/report/report.go @@ -0,0 +1,259 @@ +// Package report writes the final scan output. It consumes the store (not +// events) at ScanCompleted time and emits TXT / JSON / CSV via the existing +// v1 output.WriteOutput function. To preserve v1 output shape during the +// Fase 0.6 migration, store.Host records are projected to the legacy +// config.SubdomainResult type before serialization. +package report + +import ( + "context" + "encoding/json" + "fmt" + "os" + "strings" + "time" + + "god-eye/internal/config" + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/output" +) + +var _ = time.Now // keep import stable when unused in certain branches + +const ModuleName = "report.output" + +type reportModule struct{} + +func Register() { module.Register(&reportModule{}) } + +func (*reportModule) Name() string { return ModuleName } +func (*reportModule) Phase() module.Phase { return module.PhaseReporting } +func (*reportModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventScanCompleted} } +func (*reportModule) Produces() []eventbus.EventType { return nil } +func (*reportModule) DefaultEnabled() bool { return true } + +func (*reportModule) Run(mctx module.Context) error { + // Block until the scan is complete — we're last in the pipeline and the + // coordinator guarantees reporting runs after every earlier phase. + done := make(chan struct{}, 1) + sub := mctx.Bus.Subscribe(eventbus.EventScanCompleted, func(_ context.Context, _ eventbus.Event) { + select { + case done <- struct{}{}: + default: + } + }) + defer sub.Unsubscribe() + + // The report module itself runs in PhaseReporting which is the last + // phase. ScanCompleted fires right after this phase ends, so we can't + // rely on it — write output directly from the store instead. + _ = done + + results := projectStoreToResults(mctx) + if len(results) == 0 { + return nil + } + + silent := mctx.Config.Bool("silent", false) + jsonStdout := mctx.Config.Bool("json", false) + onlyActive := mctx.Config.Bool("only_active", false) + outPath := mctx.Config.String("output", "") + format := mctx.Config.String("format", "txt") + + if jsonStdout { + // Project a minimal JSON report to stdout, shape-compatible with v1. + writeJSONStdout(mctx, results) + return nil + } + + // Console presentation — only when not silent / not JSON-only mode. + if !silent { + printResults(results, onlyActive) + } + + if outPath != "" { + if err := writeFile(outPath, format, results); err != nil { + mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: mctx.Target}, + Module: ModuleName, + Err: fmt.Sprintf("write output %s: %v", outPath, err), + }) + return err + } + } + + return nil +} + +// projectStoreToResults converts store.Host records to the legacy +// config.SubdomainResult shape expected by output.WriteOutput. Doing the +// projection here keeps the store schema decoupled from the v1 output format. +func projectStoreToResults(mctx module.Context) map[string]*config.SubdomainResult { + hosts := mctx.Store.All(mctx.Ctx) + out := make(map[string]*config.SubdomainResult, len(hosts)) + for _, h := range hosts { + r := &config.SubdomainResult{ + Subdomain: h.Subdomain, + IPs: append([]string(nil), h.IPs...), + CNAME: h.CNAME, + PTR: h.PTR, + ASN: h.ASN, + Org: h.Org, + Country: h.Country, + City: h.City, + StatusCode: h.StatusCode, + ContentLength: h.ContentLength, + Title: h.Title, + Server: h.Server, + Tech: append([]string(nil), h.Technologies...), + WAF: h.WAF, + TLSVersion: h.TLSVersion, + TLSIssuer: h.TLSIssuer, + TLSSelfSigned: h.TLSSelfSigned, + Ports: append([]int(nil), h.Ports...), + ResponseMs: h.ResponseMs, + CloudProvider: h.CloudProvider, + } + if !h.TLSExpiry.IsZero() { + r.TLSExpiry = h.TLSExpiry.Format("2006-01-02") + } + if h.TLSFingerprint != nil { + r.TLSFingerprint = &config.TLSFingerprint{ + Vendor: h.TLSFingerprint.Vendor, + Product: h.TLSFingerprint.Product, + Version: h.TLSFingerprint.Version, + ApplianceType: h.TLSFingerprint.ApplianceKind, + InternalHosts: append([]string(nil), h.TLSFingerprint.InternalHosts...), + } + } + if h.Takeover != nil { + r.Takeover = h.Takeover.Service + } + // Flatten vulnerabilities → scalar fields v1 consumers expect. + for _, v := range h.Vulnerabilities { + switch v.ID { + case "open-redirect": + r.OpenRedirect = true + case "cors-misconfig": + r.CORSMisconfig = v.Description + case "dangerous-http-methods": + r.DangerousMethods = append(r.DangerousMethods, strings.Split(v.Evidence, ", ")...) + case "git-exposed": + r.GitExposed = true + case "svn-exposed": + r.SvnExposed = true + case "backup-file": + r.BackupFiles = append(r.BackupFiles, v.URL) + } + } + // Secrets → legacy field + for _, s := range h.Secrets { + r.JSSecrets = append(r.JSSecrets, s.Match) + } + // CVEs / AI + for _, c := range h.CVEs { + r.CVEFindings = append(r.CVEFindings, c.ID) + } + for _, a := range h.AIFindings { + r.AIFindings = append(r.AIFindings, a.Title) + if r.AISeverity == "" { + r.AISeverity = a.Severity + } + if r.AIModel == "" { + r.AIModel = a.Model + } + } + out[h.Subdomain] = r + } + return out +} + +// printResults is a minimal, non-colorful table print. The full v1 +// presentation is re-introduced when the TUI module lands in Fase 4. +func printResults(results map[string]*config.SubdomainResult, onlyActive bool) { + // Sorted output for determinism. + names := make([]string, 0, len(results)) + for n := range results { + names = append(names, n) + } + // sort by status desc, then name + sortResultsForPrint(names, results) + + active := 0 + for _, n := range names { + r := results[n] + if r.StatusCode == 0 { + if onlyActive { + continue + } + fmt.Printf(" %s %s\n", output.Dim("○"), r.Subdomain) + continue + } + active++ + marker := output.Green("●") + if r.StatusCode >= 300 && r.StatusCode < 400 { + marker = output.Yellow("◐") + } else if r.StatusCode >= 400 { + marker = output.Red("○") + } + tech := "" + if len(r.Tech) > 0 { + tech = output.Dim(" [" + strings.Join(r.Tech, ", ") + "]") + } + fmt.Printf(" %s %s %s%s\n", marker, r.Subdomain, output.Dim(fmt.Sprintf("[%d]", r.StatusCode)), tech) + } + fmt.Println() + fmt.Printf(" %s total, %s active\n", output.BoldWhite(fmt.Sprintf("%d", len(results))), output.BoldGreen(fmt.Sprintf("%d", active))) +} + +func sortResultsForPrint(names []string, results map[string]*config.SubdomainResult) { + // Simple insertion-sort quality ok for small lists; stable enough. + n := len(names) + for i := 1; i < n; i++ { + j := i + for j > 0 && lessResult(results[names[j]], results[names[j-1]]) { + names[j], names[j-1] = names[j-1], names[j] + j-- + } + } +} + +func lessResult(a, b *config.SubdomainResult) bool { + // Active first, then by subdomain name. + aActive := a.StatusCode >= 200 && a.StatusCode < 400 + bActive := b.StatusCode >= 200 && b.StatusCode < 400 + if aActive != bActive { + return aActive && !bActive + } + return a.Subdomain < b.Subdomain +} + +func writeFile(path, format string, results map[string]*config.SubdomainResult) error { + // v1 exposes SaveOutput (void); we funnel through it but surface errors + // by re-checking file writability up front. + format = strings.ToLower(strings.TrimSpace(format)) + if format == "" { + format = "txt" + } + // Pre-flight: make sure we can create the target file before delegating. + f, err := os.Create(path) + if err != nil { + return err + } + f.Close() + output.SaveOutput(path, format, results) + return nil +} + +// writeJSONStdout emits a v2-native minimal JSON dump to stdout. This is +// intentionally simpler than v1's ReportBuilder — when the full report +// generator lands in Fase 4 (Reporting), this is where it'll be wired. +func writeJSONStdout(mctx module.Context, results map[string]*config.SubdomainResult) { + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + _ = enc.Encode(map[string]interface{}{ + "target": mctx.Target, + "subdomains": results, + }) +} diff --git a/internal/modules/reversedns/reversedns.go b/internal/modules/reversedns/reversedns.go new file mode 100644 index 0000000..33b5110 --- /dev/null +++ b/internal/modules/reversedns/reversedns.go @@ -0,0 +1,143 @@ +// Package reversedns expands discovery by doing PTR sweeps on /24 blocks +// surrounding every resolved IP. Finds internal/forgotten hosts that share +// infrastructure with already-known subdomains. +// +// Intentionally conservative: only sweeps +/- 32 addresses around seen IPs +// to keep traffic bounded and avoid accidentally pulling a huge +// non-scoped ASN. +package reversedns + +import ( + "fmt" + "net" + "strings" + "sync" + "time" + + "god-eye/internal/config" + godns "god-eye/internal/dns" + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "discovery.reverse-dns" + +type rdnsModule struct{} + +func Register() { module.Register(&rdnsModule{}) } + +func (*rdnsModule) Name() string { return ModuleName } +func (*rdnsModule) Phase() module.Phase { return module.PhaseResolution } +func (*rdnsModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventDNSResolved} } +func (*rdnsModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventSubdomainDiscovered} +} + +// Opt-in: generates a lot of DNS queries; on by default for bugbounty profile. +func (*rdnsModule) DefaultEnabled() bool { return false } + +const sweepRange = 16 // how many addresses to scan either side of each seed IP + +func (*rdnsModule) Run(mctx module.Context) error { + if !mctx.Config.Bool("reverse_dns", false) { + return nil + } + target := mctx.Target + timeout := mctx.Config.Int("timeout", 5) + resolvers := parseResolvers(mctx.Config.String("resolvers", "")) + + seeds := mctx.Store.All(mctx.Ctx) + seenIP := make(map[string]struct{}) + for _, h := range seeds { + for _, ip := range h.IPs { + seenIP[ip] = struct{}{} + } + } + + var wg sync.WaitGroup + sem := make(chan struct{}, 64) + for ip := range seenIP { + for _, neighbor := range neighbors(ip, sweepRange) { + if mctx.Ctx.Err() != nil { + break + } + wg.Add(1) + sem <- struct{}{} + go func(ipAddr string) { + defer wg.Done() + defer func() { <-sem }() + + name := godns.ResolvePTR(ipAddr, resolvers, timeout) + if name == "" { + return + } + name = strings.ToLower(strings.TrimSuffix(name, ".")) + if !strings.HasSuffix(name, "."+target) && name != target { + return + } + _ = mctx.Store.Upsert(mctx.Ctx, name, func(h *store.Host) { + store.AddIPs(h, []string{ipAddr}) + store.AddDiscoveryMethod(h, "reverse-dns") + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.SubdomainDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: name}, + Subdomain: name, + Method: "reverse-dns", + }) + }(neighbor) + } + } + wg.Wait() + return nil +} + +// neighbors returns IPv4 addresses within +/- rng of ip. IPv6 addresses +// are returned as a single-element slice (no sweep — address space too +// large, and we'd rarely find anything anyway). +func neighbors(ipStr string, rng int) []string { + ip := net.ParseIP(ipStr) + if ip == nil { + return nil + } + v4 := ip.To4() + if v4 == nil { + return []string{ipStr} + } + + // Convert to uint32 for arithmetic. + base := uint32(v4[0])<<24 | uint32(v4[1])<<16 | uint32(v4[2])<<8 | uint32(v4[3]) + + out := make([]string, 0, 2*rng+1) + for delta := -rng; delta <= rng; delta++ { + candidate := int64(base) + int64(delta) + if candidate < 0 || candidate > 0xFFFFFFFF { + continue + } + c := uint32(candidate) + out = append(out, fmt.Sprintf("%d.%d.%d.%d", c>>24&0xFF, c>>16&0xFF, c>>8&0xFF, c&0xFF)) + } + return out +} + +func parseResolvers(s string) []string { + s = strings.TrimSpace(s) + if s == "" { + return config.DefaultResolvers + } + var out []string + for _, r := range strings.Split(s, ",") { + r = strings.TrimSpace(r) + if r == "" { + continue + } + if !strings.Contains(r, ":") { + r = r + ":53" + } + out = append(out, r) + } + if len(out) == 0 { + return config.DefaultResolvers + } + return out +} diff --git a/internal/modules/security/security.go b/internal/modules/security/security.go new file mode 100644 index 0000000..ca8e304 --- /dev/null +++ b/internal/modules/security/security.go @@ -0,0 +1,241 @@ +// Package security runs the v1 security checks (open redirect, CORS, +// HTTP methods, git/svn, backups, admin, API) on every probed host. +// +// Reads hosts from the store (not events) so late-start phases don't miss +// the upstream HTTPProbed events. +package security + +import ( + "context" + "sync" + "time" + + "god-eye/internal/eventbus" + gohttp "god-eye/internal/http" + "god-eye/internal/module" + "god-eye/internal/security" + "god-eye/internal/store" +) + +const ModuleName = "security.checks" + +type secModule struct{} + +func Register() { module.Register(&secModule{}) } + +func (*secModule) Name() string { return ModuleName } +func (*secModule) Phase() module.Phase { return module.PhaseAnalysis } +func (*secModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventHTTPProbed} } +func (*secModule) Produces() []eventbus.EventType { return []eventbus.EventType{eventbus.EventVulnerability} } +func (*secModule) DefaultEnabled() bool { return true } + +func (*secModule) Run(mctx module.Context) error { + conc := mctx.Config.Int("concurrency", 200) + if conc <= 0 { + conc = 200 + } + timeout := mctx.Config.Int("timeout", 5) + + processed := make(map[string]struct{}) + var processedMu sync.Mutex + shouldProcess := func(host string) bool { + processedMu.Lock() + defer processedMu.Unlock() + if _, dup := processed[host]; dup { + return false + } + processed[host] = struct{}{} + return true + } + + work := make(chan string, conc*2) + var wg sync.WaitGroup + for i := 0; i < conc; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for host := range work { + runChecks(mctx, host, timeout) + } + }() + } + + // Drain: every host that got a successful HTTP probe. + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.StatusCode == 0 { + continue + } + if !shouldProcess(h.Subdomain) { + continue + } + select { + case work <- h.Subdomain: + case <-mctx.Ctx.Done(): + close(work) + wg.Wait() + return nil + } + } + + // Listen for late HTTPProbed events. + sub := mctx.Bus.Subscribe(eventbus.EventHTTPProbed, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.HTTPProbed) + if !ok || ev.StatusCode == 0 { + return + } + host := ev.Meta().Target + if !shouldProcess(host) { + return + } + select { + case work <- host: + case <-mctx.Ctx.Done(): + } + }) + defer sub.Unsubscribe() + + select { + case <-time.After(500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + close(work) + wg.Wait() + return nil +} + +func runChecks(mctx module.Context, host string, timeout int) { + if mctx.Ctx.Err() != nil { + return + } + + client := gohttp.GetSharedClient(timeout) + + var openRedirect bool + var cors string + var allowed, dangerous []string + var admin, backups, apis []string + var gitExposed, svnExposed bool + + var wg sync.WaitGroup + wg.Add(7) + go func() { defer wg.Done(); openRedirect = security.CheckOpenRedirectWithClient(host, client) }() + go func() { defer wg.Done(); cors = security.CheckCORSWithClient(host, client) }() + go func() { defer wg.Done(); allowed, dangerous = security.CheckHTTPMethodsWithClient(host, client) }() + go func() { defer wg.Done(); admin = security.CheckAdminPanelsWithClient(host, client) }() + go func() { defer wg.Done(); gitExposed, svnExposed = security.CheckGitSvnExposureWithClient(host, client) }() + go func() { defer wg.Done(); backups = security.CheckBackupFilesWithClient(host, client) }() + go func() { defer wg.Done(); apis = security.CheckAPIEndpointsWithClient(host, client) }() + wg.Wait() + + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + now := time.Now() + if openRedirect { + h.Vulnerabilities = append(h.Vulnerabilities, store.Vulnerability{ + ID: "open-redirect", Title: "Open Redirect", + Description: "Server redirects to attacker-controlled URL via redirect parameter", + Severity: string(eventbus.SeverityMedium), + URL: "https://" + host, + OWASP: "A01:2021-Broken Access Control", + FoundAt: now, + }) + } + if cors != "" { + h.Vulnerabilities = append(h.Vulnerabilities, store.Vulnerability{ + ID: "cors-misconfig", Title: "CORS Misconfiguration", + Description: cors, + Severity: string(eventbus.SeverityHigh), + URL: "https://" + host, + OWASP: "A05:2021-Security Misconfiguration", + FoundAt: now, + }) + } + if len(dangerous) > 0 { + h.Vulnerabilities = append(h.Vulnerabilities, store.Vulnerability{ + ID: "dangerous-http-methods", Title: "Dangerous HTTP Methods Enabled", + Description: "Server allows potentially dangerous methods", + Severity: string(eventbus.SeverityMedium), + Evidence: joinStrings(dangerous, ", "), + URL: "https://" + host, + OWASP: "A05:2021-Security Misconfiguration", + FoundAt: now, + }) + } + if gitExposed { + h.Vulnerabilities = append(h.Vulnerabilities, store.Vulnerability{ + ID: "git-exposed", Title: "Git Repository Exposed", + Description: ".git directory is publicly accessible", + Severity: string(eventbus.SeverityCritical), + URL: "https://" + host + "/.git/config", + OWASP: "A05:2021-Security Misconfiguration", + FoundAt: now, + }) + } + if svnExposed { + h.Vulnerabilities = append(h.Vulnerabilities, store.Vulnerability{ + ID: "svn-exposed", Title: "SVN Repository Exposed", + Description: ".svn directory is publicly accessible", + Severity: string(eventbus.SeverityHigh), + URL: "https://" + host + "/.svn/entries", + OWASP: "A05:2021-Security Misconfiguration", + FoundAt: now, + }) + } + for _, b := range backups { + h.Vulnerabilities = append(h.Vulnerabilities, store.Vulnerability{ + ID: "backup-file", Title: "Backup File Exposed", + Description: "Backup file accessible: " + b, + Severity: string(eventbus.SeverityHigh), + URL: b, + OWASP: "A05:2021-Security Misconfiguration", + FoundAt: now, + }) + } + _ = allowed + _ = admin + _ = apis + }) + + now := time.Now() + base := eventbus.EventMeta{At: now, Source: ModuleName, Target: host} + emit := func(ev eventbus.VulnerabilityFound) { mctx.Bus.Publish(mctx.Ctx, ev) } + + if openRedirect { + emit(eventbus.VulnerabilityFound{EventMeta: base, ID: "open-redirect", Title: "Open Redirect", + Severity: eventbus.SeverityMedium, URL: "https://" + host, OWASP: "A01:2021-Broken Access Control"}) + } + if cors != "" { + emit(eventbus.VulnerabilityFound{EventMeta: base, ID: "cors-misconfig", Title: "CORS Misconfiguration", + Description: cors, Severity: eventbus.SeverityHigh, URL: "https://" + host, OWASP: "A05:2021-Security Misconfiguration"}) + } + if len(dangerous) > 0 { + emit(eventbus.VulnerabilityFound{EventMeta: base, ID: "dangerous-http-methods", Title: "Dangerous HTTP Methods", + Evidence: joinStrings(dangerous, ", "), Severity: eventbus.SeverityMedium, URL: "https://" + host, + OWASP: "A05:2021-Security Misconfiguration"}) + } + if gitExposed { + emit(eventbus.VulnerabilityFound{EventMeta: base, ID: "git-exposed", Title: "Git Repository Exposed", + Severity: eventbus.SeverityCritical, URL: "https://" + host + "/.git/config", + OWASP: "A05:2021-Security Misconfiguration"}) + } + if svnExposed { + emit(eventbus.VulnerabilityFound{EventMeta: base, ID: "svn-exposed", Title: "SVN Repository Exposed", + Severity: eventbus.SeverityHigh, URL: "https://" + host + "/.svn/entries", + OWASP: "A05:2021-Security Misconfiguration"}) + } + for _, b := range backups { + emit(eventbus.VulnerabilityFound{EventMeta: base, ID: "backup-file", Title: "Backup File Exposed", + Severity: eventbus.SeverityHigh, URL: b, OWASP: "A05:2021-Security Misconfiguration"}) + } +} + +func joinStrings(ss []string, sep string) string { + if len(ss) == 0 { + return "" + } + out := ss[0] + for _, s := range ss[1:] { + out += sep + s + } + return out +} diff --git a/internal/modules/smuggling/smuggling.go b/internal/modules/smuggling/smuggling.go new file mode 100644 index 0000000..6c3e54c --- /dev/null +++ b/internal/modules/smuggling/smuggling.go @@ -0,0 +1,227 @@ +// Package smuggling detects HTTP request smuggling (CL.TE and TE.CL +// variants) by sending ambiguous Content-Length / Transfer-Encoding +// combinations and timing-analyzing the responses. +// +// This is the non-destructive timing variant: we send a request crafted +// so that CL.TE or TE.CL parsing desync would cause the server to hold +// the connection waiting for more bytes, while the correct interpretation +// returns immediately. Large response time delta ⇒ likely smuggling. +// +// We do NOT attempt to actually smuggle follow-up requests — that could +// affect other users. This is safe for authorized testing. +package smuggling + +import ( + "bufio" + "context" + "crypto/tls" + "fmt" + "net" + "strings" + "sync" + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "vuln.http-smuggling" + +type smModule struct{} + +func Register() { module.Register(&smModule{}) } + +func (*smModule) Name() string { return ModuleName } +func (*smModule) Phase() module.Phase { return module.PhaseAnalysis } +func (*smModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventHTTPProbed} } +func (*smModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventVulnerability} +} + +// Opt-in: timing-based testing is slower and can be noisy. Bugbounty profile enables it. +func (*smModule) DefaultEnabled() bool { return false } + +func (*smModule) Run(mctx module.Context) error { + if !mctx.Config.Bool("smuggling_scan", false) { + return nil + } + timeout := mctx.Config.Int("timeout", 10) + + processed := make(map[string]struct{}) + var mu sync.Mutex + shouldProcess := func(host string) bool { + mu.Lock() + defer mu.Unlock() + if _, ok := processed[host]; ok { + return false + } + processed[host] = struct{}{} + return true + } + + var wg sync.WaitGroup + + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.StatusCode == 0 { + continue + } + if !shouldProcess(h.Subdomain) { + continue + } + host := h.Subdomain + wg.Add(1) + go func() { defer wg.Done(); probe(mctx, host, timeout) }() + } + + sub := mctx.Bus.Subscribe(eventbus.EventHTTPProbed, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.HTTPProbed) + if !ok || ev.StatusCode == 0 { + return + } + host := ev.Meta().Target + if !shouldProcess(host) { + return + } + wg.Add(1) + go func() { defer wg.Done(); probe(mctx, host, timeout) }() + }) + defer sub.Unsubscribe() + + select { + case <-time.After(500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + wg.Wait() + return nil +} + +func probe(mctx module.Context, host string, timeoutSec int) { + timeout := time.Duration(timeoutSec) * time.Second + + // Baseline: normal request, measure response time. + baseline, err := sendRequest(host, baselineRequest(host), timeout) + if err != nil { + return + } + + // CL.TE probe: Content-Length says more data coming, TE: chunked says "last chunk now". + // Vulnerable servers that read TE first return quickly; non-vulnerable + // servers that read CL wait for more bytes and hit the read timeout. + cltePayload := clteRequest(host) + clte, _ := sendRequest(host, cltePayload, timeout) + + // TE.CL probe: reversed — server reads CL first (ignoring chunked), payload is poisoned. + teclPayload := teclRequest(host) + tecl, _ := sendRequest(host, teclPayload, timeout) + + // Heuristic: if either probe hangs (duration >= timeout * 0.8) and baseline + // returned fast, it's a likely desync. + threshold := time.Duration(float64(timeout) * 0.8) + fastEnough := baseline.duration < timeout/3 + + if fastEnough && clte.duration > threshold { + emit(mctx, host, "CL.TE", "CL.TE HTTP Request Smuggling candidate", clte) + } + if fastEnough && tecl.duration > threshold { + emit(mctx, host, "TE.CL", "TE.CL HTTP Request Smuggling candidate", tecl) + } +} + +type probeResult struct { + duration time.Duration + response string +} + +func baselineRequest(host string) string { + return "GET / HTTP/1.1\r\n" + + "Host: " + host + "\r\n" + + "User-Agent: god-eye-v2\r\n" + + "Connection: close\r\n" + + "\r\n" +} + +// clteRequest crafts a CL.TE probe: the chunked body declares "0\r\n\r\n" +// which is the last chunk. If the server honors TE: chunked, the request +// completes immediately. If it honors Content-Length (say, 4), it waits for +// 4 more bytes. +func clteRequest(host string) string { + body := "0\r\n\r\n" + return fmt.Sprintf("POST / HTTP/1.1\r\n"+ + "Host: %s\r\n"+ + "User-Agent: god-eye-v2\r\n"+ + "Content-Length: %d\r\n"+ + "Transfer-Encoding: chunked\r\n"+ + "Connection: close\r\n"+ + "\r\n%s", host, 4, body) // CL=4 mismatches chunked body length +} + +// teclRequest: TE: chunked, body ends with a chunk that declares non-zero +// remaining — CL says "done", TE says "more coming". Opposite desync. +func teclRequest(host string) string { + body := "12\r\n" + + "GPOST / HTTP/1.1\r\n" + + "\r\n0\r\n\r\n" + return fmt.Sprintf("POST / HTTP/1.1\r\n"+ + "Host: %s\r\n"+ + "User-Agent: god-eye-v2\r\n"+ + "Content-Length: 3\r\n"+ + "Transfer-Encoding: chunked\r\n"+ + "Connection: close\r\n"+ + "\r\n%s", host, body) +} + +// sendRequest opens a raw TCP/TLS connection, writes raw HTTP bytes, and +// returns the time until the first response line is read (or timeout). +func sendRequest(host, payload string, timeout time.Duration) (probeResult, error) { + dialer := &net.Dialer{Timeout: timeout} + conn, err := tls.DialWithDialer(dialer, "tcp", host+":443", &tls.Config{ + InsecureSkipVerify: true, + ServerName: host, + }) + if err != nil { + return probeResult{}, err + } + defer conn.Close() + + _ = conn.SetDeadline(time.Now().Add(timeout)) + + start := time.Now() + if _, err := conn.Write([]byte(payload)); err != nil { + return probeResult{duration: time.Since(start)}, err + } + + br := bufio.NewReader(conn) + line, err := br.ReadString('\n') + return probeResult{duration: time.Since(start), response: line}, err +} + +func emit(mctx module.Context, host, kind, title string, r probeResult) { + now := time.Now() + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + h.Vulnerabilities = append(h.Vulnerabilities, store.Vulnerability{ + ID: "http-smuggling-" + strings.ToLower(kind), + Title: title, + Description: kind + " desync candidate based on response-time delta (" + r.duration.String() + ").", + Severity: string(eventbus.SeverityHigh), + URL: "https://" + host, + Evidence: strings.TrimSpace(r.response), + Remediation: "Ensure front-end and back-end parse Content-Length and Transfer-Encoding identically. Reject requests with both headers.", + OWASP: "A06:2021-Vulnerable and Outdated Components", + FoundAt: now, + }) + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.VulnerabilityFound{ + EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: host}, + ID: "http-smuggling-" + strings.ToLower(kind), + Title: title, + Description: "Timing-based " + kind + " desync candidate.", + Severity: eventbus.SeverityHigh, + URL: "https://" + host, + Evidence: strings.TrimSpace(r.response), + Remediation: "Align CL/TE parsing between front-end and back-end.", + OWASP: "A06:2021-Vulnerable and Outdated Components", + }) +} + diff --git a/internal/modules/supplychain/supplychain.go b/internal/modules/supplychain/supplychain.go new file mode 100644 index 0000000..0105530 --- /dev/null +++ b/internal/modules/supplychain/supplychain.go @@ -0,0 +1,192 @@ +// Package supplychain enumerates npm and PyPI packages that reference the +// target domain in their source, then flags packages as potential supply +// chain assets. Useful for discovering internal-only tools published by +// mistake to public registries and for finding branded utility packages +// that could reveal internal endpoints/secrets. +// +// This is a discovery-oriented check. Actually downloading + scanning +// package contents for secrets is a Fase 2 follow-up; here we just surface +// the packages and the URLs they point at. +package supplychain + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "sync" + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/sources" + "god-eye/internal/store" +) + +const ModuleName = "vuln.supply-chain" + +type scModule struct{} + +func Register() { module.Register(&scModule{}) } + +func (*scModule) Name() string { return ModuleName } +func (*scModule) Phase() module.Phase { return module.PhaseDiscovery } +func (*scModule) Consumes() []eventbus.EventType { return nil } +func (*scModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventSubdomainDiscovered, eventbus.EventAPIFinding} +} +func (*scModule) DefaultEnabled() bool { return true } + +func (*scModule) Run(mctx module.Context) error { + target := mctx.Target + if target == "" { + return nil + } + + var wg sync.WaitGroup + wg.Add(2) + go func() { defer wg.Done(); checkNPM(mctx, target) }() + go func() { defer wg.Done(); checkPyPI(mctx, target) }() + wg.Wait() + return nil +} + +// checkNPM uses npm's registry search API. Packages matching "" +// or "" are surfaced. +func checkNPM(mctx module.Context, target string) { + q := extractBrand(target) + if q == "" { + return + } + url := fmt.Sprintf("https://registry.npmjs.org/-/v1/search?text=%s&size=100", q) + body, err := fetchJSON(mctx.Ctx, url, 15*time.Second) + if err != nil { + return + } + + var parsed struct { + Objects []struct { + Package struct { + Name string `json:"name"` + Links map[string]string `json:"links"` + Description string `json:"description"` + } `json:"package"` + } `json:"objects"` + } + _ = json.Unmarshal(body, &parsed) + + for _, obj := range parsed.Objects { + pkg := obj.Package + text := pkg.Name + " " + pkg.Description + for _, link := range pkg.Links { + text += " " + link + } + if !strings.Contains(strings.ToLower(text), target) { + continue + } + // Emit an APIFinding for discovery context. + mctx.Bus.Publish(mctx.Ctx, eventbus.APIFinding{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: target}, + Kind: "supply-chain:npm", + URL: "https://www.npmjs.com/package/" + pkg.Name, + Issue: "npm package references target: " + pkg.Name + " — " + pkg.Description, + Severity: eventbus.SeverityInfo, + }) + // If the description or links contain subdomains of the target, + // also feed them into discovery. + for _, sub := range sources.ExtractSubdomains(text, target) { + _ = mctx.Store.Upsert(mctx.Ctx, sub, func(h *store.Host) { + store.AddDiscoveryMethod(h, "supply-chain:npm:"+pkg.Name) + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.SubdomainDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: sub}, + Subdomain: sub, + Method: "supply-chain:npm:" + pkg.Name, + }) + } + } +} + +func checkPyPI(mctx module.Context, target string) { + // PyPI no longer supports XML-RPC search; use the simple index + // (all packages) scanning is too expensive. Instead query a few + // likely branded package prefixes via the JSON index. + q := extractBrand(target) + if q == "" { + return + } + // Try exact-name lookups for common variants. + candidates := []string{q, q + "-cli", q + "-sdk", q + "-api", q + "-client"} + for _, name := range candidates { + url := "https://pypi.org/pypi/" + name + "/json" + body, err := fetchJSON(mctx.Ctx, url, 10*time.Second) + if err != nil || len(body) < 50 { + continue + } + var parsed struct { + Info struct { + Name string `json:"name"` + Summary string `json:"summary"` + HomePage string `json:"home_page"` + ProjectURL string `json:"project_url"` + ProjectURLs map[string]string `json:"project_urls"` + } `json:"info"` + } + _ = json.Unmarshal(body, &parsed) + info := parsed.Info + if info.Name == "" { + continue + } + text := info.Name + " " + info.Summary + " " + info.HomePage + " " + info.ProjectURL + for _, u := range info.ProjectURLs { + text += " " + u + } + if !strings.Contains(strings.ToLower(text), target) { + continue + } + mctx.Bus.Publish(mctx.Ctx, eventbus.APIFinding{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: target}, + Kind: "supply-chain:pypi", + URL: "https://pypi.org/project/" + info.Name + "/", + Issue: "PyPI package references target: " + info.Name + " — " + info.Summary, + Severity: eventbus.SeverityInfo, + }) + for _, sub := range sources.ExtractSubdomains(text, target) { + _ = mctx.Store.Upsert(mctx.Ctx, sub, func(h *store.Host) { + store.AddDiscoveryMethod(h, "supply-chain:pypi:"+info.Name) + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.SubdomainDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: sub}, + Subdomain: sub, + Method: "supply-chain:pypi:" + info.Name, + }) + } + } +} + +// extractBrand returns the "brand" (second-to-last label) from example.com → +// "example". Used as the package-search query term. +func extractBrand(domain string) string { + labels := strings.Split(strings.TrimSuffix(domain, "."), ".") + if len(labels) < 2 { + return "" + } + return strings.ToLower(labels[len(labels)-2]) +} + +func fetchJSON(ctx context.Context, url string, timeout time.Duration) ([]byte, error) { + c := &http.Client{Timeout: timeout} + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", "god-eye-v2") + resp, err := c.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + return io.ReadAll(io.LimitReader(resp.Body, 4*1024*1024)) +} diff --git a/internal/modules/takeover/takeover.go b/internal/modules/takeover/takeover.go new file mode 100644 index 0000000..e574269 --- /dev/null +++ b/internal/modules/takeover/takeover.go @@ -0,0 +1,124 @@ +// Package takeover runs v1 takeover detection on every host with a CNAME. +// Reads from the store; listens for late DNSResolved events for concurrent +// modules. +package takeover + +import ( + "context" + "sync" + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/scanner" + "god-eye/internal/store" +) + +const ModuleName = "takeover.cname" + +type takeoverModule struct{} + +func Register() { module.Register(&takeoverModule{}) } + +func (*takeoverModule) Name() string { return ModuleName } +func (*takeoverModule) Phase() module.Phase { return module.PhaseAnalysis } +func (*takeoverModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventDNSResolved} } +func (*takeoverModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventTakeoverCandidate} +} +func (*takeoverModule) DefaultEnabled() bool { return true } + +func (*takeoverModule) Run(mctx module.Context) error { + if mctx.Config.Bool("no_takeover", false) { + return nil + } + conc := mctx.Config.Int("concurrency", 100) + if conc <= 0 { + conc = 100 + } + timeout := mctx.Config.Int("timeout", 5) + + processed := make(map[string]struct{}) + var processedMu sync.Mutex + shouldProcess := func(host string) bool { + processedMu.Lock() + defer processedMu.Unlock() + if _, dup := processed[host]; dup { + return false + } + processed[host] = struct{}{} + return true + } + + work := make(chan string, conc*2) + var wg sync.WaitGroup + for i := 0; i < conc; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for host := range work { + if mctx.Ctx.Err() != nil { + return + } + service := scanner.CheckTakeover(host, timeout) + if service == "" { + continue + } + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + h.Takeover = &store.Takeover{ + Service: service, + CNAME: h.CNAME, + Confirmed: false, + FoundAt: time.Now(), + } + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.TakeoverCandidate{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, + Subdomain: host, + Service: service, + }) + } + }() + } + + // Drain: every host with a CNAME is a takeover candidate. + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.CNAME == "" { + continue + } + if !shouldProcess(h.Subdomain) { + continue + } + select { + case work <- h.Subdomain: + case <-mctx.Ctx.Done(): + close(work) + wg.Wait() + return nil + } + } + + sub := mctx.Bus.Subscribe(eventbus.EventDNSResolved, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.DNSResolved) + if !ok || ev.CNAME == "" { + return + } + if !shouldProcess(ev.Subdomain) { + return + } + select { + case work <- ev.Subdomain: + case <-mctx.Ctx.Done(): + } + }) + defer sub.Unsubscribe() + + select { + case <-time.After(500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + close(work) + wg.Wait() + return nil +} diff --git a/internal/modules/vhost/vhost.go b/internal/modules/vhost/vhost.go new file mode 100644 index 0000000..20ed76a --- /dev/null +++ b/internal/modules/vhost/vhost.go @@ -0,0 +1,79 @@ +// Package vhost is a Fase 0.6 adapter around v1 network.VHostScanner which +// performs virtual host discovery on resolved IPs. Reveals additional +// hostnames sharing infrastructure with in-scope targets. +package vhost + +import ( + "strings" + "sync" + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/network" + "god-eye/internal/store" +) + +const ModuleName = "discovery.vhost" + +type vhostModule struct{} + +func Register() { module.Register(&vhostModule{}) } + +func (*vhostModule) Name() string { return ModuleName } +func (*vhostModule) Phase() module.Phase { return module.PhaseResolution } +func (*vhostModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventDNSResolved} } +func (*vhostModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventSubdomainDiscovered} +} +func (*vhostModule) DefaultEnabled() bool { return false } // opt-in + +func (*vhostModule) Run(mctx module.Context) error { + if !mctx.Config.Bool("vhost_scan", false) { + return nil + } + timeout := mctx.Config.Int("timeout", 10) + target := mctx.Target + + hosts := mctx.Store.All(mctx.Ctx) + seenIP := make(map[string]struct{}) + for _, h := range hosts { + for _, ip := range h.IPs { + seenIP[ip] = struct{}{} + } + } + + scanner := network.NewVHostScanner(timeout) + var wg sync.WaitGroup + for ip := range seenIP { + ip := ip + if mctx.Ctx.Err() != nil { + break + } + wg.Add(1) + go func() { + defer wg.Done() + res := scanner.DiscoverVHosts(mctx.Ctx, ip) + if res == nil { + return + } + for _, h := range res.Domains { + h = strings.ToLower(strings.TrimSpace(h)) + if h == "" || !strings.HasSuffix(h, target) { + continue + } + _ = mctx.Store.Upsert(mctx.Ctx, h, func(sh *store.Host) { + store.AddIPs(sh, []string{ip}) + store.AddDiscoveryMethod(sh, "vhost") + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.SubdomainDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: h}, + Subdomain: h, + Method: "vhost", + }) + } + }() + } + wg.Wait() + return nil +} diff --git a/internal/nucleitpl/download.go b/internal/nucleitpl/download.go new file mode 100644 index 0000000..31dc396 --- /dev/null +++ b/internal/nucleitpl/download.go @@ -0,0 +1,370 @@ +package nucleitpl + +import ( + "archive/zip" + "errors" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strings" + "sync/atomic" + "time" +) + +// TemplatesZipURL is the default ZIP archive of the projectdiscovery +// nuclei-templates repository (main branch). +const TemplatesZipURL = "https://github.com/projectdiscovery/nuclei-templates/archive/refs/heads/main.zip" + +// Downloader fetches the nuclei-templates archive and extracts the +// YAML files into destDir. Designed to be invoked at most once per +// scan: after a successful extraction the destination dir persists +// across runs; subsequent invocations return quickly via hasTemplates(). +type Downloader struct { + // ZipURL overrides TemplatesZipURL for testing or mirroring. + ZipURL string + // HTTPClient is used for the download. Default: 10-minute timeout. + HTTPClient *http.Client + // Writer receives progress lines when Verbose is true. Defaults to + // os.Stderr. + Writer io.Writer + // Verbose toggles progress logging. + Verbose bool + // MinTemplatesToConsiderPresent is the count of .yaml files under + // destDir below which we treat the directory as empty / incomplete + // and re-download. Default: 50. + MinTemplatesToConsiderPresent int +} + +// NewDownloader returns a Downloader with sensible defaults. +func NewDownloader() *Downloader { + return &Downloader{ + ZipURL: TemplatesZipURL, + HTTPClient: &http.Client{Timeout: 10 * time.Minute}, + Writer: os.Stderr, + MinTemplatesToConsiderPresent: 50, + } +} + +// EnsureTemplates guarantees destDir contains a usable set of Nuclei +// YAML templates. If the directory already has ≥ MinTemplatesToConsiderPresent +// templates, it's a no-op. Otherwise the ZIP is downloaded, streamed to +// a temp file, and extracted (YAML files only). +// +// destDir is created if it doesn't exist. +func (d *Downloader) EnsureTemplates(destDir string) error { + if destDir == "" { + return errors.New("EnsureTemplates: empty destDir") + } + if d.hasEnoughTemplates(destDir) { + if d.Verbose { + fmt.Fprintf(d.writer(), "✓ nuclei templates already present at %s\n", destDir) + } + return nil + } + + if err := os.MkdirAll(destDir, 0o755); err != nil { + return fmt.Errorf("mkdir %s: %w", destDir, err) + } + + if d.Verbose { + fmt.Fprintf(d.writer(), "↓ downloading nuclei-templates from %s\n", d.zipURL()) + } + + tmpPath, err := d.downloadZip() + if err != nil { + return err + } + defer os.Remove(tmpPath) + + count, bytes, err := d.extractYAML(tmpPath, destDir) + if err != nil { + return err + } + + if count < d.MinTemplatesToConsiderPresent { + return fmt.Errorf("extracted only %d templates (expected ≥ %d) — archive may be incomplete", count, d.MinTemplatesToConsiderPresent) + } + + if d.Verbose { + fmt.Fprintf(d.writer(), "✓ extracted %d nuclei templates (%s) into %s\n", + count, humanBytesN(bytes), destDir) + } + return nil +} + +// Refresh forces a re-download regardless of current directory contents. +// Useful for `god-eye nuclei-update` style CLI commands. +func (d *Downloader) Refresh(destDir string) error { + if destDir == "" { + return errors.New("Refresh: empty destDir") + } + if err := os.MkdirAll(destDir, 0o755); err != nil { + return fmt.Errorf("mkdir %s: %w", destDir, err) + } + + if d.Verbose { + fmt.Fprintf(d.writer(), "↓ refreshing nuclei-templates from %s\n", d.zipURL()) + } + + tmpPath, err := d.downloadZip() + if err != nil { + return err + } + defer os.Remove(tmpPath) + + count, bytes, err := d.extractYAML(tmpPath, destDir) + if err != nil { + return err + } + if d.Verbose { + fmt.Fprintf(d.writer(), "✓ refreshed %d templates (%s)\n", count, humanBytesN(bytes)) + } + return nil +} + +// --- internals ----------------------------------------------------------- + +func (d *Downloader) hasEnoughTemplates(dir string) bool { + info, err := os.Stat(dir) + if err != nil || !info.IsDir() { + return false + } + found := 0 + threshold := d.MinTemplatesToConsiderPresent + if threshold <= 0 { + threshold = 50 + } + _ = filepath.Walk(dir, func(_ string, fi os.FileInfo, err error) error { + if err != nil { + return nil + } + if fi.IsDir() { + return nil + } + name := strings.ToLower(fi.Name()) + if strings.HasSuffix(name, ".yaml") || strings.HasSuffix(name, ".yml") { + found++ + if found >= threshold { + return filepath.SkipAll + } + } + return nil + }) + return found >= threshold +} + +func (d *Downloader) zipURL() string { + if d.ZipURL != "" { + return d.ZipURL + } + return TemplatesZipURL +} + +func (d *Downloader) writer() io.Writer { + if d.Writer != nil { + return d.Writer + } + return os.Stderr +} + +func (d *Downloader) downloadZip() (string, error) { + client := d.HTTPClient + if client == nil { + client = &http.Client{Timeout: 10 * time.Minute} + } + + req, err := http.NewRequest("GET", d.zipURL(), nil) + if err != nil { + return "", err + } + req.Header.Set("User-Agent", "god-eye-v2") + req.Header.Set("Accept", "application/zip") + + resp, err := client.Do(req) + if err != nil { + return "", fmt.Errorf("download: %w", err) + } + defer resp.Body.Close() + + // Follow standard HTTP error reporting. + if resp.StatusCode != 200 { + return "", fmt.Errorf("download: HTTP %d from %s", resp.StatusCode, d.zipURL()) + } + + tmp, err := os.CreateTemp("", "nuclei-templates-*.zip") + if err != nil { + return "", fmt.Errorf("create temp: %w", err) + } + + // Streaming copy with throttled progress output. + var written atomic.Int64 + pr := &progressReader{ + r: resp.Body, + written: &written, + verbose: d.Verbose, + writer: d.writer(), + total: resp.ContentLength, + prefix: " downloading", + } + + if _, err := io.Copy(tmp, pr); err != nil { + tmp.Close() + os.Remove(tmp.Name()) + return "", fmt.Errorf("stream download: %w", err) + } + if err := tmp.Close(); err != nil { + os.Remove(tmp.Name()) + return "", err + } + return tmp.Name(), nil +} + +// extractYAML walks the zip and writes every .yaml / .yml file into +// destDir. Returns (count, totalBytes, error). +// +// The top-level directory in the archive (e.g. "nuclei-templates-main/") +// is stripped so entries land at destDir//.yaml. +// +// Path-traversal protection: every resolved destination must be within +// destDir; otherwise the entry is skipped. +func (d *Downloader) extractYAML(zipPath, destDir string) (int, int64, error) { + zr, err := zip.OpenReader(zipPath) + if err != nil { + return 0, 0, fmt.Errorf("open zip: %w", err) + } + defer zr.Close() + + absDest, err := filepath.Abs(destDir) + if err != nil { + return 0, 0, err + } + + var count int + var bytes int64 + for _, f := range zr.File { + if f.FileInfo().IsDir() { + continue + } + lower := strings.ToLower(f.Name) + if !strings.HasSuffix(lower, ".yaml") && !strings.HasSuffix(lower, ".yml") { + continue + } + + // Strip leading top-level folder if present. + rel := f.Name + if i := strings.Index(rel, "/"); i >= 0 { + rel = rel[i+1:] + } + if rel == "" { + continue + } + // Guard against path traversal / absolute paths. + if strings.Contains(rel, "..") || filepath.IsAbs(rel) { + continue + } + + dest := filepath.Join(absDest, rel) + if !strings.HasPrefix(dest, absDest+string(os.PathSeparator)) && dest != absDest { + continue + } + + if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil { + continue + } + + rc, err := f.Open() + if err != nil { + continue + } + out, err := os.OpenFile(dest, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o644) + if err != nil { + rc.Close() + continue + } + n, cerr := io.Copy(out, rc) + rc.Close() + out.Close() + if cerr != nil { + _ = os.Remove(dest) + continue + } + count++ + bytes += n + } + return count, bytes, nil +} + +// --- helpers ------------------------------------------------------------- + +// progressReader wraps an io.Reader and emits throttled progress lines +// as bytes are consumed. Throttling: one line every ~5% of total (or +// every ~5MB when total is unknown). +type progressReader struct { + r io.Reader + written *atomic.Int64 + total int64 + verbose bool + writer io.Writer + prefix string + + lastPct int + lastBytes int64 + lastReport time.Time +} + +func (p *progressReader) Read(b []byte) (int, error) { + n, err := p.r.Read(b) + if n > 0 { + p.written.Add(int64(n)) + if p.verbose { + p.maybeReport() + } + } + return n, err +} + +func (p *progressReader) maybeReport() { + w := p.written.Load() + + // Rate-limit prints to avoid flooding the terminal. + if time.Since(p.lastReport) < 200*time.Millisecond { + return + } + + if p.total > 0 { + pct := int(float64(w) / float64(p.total) * 100) + if pct >= p.lastPct+5 || pct == 100 { + fmt.Fprintf(p.writer, "%s %3d%% %s / %s\n", + p.prefix, pct, humanBytesN(w), humanBytesN(p.total)) + p.lastPct = pct + p.lastReport = time.Now() + } + } else { + // Unknown total: report every ~5MB. + if w-p.lastBytes >= 5*1024*1024 { + fmt.Fprintf(p.writer, "%s %s\n", p.prefix, humanBytesN(w)) + p.lastBytes = w + p.lastReport = time.Now() + } + } +} + +// humanBytesN formats a byte count like "2.3MB". Duplicated from +// ai/ensure.go to avoid a cross-package dependency. +func humanBytesN(n int64) string { + const k = 1024.0 + if n < int64(k) { + return fmt.Sprintf("%dB", n) + } + units := []string{"KB", "MB", "GB", "TB"} + v := float64(n) / k + for _, u := range units { + if v < k { + return fmt.Sprintf("%.1f%s", v, u) + } + v /= k + } + return fmt.Sprintf("%.1fPB", v) +} diff --git a/internal/nucleitpl/executor.go b/internal/nucleitpl/executor.go new file mode 100644 index 0000000..e4c727f --- /dev/null +++ b/internal/nucleitpl/executor.go @@ -0,0 +1,361 @@ +package nucleitpl + +import ( + "bytes" + "context" + "fmt" + "io" + "net/http" + "regexp" + "strings" + "time" +) + +// Executor runs supported Nuclei templates against a target URL. +type Executor struct { + Client *http.Client + Timeout time.Duration + MaxBodyB int64 // response body cap; default 1MB + UserAgent string +} + +// NewExecutor builds an executor with sensible defaults. Pass a custom +// *http.Client when you want connection pooling shared with the rest of +// the scan (recommended). +func NewExecutor(client *http.Client, timeout time.Duration) *Executor { + if client == nil { + client = &http.Client{Timeout: timeout} + } + if timeout == 0 { + timeout = 15 * time.Second + } + return &Executor{ + Client: client, + Timeout: timeout, + MaxBodyB: 1 * 1024 * 1024, + UserAgent: "god-eye-v2-nuclei", + } +} + +// Match holds the successful match output for a single template/target. +type Match struct { + TemplateID string + TemplateURL string // reference URL when present in info.reference + Name string + Severity string + Description string + Tags []string + URL string // URL that matched + Evidence string // short excerpt from the matching response + CVEs []string // extracted from info.reference when possible + Author string +} + +// Run executes every HTTP request in the template against the given +// base URL (e.g. "https://api.example.com"). Returns one Match per +// request that succeeds. Non-matching requests produce no entries. +// +// Templating substitutions handled: {{BaseURL}}, {{Hostname}}, {{RootURL}}. +func (e *Executor) Run(ctx context.Context, t *Template, baseURL string) []Match { + if ok, _ := t.IsSupported(); !ok { + return nil + } + var matches []Match + for _, req := range t.Requests { + for _, p := range req.Path { + url := expandPath(p, baseURL) + m, err := e.runOne(ctx, t, req, url) + if err != nil || m == nil { + continue + } + matches = append(matches, *m) + } + } + return matches +} + +// runOne sends one HTTP request, applies matchers, and returns a Match +// when every matchers-condition group is satisfied. +func (e *Executor) runOne(ctx context.Context, t *Template, req HTTPRequest, url string) (*Match, error) { + method := strings.ToUpper(req.Method) + if method == "" { + method = "GET" + } + + var body io.Reader + if req.Body != "" { + body = bytes.NewBufferString(req.Body) + } + + r, err := http.NewRequestWithContext(ctx, method, url, body) + if err != nil { + return nil, err + } + for k, v := range req.Headers { + r.Header.Set(k, v) + } + if r.Header.Get("User-Agent") == "" { + r.Header.Set("User-Agent", e.UserAgent) + } + + // Honor the redirects flag; default is NO redirect follow (safer + // for vuln detection since a 3xx-based probe might be exactly what + // we want to measure). + client := e.Client + if !req.Redirects { + wrapped := *client + wrapped.CheckRedirect = func(*http.Request, []*http.Request) error { + return http.ErrUseLastResponse + } + client = &wrapped + } + + resp, err := client.Do(r) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + bodyBytes, _ := io.ReadAll(io.LimitReader(resp.Body, e.MaxBodyB)) + + // Apply matchers. + condition := strings.ToLower(strings.TrimSpace(req.MatchersCondition)) + if condition == "" { + condition = "or" + } + + fired := 0 + for _, m := range req.Matchers { + if matcherHits(m, resp, bodyBytes) { + fired++ + } + } + + switch condition { + case "and": + if fired != len(req.Matchers) { + return nil, nil + } + case "or": + if fired == 0 { + return nil, nil + } + default: + if fired == 0 { + return nil, nil + } + } + + return &Match{ + TemplateID: t.ID, + TemplateURL: firstRef(t.Info.Reference), + Name: t.Info.Name, + Severity: t.Severity(), + Description: t.Info.Description, + Tags: t.Tags(), + URL: url, + Evidence: evidenceSnippet(bodyBytes, resp), + CVEs: extractCVEs(t.ID, t.Info.Reference), + Author: t.Info.Author, + }, nil +} + +// matcherHits returns true when the matcher m fires against the response. +// Respects m.Negative (inverts), m.Condition (and|or over word list), and +// m.Part (header|body|response|all; default body). +func matcherHits(m Matcher, resp *http.Response, body []byte) bool { + hit := false + switch m.Type { + case "status": + for _, code := range m.Status { + if resp.StatusCode == code { + hit = true + break + } + } + case "size": + for _, sz := range m.Size { + if len(body) == sz { + hit = true + break + } + } + case "word": + corpus := selectCorpus(m.Part, resp, body) + hit = wordMatch(m, corpus) + case "regex": + corpus := selectCorpus(m.Part, resp, body) + hit = regexMatch(m, corpus) + } + if m.Negative { + return !hit + } + return hit +} + +func selectCorpus(part string, resp *http.Response, body []byte) string { + switch strings.ToLower(strings.TrimSpace(part)) { + case "header": + return formatHeaders(resp.Header) + case "response", "all": + return formatHeaders(resp.Header) + "\n\n" + string(body) + case "body", "": + return string(body) + default: + return string(body) + } +} + +func wordMatch(m Matcher, corpus string) bool { + if len(m.Words) == 0 { + return false + } + condition := strings.ToLower(strings.TrimSpace(m.Condition)) + if condition == "" { + condition = "or" + } + lower := strings.ToLower(corpus) + if condition == "and" { + for _, w := range m.Words { + if !strings.Contains(lower, strings.ToLower(w)) { + return false + } + } + return true + } + // or + for _, w := range m.Words { + if strings.Contains(lower, strings.ToLower(w)) { + return true + } + } + return false +} + +func regexMatch(m Matcher, corpus string) bool { + if len(m.Regex) == 0 { + return false + } + condition := strings.ToLower(strings.TrimSpace(m.Condition)) + if condition == "" { + condition = "or" + } + compiled := make([]*regexp.Regexp, 0, len(m.Regex)) + for _, pat := range m.Regex { + re, err := regexp.Compile(pat) + if err != nil { + continue + } + compiled = append(compiled, re) + } + if len(compiled) == 0 { + return false + } + if condition == "and" { + for _, re := range compiled { + if !re.MatchString(corpus) { + return false + } + } + return true + } + for _, re := range compiled { + if re.MatchString(corpus) { + return true + } + } + return false +} + +// --- helpers ------------------------------------------------------------- + +// expandPath substitutes Nuclei template variables with real values. +// {{BaseURL}} → baseURL unchanged ("https://example.com") +// {{Hostname}} → host portion of baseURL +// {{RootURL}} → scheme + host (no path) +func expandPath(template, baseURL string) string { + host := hostOnly(baseURL) + root := rootURL(baseURL) + out := strings.ReplaceAll(template, "{{BaseURL}}", baseURL) + out = strings.ReplaceAll(out, "{{Hostname}}", host) + out = strings.ReplaceAll(out, "{{RootURL}}", root) + return out +} + +func hostOnly(u string) string { + s := strings.TrimPrefix(u, "https://") + s = strings.TrimPrefix(s, "http://") + if i := strings.IndexAny(s, "/?#"); i >= 0 { + s = s[:i] + } + return s +} + +func rootURL(u string) string { + s := u + scheme := "" + switch { + case strings.HasPrefix(s, "https://"): + scheme = "https://" + s = s[len("https://"):] + case strings.HasPrefix(s, "http://"): + scheme = "http://" + s = s[len("http://"):] + } + if i := strings.IndexAny(s, "/?#"); i >= 0 { + s = s[:i] + } + return scheme + s +} + +func formatHeaders(h http.Header) string { + var sb strings.Builder + for k, vs := range h { + for _, v := range vs { + fmt.Fprintf(&sb, "%s: %s\n", k, v) + } + } + return sb.String() +} + +func evidenceSnippet(body []byte, resp *http.Response) string { + const maxSnippet = 500 + s := string(body) + if len(s) > maxSnippet { + s = s[:maxSnippet] + "…" + } + return fmt.Sprintf("HTTP %d — %s", resp.StatusCode, s) +} + +// firstRef returns the first URL in the reference list (usually the +// nuclei-templates source or the advisory). +func firstRef(refs []string) string { + for _, r := range refs { + r = strings.TrimSpace(r) + if r != "" { + return r + } + } + return "" +} + +// extractCVEs scans the template ID and references for CVE IDs. +func extractCVEs(id string, refs []string) []string { + re := regexp.MustCompile(`(?i)CVE-\d{4}-\d{4,7}`) + seen := make(map[string]bool) + var out []string + add := func(s string) { + for _, m := range re.FindAllString(s, -1) { + up := strings.ToUpper(m) + if !seen[up] { + seen[up] = true + out = append(out, up) + } + } + } + add(id) + for _, r := range refs { + add(r) + } + return out +} diff --git a/internal/nucleitpl/executor_test.go b/internal/nucleitpl/executor_test.go new file mode 100644 index 0000000..1f34ed9 --- /dev/null +++ b/internal/nucleitpl/executor_test.go @@ -0,0 +1,216 @@ +package nucleitpl + +import ( + "context" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" +) + +// mkTemplate builds a minimal Template in-memory for tests. +func mkTemplate(id string, path string, matchers []Matcher, condition string) *Template { + return &Template{ + ID: id, + Info: Info{ + Name: "Test " + id, + Severity: "high", + }, + Requests: []HTTPRequest{{ + Method: "GET", + Path: []string{path}, + Matchers: matchers, + MatchersCondition: condition, + }}, + } +} + +func TestExecutor_WordMatch(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(200) + _, _ = w.Write([]byte("PHP Version 7.4.3 loaded")) + })) + defer srv.Close() + + tpl := mkTemplate("test-phpinfo", + "{{BaseURL}}/info.php", + []Matcher{{Type: "word", Part: "body", Words: []string{"PHP Version"}}}, + "") + + e := NewExecutor(nil, 5*time.Second) + matches := e.Run(context.Background(), tpl, srv.URL) + if len(matches) != 1 { + t.Fatalf("expected 1 match, got %d", len(matches)) + } + if matches[0].TemplateID != "test-phpinfo" { + t.Errorf("wrong template: %s", matches[0].TemplateID) + } + if !strings.Contains(matches[0].Evidence, "PHP Version") { + t.Errorf("evidence missing snippet: %q", matches[0].Evidence) + } +} + +func TestExecutor_StatusMatch(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(403) + })) + defer srv.Close() + + tpl := mkTemplate("test-403", + "{{BaseURL}}/admin", + []Matcher{{Type: "status", Status: []int{403, 401}}}, + "") + + e := NewExecutor(nil, 5*time.Second) + matches := e.Run(context.Background(), tpl, srv.URL) + if len(matches) != 1 { + t.Fatalf("expected match, got %d", len(matches)) + } +} + +func TestExecutor_ANDCondition(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(200) + _, _ = w.Write([]byte("admin panel access")) + })) + defer srv.Close() + + // Both matchers must fire. + tpl := mkTemplate("test-and", + "{{BaseURL}}/", + []Matcher{ + {Type: "word", Part: "body", Words: []string{"admin"}}, + {Type: "status", Status: []int{200}}, + }, "and") + + e := NewExecutor(nil, 5*time.Second) + matches := e.Run(context.Background(), tpl, srv.URL) + if len(matches) != 1 { + t.Errorf("expected AND match to fire, got %d", len(matches)) + } + + // If we flip status to something the server doesn't return, AND fails. + tpl.Requests[0].Matchers[1].Status = []int{500} + matches = e.Run(context.Background(), tpl, srv.URL) + if len(matches) != 0 { + t.Errorf("AND should fail when one matcher doesn't, got %d", len(matches)) + } +} + +func TestExecutor_NegativeMatcher(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(200) + _, _ = w.Write([]byte("welcome")) + })) + defer srv.Close() + + tpl := mkTemplate("test-neg", + "{{BaseURL}}/", + []Matcher{{Type: "word", Part: "body", Words: []string{"error"}, Negative: true}}, + "") + + e := NewExecutor(nil, 5*time.Second) + matches := e.Run(context.Background(), tpl, srv.URL) + if len(matches) != 1 { + t.Errorf("negative should fire (body doesn't contain 'error'), got %d", len(matches)) + } +} + +func TestExecutor_RegexMatch(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(200) + _, _ = w.Write([]byte("Server: Apache/2.4.52 (Ubuntu)")) + })) + defer srv.Close() + + tpl := mkTemplate("test-re", + "{{BaseURL}}/", + []Matcher{{Type: "regex", Part: "body", Regex: []string{`Apache/\d+\.\d+\.\d+`}}}, + "") + + e := NewExecutor(nil, 5*time.Second) + matches := e.Run(context.Background(), tpl, srv.URL) + if len(matches) != 1 { + t.Errorf("regex match should fire, got %d", len(matches)) + } +} + +func TestExecutor_HeaderPart(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("X-Powered-By", "Express") + w.WriteHeader(200) + })) + defer srv.Close() + + tpl := mkTemplate("test-header", + "{{BaseURL}}/", + []Matcher{{Type: "word", Part: "header", Words: []string{"X-Powered-By"}}}, + "") + + e := NewExecutor(nil, 5*time.Second) + matches := e.Run(context.Background(), tpl, srv.URL) + if len(matches) != 1 { + t.Errorf("header matcher should fire, got %d", len(matches)) + } +} + +func TestExecutor_NoMatchReturnsEmpty(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(200) + _, _ = w.Write([]byte("nothing interesting")) + })) + defer srv.Close() + + tpl := mkTemplate("test-nomatch", + "{{BaseURL}}/", + []Matcher{{Type: "word", Part: "body", Words: []string{"definitely_not_here"}}}, + "") + + e := NewExecutor(nil, 5*time.Second) + matches := e.Run(context.Background(), tpl, srv.URL) + if len(matches) != 0 { + t.Errorf("non-match should return empty, got %d", len(matches)) + } +} + +func TestExpandPath(t *testing.T) { + cases := []struct { + path, base, want string + }{ + {"{{BaseURL}}/admin", "https://example.com", "https://example.com/admin"}, + {"{{Hostname}}/x", "https://api.example.com/v1", "api.example.com/x"}, + {"{{RootURL}}/r", "http://sub.example.com/deep/path", "http://sub.example.com/r"}, + {"/static/admin", "https://x.com", "/static/admin"}, + } + for _, c := range cases { + if got := expandPath(c.path, c.base); got != c.want { + t.Errorf("expandPath(%q, %q) = %q, want %q", c.path, c.base, got, c.want) + } + } +} + +func TestExtractCVEs(t *testing.T) { + cves := extractCVEs("cve-2021-23017-nginx", []string{ + "https://nvd.nist.gov/vuln/detail/CVE-2021-23017", // dup of ID after upper-casing + "https://example.com/adv/CVE-2020-15168", + }) + if len(cves) != 2 { + t.Errorf("expected 2 unique CVE IDs, got %d: %v", len(cves), cves) + } + if cves[0] != "CVE-2021-23017" || cves[1] != "CVE-2020-15168" { + t.Errorf("unexpected order: %v", cves) + } +} + +func TestExecutor_UnsupportedTemplateNoop(t *testing.T) { + tpl := &Template{ + ID: "dns-tpl", + DNS: []string{"placeholder"}, + } + e := NewExecutor(nil, 5*time.Second) + matches := e.Run(context.Background(), tpl, "https://example.com") + if len(matches) != 0 { + t.Errorf("unsupported template should return no matches, got %d", len(matches)) + } +} diff --git a/internal/nucleitpl/template.go b/internal/nucleitpl/template.go new file mode 100644 index 0000000..08bf082 --- /dev/null +++ b/internal/nucleitpl/template.go @@ -0,0 +1,302 @@ +// Package nucleitpl parses and executes a subset of the Nuclei YAML +// template format. The goal is to run community HTTP templates unchanged +// so God's Eye gets access to the ~8000-template ecosystem without +// reimplementing detections one-by-one. +// +// Supported subset (covers roughly 70% of HTTP templates in the public +// nuclei-templates repo at time of writing): +// +// - Top-level: id, info { name, severity, description, tags, author } +// - Protocol: requests: (aliased as http: in newer templates) +// - Per-request: method, path (with {{BaseURL}}/{{Hostname}} substitution), +// headers, body, redirects (bool), matchers-condition (and|or) +// - Matchers: type=word (word|part|condition), +// type=regex (regex|part), +// type=status (status), +// type=size (size) +// - Severity mapping: info/low/medium/high/critical +// +// Out of scope (templates using these are skipped with a reason logged): +// +// - Protocols other than http: dns, ssl, network, file, code, javascript, +// workflow, headless, flow +// - Pre-conditions, payloads, extractors, dynamic variables, +// stop-at-first-match, cluster, self-contained +// - Interactsh (OOB) — requires a callback server we don't ship yet +// - Fuzzing templates +// +// A skipped template logs via the returned diagnostic; the executor never +// panics on an unsupported template. +package nucleitpl + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "gopkg.in/yaml.v3" +) + +// Template is the parsed form of a Nuclei YAML file. +type Template struct { + ID string `yaml:"id"` + Info Info `yaml:"info"` + Requests []HTTPRequest `yaml:"requests,omitempty"` + HTTP []HTTPRequest `yaml:"http,omitempty"` // newer alias for requests + // Unsupported protocols — presence triggers skip with reason. + DNS interface{} `yaml:"dns,omitempty"` + SSL interface{} `yaml:"ssl,omitempty"` + Network interface{} `yaml:"network,omitempty"` + File interface{} `yaml:"file,omitempty"` + Code interface{} `yaml:"code,omitempty"` + Headless interface{} `yaml:"headless,omitempty"` + Workflow interface{} `yaml:"workflows,omitempty"` + // SourcePath is populated by Load so diagnostics can reference the file. + SourcePath string `yaml:"-"` +} + +// Info is the template metadata block. +type Info struct { + Name string `yaml:"name"` + Author string `yaml:"author,omitempty"` + Severity string `yaml:"severity"` + Description string `yaml:"description,omitempty"` + Reference []string `yaml:"reference,omitempty"` + Tags string `yaml:"tags,omitempty"` +} + +// HTTPRequest is one HTTP interaction in a template. +type HTTPRequest struct { + Method string `yaml:"method,omitempty"` // default GET + Path []string `yaml:"path"` + Headers map[string]string `yaml:"headers,omitempty"` + Body string `yaml:"body,omitempty"` + Redirects bool `yaml:"redirects,omitempty"` + MaxRedirects int `yaml:"max-redirects,omitempty"` + MatchersCondition string `yaml:"matchers-condition,omitempty"` // "and" | "or" (default "or") + Matchers []Matcher `yaml:"matchers"` + // Unsupported fields that, if present with values, trigger a skip. + Payloads interface{} `yaml:"payloads,omitempty"` + Extractors interface{} `yaml:"extractors,omitempty"` + Fuzzing interface{} `yaml:"fuzzing,omitempty"` + Unsafe bool `yaml:"unsafe,omitempty"` + Attack string `yaml:"attack,omitempty"` + Raw []string `yaml:"raw,omitempty"` + Pipeline bool `yaml:"pipeline,omitempty"` + Threads int `yaml:"threads,omitempty"` + StopAtFirst bool `yaml:"stop-at-first-match,omitempty"` +} + +// Matcher is a single match rule within a request. +type Matcher struct { + Type string `yaml:"type"` // word | regex | status | size | dsl | binary + Part string `yaml:"part,omitempty"` // header | body | response (default body) + Condition string `yaml:"condition,omitempty"` // and | or (default or) + Negative bool `yaml:"negative,omitempty"` + Words []string `yaml:"words,omitempty"` + Regex []string `yaml:"regex,omitempty"` + Status []int `yaml:"status,omitempty"` + Size []int `yaml:"size,omitempty"` + // Unsupported — presence marks the matcher unusable. + DSL []string `yaml:"dsl,omitempty"` + Binary []string `yaml:"binary,omitempty"` +} + +// Load parses a single YAML file into a Template. Malformed YAML or empty +// files return (nil, err); structurally valid YAML that references unused +// protocols still Load successfully — IsSupported/IsSupported reason tell +// the caller whether to execute it. +func Load(path string) (*Template, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + var t Template + if err := yaml.Unmarshal(data, &t); err != nil { + return nil, fmt.Errorf("parse %s: %w", path, err) + } + if t.ID == "" { + return nil, fmt.Errorf("parse %s: missing id field", path) + } + t.SourcePath = path + // Normalize requests vs http alias. + if len(t.Requests) == 0 && len(t.HTTP) > 0 { + t.Requests = t.HTTP + } + return &t, nil +} + +// LoadDir walks dir recursively, loads every .yaml / .yml file, and +// returns the slice of successfully-parsed templates. Parse errors are +// collected into the returned diagnostics slice but do not stop the walk. +func LoadDir(dir string) ([]*Template, []string, error) { + var tpls []*Template + var diags []string + + err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return nil // skip unreadable files silently + } + if info.IsDir() { + return nil + } + ext := strings.ToLower(filepath.Ext(path)) + if ext != ".yaml" && ext != ".yml" { + return nil + } + t, err := Load(path) + if err != nil { + diags = append(diags, fmt.Sprintf("parse %s: %v", path, err)) + return nil + } + tpls = append(tpls, t) + return nil + }) + return tpls, diags, err +} + +// TargetsCurrentHost reports whether every request path in the template +// is scoped to the scanned host — i.e. uses {{BaseURL}}, {{Hostname}}, +// {{RootURL}}, or a leading "/". Templates with absolute URLs to +// third-party services (common in OSINT / user-presence checks) would +// otherwise fire against unrelated hosts with unresolved placeholders +// like {{user}} — and their matchers often succeed on whatever generic +// response the third party returns, producing high-volume false +// positives against a single-target scan. +// +// Returns false + reason when any request path is off-host. +func (t *Template) TargetsCurrentHost() (bool, string) { + for i, r := range t.Requests { + for j, p := range r.Path { + ok := false + switch { + case strings.HasPrefix(p, "{{BaseURL}}"), + strings.HasPrefix(p, "{{Hostname}}"), + strings.HasPrefix(p, "{{RootURL}}"), + strings.HasPrefix(p, "/"): + ok = true + } + if !ok { + // Also allow the special case where the path is exactly + // a template variable (no literal text). + if p == "{{BaseURL}}" || p == "{{Hostname}}" || p == "{{RootURL}}" { + ok = true + } + } + if !ok { + return false, fmt.Sprintf("request[%d].path[%d] %q does not target the scanned host", i, j, truncateStr(p, 60)) + } + } + } + return true, "" +} + +func truncateStr(s string, n int) string { + if len(s) <= n { + return s + } + return s[:n] + "…" +} + +// IsSupported returns (true, "") when the template uses only features +// understood by the executor. Templates that would need unsupported +// protocols, payloads, extractors, or fuzzing return (false, reason). +// Templates that target third-party hosts (OSINT-style user lookups) +// also return false to prevent spurious matches during targeted scans. +func (t *Template) IsSupported() (bool, string) { + if t == nil { + return false, "nil template" + } + if t.DNS != nil { + return false, "dns protocol (unsupported)" + } + if t.SSL != nil { + return false, "ssl protocol (unsupported)" + } + if t.Network != nil { + return false, "network protocol (unsupported)" + } + if t.File != nil { + return false, "file protocol (unsupported)" + } + if t.Code != nil { + return false, "code protocol (unsupported)" + } + if t.Headless != nil { + return false, "headless protocol (unsupported)" + } + if t.Workflow != nil { + return false, "workflow (unsupported)" + } + if len(t.Requests) == 0 { + return false, "no http requests" + } + for i, r := range t.Requests { + if r.Payloads != nil { + return false, fmt.Sprintf("request[%d] uses payloads (unsupported)", i) + } + if r.Extractors != nil { + // Tolerate extractors on the first pass; we ignore them. + // Templates with only extractors still run; their findings are + // just matcher-based. + } + if r.Fuzzing != nil { + return false, fmt.Sprintf("request[%d] uses fuzzing (unsupported)", i) + } + if r.Unsafe { + return false, fmt.Sprintf("request[%d] is unsafe (raw TCP)", i) + } + if len(r.Raw) > 0 { + return false, fmt.Sprintf("request[%d] uses raw (unsupported)", i) + } + if len(r.Path) == 0 { + return false, fmt.Sprintf("request[%d] has no path", i) + } + if len(r.Matchers) == 0 { + return false, fmt.Sprintf("request[%d] has no matchers", i) + } + for j, m := range r.Matchers { + switch m.Type { + case "word", "regex", "status", "size": + // supported + case "dsl", "binary": + return false, fmt.Sprintf("request[%d].matcher[%d] type=%s (unsupported)", i, j, m.Type) + default: + return false, fmt.Sprintf("request[%d].matcher[%d] type=%s (unknown)", i, j, m.Type) + } + } + } + // Scope check: skip templates that probe third-party hosts. + if ok, reason := t.TargetsCurrentHost(); !ok { + return false, reason + } + return true, "" +} + +// Severity returns the OWASP-style severity, defaulting to "info" when +// the template omits it. +func (t *Template) Severity() string { + s := strings.ToLower(strings.TrimSpace(t.Info.Severity)) + switch s { + case "critical", "high", "medium", "low", "info": + return s + default: + return "info" + } +} + +// Tags returns the comma-separated tags as a string slice. +func (t *Template) Tags() []string { + if t.Info.Tags == "" { + return nil + } + var out []string + for _, p := range strings.Split(t.Info.Tags, ",") { + p = strings.TrimSpace(p) + if p != "" { + out = append(out, p) + } + } + return out +} diff --git a/internal/nucleitpl/template_test.go b/internal/nucleitpl/template_test.go new file mode 100644 index 0000000..fa4e8bc --- /dev/null +++ b/internal/nucleitpl/template_test.go @@ -0,0 +1,229 @@ +package nucleitpl + +import ( + "os" + "path/filepath" + "testing" +) + +const sampleSupported = ` +id: test-basic-word-match +info: + name: Test Basic Word Match + author: vyntral + severity: high + description: Fires when response body contains 'phpinfo' + tags: exposure,php + reference: + - https://example.com/advisory/CVE-2021-12345 +requests: + - method: GET + path: + - "{{BaseURL}}/phpinfo.php" + matchers: + - type: word + part: body + words: + - "PHP Version" + - type: status + status: + - 200 + matchers-condition: and +` + +const sampleUnsupportedDNS = ` +id: test-dns +info: + name: Test DNS + severity: medium +dns: + - name: "{{FQDN}}" + type: TXT + matchers: + - type: word + words: ["v=spf"] +` + +const sampleUnsupportedPayloads = ` +id: test-payloads +info: + name: Test Payloads + severity: low +requests: + - method: GET + path: + - "{{BaseURL}}/{{word}}" + payloads: + word: + - admin + - backup + matchers: + - type: status + status: [200] +` + +const sampleBadYAML = ` +id: [unclosed +info: + name: +` + +func writeTmp(t *testing.T, name, content string) string { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatal(err) + } + return path +} + +func TestLoad_Supported(t *testing.T) { + path := writeTmp(t, "ok.yaml", sampleSupported) + tpl, err := Load(path) + if err != nil { + t.Fatal(err) + } + if tpl.ID != "test-basic-word-match" { + t.Errorf("ID = %q", tpl.ID) + } + if tpl.Info.Severity != "high" { + t.Errorf("Severity = %q", tpl.Info.Severity) + } + if len(tpl.Requests) != 1 { + t.Fatalf("Requests len = %d", len(tpl.Requests)) + } + r := tpl.Requests[0] + if r.Path[0] != "{{BaseURL}}/phpinfo.php" { + t.Errorf("Path[0] = %q", r.Path[0]) + } + if len(r.Matchers) != 2 { + t.Errorf("Matchers len = %d", len(r.Matchers)) + } + if r.MatchersCondition != "and" { + t.Errorf("MatchersCondition = %q", r.MatchersCondition) + } + if ok, reason := tpl.IsSupported(); !ok { + t.Errorf("should be supported; reason=%q", reason) + } + if tags := tpl.Tags(); len(tags) != 2 || tags[0] != "exposure" { + t.Errorf("Tags = %v", tags) + } +} + +func TestLoad_DNSUnsupported(t *testing.T) { + path := writeTmp(t, "dns.yaml", sampleUnsupportedDNS) + tpl, err := Load(path) + if err != nil { + t.Fatal(err) + } + ok, reason := tpl.IsSupported() + if ok { + t.Error("dns template should be unsupported") + } + if reason == "" { + t.Error("expected non-empty reason") + } +} + +func TestLoad_PayloadsUnsupported(t *testing.T) { + path := writeTmp(t, "payloads.yaml", sampleUnsupportedPayloads) + tpl, err := Load(path) + if err != nil { + t.Fatal(err) + } + ok, reason := tpl.IsSupported() + if ok { + t.Error("payloads template should be unsupported") + } + if reason == "" { + t.Error("expected non-empty reason") + } +} + +func TestLoad_BadYAML(t *testing.T) { + path := writeTmp(t, "bad.yaml", sampleBadYAML) + if _, err := Load(path); err == nil { + t.Error("expected parse error") + } +} + +func TestLoad_MissingID(t *testing.T) { + path := writeTmp(t, "noid.yaml", "info:\n severity: low\n") + if _, err := Load(path); err == nil { + t.Error("expected missing id error") + } +} + +func TestLoadDir(t *testing.T) { + dir := t.TempDir() + _ = os.WriteFile(filepath.Join(dir, "a.yaml"), []byte(sampleSupported), 0o644) + _ = os.WriteFile(filepath.Join(dir, "b.yaml"), []byte(sampleUnsupportedDNS), 0o644) + _ = os.WriteFile(filepath.Join(dir, "c.yml"), []byte(sampleSupported), 0o644) + _ = os.WriteFile(filepath.Join(dir, "d.bad"), []byte("???"), 0o644) + _ = os.WriteFile(filepath.Join(dir, "e.yaml"), []byte(sampleBadYAML), 0o644) + + sub := filepath.Join(dir, "nested") + _ = os.MkdirAll(sub, 0o755) + _ = os.WriteFile(filepath.Join(sub, "f.yaml"), []byte(sampleSupported), 0o644) + + tpls, diags, err := LoadDir(dir) + if err != nil { + t.Fatal(err) + } + // 3 supported (a, c, nested/f), 1 dns (b), 1 parse error (e). .bad ignored. + if got := len(tpls); got != 4 { + t.Errorf("loaded = %d, want 4 (3 supported + 1 dns)", got) + } + if len(diags) != 1 { + t.Errorf("diags = %d, want 1", len(diags)) + } +} + +func TestSeverity_Default(t *testing.T) { + tpl := &Template{Info: Info{Severity: "UNKNOWN"}} + if sev := tpl.Severity(); sev != "info" { + t.Errorf("got %q, want info", sev) + } +} + +func TestSeverity_Normalized(t *testing.T) { + for input, want := range map[string]string{ + "critical": "critical", + "HIGH": "high", + " Medium ": "medium", + "LOW": "low", + "info": "info", + "": "info", + } { + tpl := &Template{Info: Info{Severity: input}} + if got := tpl.Severity(); got != want { + t.Errorf("Severity(%q) = %q, want %q", input, got, want) + } + } +} + +func TestHTTPAlias(t *testing.T) { + content := ` +id: http-alias +info: + severity: low +http: + - method: GET + path: ["{{BaseURL}}/"] + matchers: + - type: status + status: [200] +` + path := writeTmp(t, "http.yaml", content) + tpl, err := Load(path) + if err != nil { + t.Fatal(err) + } + if len(tpl.Requests) != 1 { + t.Errorf("expected http: to be aliased to Requests, got %d", len(tpl.Requests)) + } + if ok, _ := tpl.IsSupported(); !ok { + t.Error("http alias template should be supported") + } +} diff --git a/internal/output/print.go b/internal/output/print.go index 8834742..be737fe 100644 --- a/internal/output/print.go +++ b/internal/output/print.go @@ -50,11 +50,10 @@ func PrintBanner() { fmt.Println(BoldWhite(" ╚██████╔╝╚██████╔╝██████╔╝") + BoldGreen("███████║") + BoldWhite(" ███████╗ ██║ ███████╗")) fmt.Println(BoldWhite(" ╚═════╝ ╚═════╝ ╚═════╝ ") + BoldGreen("╚══════╝") + BoldWhite(" ╚══════╝ ╚═╝ ╚══════╝")) fmt.Println() - fmt.Printf(" %s %s\n", BoldGreen("⚡"), Dim("AI-powered attack surface discovery & security analysis")) - fmt.Printf(" %s %s %s %s %s %s\n", - Dim("Version:"), BoldGreen("0.1"), - Dim("By:"), White("github.com/Vyntral"), - Dim("For:"), Yellow("github.com/Orizon-eu")) + fmt.Printf(" %s %s\n", BoldGreen("⚡"), Dim("AI-powered attack surface discovery & offensive security analysis")) + fmt.Printf(" %s %s %s %s\n", + Dim("Version:"), BoldGreen("2.0.0-rc1"), + Dim("By:"), White("github.com/Vyntral")) fmt.Println() } diff --git a/internal/pipeline/pipeline.go b/internal/pipeline/pipeline.go new file mode 100644 index 0000000..bbe4de4 --- /dev/null +++ b/internal/pipeline/pipeline.go @@ -0,0 +1,278 @@ +// Package pipeline coordinates v2 module execution. It builds a Module list +// from the registry, applies the ConfigView filter, then runs every selected +// module concurrently under a shared event bus and store. +// +// Unlike the legacy scanner.Run, this coordinator does NO domain-specific +// work of its own. Every phase (passive, brute, resolve, probe, security, +// AI, reporting) is a Module. Ordering emerges from events, with explicit +// phase barriers for phases that must complete before downstream begins. +package pipeline + +import ( + "context" + "errors" + "fmt" + "sort" + "sync" + "time" + + "god-eye/internal/config" + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" +) + +// Pipeline is the v2 scan coordinator. +type Pipeline struct { + cfg *config.Config + view *config.View + bus *eventbus.Bus + store store.Store + modReg *module.Registry + + // ownBus / ownStore indicate resources created by this Pipeline that + // must be closed on Shutdown. Injected resources are left to the caller. + ownBus bool + ownStore bool +} + +// Options are optional overrides for New. Empty fields mean "use defaults". +type Options struct { + Bus *eventbus.Bus // injected bus; defaults to a new one + Store store.Store // injected store; defaults to NewMemoryStore + Registry *module.Registry // registry to draw modules from; defaults to module.Default() + Buffer int // bus buffer size when creating default bus +} + +// New creates a Pipeline from cfg and opts. The pipeline is ready to Run. +// A non-nil Config is required. +func New(cfg *config.Config, opts Options) (*Pipeline, error) { + if cfg == nil { + return nil, errors.New("pipeline.New: nil config") + } + + p := &Pipeline{ + cfg: cfg, + view: config.NewView(cfg), + modReg: opts.Registry, + } + if p.modReg == nil { + p.modReg = module.Default() + } + + if opts.Bus != nil { + p.bus = opts.Bus + } else { + buf := opts.Buffer + if buf <= 0 { + buf = 4096 + } + p.bus = eventbus.New(buf) + p.ownBus = true + } + + if opts.Store != nil { + p.store = opts.Store + } else { + p.store = store.NewMemoryStore() + p.ownStore = true + } + + return p, nil +} + +// Bus returns the underlying event bus. Useful for attaching external +// subscribers (TUI, metrics, log sinks) before calling Run. +func (p *Pipeline) Bus() *eventbus.Bus { return p.bus } + +// Store returns the underlying store. Useful for post-scan querying or +// report generation outside of modules. +func (p *Pipeline) Store() store.Store { return p.store } + +// Run executes the selected modules. Returns when every module has exited +// OR ctx is canceled. The returned error aggregates any module errors via +// errors.Join. +// +// Execution semantics: +// - ScanStarted is published first. +// - Modules are grouped by Phase; each Phase is a barrier: phase N starts +// only after every module in phase N-1 has returned. +// - Within a phase, every module runs concurrently on its own goroutine. +// - When all phases complete, ScanCompleted is published with stats, then +// the bus is drained (if owned) and Shutdown is called. +func (p *Pipeline) Run(ctx context.Context) error { + selected := p.modReg.Select(p.view) + if len(selected) == 0 { + return errors.New("pipeline.Run: no modules selected — check config and module registrations") + } + + // Group modules by phase. + byPhase := make(map[module.Phase][]module.Module) + for _, m := range selected { + byPhase[m.Phase()] = append(byPhase[m.Phase()], m) + } + + // Sort modules within each phase for deterministic start order. + for _, ms := range byPhase { + sort.SliceStable(ms, func(i, j int) bool { return ms[i].Name() < ms[j].Name() }) + } + + started := time.Now() + p.publishScanStarted() + + var moduleErrs []error + var errsMu sync.Mutex + + // Iterate phases in canonical order. + for _, phase := range phaseOrder { + modules := byPhase[phase] + if len(modules) == 0 { + continue + } + + phaseStart := time.Now() + p.publishPhaseStarted(phase) + + var wg sync.WaitGroup + for _, m := range modules { + m := m + wg.Add(1) + go func() { + defer wg.Done() + defer func() { + if r := recover(); r != nil { + p.publishModuleError(m.Name(), fmt.Errorf("panic: %v", r), true) + errsMu.Lock() + moduleErrs = append(moduleErrs, fmt.Errorf("%s panicked: %v", m.Name(), r)) + errsMu.Unlock() + } + }() + + mctx := module.Context{ + Ctx: ctx, + Bus: p.bus, + Store: p.store, + Config: p.view, + Target: p.cfg.Domain, + Profile: p.cfg.Profile, + } + if err := m.Run(mctx); err != nil && !errors.Is(err, context.Canceled) { + p.publishModuleError(m.Name(), err, false) + errsMu.Lock() + moduleErrs = append(moduleErrs, fmt.Errorf("%s: %w", m.Name(), err)) + errsMu.Unlock() + } + }() + } + + // Wait for this phase OR for ctx cancellation. + done := make(chan struct{}) + go func() { wg.Wait(); close(done) }() + + select { + case <-done: + // normal completion + case <-ctx.Done(): + // wait (bounded) for goroutines to observe the cancellation + wg.Wait() + } + + p.publishPhaseCompleted(phase, time.Since(phaseStart)) + + if ctx.Err() != nil { + break + } + } + + p.publishScanCompleted(time.Since(started)) + + if p.ownBus { + shutdownCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + _ = p.bus.Close(shutdownCtx) + } + + if len(moduleErrs) > 0 { + return errors.Join(moduleErrs...) + } + return ctx.Err() +} + +// Shutdown explicitly closes owned resources. Normally Run calls Shutdown +// automatically; use this when you want to reuse the pipeline or manage +// lifecycle externally. +func (p *Pipeline) Shutdown(ctx context.Context) error { + var errs []error + if p.ownBus { + if err := p.bus.Close(ctx); err != nil { + errs = append(errs, err) + } + } + if p.ownStore { + if err := p.store.Close(); err != nil { + errs = append(errs, err) + } + } + return errors.Join(errs...) +} + +// phaseOrder is the canonical sequence of pipeline phases. Modules may also +// declare phases not in this list — those are executed at the end in arbitrary +// order (but all still before ScanCompleted). +var phaseOrder = []module.Phase{ + module.PhaseSetup, + module.PhaseDiscovery, + module.PhaseResolution, + module.PhaseEnrichment, + module.PhaseAnalysis, + module.PhaseReporting, +} + +// --- event publishing helpers --- + +func (p *Pipeline) publishScanStarted() { + p.bus.Publish(context.Background(), eventbus.ScanStarted{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: "pipeline", Target: p.cfg.Domain}, + Target: p.cfg.Domain, + Profile: p.cfg.Profile, + }) +} + +func (p *Pipeline) publishScanCompleted(d time.Duration) { + stats := map[string]int64{ + "hosts": int64(p.store.Count(context.Background())), + "published": int64(p.bus.Stats().Published), + "delivered": int64(p.bus.Stats().Delivered), + "dropped": int64(p.bus.Stats().Dropped), + } + p.bus.Publish(context.Background(), eventbus.ScanCompleted{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: "pipeline", Target: p.cfg.Domain}, + Target: p.cfg.Domain, + Duration: d, + Stats: stats, + }) +} + +func (p *Pipeline) publishPhaseStarted(phase module.Phase) { + p.bus.Publish(context.Background(), eventbus.PhaseStarted{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: "pipeline", Target: p.cfg.Domain}, + Phase: string(phase), + }) +} + +func (p *Pipeline) publishPhaseCompleted(phase module.Phase, d time.Duration) { + p.bus.Publish(context.Background(), eventbus.PhaseCompleted{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: "pipeline", Target: p.cfg.Domain}, + Phase: string(phase), + Duration: d, + }) +} + +func (p *Pipeline) publishModuleError(name string, err error, fatal bool) { + p.bus.Publish(context.Background(), eventbus.ModuleError{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: name, Target: p.cfg.Domain}, + Module: name, + Err: err.Error(), + Fatal: fatal, + }) +} diff --git a/internal/pipeline/pipeline_test.go b/internal/pipeline/pipeline_test.go new file mode 100644 index 0000000..9b8f21f --- /dev/null +++ b/internal/pipeline/pipeline_test.go @@ -0,0 +1,285 @@ +package pipeline + +import ( + "context" + "errors" + "sync" + "sync/atomic" + "testing" + "time" + + "god-eye/internal/config" + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" +) + +// --- test doubles -------------------------------------------------------- + +type spyModule struct { + name string + phase module.Phase + run func(mctx module.Context) error + calls atomic.Int32 + enabled bool +} + +func (s *spyModule) Name() string { return s.name } +func (s *spyModule) Phase() module.Phase { return s.phase } +func (s *spyModule) Consumes() []eventbus.EventType { return nil } +func (s *spyModule) Produces() []eventbus.EventType { return nil } +func (s *spyModule) DefaultEnabled() bool { return s.enabled } +func (s *spyModule) Run(mctx module.Context) error { + s.calls.Add(1) + if s.run != nil { + return s.run(mctx) + } + return nil +} + +func mkModule(name string, phase module.Phase, enabled bool) *spyModule { + return &spyModule{name: name, phase: phase, enabled: enabled} +} + +func TestPipeline_RunsAllEnabledModules(t *testing.T) { + r := module.NewRegistry() + a := mkModule("a", module.PhaseDiscovery, true) + b := mkModule("b", module.PhaseEnrichment, true) + c := mkModule("c", module.PhaseReporting, true) + off := mkModule("off", module.PhaseDiscovery, false) + r.Register(a) + r.Register(b) + r.Register(c) + r.Register(off) + + cfg := &config.Config{Domain: "example.com"} + p, err := New(cfg, Options{Registry: r}) + if err != nil { + t.Fatal(err) + } + + if err := p.Run(context.Background()); err != nil { + t.Fatalf("Run error: %v", err) + } + + if a.calls.Load() != 1 { + t.Errorf("a not called: %d", a.calls.Load()) + } + if b.calls.Load() != 1 { + t.Errorf("b not called") + } + if c.calls.Load() != 1 { + t.Errorf("c not called") + } + if off.calls.Load() != 0 { + t.Errorf("disabled module was called: %d", off.calls.Load()) + } +} + +func TestPipeline_PhaseBarrier(t *testing.T) { + // Phase B must see A's events before B's module runs. + r := module.NewRegistry() + + var aDone atomic.Bool + a := mkModule("producer", module.PhaseDiscovery, true) + a.run = func(mctx module.Context) error { + mctx.Bus.Publish(mctx.Ctx, eventbus.NewSubdomainDiscovered("test", "x.example.com", "p")) + time.Sleep(30 * time.Millisecond) + aDone.Store(true) + return nil + } + + var sawBefore atomic.Int32 + b := mkModule("consumer", module.PhaseEnrichment, true) + b.run = func(mctx module.Context) error { + if !aDone.Load() { + sawBefore.Add(1) + } + return nil + } + r.Register(a) + r.Register(b) + + p, err := New(&config.Config{Domain: "example.com"}, Options{Registry: r}) + if err != nil { + t.Fatal(err) + } + if err := p.Run(context.Background()); err != nil { + t.Fatalf("Run error: %v", err) + } + if sawBefore.Load() != 0 { + t.Errorf("phase barrier broken: consumer ran while producer was still running (%d times)", sawBefore.Load()) + } +} + +func TestPipeline_CollectsErrors(t *testing.T) { + r := module.NewRegistry() + good := mkModule("good", module.PhaseDiscovery, true) + failA := mkModule("fail-a", module.PhaseDiscovery, true) + failA.run = func(_ module.Context) error { return errors.New("boom-a") } + failB := mkModule("fail-b", module.PhaseAnalysis, true) + failB.run = func(_ module.Context) error { return errors.New("boom-b") } + r.Register(good) + r.Register(failA) + r.Register(failB) + + p, _ := New(&config.Config{Domain: "example.com"}, Options{Registry: r}) + err := p.Run(context.Background()) + if err == nil { + t.Fatal("expected aggregated error") + } + if !contains(err.Error(), "boom-a") || !contains(err.Error(), "boom-b") { + t.Errorf("aggregated error missing parts: %v", err) + } +} + +func TestPipeline_PanicIsContained(t *testing.T) { + r := module.NewRegistry() + panicker := mkModule("panicker", module.PhaseDiscovery, true) + panicker.run = func(_ module.Context) error { panic("oops") } + r.Register(panicker) + r.Register(mkModule("normal", module.PhaseReporting, true)) + + p, _ := New(&config.Config{Domain: "example.com"}, Options{Registry: r}) + err := p.Run(context.Background()) + if err == nil { + t.Fatal("expected error from panic") + } + if !contains(err.Error(), "panicked") { + t.Errorf("error doesn't mention panic: %v", err) + } +} + +func TestPipeline_RespectsCtxCancellation(t *testing.T) { + r := module.NewRegistry() + + slow := mkModule("slow", module.PhaseDiscovery, true) + var slowRan atomic.Bool + slow.run = func(mctx module.Context) error { + slowRan.Store(true) + <-mctx.Ctx.Done() + return mctx.Ctx.Err() + } + never := mkModule("never", module.PhaseAnalysis, true) + var neverRan atomic.Bool + never.run = func(_ module.Context) error { + neverRan.Store(true) + return nil + } + r.Register(slow) + r.Register(never) + + p, _ := New(&config.Config{Domain: "example.com"}, Options{Registry: r}) + + ctx, cancel := context.WithCancel(context.Background()) + go func() { + time.Sleep(50 * time.Millisecond) + cancel() + }() + + _ = p.Run(ctx) + if !slowRan.Load() { + t.Error("slow should have run") + } + // never is in phase after slow, and phase B starts only after A finishes. + // Since slow exits when ctx is canceled, pipeline breaks out before + // scheduling phase B. never must NOT run. + if neverRan.Load() { + t.Error("never should NOT have run after cancellation") + } +} + +func TestPipeline_PublishesScanEvents(t *testing.T) { + r := module.NewRegistry() + r.Register(mkModule("tiny", module.PhaseDiscovery, true)) + + p, _ := New(&config.Config{Domain: "example.com"}, Options{Registry: r, Bus: eventbus.New(128)}) + + started := make(chan struct{}, 1) + completed := make(chan struct{}, 1) + + p.Bus().Subscribe(eventbus.EventScanStarted, func(_ context.Context, _ eventbus.Event) { + select { + case started <- struct{}{}: + default: + } + }) + p.Bus().Subscribe(eventbus.EventScanCompleted, func(_ context.Context, _ eventbus.Event) { + select { + case completed <- struct{}{}: + default: + } + }) + + _ = p.Run(context.Background()) + + select { + case <-started: + case <-time.After(2 * time.Second): + t.Fatal("ScanStarted not fired") + } + select { + case <-completed: + case <-time.After(2 * time.Second): + t.Fatal("ScanCompleted not fired") + } +} + +func TestPipeline_ModulesShareStore(t *testing.T) { + r := module.NewRegistry() + + writer := mkModule("writer", module.PhaseDiscovery, true) + writer.run = func(mctx module.Context) error { + return mctx.Store.Upsert(mctx.Ctx, "a.example.com", func(h *store.Host) { + h.IPs = []string{"1.2.3.4"} + }) + } + + var readerSaw int + var readerMu sync.Mutex + reader := mkModule("reader", module.PhaseReporting, true) + reader.run = func(mctx module.Context) error { + readerMu.Lock() + defer readerMu.Unlock() + readerSaw = mctx.Store.Count(mctx.Ctx) + return nil + } + + r.Register(writer) + r.Register(reader) + + p, _ := New(&config.Config{Domain: "example.com"}, Options{Registry: r}) + if err := p.Run(context.Background()); err != nil { + t.Fatal(err) + } + + readerMu.Lock() + defer readerMu.Unlock() + if readerSaw != 1 { + t.Errorf("reader saw %d hosts, want 1", readerSaw) + } +} + +func TestPipeline_RejectsNilConfig(t *testing.T) { + _, err := New(nil, Options{}) + if err == nil { + t.Error("expected error for nil config") + } +} + +func TestPipeline_EmptyRegistry_Errors(t *testing.T) { + r := module.NewRegistry() // empty + p, _ := New(&config.Config{Domain: "example.com"}, Options{Registry: r}) + if err := p.Run(context.Background()); err == nil { + t.Error("expected error when no modules selected") + } +} + +func contains(s, sub string) bool { + for i := 0; i+len(sub) <= len(s); i++ { + if s[i:i+len(sub)] == sub { + return true + } + } + return false +} diff --git a/internal/proxyconf/proxy.go b/internal/proxyconf/proxy.go new file mode 100644 index 0000000..45f69a8 --- /dev/null +++ b/internal/proxyconf/proxy.go @@ -0,0 +1,174 @@ +// Package proxyconf centralises outbound-proxy configuration for the +// HTTP and (where possible) DNS clients used across God's Eye modules. +// +// Why this lives in its own package: every source/probe/module needs to +// honour the same proxy setting, and duplicating URL parsing + dialer +// wiring across `internal/http`, `internal/sources`, and individual +// modules would be a fountain of bugs. This package is the single +// source of truth. +// +// Supported schemes: +// +// "" → direct (no proxy) +// http://host:port → HTTP CONNECT proxy (e.g. Burp, ZAP, mitmproxy) +// https://host:port → HTTPS CONNECT proxy +// socks5://host:port → SOCKS5 (DNS resolved locally by god-eye) +// socks5h://host:port → SOCKS5 (DNS resolved by the proxy — Tor convention) +// +// Basic auth (http://user:pass@host) is honoured for every scheme. +// +// DNS-over-SOCKS caveat: Go's net package uses the OS resolver by default, +// which does NOT route through SOCKS. `socks5h://` only applies to HTTP +// requests — the brute-force DNS resolver (`internal/dns`) continues to +// hit its configured resolvers directly. Users who need full Tor +// isolation for DNS should run god-eye inside a torsocks-wrapped shell +// or a netns with all traffic captured. +package proxyconf + +import ( + "context" + "errors" + "fmt" + "net" + "net/http" + "net/url" + "strings" + + "golang.org/x/net/proxy" +) + +// DialFunc is the signature used by http.Transport.DialContext. +type DialFunc func(ctx context.Context, network, addr string) (net.Conn, error) + +// ProxyFunc is the signature used by http.Transport.Proxy. +type ProxyFunc func(*http.Request) (*url.URL, error) + +// Validate returns a descriptive error if proxyURL is non-empty and +// doesn't parse to a supported scheme. Call this early (e.g. during +// validator.ValidateXxx) so bad flags fail before module startup. +func Validate(proxyURL string) error { + proxyURL = strings.TrimSpace(proxyURL) + if proxyURL == "" { + return nil + } + u, err := url.Parse(proxyURL) + if err != nil { + return fmt.Errorf("proxy URL malformed: %w", err) + } + if u.Host == "" { + return errors.New("proxy URL missing host:port") + } + switch strings.ToLower(u.Scheme) { + case "http", "https", "socks5", "socks5h": + return nil + default: + return fmt.Errorf("unsupported proxy scheme %q (use http/https/socks5/socks5h)", u.Scheme) + } +} + +// BuildDialer returns a DialFunc that routes TCP through the configured +// proxy. For HTTP(S) CONNECT proxies (handled at the transport layer via +// Proxy field), this returns a direct dialer — the transport layer does +// the CONNECT dance itself. +// +// For empty proxyURL, returns the direct-dialer from net.Dialer. +func BuildDialer(proxyURL string, base *net.Dialer) (DialFunc, error) { + if base == nil { + base = &net.Dialer{} + } + if strings.TrimSpace(proxyURL) == "" { + return base.DialContext, nil + } + u, err := url.Parse(proxyURL) + if err != nil { + return nil, err + } + switch strings.ToLower(u.Scheme) { + case "http", "https": + // CONNECT proxy — direct TCP, Transport.Proxy handles the handshake. + return base.DialContext, nil + case "socks5", "socks5h": + var auth *proxy.Auth + if u.User != nil { + pass, _ := u.User.Password() + auth = &proxy.Auth{User: u.User.Username(), Password: pass} + } + // proxy.Direct is the fallthrough dialer — we pass our base so + // timeouts/keepalive settings are preserved. + dialer, err := proxy.SOCKS5("tcp", u.Host, auth, &directAdapter{base: base}) + if err != nil { + return nil, fmt.Errorf("create SOCKS5 dialer: %w", err) + } + if ctxDialer, ok := dialer.(proxy.ContextDialer); ok { + return ctxDialer.DialContext, nil + } + // Older x/net versions: wrap non-context Dial with ctx-aware shim. + return func(ctx context.Context, network, addr string) (net.Conn, error) { + type result struct { + conn net.Conn + err error + } + ch := make(chan result, 1) + go func() { + c, e := dialer.Dial(network, addr) + ch <- result{c, e} + }() + select { + case r := <-ch: + return r.conn, r.err + case <-ctx.Done(): + return nil, ctx.Err() + } + }, nil + default: + return nil, fmt.Errorf("unsupported proxy scheme: %s", u.Scheme) + } +} + +// BuildProxyFunc returns the http.Transport.Proxy callback for HTTP(S) +// CONNECT proxies. Returns nil for SOCKS5 (handled by the dialer) and +// for empty proxyURL. +func BuildProxyFunc(proxyURL string) (ProxyFunc, error) { + if strings.TrimSpace(proxyURL) == "" { + return nil, nil + } + u, err := url.Parse(proxyURL) + if err != nil { + return nil, err + } + switch strings.ToLower(u.Scheme) { + case "http", "https": + return http.ProxyURL(u), nil + case "socks5", "socks5h": + return nil, nil + } + return nil, fmt.Errorf("unsupported proxy scheme: %s", u.Scheme) +} + +// Humanize returns a redacted, user-facing description of the proxy. +// Strips credentials so logs don't leak tokens. +func Humanize(proxyURL string) string { + proxyURL = strings.TrimSpace(proxyURL) + if proxyURL == "" { + return "direct (no proxy)" + } + u, err := url.Parse(proxyURL) + if err != nil { + return "invalid" + } + auth := "" + if u.User != nil { + auth = "(auth)@" + } + return fmt.Sprintf("%s://%s%s", u.Scheme, auth, u.Host) +} + +// directAdapter adapts a *net.Dialer to the proxy.Dialer interface so +// our configured timeouts/keepalive flow through to the socks hop. +type directAdapter struct { + base *net.Dialer +} + +func (d *directAdapter) Dial(network, addr string) (net.Conn, error) { + return d.base.Dial(network, addr) +} diff --git a/internal/proxyconf/proxy_test.go b/internal/proxyconf/proxy_test.go new file mode 100644 index 0000000..aae4268 --- /dev/null +++ b/internal/proxyconf/proxy_test.go @@ -0,0 +1,134 @@ +package proxyconf + +import "testing" + +func TestValidate(t *testing.T) { + cases := []struct { + in string + wantErr bool + }{ + {"", false}, + {"http://127.0.0.1:8080", false}, + {"https://proxy.corp:3128", false}, + {"socks5://127.0.0.1:9050", false}, + {"socks5h://127.0.0.1:9050", false}, + {"socks5h://user:pass@127.0.0.1:9050", false}, + {"ftp://x:21", true}, + {"socks4://x:1080", true}, + {"not a url", true}, + {"://nohost", true}, + {"http://", true}, + } + for _, c := range cases { + err := Validate(c.in) + if (err != nil) != c.wantErr { + t.Errorf("Validate(%q) err=%v wantErr=%v", c.in, err, c.wantErr) + } + } +} + +func TestBuildDialer_EmptyReturnsDirect(t *testing.T) { + d, err := BuildDialer("", nil) + if err != nil { + t.Fatal(err) + } + if d == nil { + t.Fatal("nil dialer") + } +} + +func TestBuildDialer_SOCKS5Accepted(t *testing.T) { + d, err := BuildDialer("socks5://127.0.0.1:9050", nil) + if err != nil { + t.Fatalf("SOCKS5 should construct: %v", err) + } + if d == nil { + t.Fatal("nil dialer") + } +} + +func TestBuildDialer_SOCKS5WithAuth(t *testing.T) { + d, err := BuildDialer("socks5h://user:pass@127.0.0.1:9050", nil) + if err != nil { + t.Fatalf("auth SOCKS5 should construct: %v", err) + } + if d == nil { + t.Fatal("nil dialer") + } +} + +func TestBuildDialer_HTTPProxyPassthrough(t *testing.T) { + // HTTP proxy uses Transport.Proxy; dialer should be direct-equivalent. + d, err := BuildDialer("http://127.0.0.1:8080", nil) + if err != nil { + t.Fatal(err) + } + if d == nil { + t.Fatal("nil dialer") + } +} + +func TestBuildDialer_UnsupportedScheme(t *testing.T) { + _, err := BuildDialer("ftp://127.0.0.1", nil) + if err == nil { + t.Error("expected error for unsupported scheme") + } +} + +func TestBuildProxyFunc_HTTPProxy(t *testing.T) { + fn, err := BuildProxyFunc("http://127.0.0.1:8080") + if err != nil { + t.Fatal(err) + } + if fn == nil { + t.Fatal("http:// should yield non-nil ProxyFunc") + } +} + +func TestBuildProxyFunc_SOCKSReturnsNil(t *testing.T) { + fn, err := BuildProxyFunc("socks5://127.0.0.1:9050") + if err != nil { + t.Fatal(err) + } + if fn != nil { + t.Error("SOCKS5 should return nil ProxyFunc (handled by dialer)") + } +} + +func TestBuildProxyFunc_EmptyReturnsNil(t *testing.T) { + fn, err := BuildProxyFunc("") + if err != nil || fn != nil { + t.Errorf("empty → (nil, nil), got (%v, %v)", fn, err) + } +} + +func TestHumanize(t *testing.T) { + cases := map[string]string{ + "": "direct (no proxy)", + "http://proxy.corp:3128": "http://proxy.corp:3128", + "socks5://127.0.0.1:9050": "socks5://127.0.0.1:9050", + "socks5h://user:secret@10.0.0.1:443": "socks5h://(auth)@10.0.0.1:443", + } + for in, want := range cases { + if got := Humanize(in); got != want { + t.Errorf("Humanize(%q) = %q, want %q", in, got, want) + } + } +} + +func TestHumanize_LeaksNoCredentials(t *testing.T) { + const secret = "supersecret" + h := Humanize("socks5://user:" + secret + "@127.0.0.1:9050") + if contains(h, secret) { + t.Errorf("Humanize leaked credentials: %s", h) + } +} + +func contains(s, sub string) bool { + for i := 0; i+len(sub) <= len(s); i++ { + if s[i:i+len(sub)] == sub { + return true + } + } + return false +} diff --git a/internal/scanner/helpers_test.go b/internal/scanner/helpers_test.go new file mode 100644 index 0000000..2687a37 --- /dev/null +++ b/internal/scanner/helpers_test.go @@ -0,0 +1,218 @@ +package scanner + +import ( + "os" + "path/filepath" + "reflect" + "sort" + "strings" + "testing" + + "god-eye/internal/config" +) + +func TestLoadWordlist(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "wordlist.txt") + + content := `# comment line +api +admin + +# another comment +dev + staging +test +` + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatal(err) + } + + got, err := LoadWordlist(path) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + want := []string{"api", "admin", "dev", "staging", "test"} + if !reflect.DeepEqual(got, want) { + t.Errorf("got %v, want %v", got, want) + } +} + +func TestLoadWordlist_NonExistent(t *testing.T) { + _, err := LoadWordlist("/tmp/this-does-not-exist-xyz-abc.txt") + if err == nil { + t.Error("expected error for non-existent file") + } +} + +func TestLoadWordlist_Empty(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "empty.txt") + os.WriteFile(path, []byte(""), 0o644) + + got, err := LoadWordlist(path) + if err != nil { + t.Fatal(err) + } + if len(got) != 0 { + t.Errorf("expected empty result, got %v", got) + } +} + +func TestLoadWordlist_CommentsOnly(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "comments.txt") + os.WriteFile(path, []byte("# only comments\n# and more\n"), 0o644) + + got, _ := LoadWordlist(path) + if len(got) != 0 { + t.Errorf("expected empty result for comments-only file, got %v", got) + } +} + +func TestParseResolvers(t *testing.T) { + tests := []struct { + name string + in string + want []string + }{ + { + name: "empty uses defaults", + in: "", + want: config.DefaultResolvers, + }, + { + name: "single with port", + in: "8.8.8.8:53", + want: []string{"8.8.8.8:53"}, + }, + { + name: "single without port adds :53", + in: "8.8.8.8", + want: []string{"8.8.8.8:53"}, + }, + { + name: "multiple with mixed ports", + in: "8.8.8.8,1.1.1.1:5353,9.9.9.9", + want: []string{"8.8.8.8:53", "1.1.1.1:5353", "9.9.9.9:53"}, + }, + { + name: "whitespace trimmed", + in: " 8.8.8.8 , 1.1.1.1 ", + want: []string{"8.8.8.8:53", "1.1.1.1:53"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := ParseResolvers(tt.in) + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("ParseResolvers(%q) = %v, want %v", tt.in, got, tt.want) + } + }) + } +} + +func TestParsePorts(t *testing.T) { + tests := []struct { + name string + in string + want []int + }{ + {"empty uses defaults", "", []int{80, 443, 8080, 8443}}, + {"single valid", "80", []int{80}}, + {"multiple valid", "80,443,3000", []int{80, 443, 3000}}, + {"whitespace", " 80 , 443 ", []int{80, 443}}, + {"invalid silently dropped", "80,abc,443", []int{80, 443}}, + {"out of range dropped", "80,99999,443", []int{80, 443}}, + {"negative dropped", "80,-1,443", []int{80, 443}}, + {"zero dropped", "0,80,443", []int{80, 443}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := ParsePorts(tt.in) + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("ParsePorts(%q) = %v, want %v", tt.in, got, tt.want) + } + }) + } +} + +func TestCountActive(t *testing.T) { + results := map[string]*config.SubdomainResult{ + "a.example.com": {StatusCode: 200}, + "b.example.com": {StatusCode: 301}, + "c.example.com": {StatusCode: 404}, + "d.example.com": {StatusCode: 500}, + "e.example.com": {StatusCode: 0}, // not probed + } + got := countActive(results) + if got != 2 { + t.Errorf("countActive = %d, want 2", got) + } +} + +func TestCountVulns(t *testing.T) { + results := map[string]*config.SubdomainResult{ + "a.example.com": {OpenRedirect: true}, + "b.example.com": {CORSMisconfig: "wildcard with credentials"}, + "c.example.com": {DangerousMethods: []string{"PUT", "DELETE"}}, + "d.example.com": {GitExposed: true}, + "e.example.com": {BackupFiles: []string{"backup.sql"}}, + "f.example.com": {StatusCode: 200}, // clean + } + got := countVulns(results) + if got != 5 { + t.Errorf("countVulns = %d, want 5", got) + } +} + +func TestCountSubdomainsWithAI(t *testing.T) { + results := map[string]*config.SubdomainResult{ + "a.example.com": {AIFindings: []string{"finding1"}}, + "b.example.com": {AIFindings: []string{"f1", "f2"}}, + "c.example.com": {}, // no AI findings + } + got := countSubdomainsWithAI(results) + if got != 2 { + t.Errorf("countSubdomainsWithAI = %d, want 2", got) + } +} + +func TestBuildAISummary(t *testing.T) { + results := map[string]*config.SubdomainResult{ + "a.example.com": { + AIFindings: []string{"Hardcoded API key", "Weak crypto"}, + AISeverity: "critical", + CVEFindings: []string{"CVE-2021-12345"}, + }, + "b.example.com": { + AIFindings: []string{"Missing CSP"}, + AISeverity: "medium", + }, + "c.example.com": { + AIFindings: []string{"ignored"}, + AISeverity: "info", + }, + } + got := buildAISummary(results) + if got == "" { + t.Fatal("summary is empty") + } + // Must mention severities + mustContain := []string{"critical", "high", "medium", "CRITICAL", "MEDIUM", "Hardcoded API key", "CVE-2021-12345"} + for _, s := range mustContain { + if !strings.Contains(got, s) { + t.Errorf("summary missing expected token %q in:\n%s", s, got) + } + } +} + +func TestSortedIntsInvariant(t *testing.T) { + // Sanity: whenever we sort ints we expect ascending order (tests ScanPorts sorting guarantee). + in := []int{443, 80, 8080, 22} + sort.Ints(in) + if !sort.IntsAreSorted(in) { + t.Error("sort.IntsAreSorted returned false after sort.Ints") + } +} diff --git a/internal/scheduler/alerter.go b/internal/scheduler/alerter.go new file mode 100644 index 0000000..a0f4d1c --- /dev/null +++ b/internal/scheduler/alerter.go @@ -0,0 +1,63 @@ +package scheduler + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "time" + + "god-eye/internal/diff" +) + +// WebhookAlerter POSTs the diff report JSON to an arbitrary URL. Works +// with generic webhook consumers; Slack/Discord get dedicated adapters +// later in F5.3 when bespoke formatting matters. +type WebhookAlerter struct { + URL string + Timeout time.Duration +} + +// NewWebhookAlerter returns a WebhookAlerter with sane defaults. +func NewWebhookAlerter(url string) *WebhookAlerter { + return &WebhookAlerter{URL: url, Timeout: 10 * time.Second} +} + +func (a *WebhookAlerter) Name() string { return "webhook" } + +func (a *WebhookAlerter) Notify(ctx context.Context, r *diff.Report) error { + body, err := json.Marshal(r) + if err != nil { + return err + } + client := &http.Client{Timeout: a.Timeout} + req, err := http.NewRequestWithContext(ctx, "POST", a.URL, bytes.NewReader(body)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("User-Agent", "god-eye-v2") + resp, err := client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode >= 400 { + return fmt.Errorf("webhook returned %d", resp.StatusCode) + } + return nil +} + +// StdoutAlerter prints meaningful changes to stdout. Useful for smoke +// testing and for users who pipe god-eye output into grep/jq. +type StdoutAlerter struct{} + +func (StdoutAlerter) Name() string { return "stdout" } + +func (StdoutAlerter) Notify(_ context.Context, r *diff.Report) error { + for _, c := range r.Changes { + fmt.Printf("[DIFF %s] %s %s → %s (%s)\n", r.Target, c.Kind, c.Before, c.After, c.Host) + } + return nil +} diff --git a/internal/scheduler/scheduler.go b/internal/scheduler/scheduler.go new file mode 100644 index 0000000..7222428 --- /dev/null +++ b/internal/scheduler/scheduler.go @@ -0,0 +1,114 @@ +// Package scheduler runs a scan at fixed intervals for asm-continuous +// workflows. Each scan run feeds the diff engine; meaningful changes fan +// out to registered Alerters. +// +// Minimal implementation for Fase 5 skeleton: interval ticker + in-memory +// snapshot ring. Persistence (SQLite/BoltDB) and sophisticated scheduling +// (cron syntax, jitter) are follow-ups. +package scheduler + +import ( + "context" + "errors" + "sync" + "time" + + "god-eye/internal/diff" + "god-eye/internal/store" +) + +// ScanRun executes a single scan and returns the snapshot hosts. +type ScanRun func(ctx context.Context) (hosts []*store.Host, err error) + +// Alerter receives diff reports with meaningful changes. +type Alerter interface { + Notify(ctx context.Context, report *diff.Report) error + Name() string +} + +// Scheduler runs ScanRun on an interval. +type Scheduler struct { + Target string + Interval time.Duration + Run ScanRun + Alerters []Alerter + + mu sync.Mutex + lastSnap []*store.Host + lastAt time.Time +} + +// New constructs a scheduler. Every field is required except Alerters, +// which defaults to nil (no notifications). +func New(target string, interval time.Duration, run ScanRun) *Scheduler { + return &Scheduler{Target: target, Interval: interval, Run: run} +} + +// AddAlerter registers an Alerter that receives meaningful diff reports. +func (s *Scheduler) AddAlerter(a Alerter) { s.Alerters = append(s.Alerters, a) } + +// Start runs indefinitely until ctx is canceled. The first scan runs +// immediately, subsequent scans run on s.Interval cadence. +func (s *Scheduler) Start(ctx context.Context) error { + if s.Run == nil { + return errors.New("scheduler: nil Run") + } + if s.Interval <= 0 { + return errors.New("scheduler: Interval must be > 0") + } + + // First scan now (so continuous mode produces something immediately). + s.runOnce(ctx) + + t := time.NewTicker(s.Interval) + defer t.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-t.C: + s.runOnce(ctx) + } + } +} + +func (s *Scheduler) runOnce(ctx context.Context) { + if ctx.Err() != nil { + return + } + hosts, err := s.Run(ctx) + if err != nil { + // Scan failure is non-fatal for the scheduler itself; the next + // tick will try again. + return + } + + s.mu.Lock() + prev := s.lastSnap + prevAt := s.lastAt + s.lastSnap = hosts + s.lastAt = time.Now() + s.mu.Unlock() + + // No diff possible on the first run. + if prev == nil { + return + } + + report := diff.Compute(s.Target, prev, hosts, prevAt, time.Now()) + if !report.HasMeaningful() { + return + } + for _, a := range s.Alerters { + _ = a.Notify(ctx, report) + } +} + +// LastSnapshot returns the most recent scan snapshot + timestamp. Returns +// (nil, zero) before the first scan. +func (s *Scheduler) LastSnapshot() ([]*store.Host, time.Time) { + s.mu.Lock() + defer s.mu.Unlock() + return s.lastSnap, s.lastAt +} diff --git a/internal/scheduler/scheduler_test.go b/internal/scheduler/scheduler_test.go new file mode 100644 index 0000000..ada7890 --- /dev/null +++ b/internal/scheduler/scheduler_test.go @@ -0,0 +1,78 @@ +package scheduler + +import ( + "context" + "sync/atomic" + "testing" + "time" + + "god-eye/internal/diff" + "god-eye/internal/store" +) + +type spyAlerter struct{ called atomic.Int32 } + +func (s *spyAlerter) Name() string { return "spy" } +func (s *spyAlerter) Notify(_ context.Context, _ *diff.Report) error { + s.called.Add(1) + return nil +} + +func TestScheduler_RunsAndDiffsBetweenScans(t *testing.T) { + var callCount atomic.Int32 + scan := ScanRun(func(_ context.Context) ([]*store.Host, error) { + n := callCount.Add(1) + if n == 1 { + return []*store.Host{{Subdomain: "a.example.com"}}, nil + } + // Second scan adds a new host — meaningful diff. + return []*store.Host{ + {Subdomain: "a.example.com"}, + {Subdomain: "b.example.com"}, + }, nil + }) + + s := New("example.com", 100*time.Millisecond, scan) + alerter := &spyAlerter{} + s.AddAlerter(alerter) + + ctx, cancel := context.WithTimeout(context.Background(), 300*time.Millisecond) + defer cancel() + _ = s.Start(ctx) + + if callCount.Load() < 2 { + t.Errorf("scan should have run at least twice, got %d", callCount.Load()) + } + if alerter.called.Load() == 0 { + t.Error("alerter should have been called on the second run") + } +} + +func TestScheduler_NoAlertOnIdenticalScans(t *testing.T) { + scan := ScanRun(func(_ context.Context) ([]*store.Host, error) { + return []*store.Host{{Subdomain: "a.example.com"}}, nil + }) + s := New("example.com", 50*time.Millisecond, scan) + alerter := &spyAlerter{} + s.AddAlerter(alerter) + + ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) + defer cancel() + _ = s.Start(ctx) + + if alerter.called.Load() != 0 { + t.Errorf("alerter should not have been called on unchanged scans, got %d", alerter.called.Load()) + } +} + +func TestScheduler_RejectsBadParams(t *testing.T) { + s := &Scheduler{Target: "x", Interval: 0} + if err := s.Start(context.Background()); err == nil { + t.Error("expected error for zero interval") + } + + s2 := &Scheduler{Target: "x", Interval: time.Second, Run: nil} + if err := s2.Start(context.Background()); err == nil { + t.Error("expected error for nil Run") + } +} diff --git a/internal/sources/extra.go b/internal/sources/extra.go new file mode 100644 index 0000000..71db0da --- /dev/null +++ b/internal/sources/extra.go @@ -0,0 +1,167 @@ +// Additional passive sources added in v2.0 to close the gap with +// subfinder / BBOT. Every source here is: +// - Free and key-less (no API key required) +// - Defensive (fail-open — returns an empty slice on any error) +// - Bounded by the shared HTTP clients +// +// If a source goes offline upstream, the corresponding fetcher keeps +// returning empty — the scan still succeeds. + +package sources + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" +) + +// FetchOmnisint queries the free Omnisint Sonar mirror. It may be offline +// on any given day — fail-open. +func FetchOmnisint(domain string) ([]string, error) { + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + + u := fmt.Sprintf("https://sonar.omnisint.io/subdomains/%s", url.PathEscape(domain)) + req, _ := http.NewRequestWithContext(ctx, "GET", u, nil) + req.Header.Set("User-Agent", "god-eye-v2") + + resp, err := StandardClient.Do(req) + if err != nil { + return []string{}, nil + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + return []string{}, nil + } + body, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024)) + if err != nil { + return []string{}, nil + } + + var list []string + if err := json.Unmarshal(body, &list); err != nil { + return []string{}, nil + } + + seen := make(map[string]bool) + var out []string + for _, s := range list { + s = strings.ToLower(strings.TrimSpace(s)) + if s != "" && strings.HasSuffix(s, domain) && !seen[s] { + seen[s] = true + out = append(out, s) + } + } + return out, nil +} + +// FetchHudsonRock queries the free Cavalier InfoStealer intelligence API. +// Surfaces domain assets referenced in leaked stealer logs; useful for +// discovering shadow internal hostnames. +func FetchHudsonRock(domain string) ([]string, error) { + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + + u := fmt.Sprintf("https://cavalier.hudsonrock.com/api/json/v2/osint-tools/search-by-domain?domain=%s", url.QueryEscape(domain)) + req, _ := http.NewRequestWithContext(ctx, "GET", u, nil) + req.Header.Set("User-Agent", "god-eye-v2") + + resp, err := StandardClient.Do(req) + if err != nil { + return []string{}, nil + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + return []string{}, nil + } + body, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024)) + if err != nil { + return []string{}, nil + } + + // HudsonRock returns free-form JSON; we just mine every subdomain-like + // token from the response body via the shared regex. + return ExtractSubdomains(string(body), domain), nil +} + +// FetchWebArchiveCDX queries the Internet Archive CDX server — a richer +// variant of the existing Wayback source. Pulls URLs with fewer limits +// and extracts hostnames that match the target domain. +func FetchWebArchiveCDX(domain string) ([]string, error) { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + u := fmt.Sprintf("https://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&collapse=urlkey&limit=5000&fl=original", url.QueryEscape(domain)) + req, _ := http.NewRequestWithContext(ctx, "GET", u, nil) + req.Header.Set("User-Agent", "god-eye-v2") + + resp, err := SlowClient.Do(req) + if err != nil { + return []string{}, nil + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + return []string{}, nil + } + body, err := io.ReadAll(io.LimitReader(resp.Body, 16*1024*1024)) + if err != nil { + return []string{}, nil + } + + // Response shape: [["original"], ["url1"], ["url2"], ...] — first row + // is the header, subsequent rows are single-element arrays with the URL. + var rows [][]string + if err := json.Unmarshal(body, &rows); err != nil { + return []string{}, nil + } + + seen := make(map[string]bool) + var out []string + for i, row := range rows { + if i == 0 { // skip header + continue + } + if len(row) == 0 { + continue + } + for _, host := range ExtractSubdomains(row[0], domain) { + if !seen[host] { + seen[host] = true + out = append(out, host) + } + } + } + return out, nil +} + +// FetchDigitorus queries the free Digitorus CT log mirror — an alternative +// to crt.sh that sometimes returns fresher data. +func FetchDigitorus(domain string) ([]string, error) { + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) + defer cancel() + + u := fmt.Sprintf("https://certificatedetails.com/api/find/%s", url.QueryEscape(domain)) + req, _ := http.NewRequestWithContext(ctx, "GET", u, nil) + req.Header.Set("User-Agent", "god-eye-v2") + + resp, err := StandardClient.Do(req) + if err != nil { + return []string{}, nil + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + return []string{}, nil + } + body, err := io.ReadAll(io.LimitReader(resp.Body, 4*1024*1024)) + if err != nil { + return []string{}, nil + } + + // Free-form JSON; mine hostnames. + return ExtractSubdomains(string(body), domain), nil +} diff --git a/internal/sources/shared.go b/internal/sources/shared.go index b4fb5ac..ce4691d 100644 --- a/internal/sources/shared.go +++ b/internal/sources/shared.go @@ -8,6 +8,8 @@ import ( "strings" "sync" "time" + + "god-eye/internal/proxyconf" ) // Shared HTTP clients - singleton pattern @@ -50,6 +52,65 @@ func init() { initRegex() } +// SetProxy configures outbound proxy for every shared HTTP client used +// by passive sources. Must be called BEFORE any Fetch* source function +// runs (init runs on package import, so main.go calls this after flag +// parsing but before pipeline start, which triggers a re-init via +// ReinitClients). +func SetProxy(u string) error { + if err := proxyconf.Validate(u); err != nil { + return err + } + proxyMu.Lock() + proxyURL = u + proxyMu.Unlock() + // Rebuild transports to pick up the new proxy. + reinitClients() + return nil +} + +var ( + proxyURL string + proxyMu sync.RWMutex +) + +// reinitClients rebuilds the shared transport and clients. Safe to call +// multiple times; in practice only called from SetProxy after startup. +func reinitClients() { + proxyMu.RLock() + cfgProxy := proxyURL + proxyMu.RUnlock() + + baseDialer := &net.Dialer{ + Timeout: 10 * time.Second, + KeepAlive: 30 * time.Second, + } + dialCtx, err := proxyconf.BuildDialer(cfgProxy, baseDialer) + if err != nil { + dialCtx = baseDialer.DialContext + } + proxyFunc, _ := proxyconf.BuildProxyFunc(cfgProxy) + + sharedTransport = &http.Transport{ + DialContext: dialCtx, + Proxy: proxyFunc, + MaxIdleConns: 100, + MaxIdleConnsPerHost: 10, + MaxConnsPerHost: 20, + IdleConnTimeout: 90 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + TLSClientConfig: &tls.Config{ + MinVersion: tls.VersionTLS12, + }, + ForceAttemptHTTP2: true, + ExpectContinueTimeout: 1 * time.Second, + } + + FastClient = &http.Client{Transport: sharedTransport, Timeout: 10 * time.Second} + StandardClient = &http.Client{Transport: sharedTransport, Timeout: 15 * time.Second} + SlowClient = &http.Client{Transport: sharedTransport, Timeout: 120 * time.Second} +} + func initClients() { clientOnce.Do(func() { // Shared transport with connection pooling diff --git a/internal/sources/shared_test.go b/internal/sources/shared_test.go new file mode 100644 index 0000000..4fab758 --- /dev/null +++ b/internal/sources/shared_test.go @@ -0,0 +1,171 @@ +package sources + +import ( + "reflect" + "sort" + "testing" + "time" +) + +func TestExtractSubdomains(t *testing.T) { + target := "example.com" + + tests := []struct { + name string + text string + want []string + }{ + { + name: "empty text", + text: "", + want: nil, + }, + { + name: "no matches", + text: "some text with no domains", + want: nil, + }, + { + name: "apex only", + text: "found example.com here", + want: []string{"example.com"}, + }, + { + name: "single subdomain", + text: "api.example.com was found", + want: []string{"api.example.com"}, + }, + { + name: "multiple subdomains", + text: "api.example.com and admin.example.com and dev.example.com", + want: []string{"admin.example.com", "api.example.com", "dev.example.com"}, + }, + { + name: "deduplication", + text: "api.example.com api.example.com api.example.com", + want: []string{"api.example.com"}, + }, + { + name: "uppercase normalized", + text: "API.EXAMPLE.COM and Api.Example.com", + want: []string{"api.example.com"}, + }, + { + name: "wildcard prefix stripped", + text: "*.example.com is a wildcard", + want: []string{"example.com"}, + }, + { + name: "different domain filtered", + text: "api.example.com and other.different.org and sub.example.com", + want: []string{"api.example.com", "sub.example.com"}, + }, + { + name: "partial match not allowed", + text: "evilexample.com should not match", + want: nil, + }, + { + name: "json-wrapped", + text: `{"name":"api.example.com","type":"A"}`, + want: []string{"api.example.com"}, + }, + { + name: "mixed with urls", + text: `Visit https://api.example.com and https://docs.example.com/path`, + want: []string{"api.example.com", "docs.example.com"}, + }, + { + // Regex is greedy: only the longest leftmost match is returned, + // not every suffix. This is the v1 baseline behavior. + name: "deep subdomain longest match only", + text: "a.b.c.example.com", + want: []string{"a.b.c.example.com"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := ExtractSubdomains(tt.text, target) + sort.Strings(got) + sort.Strings(tt.want) + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("ExtractSubdomains(%q)\n got: %v\n want: %v", tt.text, got, tt.want) + } + }) + } +} + +func TestGetClientForTimeout(t *testing.T) { + tests := []struct { + timeout time.Duration + want string // identify by Timeout field + }{ + {5 * time.Second, "fast"}, + {10 * time.Second, "fast"}, + {15 * time.Second, "standard"}, + {30 * time.Second, "standard"}, + {60 * time.Second, "slow"}, + {120 * time.Second, "slow"}, + } + + for _, tt := range tests { + c := GetClientForTimeout(tt.timeout) + if c == nil { + t.Fatalf("GetClientForTimeout(%v) returned nil", tt.timeout) + } + var gotClient string + switch c { + case FastClient: + gotClient = "fast" + case StandardClient: + gotClient = "standard" + case SlowClient: + gotClient = "slow" + default: + gotClient = "unknown" + } + if gotClient != tt.want { + t.Errorf("GetClientForTimeout(%v) = %s, want %s", tt.timeout, gotClient, tt.want) + } + } +} + +func TestClientsInitialized(t *testing.T) { + if FastClient == nil { + t.Error("FastClient is nil") + } + if StandardClient == nil { + t.Error("StandardClient is nil") + } + if SlowClient == nil { + t.Error("SlowClient is nil") + } + if FastClient.Timeout != 10*time.Second { + t.Errorf("FastClient.Timeout = %v, want 10s", FastClient.Timeout) + } + if StandardClient.Timeout != 15*time.Second { + t.Errorf("StandardClient.Timeout = %v, want 15s", StandardClient.Timeout) + } + if SlowClient.Timeout != 120*time.Second { + t.Errorf("SlowClient.Timeout = %v, want 120s", SlowClient.Timeout) + } +} + +func TestRegexCompiled(t *testing.T) { + if SubdomainRegex == nil { + t.Error("SubdomainRegex not compiled") + } + if EmailDomainRegex == nil { + t.Error("EmailDomainRegex not compiled") + } + if URLDomainRegex == nil { + t.Error("URLDomainRegex not compiled") + } + if JSONSubdomainRegex == nil { + t.Error("JSONSubdomainRegex not compiled") + } + if WildcardPrefixRegex == nil { + t.Error("WildcardPrefixRegex not compiled") + } +} diff --git a/internal/store/memory.go b/internal/store/memory.go new file mode 100644 index 0000000..eb98984 --- /dev/null +++ b/internal/store/memory.go @@ -0,0 +1,267 @@ +package store + +import ( + "context" + "sort" + "sync" + "time" +) + +// MemoryStore is the default in-memory Store implementation. Thread-safe, +// suitable for single-process scans. Persistent backends (BoltDB for ASM / +// resume workflows) land in Fase 5; they will implement the same Store +// interface so callers need no changes. +type MemoryStore struct { + mu sync.RWMutex + hosts map[string]*Host + // perHostLocks serializes Upsert mutations per-host without blocking + // independent hosts. It's populated lazily and never cleared — the number + // of subdomains per scan is bounded (thousands, not millions). + perHostLocks map[string]*sync.Mutex + locksMu sync.Mutex +} + +// NewMemoryStore creates an empty MemoryStore. +func NewMemoryStore() *MemoryStore { + return &MemoryStore{ + hosts: make(map[string]*Host), + perHostLocks: make(map[string]*sync.Mutex), + } +} + +// lockFor returns the mutex that protects mutations to subdomain, creating +// it lazily if needed. +func (s *MemoryStore) lockFor(subdomain string) *sync.Mutex { + s.locksMu.Lock() + defer s.locksMu.Unlock() + l, ok := s.perHostLocks[subdomain] + if !ok { + l = &sync.Mutex{} + s.perHostLocks[subdomain] = l + } + return l +} + +// Upsert creates or updates the record for subdomain, invoking mutate under +// a per-host lock. Concurrent callers mutating different subdomains proceed +// in parallel; concurrent mutations of the same subdomain are serialized. +func (s *MemoryStore) Upsert(ctx context.Context, subdomain string, mutate func(*Host)) error { + if err := ctx.Err(); err != nil { + return err + } + if subdomain == "" { + return nil + } + + hostLock := s.lockFor(subdomain) + hostLock.Lock() + defer hostLock.Unlock() + + s.mu.Lock() + h, existed := s.hosts[subdomain] + if !existed { + h = &Host{ + Subdomain: subdomain, + FirstSeen: time.Now(), + } + s.hosts[subdomain] = h + } + s.mu.Unlock() + + if mutate != nil { + mutate(h) + } + h.LastUpdated = time.Now() + return nil +} + +// Get returns a deep-enough copy of the record so the caller cannot +// accidentally mutate store state. Slice fields are copied; nested struct +// pointers (TLSFingerprint, Takeover) are shallow-copied — callers MUST treat +// the result as read-only. +func (s *MemoryStore) Get(ctx context.Context, subdomain string) (*Host, bool) { + s.mu.RLock() + h, ok := s.hosts[subdomain] + s.mu.RUnlock() + if !ok { + return nil, false + } + hostLock := s.lockFor(subdomain) + hostLock.Lock() + defer hostLock.Unlock() + return cloneHost(h), true +} + +// All returns every host, sorted by subdomain. Each returned Host is a copy; +// mutations to the slice or its elements do not affect the store. +func (s *MemoryStore) All(ctx context.Context) []*Host { + s.mu.RLock() + names := make([]string, 0, len(s.hosts)) + for name := range s.hosts { + names = append(names, name) + } + s.mu.RUnlock() + sort.Strings(names) + + out := make([]*Host, 0, len(names)) + for _, n := range names { + if h, ok := s.Get(ctx, n); ok { + out = append(out, h) + } + } + return out +} + +// Count returns the number of hosts in the store. +func (s *MemoryStore) Count(ctx context.Context) int { + s.mu.RLock() + defer s.mu.RUnlock() + return len(s.hosts) +} + +// Close is a no-op for MemoryStore; implemented to satisfy Store interface. +func (s *MemoryStore) Close() error { return nil } + +// cloneHost returns a deep-enough copy that slice/map fields are detached. +func cloneHost(h *Host) *Host { + if h == nil { + return nil + } + c := *h + c.IPs = cloneStrings(h.IPs) + c.Technologies = cloneStrings(h.Technologies) + c.TLSAltNames = cloneStrings(h.TLSAltNames) + c.DiscoveredVia = cloneStrings(h.DiscoveredVia) + c.Ports = cloneInts(h.Ports) + c.Headers = cloneStringMap(h.Headers) + + if h.TLSFingerprint != nil { + fp := *h.TLSFingerprint + fp.InternalHosts = cloneStrings(h.TLSFingerprint.InternalHosts) + c.TLSFingerprint = &fp + } + if h.Takeover != nil { + t := *h.Takeover + c.Takeover = &t + } + + c.Vulnerabilities = cloneVulns(h.Vulnerabilities) + c.Secrets = cloneSecrets(h.Secrets) + c.CVEs = cloneCVEs(h.CVEs) + c.AIFindings = cloneAIFindings(h.AIFindings) + return &c +} + +func cloneStrings(in []string) []string { + if len(in) == 0 { + return nil + } + out := make([]string, len(in)) + copy(out, in) + return out +} + +func cloneInts(in []int) []int { + if len(in) == 0 { + return nil + } + out := make([]int, len(in)) + copy(out, in) + return out +} + +func cloneStringMap(in map[string]string) map[string]string { + if len(in) == 0 { + return nil + } + out := make(map[string]string, len(in)) + for k, v := range in { + out[k] = v + } + return out +} + +func cloneVulns(in []Vulnerability) []Vulnerability { + if len(in) == 0 { + return nil + } + out := make([]Vulnerability, len(in)) + for i, v := range in { + v.CVEs = cloneStrings(v.CVEs) + out[i] = v + } + return out +} + +func cloneSecrets(in []Secret) []Secret { + if len(in) == 0 { + return nil + } + out := make([]Secret, len(in)) + copy(out, in) + return out +} + +func cloneCVEs(in []CVE) []CVE { + if len(in) == 0 { + return nil + } + out := make([]CVE, len(in)) + copy(out, in) + return out +} + +func cloneAIFindings(in []AIFinding) []AIFinding { + if len(in) == 0 { + return nil + } + out := make([]AIFinding, len(in)) + for i, f := range in { + f.CVEs = cloneStrings(f.CVEs) + out[i] = f + } + return out +} + +// AppendUnique helpers — exported for modules that want to append slice +// fields without introducing duplicates. Keeps mutation semantics in one place. + +// AddDiscoveryMethod appends method to h.DiscoveredVia if not already present. +func AddDiscoveryMethod(h *Host, method string) { + for _, m := range h.DiscoveredVia { + if m == method { + return + } + } + h.DiscoveredVia = append(h.DiscoveredVia, method) +} + +// AddIPs appends new IPs (dedup, in-place). +func AddIPs(h *Host, ips []string) { + seen := make(map[string]bool, len(h.IPs)) + for _, ip := range h.IPs { + seen[ip] = true + } + for _, ip := range ips { + if ip == "" || seen[ip] { + continue + } + seen[ip] = true + h.IPs = append(h.IPs, ip) + } +} + +// AddTechnologies appends new technologies (dedup, in-place). +func AddTechnologies(h *Host, tech []string) { + seen := make(map[string]bool, len(h.Technologies)) + for _, t := range h.Technologies { + seen[t] = true + } + for _, t := range tech { + if t == "" || seen[t] { + continue + } + seen[t] = true + h.Technologies = append(h.Technologies, t) + } +} diff --git a/internal/store/memory_test.go b/internal/store/memory_test.go new file mode 100644 index 0000000..e16c037 --- /dev/null +++ b/internal/store/memory_test.go @@ -0,0 +1,263 @@ +package store + +import ( + "context" + "fmt" + "reflect" + "sort" + "sync" + "sync/atomic" + "testing" + "time" +) + +func TestUpsert_CreatesHost(t *testing.T) { + s := NewMemoryStore() + ctx := context.Background() + + err := s.Upsert(ctx, "api.example.com", func(h *Host) { + h.IPs = []string{"1.2.3.4"} + h.StatusCode = 200 + }) + if err != nil { + t.Fatal(err) + } + + h, ok := s.Get(ctx, "api.example.com") + if !ok { + t.Fatal("Get returned !ok after Upsert") + } + if h.Subdomain != "api.example.com" { + t.Errorf("Subdomain = %q", h.Subdomain) + } + if !reflect.DeepEqual(h.IPs, []string{"1.2.3.4"}) { + t.Errorf("IPs = %v", h.IPs) + } + if h.StatusCode != 200 { + t.Errorf("StatusCode = %d", h.StatusCode) + } + if h.FirstSeen.IsZero() { + t.Error("FirstSeen not populated") + } + if h.LastUpdated.IsZero() { + t.Error("LastUpdated not populated") + } +} + +func TestUpsert_UpdatesExistingHost(t *testing.T) { + s := NewMemoryStore() + ctx := context.Background() + + s.Upsert(ctx, "api.example.com", func(h *Host) { h.StatusCode = 200 }) + firstSeen, _ := s.Get(ctx, "api.example.com") + time.Sleep(5 * time.Millisecond) // ensure LastUpdated differs + + s.Upsert(ctx, "api.example.com", func(h *Host) { h.Title = "API" }) + + h, _ := s.Get(ctx, "api.example.com") + if h.StatusCode != 200 { + t.Errorf("StatusCode lost: %d", h.StatusCode) + } + if h.Title != "API" { + t.Errorf("Title not set: %q", h.Title) + } + if !h.FirstSeen.Equal(firstSeen.FirstSeen) { + t.Error("FirstSeen changed on update") + } + if !h.LastUpdated.After(firstSeen.LastUpdated) { + t.Error("LastUpdated did not advance") + } +} + +func TestUpsert_EmptySubdomainNoop(t *testing.T) { + s := NewMemoryStore() + ctx := context.Background() + if err := s.Upsert(ctx, "", func(h *Host) {}); err != nil { + t.Errorf("unexpected error: %v", err) + } + if s.Count(ctx) != 0 { + t.Error("empty subdomain should be a noop") + } +} + +func TestUpsert_CanceledContext(t *testing.T) { + s := NewMemoryStore() + ctx, cancel := context.WithCancel(context.Background()) + cancel() + if err := s.Upsert(ctx, "a.example.com", func(h *Host) {}); err == nil { + t.Error("expected error for canceled context") + } +} + +func TestGet_Missing(t *testing.T) { + s := NewMemoryStore() + _, ok := s.Get(context.Background(), "none.example.com") + if ok { + t.Error("expected !ok for missing host") + } +} + +func TestGet_ReturnsCopy(t *testing.T) { + s := NewMemoryStore() + ctx := context.Background() + s.Upsert(ctx, "a.example.com", func(h *Host) { + h.IPs = []string{"1.2.3.4"} + h.Technologies = []string{"nginx"} + h.Headers = map[string]string{"X-Test": "yes"} + h.TLSFingerprint = &TLSFingerprint{Vendor: "Fortinet", InternalHosts: []string{"internal.local"}} + }) + + a, _ := s.Get(ctx, "a.example.com") + // mutate returned host aggressively + a.IPs[0] = "MUTATED" + a.Technologies = append(a.Technologies, "INJECTED") + a.Headers["X-Test"] = "MUTATED" + a.TLSFingerprint.Vendor = "MUTATED" + a.TLSFingerprint.InternalHosts[0] = "MUTATED" + + b, _ := s.Get(ctx, "a.example.com") + if b.IPs[0] != "1.2.3.4" { + t.Errorf("IPs corrupted: %v", b.IPs) + } + if len(b.Technologies) != 1 { + t.Errorf("Technologies corrupted: %v", b.Technologies) + } + if b.Headers["X-Test"] != "yes" { + t.Errorf("Headers corrupted: %v", b.Headers) + } + if b.TLSFingerprint.Vendor != "Fortinet" { + t.Errorf("TLSFingerprint.Vendor corrupted: %q", b.TLSFingerprint.Vendor) + } + if b.TLSFingerprint.InternalHosts[0] != "internal.local" { + t.Errorf("InternalHosts corrupted: %v", b.TLSFingerprint.InternalHosts) + } +} + +func TestAll_Sorted(t *testing.T) { + s := NewMemoryStore() + ctx := context.Background() + for _, name := range []string{"zeta.example.com", "alpha.example.com", "mid.example.com"} { + s.Upsert(ctx, name, func(h *Host) {}) + } + all := s.All(ctx) + got := make([]string, len(all)) + for i, h := range all { + got[i] = h.Subdomain + } + want := []string{"alpha.example.com", "mid.example.com", "zeta.example.com"} + if !reflect.DeepEqual(got, want) { + t.Errorf("All order = %v, want %v", got, want) + } +} + +func TestCount(t *testing.T) { + s := NewMemoryStore() + ctx := context.Background() + if s.Count(ctx) != 0 { + t.Error("initial Count != 0") + } + s.Upsert(ctx, "a.example.com", func(h *Host) {}) + s.Upsert(ctx, "b.example.com", func(h *Host) {}) + s.Upsert(ctx, "a.example.com", func(h *Host) {}) // update, not new + if got := s.Count(ctx); got != 2 { + t.Errorf("Count = %d, want 2", got) + } +} + +func TestConcurrentUpserts_SameHost(t *testing.T) { + // All writers target the same host; only one value wins per field but + // no race should fire. + s := NewMemoryStore() + ctx := context.Background() + + var wg sync.WaitGroup + const writers = 50 + var counter atomic.Int32 + for i := 0; i < writers; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + s.Upsert(ctx, "hot.example.com", func(h *Host) { + h.Technologies = append(h.Technologies, fmt.Sprintf("t%d", i)) + counter.Add(1) + }) + }(i) + } + wg.Wait() + + if counter.Load() != writers { + t.Errorf("not all mutators ran: %d/%d", counter.Load(), writers) + } + h, _ := s.Get(ctx, "hot.example.com") + if len(h.Technologies) != writers { + t.Errorf("expected %d technologies, got %d", writers, len(h.Technologies)) + } +} + +func TestConcurrentUpserts_DifferentHosts(t *testing.T) { + s := NewMemoryStore() + ctx := context.Background() + var wg sync.WaitGroup + const hosts = 200 + for i := 0; i < hosts; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + s.Upsert(ctx, fmt.Sprintf("h%d.example.com", i), func(h *Host) { + h.IPs = []string{"1.2.3.4"} + }) + }(i) + } + wg.Wait() + if got := s.Count(ctx); got != hosts { + t.Errorf("expected %d hosts, got %d", hosts, got) + } +} + +func TestClose_Idempotent(t *testing.T) { + s := NewMemoryStore() + if err := s.Close(); err != nil { + t.Fatal(err) + } + if err := s.Close(); err != nil { + t.Fatal(err) + } +} + +// ---------- Helper tests ---------- + +func TestAddDiscoveryMethod(t *testing.T) { + h := &Host{} + AddDiscoveryMethod(h, "passive:crt.sh") + AddDiscoveryMethod(h, "brute") + AddDiscoveryMethod(h, "passive:crt.sh") // duplicate + if !reflect.DeepEqual(h.DiscoveredVia, []string{"passive:crt.sh", "brute"}) { + t.Errorf("DiscoveredVia = %v", h.DiscoveredVia) + } +} + +func TestAddIPs_Dedup(t *testing.T) { + h := &Host{IPs: []string{"1.1.1.1"}} + AddIPs(h, []string{"1.1.1.1", "2.2.2.2", "", "3.3.3.3", "2.2.2.2"}) + sort.Strings(h.IPs) + want := []string{"1.1.1.1", "2.2.2.2", "3.3.3.3"} + if !reflect.DeepEqual(h.IPs, want) { + t.Errorf("IPs = %v, want %v", h.IPs, want) + } +} + +func TestAddTechnologies_Dedup(t *testing.T) { + h := &Host{Technologies: []string{"nginx"}} + AddTechnologies(h, []string{"nginx", "Go", "", "React", "Go"}) + sort.Strings(h.Technologies) + want := []string{"Go", "React", "nginx"} + if !reflect.DeepEqual(h.Technologies, want) { + t.Errorf("Technologies = %v, want %v", h.Technologies, want) + } +} + +func TestCloneHost_Nil(t *testing.T) { + if got := cloneHost(nil); got != nil { + t.Errorf("cloneHost(nil) = %v, want nil", got) + } +} diff --git a/internal/store/store.go b/internal/store/store.go new file mode 100644 index 0000000..3f4f45b --- /dev/null +++ b/internal/store/store.go @@ -0,0 +1,161 @@ +// Package store defines the Store interface used by pipeline modules to record +// per-host findings. Full implementations (in-memory + BoltDB-backed) live in +// this same package — this file only declares the interface so other packages +// can depend on it without pulling in storage backends. +package store + +import ( + "context" + "time" +) + +// Host is the aggregate per-subdomain record. Fields are populated +// incrementally as modules publish events. +// +// Field names intentionally mirror the legacy config.SubdomainResult shape so +// migrating JSON output in F0.6 is mechanical. Over time this struct will +// diverge (more fields, richer types) as v2 features land. +type Host struct { + Subdomain string + IPs []string + CNAME string + PTR string + + // Resolution metadata + ASN string + Org string + Country string + City string + + // HTTP probe + URL string + StatusCode int + ContentLength int64 + Title string + Server string + Technologies []string + Headers map[string]string + ResponseMs int64 + + // TLS + TLSVersion string + TLSIssuer string + TLSExpiry time.Time + TLSSelfSigned bool + TLSAltNames []string + TLSFingerprint *TLSFingerprint + + // Classification + CloudProvider string + WAF string + Ports []int + + // Analysis + Vulnerabilities []Vulnerability + Secrets []Secret + CVEs []CVE + AIFindings []AIFinding + Takeover *Takeover + + // Discovery metadata + DiscoveredVia []string // e.g. ["passive:crt.sh", "brute"] + FirstSeen time.Time + LastUpdated time.Time +} + +// TLSFingerprint identifies a security appliance (firewall, VPN, load balancer) +// from its TLS certificate. +type TLSFingerprint struct { + Vendor string + Product string + Version string + ApplianceKind string + InternalHosts []string +} + +// Vulnerability is a single finding recorded on a host. +type Vulnerability struct { + ID string + Title string + Description string + Severity string + URL string + Evidence string + Remediation string + CVEs []string + OWASP string + CVSS float64 + FoundAt time.Time +} + +// Secret is a credential/token discovered on a host. +type Secret struct { + Kind string + Match string + Value string + Location string + Validated bool + Severity string + Description string + FoundAt time.Time +} + +// CVE is a CVE match correlated to a detected technology. +type CVE struct { + ID string + Technology string + Version string + Severity string + CVSS float64 + Description string + URL string + InKEV bool + FoundAt time.Time +} + +// AIFinding is an AI/agent-produced insight. +type AIFinding struct { + Agent string + Model string + Severity string + Title string + Description string + Evidence string + CVEs []string + OWASP string + Confidence float64 + FoundAt time.Time +} + +// Takeover is a confirmed or candidate subdomain takeover. +type Takeover struct { + Service string + CNAME string + Evidence string + PoC string + Confirmed bool + FoundAt time.Time +} + +// Store is the aggregate interface modules use to record findings. Methods +// must be safe for concurrent use by many goroutines. +type Store interface { + // Upsert merges patch into the record for subdomain. Only non-zero fields + // in patch overwrite existing data; slice/map fields are appended/merged. + // The mutator is invoked under a per-host lock so concurrent callers see + // consistent state. + Upsert(ctx context.Context, subdomain string, mutate func(*Host)) error + + // Get returns a snapshot copy of the record for subdomain. + Get(ctx context.Context, subdomain string) (*Host, bool) + + // All returns a snapshot slice of every host. The slice is sorted by + // subdomain for deterministic output. + All(ctx context.Context) []*Host + + // Count returns the number of hosts in the store. + Count(ctx context.Context) int + + // Close releases resources (e.g. BoltDB handle). Idempotent. + Close() error +} diff --git a/internal/tui/live.go b/internal/tui/live.go new file mode 100644 index 0000000..722e929 --- /dev/null +++ b/internal/tui/live.go @@ -0,0 +1,134 @@ +// Package tui provides terminal-only live views of scan activity. No web +// UI by design. Fase 4 will expand this into a bubbletea-powered +// interactive TUI with panels; the current LivePrinter is the minimal +// terminal-only viewer that emits colorized event lines in real time. +package tui + +import ( + "context" + "fmt" + "sync/atomic" + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/output" +) + +// LivePrinter subscribes to every event on a bus and prints a one-line +// summary to stdout as they arrive. Safe to attach alongside the regular +// report module — this is purely an observability layer. +type LivePrinter struct { + bus *eventbus.Bus + sub *eventbus.Subscription + verbosity int // 0 = quiet (vulns only), 1 = normal (discovery+vulns), 2 = noisy + started time.Time + + evCount atomic.Uint64 +} + +// NewLivePrinter attaches to bus and begins printing. +// +// verbosity levels: +// +// 0 — only vulnerabilities, takeovers, secrets, CVEs +// 1 — above + subdomain discovery + HTTP probe summaries +// 2 — everything, including module errors and phase markers +func NewLivePrinter(bus *eventbus.Bus, verbosity int) *LivePrinter { + p := &LivePrinter{bus: bus, verbosity: verbosity, started: time.Now()} + p.sub = bus.SubscribeAll(p.handle) + return p +} + +// Close unsubscribes from the bus and prints a summary footer. +func (p *LivePrinter) Close() { + if p.sub != nil { + p.sub.Unsubscribe() + } + dur := time.Since(p.started).Round(time.Millisecond) + fmt.Printf("%s scan elapsed %s, %d events seen\n", + output.Dim("·"), output.BoldGreen(dur.String()), p.evCount.Load()) +} + +func (p *LivePrinter) handle(_ context.Context, e eventbus.Event) { + p.evCount.Add(1) + switch ev := e.(type) { + case eventbus.SubdomainDiscovered: + if p.verbosity >= 1 { + fmt.Printf("%s %s %s\n", output.Dim("↳"), output.Cyan(ev.Method), ev.Subdomain) + } + case eventbus.DNSResolved: + if p.verbosity >= 2 { + fmt.Printf("%s %s %s\n", output.Dim("⏚"), ev.Subdomain, output.Dim(joinIPs(ev.IPs))) + } + case eventbus.HTTPProbed: + if p.verbosity >= 1 { + color := statusColor(ev.StatusCode) + fmt.Printf("%s %s %s %s\n", color, ev.URL, output.Dim(fmt.Sprintf("[%d]", ev.StatusCode)), output.Dim(ev.Title)) + } + case eventbus.VulnerabilityFound: + fmt.Printf("%s %s %s %s\n", sevBadge(ev.Severity), output.BoldWhite(ev.Title), output.Dim(ev.URL), output.Dim(ev.ID)) + case eventbus.SecretFound: + fmt.Printf("%s %s %s %s\n", sevBadge(ev.Severity), output.BoldWhite("SECRET:"+ev.Kind), ev.Location, output.Dim(ev.Match)) + case eventbus.TakeoverCandidate: + fmt.Printf("%s %s %s service=%s\n", sevBadge(eventbus.SeverityHigh), output.BoldYellow("TAKEOVER?"), ev.Subdomain, ev.Service) + case eventbus.TakeoverConfirmed: + fmt.Printf("%s %s %s service=%s\n", sevBadge(eventbus.SeverityCritical), output.BgRed(" TAKEOVER "), ev.Subdomain, ev.Service) + case eventbus.CVEMatch: + fmt.Printf("%s %s %s@%s → %s\n", sevBadge(ev.Severity), output.BoldWhite("CVE"), ev.Technology, ev.Version, ev.CVE) + case eventbus.AIFinding: + fmt.Printf("%s %s %s %s\n", sevBadge(ev.Severity), output.BoldMagenta("AI:"+ev.Agent), output.Dim(ev.Subject), ev.Title) + case eventbus.ModuleError: + if p.verbosity >= 2 { + fmt.Printf("%s %s %s\n", output.Red("⚠"), output.Dim(ev.Module), ev.Err) + } + case eventbus.PhaseStarted: + if p.verbosity >= 1 { + fmt.Printf("%s %s\n", output.Dim("▶"), output.BoldCyan("phase "+ev.Phase)) + } + case eventbus.PhaseCompleted: + if p.verbosity >= 1 { + fmt.Printf("%s %s %s\n", output.Dim("▣"), output.Dim("phase "+ev.Phase), output.Dim(ev.Duration.Round(time.Millisecond).String())) + } + } +} + +func sevBadge(s eventbus.Severity) string { + switch s { + case eventbus.SeverityCritical: + return output.BgRed(" CRIT ") + case eventbus.SeverityHigh: + return output.Red("[HIGH]") + case eventbus.SeverityMedium: + return output.Yellow("[MED]") + case eventbus.SeverityLow: + return output.Blue("[LOW]") + default: + return output.Dim("[INFO]") + } +} + +func statusColor(code int) string { + switch { + case code >= 200 && code < 300: + return output.Green("●") + case code >= 300 && code < 400: + return output.Yellow("◐") + case code >= 400 && code < 500: + return output.Red("○") + case code >= 500: + return output.BoldRed("✕") + default: + return output.Dim("·") + } +} + +func joinIPs(ips []string) string { + out := "[" + for i, ip := range ips { + if i > 0 { + out += "," + } + out += ip + } + return out + "]" +} diff --git a/internal/validator/validator_test.go b/internal/validator/validator_test.go new file mode 100644 index 0000000..87ecd36 --- /dev/null +++ b/internal/validator/validator_test.go @@ -0,0 +1,249 @@ +package validator + +import ( + "strings" + "testing" +) + +func TestValidateDomain(t *testing.T) { + v := DefaultDomainValidator() + + tests := []struct { + name string + input string + wantErr bool + }{ + {"simple domain", "example.com", false}, + {"subdomain", "api.example.com", false}, + {"deep subdomain", "a.b.c.example.com", false}, + {"hyphen in middle", "my-site.example.com", false}, + {"co.uk tld", "example.co.uk", false}, + {"uppercase", "EXAMPLE.COM", false}, + + {"empty", "", true}, + {"whitespace only", " ", true}, + {"with scheme http", "http://example.com", true}, + {"with scheme https", "https://example.com", true}, + {"path traversal", "example.com/../etc", true}, + {"shell metachar ;", "example.com;whoami", true}, + {"shell metachar |", "example.com|whoami", true}, + {"shell metachar &", "example.com&ls", true}, + {"backtick", "example.com`id`", true}, + {"dollar", "example.com$USER", true}, + {"newline", "example.com\nmalicious", true}, + {"null byte", "example.com\x00.evil", true}, + {"leading hyphen label", "-example.com", true}, + {"trailing hyphen label", "example-.com", true}, + {"double dot", "example..com", true}, + {"label too long", strings.Repeat("a", 64) + ".com", true}, + {"domain too long", strings.Repeat("a.", 130) + "com", true}, + {"numeric tld", "example.123", true}, + {"single label", "localhost", true}, + {"angle brackets", "