diff --git a/cmd/god-eye/main.go b/cmd/god-eye/main.go index 0cddfc9..01a2c7a 100644 --- a/cmd/god-eye/main.go +++ b/cmd/god-eye/main.go @@ -1,18 +1,39 @@ package main import ( + "context" "fmt" "os" + "os/signal" + "syscall" + "time" "github.com/spf13/cobra" "god-eye/internal/ai" "god-eye/internal/config" + "god-eye/internal/diff" + "god-eye/internal/modules/all" + "god-eye/internal/nucleitpl" "god-eye/internal/output" + "god-eye/internal/pipeline" + gohttp "god-eye/internal/http" + "god-eye/internal/proxyconf" "god-eye/internal/scanner" + "god-eye/internal/scheduler" + "god-eye/internal/sources" + "god-eye/internal/store" + "god-eye/internal/tui" "god-eye/internal/validator" + "god-eye/internal/wizard" ) +var _ = diff.Compute // ensure diff import is kept in the dependency graph + +// rootCmdRef is set by main() so helpers can query which flags cobra saw +// explicitly on the command line (via Flags().Changed). +var rootCmdRef *cobra.Command + func main() { var cfg config.Config @@ -33,6 +54,20 @@ Examples: god-eye -d example.com --stealth moderate Moderate stealth (evasion mode) god-eye -d example.com --stealth paranoid Maximum stealth (very slow)`, Run: func(cmd *cobra.Command, args []string) { + // If no target given and stdin is a TTY, launch the interactive wizard. + // Explicit --wizard also triggers it even with a target present (user + // wants to review defaults). + if (cfg.Domain == "" && wizard.IsInteractive()) || cfg.Wizard { + if err := runWizard(&cfg); err != nil { + if err == wizard.ErrCancelled { + fmt.Println(output.Yellow("cancelled.")) + os.Exit(130) + } + fmt.Println(output.Red("[-]"), "wizard:", err) + os.Exit(1) + } + } + if cfg.Domain == "" { fmt.Println(output.Red("[-]"), "Domain is required. Use -d flag.") cmd.Help() @@ -58,6 +93,27 @@ Examples: fmt.Println(output.Red("[-]"), "Invalid resolvers:", err.Error()) os.Exit(1) } + if err := proxyconf.Validate(cfg.Proxy); err != nil { + fmt.Println(output.Red("[-]"), "Invalid --proxy:", err.Error()) + os.Exit(1) + } + // Propagate proxy config to every HTTP client before anything + // else spins up. This must happen after validation and before + // the pipeline/scanner starts. + if cfg.Proxy != "" { + if err := gohttp.SetProxy(cfg.Proxy); err != nil { + fmt.Println(output.Red("[-]"), "proxy (http factory):", err.Error()) + os.Exit(1) + } + if err := sources.SetProxy(cfg.Proxy); err != nil { + fmt.Println(output.Red("[-]"), "proxy (sources):", err.Error()) + os.Exit(1) + } + if !cfg.Silent { + fmt.Printf("%s Routing HTTP through %s\n", + output.BoldCyan("β›“"), output.BoldWhite(proxyconf.Humanize(cfg.Proxy))) + } + } if err := validator.ValidateConcurrency(cfg.Concurrency); err != nil { fmt.Println(output.Red("[-]"), "Invalid concurrency:", err.Error()) os.Exit(1) @@ -111,6 +167,10 @@ Examples: fmt.Println() } + if cfg.UsePipeline { + runPipeline(cfg) + return + } scanner.Run(cfg) }, } @@ -135,8 +195,8 @@ Examples: // AI flags rootCmd.Flags().BoolVar(&cfg.EnableAI, "enable-ai", false, "Enable AI-powered analysis with Ollama (includes CVE search)") rootCmd.Flags().StringVar(&cfg.AIUrl, "ai-url", "http://localhost:11434", "Ollama API URL") - rootCmd.Flags().StringVar(&cfg.AIFastModel, "ai-fast-model", "deepseek-r1:1.5b", "Fast triage model") - rootCmd.Flags().StringVar(&cfg.AIDeepModel, "ai-deep-model", "qwen2.5-coder:7b", "Deep analysis model (supports function calling)") + rootCmd.Flags().StringVar(&cfg.AIFastModel, "ai-fast-model", "qwen3:1.7b", "Fast triage model (Ollama tag)") + rootCmd.Flags().StringVar(&cfg.AIDeepModel, "ai-deep-model", "qwen2.5-coder:14b", "Deep analysis model (Ollama tag, supports function calling)") rootCmd.Flags().BoolVar(&cfg.AICascade, "ai-cascade", true, "Use cascade (fast triage + deep analysis)") rootCmd.Flags().BoolVar(&cfg.AIDeepAnalysis, "ai-deep", false, "Enable deep AI analysis on all findings") rootCmd.Flags().BoolVar(&cfg.MultiAgent, "multi-agent", false, "Enable multi-agent orchestration (8 specialized AI agents)") @@ -144,6 +204,27 @@ Examples: // Stealth flags rootCmd.Flags().StringVar(&cfg.StealthMode, "stealth", "", "Stealth mode: light, moderate, aggressive, paranoid (reduces detection)") + // v2 pipeline flags + rootCmd.Flags().BoolVar(&cfg.UsePipeline, "pipeline", false, "Use v2 event-driven pipeline (experimental, parity with v1 verified by F0.7)") + rootCmd.Flags().BoolVar(&cfg.Wizard, "wizard", false, "Force the interactive setup wizard even when -d is set") + rootCmd.Flags().StringVar(&cfg.Profile, "profile", "", "Apply named scan profile (bugbounty, pentest, asm-continuous, stealth-max, quick)") + rootCmd.Flags().StringVar(&cfg.ConfigFile, "config", "", "Path to YAML config file (overrides auto-discovery)") + + // Stash the rootCmd in a package var so runPipeline can check which + // flags the user set explicitly (cobra is the only thing that knows). + rootCmdRef = rootCmd + rootCmd.Flags().BoolVar(&cfg.Live, "live", false, "Stream colorized scan events live to the terminal (v2 only)") + rootCmd.Flags().IntVar(&cfg.LiveVerbosity, "live-verbosity", 1, "Live view verbosity: 0=findings-only, 1=normal, 2=noisy") + rootCmd.Flags().StringVar(&cfg.AIProfile, "ai-profile", "", "AI tier: lean (16GB), balanced (32GB), heavy/max (64GB+). Overrides --ai-fast-model/--ai-deep-model unless those are also set explicitly.") + rootCmd.Flags().BoolVar(&cfg.AIVerbose, "ai-verbose", false, "Log every Ollama query (model, prompt/response size, duration) to stderr") + rootCmd.Flags().BoolVar(&cfg.AutoPullModels, "ai-auto-pull", true, "Auto-download missing Ollama models before the scan starts") + rootCmd.Flags().BoolVar(&cfg.NucleiScan, "nuclei", false, "Run Nuclei-format YAML templates against every probed host") + rootCmd.Flags().StringVar(&cfg.NucleiTemplates, "nuclei-templates", "", "Path to Nuclei templates directory (default: $NUCLEI_TEMPLATES, then ~/nuclei-templates, then ~/.god-eye/nuclei-templates)") + rootCmd.Flags().BoolVar(&cfg.NucleiAutoDownload, "nuclei-auto-download", true, "Auto-download nuclei-templates ZIP from GitHub when no local dir is found") + rootCmd.Flags().StringVar(&cfg.Proxy, "proxy", "", "Route outbound HTTP through a proxy. Supported: http://host:port, https://host:port, socks5://host:port, socks5h://host:port (Tor). Basic auth via http://user:pass@host.") + rootCmd.Flags().DurationVar(&cfg.MonitorInterval, "monitor-interval", 0, "Run in continuous monitoring mode, re-scanning every N (e.g. 6h, 24h). Emits diffs.") + rootCmd.Flags().StringVar(&cfg.MonitorWebhook, "monitor-webhook", "", "Webhook URL to POST diff reports to in monitoring mode") + // Recursive discovery flags (enabled by default with --enable-ai) rootCmd.Flags().BoolVar(&cfg.Recursive, "recursive", false, "Enable recursive subdomain discovery with pattern learning") rootCmd.Flags().IntVar(&cfg.RecursiveDepth, "recursive-depth", 3, "Maximum recursion depth (1-5)") @@ -224,7 +305,288 @@ This data is used for instant, offline CVE lookups during scans.`, } rootCmd.AddCommand(dbInfoCmd) + // nuclei-update: force refresh of the auto-downloaded Nuclei template cache + nucleiUpdateCmd := &cobra.Command{ + Use: "nuclei-update", + Short: "Download / refresh Nuclei YAML templates cache", + Long: `Fetches the official projectdiscovery/nuclei-templates ZIP archive +and extracts every .yaml/.yml file into ~/.god-eye/nuclei-templates. + +Safe to re-run: existing templates are overwritten in-place. The cache +is ~40MB on disk and ships thousands of detections that the compat +layer executes when --nuclei is on.`, + Run: func(cmd *cobra.Command, args []string) { + home, err := os.UserHomeDir() + if err != nil { + fmt.Println(output.Red("[-]"), "cannot find home dir:", err) + os.Exit(1) + } + dest := home + "/.god-eye/nuclei-templates" + + fmt.Println(output.BoldCyan("πŸ“₯ Refreshing Nuclei templates…")) + fmt.Printf(" %s %s\n", output.Dim("destination:"), output.BoldWhite(dest)) + + // Pull up the downloader. Inline import to keep the subcommand + // lightweight when not invoked. + dl := nucleitpl.NewDownloader() + dl.Verbose = true + if err := dl.Refresh(dest); err != nil { + fmt.Println(output.Red("[-]"), "refresh failed:", err) + os.Exit(1) + } + fmt.Println(output.Green("βœ“ Nuclei templates refreshed.")) + }, + } + rootCmd.AddCommand(nucleiUpdateCmd) + if err := rootCmd.Execute(); err != nil { os.Exit(1) } } + +// runPipeline is the v2 entry point. Registers every adapter module, loads +// optional YAML + profile, and runs the event-driven pipeline under a +// signal-aware context. +func runPipeline(cfg config.Config) { + // Side-effect registration of all adapter modules (F0.6). + all.RegisterAll() + + // Load YAML config if present. --config wins over auto-discovery. + path := cfg.ConfigFile + if path == "" { + path = config.FindConfigFile() + } + if path != "" { + if y, err := config.LoadYAML(path); err != nil { + fmt.Println(output.Red("[-]"), "config:", err.Error()) + os.Exit(1) + } else if y != nil { + config.ApplyYAML(&cfg, y) + } + } + + // Apply named scan profile if set. + if cfg.Profile != "" { + p, ok := config.ProfileByName(cfg.Profile) + if !ok { + fmt.Println(output.Red("[-]"), "unknown profile:", cfg.Profile) + os.Exit(1) + } + config.ApplyProfile(&cfg, p) + if !cfg.Silent { + fmt.Printf("%s Profile %s applied: %s\n", output.Green("βœ“"), output.BoldCyan(p.Name), output.Dim(p.Description)) + } + } + + // Apply AI tier profile (lean/balanced/heavy). Respects explicit + // --ai-fast-model / --ai-deep-model overrides. + if cfg.AIProfile != "" { + p, ok := config.AIProfileByName(cfg.AIProfile) + if !ok { + fmt.Println(output.Red("[-]"), "unknown AI profile:", cfg.AIProfile, + "β€” valid: lean, balanced, heavy") + os.Exit(1) + } + overrideFast := rootCmdRef != nil && rootCmdRef.Flags().Changed("ai-fast-model") + overrideDeep := rootCmdRef != nil && rootCmdRef.Flags().Changed("ai-deep-model") + config.ApplyAIProfile(&cfg, p, overrideFast, overrideDeep) + if !cfg.Silent { + fmt.Printf("%s AI profile %s: %s\n", + output.Green("βœ“"), output.BoldCyan(p.Name), output.Dim(p.Description)) + fmt.Printf(" %s %s %s %s\n", + output.Dim("triage:"), output.BoldWhite(cfg.AIFastModel), + output.Dim("deep:"), output.BoldWhite(cfg.AIDeepModel)) + } + } + + // Handle Ctrl-C gracefully. Set this up BEFORE the model-ensure step + // so long downloads can be interrupted cleanly. + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM) + go func() { + <-sigCh + fmt.Println() + fmt.Println(output.Yellow("⚠ Interrupted β€” shutting down...")) + cancel() + }() + + // Ensure Ollama models are present before scan starts. + if cfg.EnableAI && cfg.AutoPullModels { + if err := ensureAIModels(ctx, &cfg); err != nil { + if ctx.Err() == context.Canceled { + os.Exit(130) + } + fmt.Println(output.Red("[-]"), "AI setup:", err) + os.Exit(1) + } + } + + // Continuous monitoring mode: run the scan on an interval, diff and alert. + if cfg.MonitorInterval > 0 { + runMonitor(ctx, cfg) + return + } + + p, err := pipeline.New(&cfg, pipeline.Options{}) + if err != nil { + fmt.Println(output.Red("[-]"), err) + os.Exit(1) + } + + var live *tui.LivePrinter + if cfg.Live { + live = tui.NewLivePrinter(p.Bus(), cfg.LiveVerbosity) + } + + if err := p.Run(ctx); err != nil { + if ctx.Err() == context.Canceled { + if live != nil { + live.Close() + } + os.Exit(130) + } + fmt.Println(output.Red("[!]"), "pipeline error:", err) + os.Exit(1) + } + + if live != nil { + live.Close() + } +} + +// runMonitor implements the asm-continuous mode: a single pipeline.Run +// wrapped in scheduler.Scheduler that ticks at MonitorInterval, diffs +// against the previous snapshot, and alerts on meaningful changes. +func runMonitor(ctx context.Context, cfg config.Config) { + scan := func(scanCtx context.Context) ([]*store.Host, error) { + p, err := pipeline.New(&cfg, pipeline.Options{}) + if err != nil { + return nil, err + } + if err := p.Run(scanCtx); err != nil { + return nil, err + } + return p.Store().All(scanCtx), nil + } + + s := scheduler.New(cfg.Domain, cfg.MonitorInterval, scan) + s.AddAlerter(scheduler.StdoutAlerter{}) + if cfg.MonitorWebhook != "" { + s.AddAlerter(scheduler.NewWebhookAlerter(cfg.MonitorWebhook)) + } + + fmt.Printf("%s Monitoring %s every %s β€” Ctrl-C to stop\n", + output.BoldGreen("β–£"), output.BoldCyan(cfg.Domain), cfg.MonitorInterval) + + if err := s.Start(ctx); err != nil && !errorIs(err, context.Canceled) { + fmt.Println(output.Red("[!]"), "monitor error:", err) + os.Exit(1) + } +} + +// runWizard starts the interactive setup, then folds the user's choices +// back into cfg. Forces pipeline mode (wizard is v2-only by design). +func runWizard(cfg *config.Config) error { + choice, err := wizard.Run(context.Background(), wizard.Options{ + In: os.Stdin, + Out: os.Stdout, + OllamaURL: cfg.AIUrl, + }) + if err != nil { + return err + } + + cfg.Domain = validator.SanitizeDomain(choice.Target) + cfg.UsePipeline = true + cfg.Live = choice.Live + cfg.LiveVerbosity = choice.LiveVerbosity + cfg.Output = choice.Output + if choice.Format != "" { + cfg.Format = choice.Format + } + + // Scan profile name threads through --profile application (later). + if choice.ScanProfile != "" { + cfg.Profile = choice.ScanProfile + } + + // ASM-continuous interval translates into a duration flag. + if choice.MonitorInterval != "" { + d, parseErr := time.ParseDuration(choice.MonitorInterval) + if parseErr != nil { + return fmt.Errorf("invalid interval %q: %w", choice.MonitorInterval, parseErr) + } + cfg.MonitorInterval = d + } + + // AI tier. + if choice.AIProfile != "" { + cfg.EnableAI = true + cfg.AIProfile = choice.AIProfile + cfg.AIVerbose = choice.AIVerbose + cfg.AutoPullModels = choice.AIAutoPull + } else { + cfg.EnableAI = false + } + + return nil +} + +// ensureAIModels checks the Ollama server and downloads any missing models. +// Prints progress when --ai-verbose is on. Fails open on unreachable +// Ollama β€” the AI module itself will no-op gracefully. +func ensureAIModels(ctx context.Context, cfg *config.Config) error { + e := ai.NewModelEnsurer(cfg.AIUrl) + e.Verbose = cfg.AIVerbose || cfg.Verbose + e.Writer = os.Stderr + + if err := e.Reachable(ctx); err != nil { + if !cfg.Silent { + fmt.Println(output.Yellow("⚠ "), err.Error()) + fmt.Println(output.Dim(" AI modules will no-op for this run. Start `ollama serve` to enable.")) + } + return nil + } + + models := []string{} + if cfg.AIFastModel != "" { + models = append(models, cfg.AIFastModel) + } + if cfg.AIDeepModel != "" && cfg.AIDeepModel != cfg.AIFastModel { + models = append(models, cfg.AIDeepModel) + } + if len(models) == 0 { + return nil + } + + if !cfg.Silent { + fmt.Printf("%s Checking Ollama models: %s\n", + output.BoldCyan("βš™"), output.Dim(fmt.Sprintf("%v", models))) + } + if err := e.EnsureAll(ctx, models); err != nil { + return err + } + if !cfg.Silent { + fmt.Printf("%s Models ready\n", output.Green("βœ“")) + } + return nil +} + +// errorIs is a thin wrapper for errors.Is that only pulls errors into +// main when needed. +func errorIs(err, target error) bool { + for err != nil { + if err == target { + return true + } + type unwrapper interface{ Unwrap() error } + u, ok := err.(unwrapper) + if !ok { + return false + } + err = u.Unwrap() + } + return false +} diff --git a/go.mod b/go.mod index 79784ce..5e68957 100644 --- a/go.mod +++ b/go.mod @@ -4,17 +4,18 @@ go 1.21 require ( github.com/fatih/color v1.16.0 + github.com/mattn/go-isatty v0.0.20 github.com/miekg/dns v1.1.58 github.com/spf13/cobra v1.8.0 + golang.org/x/net v0.20.0 + gopkg.in/yaml.v3 v3.0.1 ) require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect - github.com/mattn/go-isatty v0.0.20 // indirect github.com/spf13/pflag v1.0.5 // indirect golang.org/x/mod v0.14.0 // indirect - golang.org/x/net v0.20.0 // indirect golang.org/x/sys v0.16.0 // indirect golang.org/x/tools v0.17.0 // indirect ) diff --git a/go.sum b/go.sum index 4775209..cbf4921 100644 --- a/go.sum +++ b/go.sum @@ -27,5 +27,7 @@ golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/tools v0.17.0 h1:FvmRgNOcs3kOa+T20R1uhfP9F6HgG2mfxDv1vrx1Htc= golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/agent/agent.go b/internal/agent/agent.go new file mode 100644 index 0000000..0ac196c --- /dev/null +++ b/internal/agent/agent.go @@ -0,0 +1,109 @@ +// Package agent defines the Fase 3 AI agentic v2 interfaces: Planner, +// Worker, and Tool. Unlike Fase 0.6 adapters that merely wrap v1 Ollama +// calls, a v2 Agent plans multi-step investigations and executes tools +// via the event bus. +// +// The Agent lifecycle: +// +// 1. Planner receives the target + existing store snapshot, produces a +// Plan (ordered list of Tasks). +// 2. Each Task is dispatched to a Worker (specialized agent: XSS, auth, +// API, crypto, secrets, etc.) with a Tool set. +// 3. Workers call Tools (dns_resolve, http_request, check_sqli_blind, +// fetch_js, query_cve, ...) and reason over the results. +// 4. Results feed back into Plan revision; new Tasks may be scheduled. +// +// This file defines the contracts. Implementations land incrementally; +// for now a Basic Planner delegates to the Fase 0.6 v1 Ollama wrapper, +// and a native tool-using implementation follows. +package agent + +import ( + "context" + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/store" +) + +// Tool is a capability an agent can invoke. Tools should be idempotent +// where possible and must respect ctx cancellation. +type Tool interface { + // Name is the machine identifier (e.g., "http_request", "dns_resolve"). + // Used in tool-call serialization for LLMs. + Name() string + + // Description is a short human-readable blurb used in the LLM tool + // descriptor. Keep it action-oriented: "fetch an HTTP URL and return + // the response headers + first 2KB of body". + Description() string + + // Schema returns the JSON-schema of the tool's argument object. Used + // to build function-calling descriptors and to validate inputs. + Schema() map[string]interface{} + + // Call invokes the tool with the given arguments. Returns a JSON-encoded + // result (often just a text summary). Errors should be returned β€” the + // agent decides how to react. + Call(ctx context.Context, args map[string]interface{}) (string, error) +} + +// Task is a single unit of agent work. +type Task struct { + ID string + Kind string // e.g. "investigate-xss", "audit-auth", "chain-finding" + Description string // natural-language goal the worker pursues + Subject string // target URL / subdomain / evidence the task focuses on + Context map[string]string // additional hints for the worker + CreatedAt time.Time +} + +// Plan is an ordered list of Tasks produced by the Planner. +type Plan struct { + Target string + Tasks []Task + Reason string // planner's rationale, logged for debugging +} + +// Planner decides what to investigate next given the current store state. +type Planner interface { + // Plan produces a new Plan. Called at the start of the analysis phase + // and whenever enough new evidence accumulates to justify replanning. + Plan(ctx context.Context, target string, storeSnap store.Store, bus *eventbus.Bus) (*Plan, error) + + // Name identifies the planner implementation for logs. + Name() string +} + +// Worker executes a single Task using a Toolset. +type Worker interface { + // Name identifies the worker (usually its specialization, e.g. "xss", + // "auth", "api", "crypto"). + Name() string + + // CanHandle reports whether the worker is a good fit for task. Workers + // are consulted in priority order. + CanHandle(task Task) bool + + // Execute carries out the task. The worker may call tools, update the + // store via bus events (VulnerabilityFound, SecretFound, AIFinding), + // and return a short natural-language summary for the planner. + Execute(ctx context.Context, task Task, tools Toolset, bus *eventbus.Bus, st store.Store) (summary string, err error) +} + +// Toolset is an indexed collection of Tools available to a worker. It is +// intentionally separate from Registry so workers receive a curated subset +// (e.g., a "crypto" worker gets oracle-style tools but not "send_slack"). +type Toolset map[string]Tool + +// Get returns the named tool, or nil if absent. +func (ts Toolset) Get(name string) Tool { return ts[name] } + +// Names returns every tool name in the set. Order is not guaranteed. +func (ts Toolset) Names() []string { + out := make([]string, 0, len(ts)) + for n := range ts { + out = append(out, n) + } + return out +} diff --git a/internal/agent/tools.go b/internal/agent/tools.go new file mode 100644 index 0000000..958e232 --- /dev/null +++ b/internal/agent/tools.go @@ -0,0 +1,141 @@ +package agent + +import ( + "context" + "crypto/tls" + "encoding/json" + "errors" + "io" + "net/http" + "time" + + godns "god-eye/internal/dns" +) + +// --- built-in tools ------------------------------------------------------- +// +// These tools cover the minimum needed for a planner to investigate +// discovered hosts without reinventing basic primitives. Fase 3 workers +// receive curated subsets via Toolset. + +// HTTPRequestTool fetches an arbitrary URL and returns status, headers, +// and (truncated) body. Maximum 64KB body returned. +type HTTPRequestTool struct { + Client *http.Client +} + +func NewHTTPRequestTool(timeoutSec int) *HTTPRequestTool { + return &HTTPRequestTool{ + Client: &http.Client{ + Timeout: time.Duration(timeoutSec) * time.Second, + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + }, + }, + } +} + +func (t *HTTPRequestTool) Name() string { return "http_request" } +func (t *HTTPRequestTool) Description() string { return "Fetch an HTTP(S) URL and return status + headers + first 64KB of body." } + +func (t *HTTPRequestTool) Schema() map[string]interface{} { + return map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "url": map[string]interface{}{"type": "string"}, + "method": map[string]interface{}{"type": "string", "default": "GET"}, + "headers": map[string]interface{}{ + "type": "object", + "additionalProperties": map[string]interface{}{"type": "string"}, + }, + }, + "required": []string{"url"}, + } +} + +func (t *HTTPRequestTool) Call(ctx context.Context, args map[string]interface{}) (string, error) { + url, _ := args["url"].(string) + if url == "" { + return "", errors.New("url is required") + } + method, _ := args["method"].(string) + if method == "" { + method = "GET" + } + req, err := http.NewRequestWithContext(ctx, method, url, nil) + if err != nil { + return "", err + } + if hdrs, ok := args["headers"].(map[string]interface{}); ok { + for k, v := range hdrs { + if s, ok := v.(string); ok { + req.Header.Set(k, s) + } + } + } + req.Header.Set("User-Agent", "god-eye-v2-agent") + + resp, err := t.Client.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + + body, _ := io.ReadAll(io.LimitReader(resp.Body, 64*1024)) + out := map[string]interface{}{ + "status_code": resp.StatusCode, + "headers": flattenHeaders(resp.Header), + "body": string(body), + } + b, _ := json.Marshal(out) + return string(b), nil +} + +// DNSResolveTool resolves a hostname to A/CNAME/PTR records. +type DNSResolveTool struct { + Resolvers []string + TimeoutSec int +} + +func NewDNSResolveTool(resolvers []string, timeoutSec int) *DNSResolveTool { + if len(resolvers) == 0 { + resolvers = []string{"8.8.8.8:53", "1.1.1.1:53"} + } + return &DNSResolveTool{Resolvers: resolvers, TimeoutSec: timeoutSec} +} + +func (t *DNSResolveTool) Name() string { return "dns_resolve" } +func (t *DNSResolveTool) Description() string { return "Resolve a hostname to A/CNAME/PTR records." } + +func (t *DNSResolveTool) Schema() map[string]interface{} { + return map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "hostname": map[string]interface{}{"type": "string"}, + }, + "required": []string{"hostname"}, + } +} + +func (t *DNSResolveTool) Call(_ context.Context, args map[string]interface{}) (string, error) { + name, _ := args["hostname"].(string) + if name == "" { + return "", errors.New("hostname is required") + } + ips := godns.ResolveSubdomain(name, t.Resolvers, t.TimeoutSec) + cname := godns.ResolveCNAME(name, t.Resolvers, t.TimeoutSec) + out := map[string]interface{}{"ips": ips, "cname": cname} + b, _ := json.Marshal(out) + return string(b), nil +} + +func flattenHeaders(h http.Header) map[string]string { + out := make(map[string]string, len(h)) + for k, vs := range h { + if len(vs) == 0 { + continue + } + out[k] = vs[0] + } + return out +} diff --git a/internal/ai/ensure.go b/internal/ai/ensure.go new file mode 100644 index 0000000..bdb65e1 --- /dev/null +++ b/internal/ai/ensure.go @@ -0,0 +1,275 @@ +package ai + +import ( + "bufio" + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "strings" + "time" +) + +// ModelEnsurer verifies that a given list of Ollama models is present on +// the local server, and pulls any that are missing. Designed for the +// pre-scan warmup: God's Eye should not crash mid-scan because a model +// wasn't downloaded β€” EnsureAll fixes that before the pipeline starts. +type ModelEnsurer struct { + BaseURL string + Client *http.Client + Verbose bool + Writer io.Writer // where progress is printed; defaults to os.Stdout if nil +} + +// NewModelEnsurer constructs an ensurer against the given Ollama base URL +// (e.g. "http://localhost:11434"). The HTTP client has no timeout because +// a fresh pull of a 30B model can legitimately take 10+ minutes. +func NewModelEnsurer(baseURL string) *ModelEnsurer { + if baseURL == "" { + baseURL = "http://localhost:11434" + } + return &ModelEnsurer{ + BaseURL: strings.TrimRight(baseURL, "/"), + Client: &http.Client{Timeout: 0}, + } +} + +// Installed returns the set of model tags currently available on the +// Ollama server, keyed by the full name (e.g. "qwen3:1.7b"). +func (e *ModelEnsurer) Installed(ctx context.Context) (map[string]bool, error) { + req, err := http.NewRequestWithContext(ctx, "GET", e.BaseURL+"/api/tags", nil) + if err != nil { + return nil, err + } + c := &http.Client{Timeout: 10 * time.Second} + resp, err := c.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + return nil, fmt.Errorf("ollama /api/tags returned %d", resp.StatusCode) + } + + var body struct { + Models []struct { + Name string `json:"name"` + } `json:"models"` + } + if err := json.NewDecoder(resp.Body).Decode(&body); err != nil { + return nil, err + } + out := make(map[string]bool, len(body.Models)) + for _, m := range body.Models { + out[m.Name] = true + } + return out, nil +} + +// Pull streams a model pull from Ollama, printing progress lines when +// Verbose is true. Uses POST /api/pull with stream=true; each JSON line +// reports status + optional {total, completed} for byte-level progress. +func (e *ModelEnsurer) Pull(ctx context.Context, model string) error { + payload := map[string]interface{}{"name": model, "stream": true} + body, _ := json.Marshal(payload) + req, err := http.NewRequestWithContext(ctx, "POST", e.BaseURL+"/api/pull", bytes.NewReader(body)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + resp, err := e.Client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + b, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + return fmt.Errorf("ollama /api/pull returned %d: %s", resp.StatusCode, strings.TrimSpace(string(b))) + } + + scanner := bufio.NewScanner(resp.Body) + // Progress events can be large; bump the scanner buffer. + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) + + var lastStatus string + var lastPct int + for scanner.Scan() { + line := bytes.TrimSpace(scanner.Bytes()) + if len(line) == 0 { + continue + } + var ev struct { + Status string `json:"status"` + Digest string `json:"digest,omitempty"` + Total int64 `json:"total,omitempty"` + Completed int64 `json:"completed,omitempty"` + Error string `json:"error,omitempty"` + } + if err := json.Unmarshal(line, &ev); err != nil { + continue + } + if ev.Error != "" { + return fmt.Errorf("pull %s: %s", model, ev.Error) + } + if !e.Verbose { + continue + } + w := e.writer() + if ev.Total > 0 && ev.Completed > 0 { + pct := int(float64(ev.Completed) / float64(ev.Total) * 100) + // Throttle: new status line always; otherwise only print when + // the percentage has moved β‰₯5 points since the last emission + // (or reaches a final 100% for this status exactly once). + switch { + case ev.Status != lastStatus: + fmt.Fprintf(w, " %-24s %3d%% %s / %s\n", ev.Status, pct, humanBytes(ev.Completed), humanBytes(ev.Total)) + lastStatus = ev.Status + lastPct = pct + case pct >= lastPct+5 && pct < 100: + fmt.Fprintf(w, " %-24s %3d%% %s / %s\n", ev.Status, pct, humanBytes(ev.Completed), humanBytes(ev.Total)) + lastPct = pct + case pct == 100 && lastPct < 100: + fmt.Fprintf(w, " %-24s %3d%% %s / %s\n", ev.Status, pct, humanBytes(ev.Completed), humanBytes(ev.Total)) + lastPct = 100 + } + } else if ev.Status != lastStatus { + fmt.Fprintf(w, " %s\n", ev.Status) + lastStatus = ev.Status + lastPct = 0 + } + } + return scanner.Err() +} + +// EnsureAll checks every name in models. For each missing one it calls Pull. +// Already-present models are skipped. Returns on the first error. +// +// Name matching is generous: Ollama sometimes tags models as "qwen3:1.7b" +// and sometimes as "qwen3:1.7b-instruct-fp16", so we accept exact match, +// a ":latest" variant, or the bare model name with no tag. +func (e *ModelEnsurer) EnsureAll(ctx context.Context, models []string) error { + installed, err := e.Installed(ctx) + if err != nil { + return fmt.Errorf("query ollama: %w", err) + } + + unique := dedup(models) + missing := []string{} + for _, m := range unique { + if alreadyInstalled(installed, m) { + if e.Verbose { + fmt.Fprintf(e.writer(), "βœ“ %s already installed\n", m) + } + continue + } + missing = append(missing, m) + } + + if len(missing) == 0 { + return nil + } + + if e.Verbose { + fmt.Fprintf(e.writer(), "↓ Pulling %d missing model(s): %s\n", len(missing), strings.Join(missing, ", ")) + } + for _, m := range missing { + if err := ctx.Err(); err != nil { + return err + } + if e.Verbose { + fmt.Fprintf(e.writer(), "↓ %s\n", m) + } + if err := e.Pull(ctx, m); err != nil { + return fmt.Errorf("pull %s: %w", m, err) + } + if e.Verbose { + fmt.Fprintf(e.writer(), "βœ“ %s ready\n", m) + } + } + return nil +} + +// Reachable reports whether the Ollama server answers /api/tags. Callers +// should check this before EnsureAll to surface a friendly message. +func (e *ModelEnsurer) Reachable(ctx context.Context) error { + c := &http.Client{Timeout: 3 * time.Second} + req, err := http.NewRequestWithContext(ctx, "GET", e.BaseURL+"/api/tags", nil) + if err != nil { + return err + } + resp, err := c.Do(req) + if err != nil { + return errors.New("ollama not reachable at " + e.BaseURL + " (is `ollama serve` running?)") + } + resp.Body.Close() + if resp.StatusCode != 200 { + return fmt.Errorf("ollama at %s returned %d", e.BaseURL, resp.StatusCode) + } + return nil +} + +func (e *ModelEnsurer) writer() io.Writer { + if e.Writer != nil { + return e.Writer + } + return stdout +} + +var stdout io.Writer // populated by main via SetStdout; nil writer would fmt-print to os.Stdout + +// SetStdout installs the writer used when ModelEnsurer.Writer is nil. main.go +// sets this to os.Stdout; tests can set it to a bytes.Buffer. +func SetStdout(w io.Writer) { stdout = w } + +func alreadyInstalled(installed map[string]bool, model string) bool { + if installed[model] { + return true + } + if installed[model+":latest"] { + return true + } + if strings.Contains(model, ":") { + base := strings.SplitN(model, ":", 2)[0] + if installed[base] || installed[base+":latest"] { + return true + } + } + return false +} + +func dedup(ss []string) []string { + seen := make(map[string]struct{}, len(ss)) + out := make([]string, 0, len(ss)) + for _, s := range ss { + s = strings.TrimSpace(s) + if s == "" { + continue + } + if _, ok := seen[s]; ok { + continue + } + seen[s] = struct{}{} + out = append(out, s) + } + return out +} + +func humanBytes(n int64) string { + const k = 1024.0 + if n < int64(k) { + return fmt.Sprintf("%dB", n) + } + units := []string{"KB", "MB", "GB", "TB"} + v := float64(n) / k + for _, u := range units { + if v < k { + return fmt.Sprintf("%.1f%s", v, u) + } + v /= k + } + return fmt.Sprintf("%.1fPB", v) +} diff --git a/internal/ai/ensure_test.go b/internal/ai/ensure_test.go new file mode 100644 index 0000000..5b4329e --- /dev/null +++ b/internal/ai/ensure_test.go @@ -0,0 +1,214 @@ +package ai + +import ( + "bytes" + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" +) + +func TestAlreadyInstalled(t *testing.T) { + installed := map[string]bool{ + "qwen3:1.7b": true, + "qwen2.5-coder:14b": true, + "custom-model:latest": true, + } + cases := []struct { + model string + want bool + }{ + {"qwen3:1.7b", true}, + {"qwen2.5-coder:14b", true}, + {"custom-model", true}, // via :latest fallback + {"llama3:8b", false}, + {"qwen3", false}, // bare name: only matches when ":latest" is installed (it isn't) + } + for _, c := range cases { + if got := alreadyInstalled(installed, c.model); got != c.want { + t.Errorf("alreadyInstalled(%q) = %v, want %v", c.model, got, c.want) + } + } +} + +func TestDedup(t *testing.T) { + got := dedup([]string{"a", "b", "a", "", "c", " b "}) + want := []string{"a", "b", "c"} + if len(got) != len(want) { + t.Fatalf("got %v, want %v", got, want) + } + for i := range got { + if got[i] != want[i] { + t.Errorf("index %d: got %q want %q", i, got[i], want[i]) + } + } +} + +func TestHumanBytes(t *testing.T) { + cases := []struct { + in int64 + want string + }{ + {0, "0B"}, + {512, "512B"}, + {1024, "1.0KB"}, + {1024 * 1024, "1.0MB"}, + {1024 * 1024 * 1024, "1.0GB"}, + {int64(2.5 * 1024 * 1024 * 1024), "2.5GB"}, + } + for _, c := range cases { + if got := humanBytes(c.in); got != c.want { + t.Errorf("humanBytes(%d) = %q, want %q", c.in, got, c.want) + } + } +} + +func TestInstalled_ParsesTagsResponse(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/api/tags" { + http.NotFound(w, r) + return + } + _ = json.NewEncoder(w).Encode(map[string]interface{}{ + "models": []map[string]string{ + {"name": "qwen3:1.7b"}, + {"name": "qwen2.5-coder:14b"}, + }, + }) + })) + defer srv.Close() + + e := NewModelEnsurer(srv.URL) + got, err := e.Installed(context.Background()) + if err != nil { + t.Fatal(err) + } + if !got["qwen3:1.7b"] || !got["qwen2.5-coder:14b"] { + t.Errorf("missing expected models: %v", got) + } +} + +func TestInstalled_Non200ReturnsError(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Error(w, "nope", http.StatusInternalServerError) + })) + defer srv.Close() + + e := NewModelEnsurer(srv.URL) + if _, err := e.Installed(context.Background()); err == nil { + t.Error("expected error on non-200") + } +} + +func TestReachable(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(`{"models":[]}`)) + })) + defer srv.Close() + + e := NewModelEnsurer(srv.URL) + if err := e.Reachable(context.Background()); err != nil { + t.Errorf("expected reachable, got %v", err) + } +} + +func TestReachable_Unreachable(t *testing.T) { + e := NewModelEnsurer("http://127.0.0.1:1") // nothing listens here + if err := e.Reachable(context.Background()); err == nil { + t.Error("expected unreachable error") + } +} + +func TestPull_StreamsProgress(t *testing.T) { + // Fake Ollama that emits a few NDJSON status events. + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/api/pull" { + http.NotFound(w, r) + return + } + w.Header().Set("Content-Type", "application/x-ndjson") + events := []string{ + `{"status":"pulling manifest"}`, + `{"status":"downloading","digest":"sha256:abc","total":1048576,"completed":524288}`, + `{"status":"downloading","digest":"sha256:abc","total":1048576,"completed":1048576}`, + `{"status":"verifying sha256 digest"}`, + `{"status":"writing manifest"}`, + `{"status":"success"}`, + } + for _, e := range events { + w.Write([]byte(e + "\n")) + if flusher, ok := w.(http.Flusher); ok { + flusher.Flush() + } + } + })) + defer srv.Close() + + buf := &bytes.Buffer{} + e := NewModelEnsurer(srv.URL) + e.Verbose = true + e.Writer = buf + + if err := e.Pull(context.Background(), "fake:1b"); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + out := buf.String() + if !strings.Contains(out, "pulling manifest") { + t.Errorf("missing 'pulling manifest' in output: %q", out) + } + if !strings.Contains(out, "success") { + t.Errorf("missing 'success' in output: %q", out) + } +} + +func TestPull_ErrorBubblesUp(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(`{"error":"model not found"}` + "\n")) + })) + defer srv.Close() + + e := NewModelEnsurer(srv.URL) + err := e.Pull(context.Background(), "nonexistent") + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "model not found") { + t.Errorf("unexpected error: %v", err) + } +} + +func TestEnsureAll_SkipsInstalled_PullsMissing(t *testing.T) { + pullCalls := map[string]int{} + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/api/tags": + _ = json.NewEncoder(w).Encode(map[string]interface{}{ + "models": []map[string]string{{"name": "already-here:1b"}}, + }) + case "/api/pull": + var body struct { + Name string `json:"name"` + } + _ = json.NewDecoder(r.Body).Decode(&body) + pullCalls[body.Name]++ + w.Write([]byte(`{"status":"success"}` + "\n")) + default: + http.NotFound(w, r) + } + })) + defer srv.Close() + + e := NewModelEnsurer(srv.URL) + if err := e.EnsureAll(context.Background(), []string{"already-here:1b", "missing-a:7b", "missing-b:14b"}); err != nil { + t.Fatal(err) + } + if pullCalls["already-here:1b"] > 0 { + t.Errorf("should not have pulled already-here") + } + if pullCalls["missing-a:7b"] != 1 || pullCalls["missing-b:14b"] != 1 { + t.Errorf("missing models not pulled correctly: %v", pullCalls) + } +} diff --git a/internal/ai/ollama.go b/internal/ai/ollama.go index a472cff..7587419 100644 --- a/internal/ai/ollama.go +++ b/internal/ai/ollama.go @@ -4,7 +4,9 @@ import ( "bytes" "encoding/json" "fmt" + "io" "net/http" + "os" "strings" "time" ) @@ -12,10 +14,27 @@ import ( // OllamaClient handles communication with local Ollama instance type OllamaClient struct { BaseURL string - FastModel string // deepseek-r1:1.5b for quick triage - DeepModel string // qwen2.5-coder:7b for deep analysis + FastModel string // qwen3:1.7b for quick triage (lean default) + DeepModel string // qwen2.5-coder:14b for deep analysis (lean default) Timeout time.Duration EnableCascade bool + + // Verbose controls whether every query is logged with timing + sizes. + // Writes to VerboseLogger or stderr when nil. Toggle via --ai-verbose. + Verbose bool + VerboseLogger io.Writer +} + +// logVerbose writes a single line to the verbose logger when Verbose is on. +func (c *OllamaClient) logVerbose(format string, args ...interface{}) { + if !c.Verbose { + return + } + w := c.VerboseLogger + if w == nil { + w = os.Stderr + } + fmt.Fprintf(w, "[ai] "+format+"\n", args...) } // OllamaRequest represents the request payload for Ollama API @@ -51,10 +70,10 @@ func NewOllamaClient(baseURL, fastModel, deepModel string, enableCascade bool) * baseURL = "http://localhost:11434" } if fastModel == "" { - fastModel = "deepseek-r1:1.5b" + fastModel = "qwen3:1.7b" } if deepModel == "" { - deepModel = "qwen2.5-coder:7b" + deepModel = "qwen2.5-coder:14b" } return &OllamaClient{ @@ -351,6 +370,9 @@ Output only the REAL secrets in their original [Type] format, one per line. If n // query sends a request to Ollama API func (c *OllamaClient) query(model, prompt string, timeout time.Duration) (string, error) { + start := time.Now() + c.logVerbose("β†’ %s prompt=%dB timeout=%s", model, len(prompt), timeout) + reqBody := OllamaRequest{ Model: model, Prompt: prompt, @@ -373,20 +395,25 @@ func (c *OllamaClient) query(model, prompt string, timeout time.Duration) (strin bytes.NewBuffer(jsonData), ) if err != nil { + c.logVerbose("✘ %s %s error=%v", model, time.Since(start).Round(time.Millisecond), err) return "", fmt.Errorf("ollama request failed: %v", err) } defer resp.Body.Close() if resp.StatusCode != 200 { + c.logVerbose("✘ %s status=%d %s", model, resp.StatusCode, time.Since(start).Round(time.Millisecond)) return "", fmt.Errorf("ollama returned status %d", resp.StatusCode) } var ollamaResp OllamaResponse if err := json.NewDecoder(resp.Body).Decode(&ollamaResp); err != nil { + c.logVerbose("✘ %s decode error=%v", model, err) return "", fmt.Errorf("failed to decode response: %v", err) } - return strings.TrimSpace(ollamaResp.Response), nil + out := strings.TrimSpace(ollamaResp.Response) + c.logVerbose("← %s response=%dB %s", model, len(out), time.Since(start).Round(time.Millisecond)) + return out, nil } // parseFindings extracts findings by severity from AI response diff --git a/internal/config/ai_profile.go b/internal/config/ai_profile.go new file mode 100644 index 0000000..c951773 --- /dev/null +++ b/internal/config/ai_profile.go @@ -0,0 +1,101 @@ +package config + +// AIProfile bundles the triage + deep models for a named AI tier. Unlike +// the scan-level Profile (bugbounty/pentest/…), an AIProfile only touches +// model selection β€” it doesn't flip stealth, recursion, or module enables. +type AIProfile struct { + Name string + Description string + FastModel string + DeepModel string + // MinRAMGB is an advisory (not enforced) hint about the memory footprint + // of both models loaded simultaneously. Printed in the profile help + // banner so users can pick the right tier for their machine. + MinRAMGB int +} + +// Built-in AI profiles. The lean tier matches the repository defaults so +// `--ai-profile lean` is always equivalent to "use whatever the defaults +// say". balanced and heavy upgrade deep model to Qwen3-Coder MoE which +// activates only 3.3B parameters per token despite its 30B total. +var ( + AIProfileLean = AIProfile{ + Name: "lean", + Description: "Runs on 16GB RAM; default. qwen3:1.7b triage + qwen2.5-coder:14b deep.", + FastModel: "qwen3:1.7b", + DeepModel: "qwen2.5-coder:14b", + MinRAMGB: 16, + } + + AIProfileBalanced = AIProfile{ + Name: "balanced", + Description: "32GB RAM / 24GB VRAM. Upgrades deep to qwen3-coder:30b MoE (3.3B active, 256K ctx).", + FastModel: "qwen3:4b", + DeepModel: "qwen3-coder:30b", + MinRAMGB: 32, + } + + AIProfileHeavy = AIProfile{ + Name: "heavy", + Description: "64GB+ RAM. Best-quality triage + deep. Slowest; ideal for final analysis passes.", + FastModel: "qwen3:8b", + DeepModel: "qwen3-coder:30b", + MinRAMGB: 64, + } +) + +// BuiltinAIProfiles lists every AIProfile in CLI help order. +var BuiltinAIProfiles = []AIProfile{ + AIProfileLean, + AIProfileBalanced, + AIProfileHeavy, +} + +// AIProfileByName resolves a named profile. Lookup is case-insensitive +// and tolerates the common alias "max" β†’ heavy. +func AIProfileByName(name string) (AIProfile, bool) { + switch normaliseAIProfileName(name) { + case "lean": + return AIProfileLean, true + case "balanced", "balance", "mid": + return AIProfileBalanced, true + case "heavy", "max", "power": + return AIProfileHeavy, true + } + return AIProfile{}, false +} + +func normaliseAIProfileName(s string) string { + out := make([]byte, 0, len(s)) + for i := 0; i < len(s); i++ { + c := s[i] + if c >= 'A' && c <= 'Z' { + c += 'a' - 'A' + } + if c == ' ' || c == '_' || c == '-' { + continue + } + out = append(out, c) + } + return string(out) +} + +// ApplyAIProfile merges p's models into cfg. If cfg.AIFastModel / +// cfg.AIDeepModel were explicitly set by the user (overrideFast / +// overrideDeep true) the profile is ignored for that field. The caller +// is responsible for detecting explicit flags; in practice this comes +// from cobra's cmd.Flags().Changed("ai-fast-model"). +func ApplyAIProfile(cfg *Config, p AIProfile, overrideFast, overrideDeep bool) { + if cfg == nil { + return + } + if !overrideFast && p.FastModel != "" { + cfg.AIFastModel = p.FastModel + } + if !overrideDeep && p.DeepModel != "" { + cfg.AIDeepModel = p.DeepModel + } + if cfg.AIProfile == "" { + cfg.AIProfile = p.Name + } +} diff --git a/internal/config/ai_profile_test.go b/internal/config/ai_profile_test.go new file mode 100644 index 0000000..eb47003 --- /dev/null +++ b/internal/config/ai_profile_test.go @@ -0,0 +1,98 @@ +package config + +import "testing" + +func TestAIProfileByName(t *testing.T) { + cases := []struct { + in string + wantOK bool + wantTag string + }{ + {"lean", true, "qwen3:1.7b"}, + {"LEAN", true, "qwen3:1.7b"}, + {"balanced", true, "qwen3:4b"}, + {"balance", true, "qwen3:4b"}, + {"mid", true, "qwen3:4b"}, + {"heavy", true, "qwen3:8b"}, + {"max", true, "qwen3:8b"}, + {"power", true, "qwen3:8b"}, + {"Heavy", true, "qwen3:8b"}, + {"nope", false, ""}, + {"", false, ""}, + } + for _, c := range cases { + p, ok := AIProfileByName(c.in) + if ok != c.wantOK { + t.Errorf("AIProfileByName(%q) ok = %v, want %v", c.in, ok, c.wantOK) + continue + } + if ok && p.FastModel != c.wantTag { + t.Errorf("AIProfileByName(%q).FastModel = %q, want %q", c.in, p.FastModel, c.wantTag) + } + } +} + +func TestBuiltinAIProfiles_Unique(t *testing.T) { + names := map[string]bool{} + for _, p := range BuiltinAIProfiles { + if p.Name == "" { + t.Error("profile with empty name") + } + if p.FastModel == "" || p.DeepModel == "" { + t.Errorf("profile %q missing models", p.Name) + } + if p.Description == "" { + t.Errorf("profile %q missing description", p.Name) + } + if names[p.Name] { + t.Errorf("duplicate profile name: %q", p.Name) + } + names[p.Name] = true + } +} + +func TestApplyAIProfile_RespectsOverrides(t *testing.T) { + cfg := &Config{ + AIFastModel: "user-chose-this:1b", + AIDeepModel: "user-chose-that:7b", + } + ApplyAIProfile(cfg, AIProfileHeavy, true, true) + if cfg.AIFastModel != "user-chose-this:1b" { + t.Errorf("overrideFast was ignored: %q", cfg.AIFastModel) + } + if cfg.AIDeepModel != "user-chose-that:7b" { + t.Errorf("overrideDeep was ignored: %q", cfg.AIDeepModel) + } + if cfg.AIProfile != "heavy" { + t.Errorf("AIProfile not set to heavy, got %q", cfg.AIProfile) + } +} + +func TestApplyAIProfile_FillsUnsetFields(t *testing.T) { + cfg := &Config{} + ApplyAIProfile(cfg, AIProfileBalanced, false, false) + if cfg.AIFastModel != "qwen3:4b" { + t.Errorf("FastModel not applied: %q", cfg.AIFastModel) + } + if cfg.AIDeepModel != "qwen3-coder:30b" { + t.Errorf("DeepModel not applied: %q", cfg.AIDeepModel) + } + if cfg.AIProfile != "balanced" { + t.Errorf("AIProfile not set: %q", cfg.AIProfile) + } +} + +func TestApplyAIProfile_NilConfigNoop(t *testing.T) { + ApplyAIProfile(nil, AIProfileLean, false, false) // must not panic +} + +func TestApplyAIProfile_PartialOverride(t *testing.T) { + cfg := &Config{AIFastModel: "custom:1b"} + ApplyAIProfile(cfg, AIProfileHeavy, true, false) + if cfg.AIFastModel != "custom:1b" { + t.Errorf("FastModel overridden: %q", cfg.AIFastModel) + } + if cfg.AIDeepModel != "qwen3-coder:30b" { + t.Errorf("DeepModel not applied: %q", cfg.AIDeepModel) + } +} diff --git a/internal/config/config.go b/internal/config/config.go index 4962716..30524b0 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -49,6 +49,80 @@ type Config struct { NoTechScan bool // Disable tech scan (override when --enable-ai) NoASNScan bool // Disable ASN scan (override when --enable-ai) NoVHostScan bool // Disable vhost scan (override when --enable-ai) + + // v2: profile + per-module overrides loaded from config file or CLI. + // Profile is the named profile to apply before CLI flags. Empty = none. + Profile string + + // ConfigFile is the path to an optional YAML config file. Empty = search + // standard locations, then fall through to CLI defaults + profile only. + ConfigFile string + + // ModuleSettings is a flat map of module-name β†’ enabled. Populated from + // YAML ("modules:" section) and CLI (--enable/--disable flags if added). + // Consumed by ConfigView.ModuleEnabled. Empty means "honor each module's + // DefaultEnabled()". + ModuleSettings map[string]bool + + // UsePipeline opts into the v2 event-driven pipeline. When false (default + // during F0.6 migration) the legacy scanner.Run is used. Once F0.7 + // parity is verified this becomes true by default. + UsePipeline bool + + // Live toggles the Fase 4 LivePrinter that streams colorized scan + // events to the terminal alongside (or instead of) the final report. + Live bool + // LiveVerbosity controls how much the LivePrinter prints (0..2). + LiveVerbosity int + + // MonitorInterval, when > 0, switches the CLI into asm-continuous mode: + // the scan runs on this interval and diffs against the previous + // snapshot, firing Webhook/Stdout alerts on meaningful changes. + MonitorInterval time.Duration + // MonitorWebhook is a POST target for diff reports in monitor mode. + MonitorWebhook string + + // AIProfile is the named AI tier (lean/balanced/heavy). When set, it + // applies FastModel+DeepModel defaults before CLI overrides kick in. + // Empty string = use whatever AIFastModel/AIDeepModel resolve to via + // CLI flags + YAML. + AIProfile string + + // AIVerbose toggles detailed logging of every Ollama query: model, + // prompt size, response size, duration, triage decisions. Writes to + // stderr so stdout (JSON / silent modes) stays clean. + AIVerbose bool + + // AutoPullModels controls whether god-eye auto-downloads missing + // Ollama models at startup when --enable-ai is set. Defaults to true + // β€” flip to false if you want scan failures instead of silent pulls. + AutoPullModels bool + + // Wizard forces the interactive setup flow even when -d is present, + // so users can preview/tweak defaults. When -d is absent and stdin + // is a TTY, the wizard auto-starts without this flag. + Wizard bool + + // NucleiScan opts into the Nuclei-format template executor. Templates + // are loaded from NucleiTemplates (or ~/nuclei-templates as fallback, + // with auto-download of the official ZIP into ~/.god-eye/nuclei-templates + // when NucleiAutoDownload is true and no local dir is present). + NucleiScan bool + // NucleiTemplates is an optional override for the template directory. + NucleiTemplates string + // NucleiAutoDownload controls whether god-eye auto-fetches the + // official nuclei-templates ZIP on first use. Defaults to true. + NucleiAutoDownload bool + + // Proxy routes every outbound HTTP request (passive sources, probes, + // Nuclei, Ollama-if-remote) through the given URL. Supports: + // http://host:port - HTTP CONNECT proxy (Burp, ZAP, mitmproxy) + // https://host:port - HTTPS CONNECT proxy + // socks5://host:port - SOCKS5 with local DNS + // socks5h://host:port - SOCKS5 with proxy-side DNS (Tor convention) + // Basic auth is honoured: http://user:pass@host. + // Empty = no proxy (direct). + Proxy string } // Stats holds scan statistics diff --git a/internal/config/config_test.go b/internal/config/config_test.go new file mode 100644 index 0000000..9a03d0c --- /dev/null +++ b/internal/config/config_test.go @@ -0,0 +1,102 @@ +package config + +import ( + "encoding/json" + "testing" +) + +func TestDefaultResolversNonEmpty(t *testing.T) { + if len(DefaultResolvers) == 0 { + t.Fatal("DefaultResolvers is empty") + } + for _, r := range DefaultResolvers { + if r == "" { + t.Errorf("empty resolver in DefaultResolvers") + } + } +} + +func TestDefaultWordlistNonEmpty(t *testing.T) { + if len(DefaultWordlist) < 50 { + t.Errorf("DefaultWordlist too small: %d entries", len(DefaultWordlist)) + } + seen := make(map[string]bool) + for _, w := range DefaultWordlist { + if w == "" { + t.Error("empty entry in DefaultWordlist") + } + // Note: v1 wordlist contains "smtp" and "staging" twice β€” that's a bug + // but not something we fix in baseline tests. Just verify no ALL duplicates. + seen[w] = true + } + if len(seen) < 50 { + t.Errorf("too many duplicates: %d unique out of %d", len(seen), len(DefaultWordlist)) + } +} + +func TestSubdomainResult_JSONRoundtrip(t *testing.T) { + orig := &SubdomainResult{ + Subdomain: "api.example.com", + IPs: []string{"1.2.3.4"}, + CNAME: "cname.example.com", + StatusCode: 200, + Title: "API", + Tech: []string{"nginx", "Go"}, + CloudProvider: "AWS", + TLSFingerprint: &TLSFingerprint{ + Vendor: "Fortinet", + Product: "FortiGate", + ApplianceType: "firewall", + }, + } + + data, err := json.Marshal(orig) + if err != nil { + t.Fatalf("marshal failed: %v", err) + } + + var decoded SubdomainResult + if err := json.Unmarshal(data, &decoded); err != nil { + t.Fatalf("unmarshal failed: %v", err) + } + + if decoded.Subdomain != orig.Subdomain { + t.Errorf("Subdomain mismatch: got %q want %q", decoded.Subdomain, orig.Subdomain) + } + if len(decoded.IPs) != 1 || decoded.IPs[0] != "1.2.3.4" { + t.Errorf("IPs mismatch: got %v", decoded.IPs) + } + if decoded.TLSFingerprint == nil { + t.Fatal("TLSFingerprint is nil after roundtrip") + } + if decoded.TLSFingerprint.Vendor != "Fortinet" { + t.Errorf("TLSFingerprint.Vendor = %q, want Fortinet", decoded.TLSFingerprint.Vendor) + } +} + +func TestSubdomainResult_OmitemptyMinimal(t *testing.T) { + // Ensure zero-value struct produces a minimal JSON (only subdomain field would be present if set). + empty := &SubdomainResult{} + data, err := json.Marshal(empty) + if err != nil { + t.Fatal(err) + } + // Only the required "subdomain" field (empty string) should appear β€” every other is omitempty. + expected := `{"subdomain":""}` + if string(data) != expected { + t.Errorf("empty struct JSON = %s, want %s", string(data), expected) + } +} + +func TestConfigZeroValue(t *testing.T) { + var c Config + if c.Domain != "" { + t.Errorf("default Domain should be empty, got %q", c.Domain) + } + if c.EnableAI { + t.Error("EnableAI should default to false") + } + if c.Concurrency != 0 { + t.Error("Concurrency should default to 0 (overridden by CLI default)") + } +} diff --git a/internal/config/profile.go b/internal/config/profile.go new file mode 100644 index 0000000..4d8dca9 --- /dev/null +++ b/internal/config/profile.go @@ -0,0 +1,208 @@ +package config + +// Profile is a named bundle of defaults that tailors God's Eye for a specific +// use case. Profiles set module enable/disable, concurrency hints, stealth, +// and whether AI is on. CLI flags still override profile defaults. +type Profile struct { + Name string + Description string + + // Core tuning + Concurrency int + Timeout int + Stealth string // off, light, moderate, aggressive, paranoid + + // Feature toggles (nil means "use module default") + AI *bool + MultiAgent *bool + Recursive *bool + NoBrute *bool + NoProbe *bool + NoPorts *bool + NoTakeover *bool + + // Advanced feature flags (nil = use module default) + CloudScan *bool + APIScan *bool + SecretsScan *bool + TechScan *bool + ASNScan *bool + VHostScan *bool + + // Per-module overrides (explicit enable/disable) + Modules map[string]bool +} + +// ProfileBugBounty is tuned for bug-bounty recon: broad discovery, AI on, +// secrets+tech+cloud scanning on, stealth off (speed matters). +var ProfileBugBounty = Profile{ + Name: "bugbounty", + Description: "Aggressive recon for bug-bounty: broad discovery, AI on, secrets/cloud/API/tech scanning, stealth off.", + Concurrency: 1000, + Timeout: 5, + Stealth: "off", + AI: ptrTrue(), + MultiAgent: ptrTrue(), + Recursive: ptrTrue(), + CloudScan: ptrTrue(), + APIScan: ptrTrue(), + SecretsScan: ptrTrue(), + TechScan: ptrTrue(), + ASNScan: ptrTrue(), + VHostScan: ptrTrue(), +} + +// ProfilePentest is tuned for authorized penetration tests: stealth light, +// full enrichment, AI on for deeper analysis. +var ProfilePentest = Profile{ + Name: "pentest", + Description: "Authorized pentest: full enrichment with light stealth to avoid basic rate limits.", + Concurrency: 300, + Timeout: 10, + Stealth: "light", + AI: ptrTrue(), + MultiAgent: ptrTrue(), + Recursive: ptrTrue(), + CloudScan: ptrTrue(), + APIScan: ptrTrue(), + SecretsScan: ptrTrue(), + TechScan: ptrTrue(), + ASNScan: ptrTrue(), + VHostScan: ptrTrue(), +} + +// ProfileASMContinuous is tuned for attack-surface monitoring: reduced depth +// per run, designed to be re-run periodically with diff engine (Fase 5). +// Stealth moderate to stay below detection thresholds when running daily. +var ProfileASMContinuous = Profile{ + Name: "asm-continuous", + Description: "Continuous attack-surface monitoring; runs cheaper than full recon, feeds diff engine.", + Concurrency: 200, + Timeout: 10, + Stealth: "moderate", + AI: ptrFalse(), // AI only on findings that change, not full re-analysis + Recursive: ptrFalse(), // rely on diff to grow surface over time + CloudScan: ptrTrue(), + TechScan: ptrTrue(), + SecretsScan: ptrTrue(), +} + +// ProfileStealthMax is for highly sensitive targets where any detection is +// unacceptable. Very slow; passive-first. +var ProfileStealthMax = Profile{ + Name: "stealth-max", + Description: "Maximum evasion. Passive-only by default, slow request cadence.", + Concurrency: 3, + Timeout: 20, + Stealth: "paranoid", + NoBrute: ptrTrue(), + NoPorts: ptrTrue(), + AI: ptrFalse(), + TechScan: ptrTrue(), +} + +// ProfileQuick is for triage: skip expensive phases, produce a fast answer. +var ProfileQuick = Profile{ + Name: "quick", + Description: "Fast triage: passive enum + HTTP probe, no brute/JS/AI.", + Concurrency: 500, + Timeout: 5, + Stealth: "off", + NoBrute: ptrTrue(), + AI: ptrFalse(), +} + +// BuiltinProfiles lists every named profile that ships with the tool, in a +// stable order for docs/help output. +var BuiltinProfiles = []Profile{ + ProfileBugBounty, + ProfilePentest, + ProfileASMContinuous, + ProfileStealthMax, + ProfileQuick, +} + +// ProfileByName returns the named profile, or ok=false when not found. +func ProfileByName(name string) (Profile, bool) { + for _, p := range BuiltinProfiles { + if p.Name == name { + return p, true + } + } + return Profile{}, false +} + +// ApplyProfile merges a profile into cfg. Existing non-zero values in cfg +// take precedence (CLI flags win over profile defaults). Pointer-typed +// profile fields are applied only when they are non-nil. +func ApplyProfile(cfg *Config, p Profile) { + if cfg == nil { + return + } + + if cfg.Concurrency == 0 || cfg.Concurrency == 1000 { // 1000 is the cobra default + cfg.Concurrency = p.Concurrency + } + if cfg.Timeout == 0 || cfg.Timeout == 5 { // 5 is cobra default + cfg.Timeout = p.Timeout + } + if cfg.StealthMode == "" { + cfg.StealthMode = p.Stealth + } + + if p.AI != nil && !cfg.EnableAI { + cfg.EnableAI = *p.AI + } + if p.MultiAgent != nil && !cfg.MultiAgent { + cfg.MultiAgent = *p.MultiAgent + } + if p.Recursive != nil && !cfg.Recursive && !cfg.NoRecursive { + cfg.Recursive = *p.Recursive + } + if p.NoBrute != nil && !cfg.NoBrute { + cfg.NoBrute = *p.NoBrute + } + if p.NoProbe != nil && !cfg.NoProbe { + cfg.NoProbe = *p.NoProbe + } + if p.NoPorts != nil && !cfg.NoPorts { + cfg.NoPorts = *p.NoPorts + } + if p.NoTakeover != nil && !cfg.NoTakeover { + cfg.NoTakeover = *p.NoTakeover + } + + applyPtrBool(&cfg.CloudScan, &cfg.NoCloudScan, p.CloudScan) + applyPtrBool(&cfg.APIScan, &cfg.NoAPIScan, p.APIScan) + applyPtrBool(&cfg.SecretsScan, &cfg.NoSecrets, p.SecretsScan) + applyPtrBool(&cfg.TechScan, &cfg.NoTechScan, p.TechScan) + applyPtrBool(&cfg.ASNScan, &cfg.NoASNScan, p.ASNScan) + applyPtrBool(&cfg.VHostScan, &cfg.NoVHostScan, p.VHostScan) + + // Module overrides + if cfg.ModuleSettings == nil { + cfg.ModuleSettings = make(map[string]bool) + } + for name, enabled := range p.Modules { + if _, already := cfg.ModuleSettings[name]; !already { + cfg.ModuleSettings[name] = enabled + } + } +} + +// applyPtrBool merges a ptr-bool from a profile into a (enabled, noEnabled) +// pair on the Config struct. The v1 scheme uses two flags per feature +// (Enable/NoEnable) to allow a three-state: unset/on/off. A nil profile ptr +// means "leave unchanged"; *p=true enables unless user has set NoX; *p=false +// leaves alone (profile doesn't force-off, user's explicit flag does). +func applyPtrBool(enable, disable *bool, p *bool) { + if p == nil { + return + } + if *p && !*enable && !*disable { + *enable = true + } +} + +func ptrTrue() *bool { v := true; return &v } +func ptrFalse() *bool { v := false; return &v } diff --git a/internal/config/profile_test.go b/internal/config/profile_test.go new file mode 100644 index 0000000..e7bfce8 --- /dev/null +++ b/internal/config/profile_test.go @@ -0,0 +1,143 @@ +package config + +import "testing" + +func TestProfileByName(t *testing.T) { + tests := []struct { + name string + input string + wantOK bool + wantStr string + }{ + {"bugbounty", "bugbounty", true, "bugbounty"}, + {"pentest", "pentest", true, "pentest"}, + {"asm-continuous", "asm-continuous", true, "asm-continuous"}, + {"stealth-max", "stealth-max", true, "stealth-max"}, + {"quick", "quick", true, "quick"}, + {"empty", "", false, ""}, + {"unknown", "nonsense", false, ""}, + {"case sensitive", "BugBounty", false, ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, ok := ProfileByName(tt.input) + if ok != tt.wantOK { + t.Errorf("ok = %v, want %v", ok, tt.wantOK) + } + if ok && got.Name != tt.wantStr { + t.Errorf("Name = %q, want %q", got.Name, tt.wantStr) + } + }) + } +} + +func TestBuiltinProfiles_NonEmpty(t *testing.T) { + if len(BuiltinProfiles) < 5 { + t.Errorf("expected β‰₯5 built-in profiles, got %d", len(BuiltinProfiles)) + } + seen := make(map[string]bool) + for _, p := range BuiltinProfiles { + if p.Name == "" { + t.Error("profile with empty name") + } + if p.Description == "" { + t.Errorf("profile %q has empty description", p.Name) + } + if seen[p.Name] { + t.Errorf("duplicate profile name: %q", p.Name) + } + seen[p.Name] = true + } +} + +func TestApplyProfile_NilConfigNoop(t *testing.T) { + ApplyProfile(nil, ProfileBugBounty) // must not panic +} + +func TestApplyProfile_FillsDefaults(t *testing.T) { + cfg := &Config{} // zero + ApplyProfile(cfg, ProfileBugBounty) + if cfg.Concurrency != ProfileBugBounty.Concurrency { + t.Errorf("Concurrency = %d, want %d", cfg.Concurrency, ProfileBugBounty.Concurrency) + } + if cfg.Timeout != ProfileBugBounty.Timeout { + t.Errorf("Timeout = %d, want %d", cfg.Timeout, ProfileBugBounty.Timeout) + } + if cfg.StealthMode != ProfileBugBounty.Stealth { + t.Errorf("Stealth = %q, want %q", cfg.StealthMode, ProfileBugBounty.Stealth) + } + if !cfg.EnableAI { + t.Error("bugbounty profile should enable AI") + } + if !cfg.MultiAgent { + t.Error("bugbounty profile should enable MultiAgent") + } + if !cfg.Recursive { + t.Error("bugbounty profile should enable Recursive") + } + if !cfg.CloudScan { + t.Error("bugbounty profile should enable CloudScan") + } +} + +func TestApplyProfile_DoesNotOverrideExplicitFlags(t *testing.T) { + cfg := &Config{ + Concurrency: 42, + Timeout: 999, + StealthMode: "paranoid", + EnableAI: false, // explicitly disabled before profile apply + } + // Apply bugbounty which normally enables AI + sets concurrency to 1000 + ApplyProfile(cfg, ProfileBugBounty) + + // Explicit non-default user values should survive + if cfg.Concurrency != 42 { + t.Errorf("Concurrency overwritten: %d", cfg.Concurrency) + } + if cfg.Timeout != 999 { + t.Errorf("Timeout overwritten: %d", cfg.Timeout) + } + if cfg.StealthMode != "paranoid" { + t.Errorf("Stealth overwritten: %q", cfg.StealthMode) + } + // Profile AI enable should still apply since cfg.EnableAI was false + // (we can't distinguish "user explicitly set false" from "zero value"). + // This is a known limitation documented in the CLI help. + if !cfg.EnableAI { + t.Errorf("AI not enabled by profile despite cfg.EnableAI being false") + } +} + +func TestApplyProfile_NoForceOff(t *testing.T) { + // stealth-max sets NoBrute=true. If user did NOT disable, profile wins. + cfg := &Config{} + ApplyProfile(cfg, ProfileStealthMax) + if !cfg.NoBrute { + t.Error("stealth-max profile should set NoBrute") + } +} + +func TestApplyProfile_ModuleSettings(t *testing.T) { + p := Profile{ + Name: "custom", + Modules: map[string]bool{ + "sources.crtsh": true, + "brute": false, + }, + } + cfg := &Config{} + ApplyProfile(cfg, p) + if got := cfg.ModuleSettings["sources.crtsh"]; !got { + t.Error("crtsh should be enabled") + } + if got := cfg.ModuleSettings["brute"]; got { + t.Error("brute should be disabled") + } + + // User pre-existing setting must not be overridden + cfg2 := &Config{ModuleSettings: map[string]bool{"sources.crtsh": false}} + ApplyProfile(cfg2, p) + if cfg2.ModuleSettings["sources.crtsh"] { + t.Error("user explicit module setting was overridden") + } +} diff --git a/internal/config/view.go b/internal/config/view.go new file mode 100644 index 0000000..b4add9e --- /dev/null +++ b/internal/config/view.go @@ -0,0 +1,151 @@ +package config + +// View implements module.ConfigView over a *Config. Modules receive a View +// (not the raw Config pointer) to prevent them from mutating global scan +// state β€” reads only. +// +// The implementation is intentionally small: it exposes just the shape +// needed by the module package without pulling in a full generic key/value +// store. Module-specific settings live in ModuleSettings; typed options +// should be hoisted to first-class fields on Config when they are used +// across modules. +type View struct { + cfg *Config +} + +// NewView wraps cfg as a ConfigView. cfg may be nil, in which case every +// accessor returns the fallback/zero value. +func NewView(cfg *Config) *View { return &View{cfg: cfg} } + +// Profile returns the active profile name ("" when none). +func (v *View) Profile() string { + if v == nil || v.cfg == nil { + return "" + } + return v.cfg.Profile +} + +// Bool reads a boolean config key by well-known name. Unknown keys return fb. +// Keys intentionally kept flat to avoid accidental namespacing bugs. +func (v *View) Bool(key string, fb bool) bool { + if v == nil || v.cfg == nil { + return fb + } + switch key { + case "ai.enabled": + return v.cfg.EnableAI + case "ai.cascade": + return v.cfg.AICascade + case "ai.deep": + return v.cfg.AIDeepAnalysis + case "ai.multi_agent": + return v.cfg.MultiAgent + case "ai.verbose": + return v.cfg.AIVerbose + case "ai.auto_pull": + return v.cfg.AutoPullModels + case "silent": + return v.cfg.Silent + case "verbose": + return v.cfg.Verbose + case "json": + return v.cfg.JsonOutput + case "no_brute": + return v.cfg.NoBrute + case "no_probe": + return v.cfg.NoProbe + case "no_ports": + return v.cfg.NoPorts + case "no_takeover": + return v.cfg.NoTakeover + case "only_active": + return v.cfg.OnlyActive + case "recursive": + return v.cfg.Recursive + case "cloud_scan": + return v.cfg.CloudScan + case "api_scan": + return v.cfg.APIScan + case "secrets_scan": + return v.cfg.SecretsScan + case "tech_scan": + return v.cfg.TechScan + case "asn_scan": + return v.cfg.ASNScan + case "vhost_scan": + return v.cfg.VHostScan + case "nuclei_scan": + return v.cfg.NucleiScan + case "nuclei_auto_download": + return v.cfg.NucleiAutoDownload + } + return fb +} + +// Int reads an int key. +func (v *View) Int(key string, fb int) int { + if v == nil || v.cfg == nil { + return fb + } + switch key { + case "concurrency": + return v.cfg.Concurrency + case "timeout": + return v.cfg.Timeout + case "recursive.depth": + return v.cfg.RecursiveDepth + } + return fb +} + +// String reads a string key. +func (v *View) String(key string, fb string) string { + if v == nil || v.cfg == nil { + return fb + } + switch key { + case "domain": + return v.cfg.Domain + case "wordlist": + return v.cfg.Wordlist + case "output": + return v.cfg.Output + case "format": + return v.cfg.Format + case "ports": + return v.cfg.Ports + case "resolvers": + return v.cfg.Resolvers + case "stealth": + return v.cfg.StealthMode + case "ai.url": + return v.cfg.AIUrl + case "ai.fast_model": + return v.cfg.AIFastModel + case "ai.deep_model": + return v.cfg.AIDeepModel + case "nuclei_templates": + return v.cfg.NucleiTemplates + } + return fb +} + +// Strings reads a string-slice key. No multi-value keys are defined yet, +// but reserved for module-specific settings loaded from YAML. +func (v *View) Strings(key string) []string { + _ = key + return nil +} + +// ModuleEnabled returns true when the config explicitly enabled the module +// by name (via ModuleSettings). It returns false otherwise; callers should +// fall back to the module's DefaultEnabled() when this returns false. +func (v *View) ModuleEnabled(name string) bool { + if v == nil || v.cfg == nil { + return false + } + if v.cfg.ModuleSettings == nil { + return false + } + return v.cfg.ModuleSettings[name] +} diff --git a/internal/config/view_test.go b/internal/config/view_test.go new file mode 100644 index 0000000..7f57ab3 --- /dev/null +++ b/internal/config/view_test.go @@ -0,0 +1,156 @@ +package config + +import "testing" + +func TestView_NilSafe(t *testing.T) { + var v *View + if v.Profile() != "" { + t.Error("nil view Profile should be empty") + } + if v.Bool("ai.enabled", true) != true { + t.Error("nil view Bool should return fallback") + } + if v.Int("concurrency", 99) != 99 { + t.Error("nil view Int should return fallback") + } + if v.String("domain", "fb") != "fb" { + t.Error("nil view String should return fallback") + } + if v.ModuleEnabled("x") { + t.Error("nil view ModuleEnabled should be false") + } +} + +func TestView_Profile(t *testing.T) { + v := NewView(&Config{Profile: "bugbounty"}) + if v.Profile() != "bugbounty" { + t.Errorf("Profile = %q", v.Profile()) + } +} + +func TestView_Bool(t *testing.T) { + cfg := &Config{ + EnableAI: true, + AICascade: true, + AIDeepAnalysis: false, + MultiAgent: true, + Silent: true, + Verbose: false, + JsonOutput: true, + NoBrute: true, + OnlyActive: true, + Recursive: true, + CloudScan: true, + APIScan: false, + } + v := NewView(cfg) + + tests := []struct { + key string + fb bool + want bool + }{ + {"ai.enabled", false, true}, + {"ai.cascade", false, true}, + {"ai.deep", true, false}, + {"ai.multi_agent", false, true}, + {"silent", false, true}, + {"verbose", true, false}, + {"json", false, true}, + {"no_brute", false, true}, + {"only_active", false, true}, + {"recursive", false, true}, + {"cloud_scan", false, true}, + {"api_scan", true, false}, + {"unknown_key", true, true}, // fallback + {"unknown_key", false, false}, + } + + for _, tt := range tests { + if got := v.Bool(tt.key, tt.fb); got != tt.want { + t.Errorf("Bool(%q, %v) = %v, want %v", tt.key, tt.fb, got, tt.want) + } + } +} + +func TestView_Int(t *testing.T) { + v := NewView(&Config{Concurrency: 500, Timeout: 10, RecursiveDepth: 4}) + if v.Int("concurrency", 1) != 500 { + t.Errorf("concurrency wrong") + } + if v.Int("timeout", 1) != 10 { + t.Errorf("timeout wrong") + } + if v.Int("recursive.depth", 1) != 4 { + t.Errorf("recursive.depth wrong") + } + if v.Int("unknown", 99) != 99 { + t.Errorf("unknown key should return fallback") + } +} + +func TestView_String(t *testing.T) { + v := NewView(&Config{ + Domain: "example.com", + Wordlist: "/wl", + Output: "/out", + Format: "json", + Ports: "80,443", + Resolvers: "8.8.8.8", + StealthMode: "light", + AIUrl: "http://x", + AIFastModel: "f", + AIDeepModel: "d", + }) + + cases := map[string]string{ + "domain": "example.com", + "wordlist": "/wl", + "output": "/out", + "format": "json", + "ports": "80,443", + "resolvers": "8.8.8.8", + "stealth": "light", + "ai.url": "http://x", + "ai.fast_model": "f", + "ai.deep_model": "d", + } + for k, want := range cases { + if got := v.String(k, "fb"); got != want { + t.Errorf("String(%q) = %q, want %q", k, got, want) + } + } + + if v.String("unknown", "fb") != "fb" { + t.Error("unknown key should return fallback") + } +} + +func TestView_Strings(t *testing.T) { + // Placeholder β€” no multi-value keys defined yet + v := NewView(&Config{}) + if got := v.Strings("anything"); got != nil { + t.Errorf("expected nil, got %v", got) + } +} + +func TestView_ModuleEnabled(t *testing.T) { + cfg := &Config{ModuleSettings: map[string]bool{"m1": true, "m2": false}} + v := NewView(cfg) + if !v.ModuleEnabled("m1") { + t.Error("m1 should be enabled") + } + if v.ModuleEnabled("m2") { + t.Error("m2 should be disabled (false in map)") + } + if v.ModuleEnabled("unset") { + t.Error("unset module should be false") + } +} + +func TestView_ModuleEnabled_NilMap(t *testing.T) { + v := NewView(&Config{}) + if v.ModuleEnabled("anything") { + t.Error("nil map should result in false") + } +} diff --git a/internal/config/yaml.go b/internal/config/yaml.go new file mode 100644 index 0000000..cf54477 --- /dev/null +++ b/internal/config/yaml.go @@ -0,0 +1,181 @@ +package config + +import ( + "fmt" + "os" + "path/filepath" + + "gopkg.in/yaml.v3" +) + +// YAMLConfig is the schema persisted on disk. Fields are intentionally a +// subset of Config β€” YAML is for declarative, long-lived settings +// (profile, module toggles, resolver lists, AI model names); ephemeral +// flags (--silent, --verbose, --domain) remain CLI-only. +type YAMLConfig struct { + Profile string `yaml:"profile,omitempty"` + Concurrency int `yaml:"concurrency,omitempty"` + Timeout int `yaml:"timeout,omitempty"` + Stealth string `yaml:"stealth,omitempty"` + Resolvers []string `yaml:"resolvers,omitempty"` + Wordlist string `yaml:"wordlist,omitempty"` + Modules map[string]bool `yaml:"modules,omitempty"` + AI *YAMLAIConfig `yaml:"ai,omitempty"` + Output *YAMLOutputConfig `yaml:"output,omitempty"` +} + +// YAMLAIConfig groups AI-related YAML fields. +type YAMLAIConfig struct { + Enabled bool `yaml:"enabled,omitempty"` + URL string `yaml:"url,omitempty"` + FastModel string `yaml:"fast_model,omitempty"` + DeepModel string `yaml:"deep_model,omitempty"` + Cascade *bool `yaml:"cascade,omitempty"` + Deep bool `yaml:"deep,omitempty"` + MultiAgent bool `yaml:"multi_agent,omitempty"` +} + +// YAMLOutputConfig groups output-related YAML fields. +type YAMLOutputConfig struct { + Path string `yaml:"path,omitempty"` + Format string `yaml:"format,omitempty"` + JSON bool `yaml:"json,omitempty"` +} + +// LoadYAML reads a YAML config file from path and returns the parsed config. +// Returns (nil, nil) when the file does not exist β€” callers should treat this +// as "no config file, use defaults". Returns an error for any other I/O or +// parse failure. +func LoadYAML(path string) (*YAMLConfig, error) { + if path == "" { + return nil, nil + } + data, err := os.ReadFile(path) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf("read config %q: %w", path, err) + } + + var y YAMLConfig + if err := yaml.Unmarshal(data, &y); err != nil { + return nil, fmt.Errorf("parse config %q: %w", path, err) + } + return &y, nil +} + +// ApplyYAML merges a parsed YAML config into cfg. CLI flags win: YAML only +// fills fields that are still at their zero value on cfg. The profile named +// in YAML is applied only if cfg.Profile is empty. +func ApplyYAML(cfg *Config, y *YAMLConfig) { + if cfg == nil || y == nil { + return + } + + if cfg.Profile == "" && y.Profile != "" { + cfg.Profile = y.Profile + } + if cfg.Concurrency == 0 && y.Concurrency > 0 { + cfg.Concurrency = y.Concurrency + } + if cfg.Timeout == 0 && y.Timeout > 0 { + cfg.Timeout = y.Timeout + } + if cfg.StealthMode == "" && y.Stealth != "" { + cfg.StealthMode = y.Stealth + } + if cfg.Resolvers == "" && len(y.Resolvers) > 0 { + cfg.Resolvers = joinComma(y.Resolvers) + } + if cfg.Wordlist == "" && y.Wordlist != "" { + cfg.Wordlist = y.Wordlist + } + + if len(y.Modules) > 0 { + if cfg.ModuleSettings == nil { + cfg.ModuleSettings = make(map[string]bool) + } + for name, enabled := range y.Modules { + if _, already := cfg.ModuleSettings[name]; !already { + cfg.ModuleSettings[name] = enabled + } + } + } + + if y.AI != nil { + if y.AI.Enabled && !cfg.EnableAI { + cfg.EnableAI = true + } + if cfg.AIUrl == "" && y.AI.URL != "" { + cfg.AIUrl = y.AI.URL + } + if cfg.AIFastModel == "" && y.AI.FastModel != "" { + cfg.AIFastModel = y.AI.FastModel + } + if cfg.AIDeepModel == "" && y.AI.DeepModel != "" { + cfg.AIDeepModel = y.AI.DeepModel + } + if y.AI.Cascade != nil && !cfg.AICascade { + cfg.AICascade = *y.AI.Cascade + } + if y.AI.Deep && !cfg.AIDeepAnalysis { + cfg.AIDeepAnalysis = true + } + if y.AI.MultiAgent && !cfg.MultiAgent { + cfg.MultiAgent = true + } + } + + if y.Output != nil { + if cfg.Output == "" && y.Output.Path != "" { + cfg.Output = y.Output.Path + } + if cfg.Format == "" && y.Output.Format != "" { + cfg.Format = y.Output.Format + } + if y.Output.JSON && !cfg.JsonOutput { + cfg.JsonOutput = true + } + } +} + +// DefaultConfigPaths returns the ordered list of paths LoadYAML scans by +// default when no --config is provided. The first existing file wins. +func DefaultConfigPaths() []string { + home, err := os.UserHomeDir() + var homeCfg string + if err == nil { + homeCfg = filepath.Join(home, ".god-eye", "config.yaml") + } + return []string{ + "god-eye.yaml", + ".god-eye.yaml", + homeCfg, + } +} + +// FindConfigFile returns the first existing file in DefaultConfigPaths, or +// "" if none is found. +func FindConfigFile() string { + for _, p := range DefaultConfigPaths() { + if p == "" { + continue + } + if _, err := os.Stat(p); err == nil { + return p + } + } + return "" +} + +func joinComma(ss []string) string { + out := "" + for i, s := range ss { + if i > 0 { + out += "," + } + out += s + } + return out +} diff --git a/internal/config/yaml_test.go b/internal/config/yaml_test.go new file mode 100644 index 0000000..529b9a1 --- /dev/null +++ b/internal/config/yaml_test.go @@ -0,0 +1,270 @@ +package config + +import ( + "os" + "path/filepath" + "testing" +) + +func TestLoadYAML_Missing(t *testing.T) { + y, err := LoadYAML("/tmp/this-definitely-does-not-exist-xyz.yaml") + if err != nil { + t.Errorf("missing file should return nil error, got %v", err) + } + if y != nil { + t.Errorf("missing file should return nil config, got %+v", y) + } +} + +func TestLoadYAML_EmptyPath(t *testing.T) { + y, err := LoadYAML("") + if y != nil || err != nil { + t.Errorf("empty path β†’ (nil, nil), got (%+v, %v)", y, err) + } +} + +func TestLoadYAML_Malformed(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "bad.yaml") + os.WriteFile(path, []byte("profile: [unclosed"), 0o644) + _, err := LoadYAML(path) + if err == nil { + t.Error("expected parse error for malformed YAML") + } +} + +func TestLoadYAML_Full(t *testing.T) { + content := ` +profile: bugbounty +concurrency: 500 +timeout: 8 +stealth: moderate +resolvers: + - 8.8.8.8 + - 1.1.1.1 +wordlist: /tmp/wl.txt +modules: + sources.crtsh: true + brute: false +ai: + enabled: true + url: http://localhost:11434 + fast_model: qwen3:1.7b + deep_model: qwen2.5-coder:14b + cascade: true + deep: true + multi_agent: true +output: + path: /tmp/out.json + format: json + json: true +` + dir := t.TempDir() + path := filepath.Join(dir, "config.yaml") + os.WriteFile(path, []byte(content), 0o644) + + y, err := LoadYAML(path) + if err != nil { + t.Fatal(err) + } + if y == nil { + t.Fatal("expected non-nil config") + } + if y.Profile != "bugbounty" { + t.Errorf("Profile = %q", y.Profile) + } + if y.Concurrency != 500 { + t.Errorf("Concurrency = %d", y.Concurrency) + } + if y.Timeout != 8 { + t.Errorf("Timeout = %d", y.Timeout) + } + if y.Stealth != "moderate" { + t.Errorf("Stealth = %q", y.Stealth) + } + if len(y.Resolvers) != 2 { + t.Errorf("Resolvers len = %d", len(y.Resolvers)) + } + if y.Modules["sources.crtsh"] != true { + t.Errorf("modules.sources.crtsh = false") + } + if y.Modules["brute"] != false { + t.Errorf("modules.brute = true") + } + if y.AI == nil || !y.AI.Enabled { + t.Error("AI not enabled") + } + if y.AI.URL != "http://localhost:11434" { + t.Errorf("AI.URL = %q", y.AI.URL) + } + if y.Output == nil || y.Output.Path != "/tmp/out.json" { + t.Errorf("Output.Path wrong") + } +} + +func TestApplyYAML_NilInputs(t *testing.T) { + ApplyYAML(nil, &YAMLConfig{Profile: "x"}) // must not panic + ApplyYAML(&Config{}, nil) // must not panic +} + +func TestApplyYAML_FillsZeroFields(t *testing.T) { + cfg := &Config{} + y := &YAMLConfig{ + Profile: "quick", + Concurrency: 123, + Timeout: 7, + Stealth: "light", + Resolvers: []string{"8.8.8.8", "1.1.1.1"}, + Wordlist: "/tmp/wl", + Modules: map[string]bool{"m1": true}, + AI: &YAMLAIConfig{ + Enabled: true, + URL: "http://x", + FastModel: "f", + DeepModel: "d", + Cascade: ptrTrue(), + Deep: true, + MultiAgent: true, + }, + Output: &YAMLOutputConfig{Path: "/o", Format: "json", JSON: true}, + } + ApplyYAML(cfg, y) + + if cfg.Profile != "quick" { + t.Errorf("Profile = %q", cfg.Profile) + } + if cfg.Concurrency != 123 { + t.Errorf("Concurrency = %d", cfg.Concurrency) + } + if cfg.Timeout != 7 { + t.Errorf("Timeout = %d", cfg.Timeout) + } + if cfg.StealthMode != "light" { + t.Errorf("StealthMode = %q", cfg.StealthMode) + } + if cfg.Resolvers != "8.8.8.8,1.1.1.1" { + t.Errorf("Resolvers = %q", cfg.Resolvers) + } + if cfg.Wordlist != "/tmp/wl" { + t.Errorf("Wordlist = %q", cfg.Wordlist) + } + if !cfg.EnableAI { + t.Error("EnableAI should be true") + } + if cfg.AIUrl != "http://x" { + t.Errorf("AIUrl = %q", cfg.AIUrl) + } + if !cfg.AICascade { + t.Error("AICascade should be true") + } + if !cfg.AIDeepAnalysis { + t.Error("AIDeepAnalysis should be true") + } + if !cfg.MultiAgent { + t.Error("MultiAgent should be true") + } + if cfg.Output != "/o" { + t.Errorf("Output = %q", cfg.Output) + } + if cfg.Format != "json" { + t.Errorf("Format = %q", cfg.Format) + } + if !cfg.JsonOutput { + t.Error("JsonOutput should be true") + } + if cfg.ModuleSettings["m1"] != true { + t.Error("ModuleSettings.m1 should be true") + } +} + +func TestApplyYAML_CLIOverrideWins(t *testing.T) { + cfg := &Config{ + Profile: "pentest", + Concurrency: 42, + Timeout: 3, + StealthMode: "paranoid", + Resolvers: "9.9.9.9", + Wordlist: "/existing", + } + y := &YAMLConfig{ + Profile: "quick", + Concurrency: 999, + Timeout: 999, + Stealth: "off", + Resolvers: []string{"8.8.8.8"}, + Wordlist: "/yaml", + } + ApplyYAML(cfg, y) + + // CLI values should survive + if cfg.Profile != "pentest" { + t.Errorf("Profile overwritten: %q", cfg.Profile) + } + if cfg.Concurrency != 42 { + t.Errorf("Concurrency overwritten: %d", cfg.Concurrency) + } + if cfg.Timeout != 3 { + t.Errorf("Timeout overwritten: %d", cfg.Timeout) + } + if cfg.StealthMode != "paranoid" { + t.Errorf("StealthMode overwritten: %q", cfg.StealthMode) + } + if cfg.Resolvers != "9.9.9.9" { + t.Errorf("Resolvers overwritten: %q", cfg.Resolvers) + } + if cfg.Wordlist != "/existing" { + t.Errorf("Wordlist overwritten: %q", cfg.Wordlist) + } +} + +func TestDefaultConfigPaths(t *testing.T) { + paths := DefaultConfigPaths() + if len(paths) < 3 { + t.Errorf("expected β‰₯3 default paths, got %d", len(paths)) + } + // First two are CWD-relative + if paths[0] != "god-eye.yaml" { + t.Errorf("paths[0] = %q", paths[0]) + } + if paths[1] != ".god-eye.yaml" { + t.Errorf("paths[1] = %q", paths[1]) + } +} + +func TestFindConfigFile_FindsInWorkingDir(t *testing.T) { + // Create a temp "god-eye.yaml" and ensure the search finds it. We can't + // easily change CWD for just this test, so we validate the underlying + // Stat call by constructing a path that definitely exists. + dir := t.TempDir() + target := filepath.Join(dir, "god-eye.yaml") + os.WriteFile(target, []byte("profile: quick\n"), 0o644) + + oldWD, _ := os.Getwd() + defer os.Chdir(oldWD) + if err := os.Chdir(dir); err != nil { + t.Skipf("cannot chdir: %v", err) + } + + got := FindConfigFile() + if got != "god-eye.yaml" { + t.Errorf("FindConfigFile = %q, want god-eye.yaml", got) + } +} + +func TestFindConfigFile_NoneFound(t *testing.T) { + dir := t.TempDir() + oldWD, _ := os.Getwd() + defer os.Chdir(oldWD) + if err := os.Chdir(dir); err != nil { + t.Skipf("cannot chdir: %v", err) + } + // Also override HOME to an empty dir so the user-home path never matches. + oldHome := os.Getenv("HOME") + defer os.Setenv("HOME", oldHome) + os.Setenv("HOME", dir) + + got := FindConfigFile() + if got != "" { + t.Errorf("FindConfigFile = %q, want empty", got) + } +} diff --git a/internal/diff/diff.go b/internal/diff/diff.go new file mode 100644 index 0000000..bda2d2d --- /dev/null +++ b/internal/diff/diff.go @@ -0,0 +1,270 @@ +// Package diff computes deltas between two scans of the same target. It +// powers Fase 5's asm-continuous mode: run the scanner on a schedule, diff +// against the last snapshot, alert on meaningful changes. +// +// Diff categories: +// +// new_host β€” subdomain not seen before +// removed_host β€” subdomain vanished from discovery +// new_ip β€” host gained an IP +// removed_ip β€” host lost an IP +// status_change β€” HTTP status code changed (200β†’401, 200β†’gone) +// tech_change β€” technology stack changed (upgrade or new framework) +// new_vuln β€” new vulnerability finding +// cleared_vuln β€” previously-reported vuln no longer detected +// cert_change β€” TLS certificate issuer/expiry changed +// new_takeover β€” new takeover candidate +// +// A Report is consumable both by humans (pretty-print) and by alerters +// (Slack/webhook payload shape defined later in F5.3). +package diff + +import ( + "sort" + "time" + + "god-eye/internal/store" +) + +// Change is one delta. +type Change struct { + Kind string `json:"kind"` + Host string `json:"host"` + Before string `json:"before,omitempty"` + After string `json:"after,omitempty"` + Severity string `json:"severity,omitempty"` + Detected time.Time `json:"detected_at"` +} + +// Report is the full delta between two scans. +type Report struct { + Target string `json:"target"` + OldAt time.Time `json:"old_scan_at"` + NewAt time.Time `json:"new_scan_at"` + Changes []Change `json:"changes"` +} + +// HasMeaningful returns true when the report contains any change that +// warrants alerting. "new_host" and any "new_vuln" always qualify. +func (r *Report) HasMeaningful() bool { + for _, c := range r.Changes { + switch c.Kind { + case "new_host", "new_vuln", "new_takeover", "removed_host": + return true + } + } + return false +} + +// Compute compares old vs new snapshots and returns the delta. Both +// slices are assumed to come from store.All() (sorted by subdomain). +func Compute(target string, oldHosts, newHosts []*store.Host, oldAt, newAt time.Time) *Report { + r := &Report{Target: target, OldAt: oldAt, NewAt: newAt} + + oldByName := indexHosts(oldHosts) + newByName := indexHosts(newHosts) + + // Walk the union of hostnames. + names := union(oldByName, newByName) + sort.Strings(names) + + for _, name := range names { + o, oOK := oldByName[name] + n, nOK := newByName[name] + switch { + case !oOK && nOK: + r.Changes = append(r.Changes, Change{Kind: "new_host", Host: name, Detected: newAt}) + for _, v := range n.Vulnerabilities { + r.Changes = append(r.Changes, Change{ + Kind: "new_vuln", + Host: name, + After: v.Title, + Severity: v.Severity, + Detected: newAt, + }) + } + if n.Takeover != nil { + r.Changes = append(r.Changes, Change{ + Kind: "new_takeover", + Host: name, + After: n.Takeover.Service, + Severity: "high", + Detected: newAt, + }) + } + case oOK && !nOK: + r.Changes = append(r.Changes, Change{Kind: "removed_host", Host: name, Detected: newAt}) + case oOK && nOK: + r.Changes = append(r.Changes, diffHost(o, n, newAt)...) + } + } + return r +} + +func diffHost(o, n *store.Host, at time.Time) []Change { + var out []Change + + if o.StatusCode != n.StatusCode { + out = append(out, Change{ + Kind: "status_change", + Host: n.Subdomain, + Before: itoa(o.StatusCode), + After: itoa(n.StatusCode), + Detected: at, + }) + } + + // IP deltas + oldIPs := toSet(o.IPs) + newIPs := toSet(n.IPs) + for ip := range newIPs { + if _, present := oldIPs[ip]; !present { + out = append(out, Change{Kind: "new_ip", Host: n.Subdomain, After: ip, Detected: at}) + } + } + for ip := range oldIPs { + if _, present := newIPs[ip]; !present { + out = append(out, Change{Kind: "removed_ip", Host: n.Subdomain, Before: ip, Detected: at}) + } + } + + // Tech change (set inequality) + if !stringSetsEqual(o.Technologies, n.Technologies) { + out = append(out, Change{ + Kind: "tech_change", + Host: n.Subdomain, + Before: joinSorted(o.Technologies), + After: joinSorted(n.Technologies), + Detected: at, + }) + } + + // Vuln delta (by ID) + oldVulns := indexVulns(o.Vulnerabilities) + newVulns := indexVulns(n.Vulnerabilities) + for id, v := range newVulns { + if _, present := oldVulns[id]; !present { + out = append(out, Change{ + Kind: "new_vuln", Host: n.Subdomain, After: v.Title, + Severity: v.Severity, Detected: at, + }) + } + } + for id, v := range oldVulns { + if _, present := newVulns[id]; !present { + out = append(out, Change{ + Kind: "cleared_vuln", Host: n.Subdomain, Before: v.Title, + Severity: v.Severity, Detected: at, + }) + } + } + + // Certificate change + if o.TLSIssuer != n.TLSIssuer && n.TLSIssuer != "" { + out = append(out, Change{ + Kind: "cert_change", + Host: n.Subdomain, + Before: o.TLSIssuer, + After: n.TLSIssuer, + Detected: at, + }) + } + + // Takeover appeared + if o.Takeover == nil && n.Takeover != nil { + out = append(out, Change{ + Kind: "new_takeover", Host: n.Subdomain, + After: n.Takeover.Service, Severity: "high", Detected: at, + }) + } + return out +} + +// --- helpers ------------------------------------------------------------- + +func indexHosts(hs []*store.Host) map[string]*store.Host { + out := make(map[string]*store.Host, len(hs)) + for _, h := range hs { + out[h.Subdomain] = h + } + return out +} + +func indexVulns(vs []store.Vulnerability) map[string]store.Vulnerability { + out := make(map[string]store.Vulnerability, len(vs)) + for _, v := range vs { + out[v.ID] = v + } + return out +} + +func union(a, b map[string]*store.Host) []string { + out := make(map[string]struct{}, len(a)+len(b)) + for k := range a { + out[k] = struct{}{} + } + for k := range b { + out[k] = struct{}{} + } + names := make([]string, 0, len(out)) + for n := range out { + names = append(names, n) + } + return names +} + +func toSet(ss []string) map[string]struct{} { + out := make(map[string]struct{}, len(ss)) + for _, s := range ss { + out[s] = struct{}{} + } + return out +} + +func stringSetsEqual(a, b []string) bool { + if len(a) != len(b) { + return false + } + sa := toSet(a) + for _, s := range b { + if _, ok := sa[s]; !ok { + return false + } + } + return true +} + +func joinSorted(s []string) string { + cpy := append([]string(nil), s...) + sort.Strings(cpy) + out := "" + for i, v := range cpy { + if i > 0 { + out += "," + } + out += v + } + return out +} + +func itoa(n int) string { + if n == 0 { + return "0" + } + var buf [20]byte + i := len(buf) + neg := n < 0 + if neg { + n = -n + } + for n > 0 { + i-- + buf[i] = byte('0' + n%10) + n /= 10 + } + if neg { + i-- + buf[i] = '-' + } + return string(buf[i:]) +} diff --git a/internal/diff/diff_test.go b/internal/diff/diff_test.go new file mode 100644 index 0000000..e947d32 --- /dev/null +++ b/internal/diff/diff_test.go @@ -0,0 +1,154 @@ +package diff + +import ( + "testing" + "time" + + "god-eye/internal/store" +) + +func TestCompute_NewHost(t *testing.T) { + oldHosts := []*store.Host{} + newHosts := []*store.Host{{Subdomain: "api.example.com"}} + r := Compute("example.com", oldHosts, newHosts, time.Now(), time.Now()) + if len(r.Changes) != 1 || r.Changes[0].Kind != "new_host" { + t.Errorf("expected 1 new_host change, got %+v", r.Changes) + } + if !r.HasMeaningful() { + t.Error("new_host should be meaningful") + } +} + +func TestCompute_RemovedHost(t *testing.T) { + oldHosts := []*store.Host{{Subdomain: "old.example.com"}} + newHosts := []*store.Host{} + r := Compute("example.com", oldHosts, newHosts, time.Now(), time.Now()) + if len(r.Changes) != 1 || r.Changes[0].Kind != "removed_host" { + t.Errorf("expected removed_host, got %+v", r.Changes) + } + if !r.HasMeaningful() { + t.Error("removed_host should be meaningful") + } +} + +func TestCompute_StatusChange(t *testing.T) { + oldH := &store.Host{Subdomain: "a.example.com", StatusCode: 200} + newH := &store.Host{Subdomain: "a.example.com", StatusCode: 401} + r := Compute("example.com", []*store.Host{oldH}, []*store.Host{newH}, time.Now(), time.Now()) + if len(r.Changes) != 1 || r.Changes[0].Kind != "status_change" { + t.Errorf("expected status_change, got %+v", r.Changes) + } + if r.Changes[0].Before != "200" || r.Changes[0].After != "401" { + t.Errorf("wrong before/after: %+v", r.Changes[0]) + } +} + +func TestCompute_IPDelta(t *testing.T) { + oldH := &store.Host{Subdomain: "a.example.com", IPs: []string{"1.1.1.1"}} + newH := &store.Host{Subdomain: "a.example.com", IPs: []string{"1.1.1.1", "2.2.2.2"}} + r := Compute("example.com", []*store.Host{oldH}, []*store.Host{newH}, time.Now(), time.Now()) + found := false + for _, c := range r.Changes { + if c.Kind == "new_ip" && c.After == "2.2.2.2" { + found = true + } + } + if !found { + t.Errorf("expected new_ip change, got %+v", r.Changes) + } +} + +func TestCompute_NewVuln(t *testing.T) { + oldH := &store.Host{Subdomain: "a.example.com"} + newH := &store.Host{ + Subdomain: "a.example.com", + Vulnerabilities: []store.Vulnerability{ + {ID: "xss", Title: "Reflected XSS", Severity: "high"}, + }, + } + r := Compute("example.com", []*store.Host{oldH}, []*store.Host{newH}, time.Now(), time.Now()) + found := false + for _, c := range r.Changes { + if c.Kind == "new_vuln" && c.After == "Reflected XSS" { + found = true + } + } + if !found { + t.Errorf("expected new_vuln change, got %+v", r.Changes) + } + if !r.HasMeaningful() { + t.Error("new_vuln must be meaningful") + } +} + +func TestCompute_ClearedVuln(t *testing.T) { + oldH := &store.Host{ + Subdomain: "a.example.com", + Vulnerabilities: []store.Vulnerability{ + {ID: "git-exposed", Title: "Git Exposed", Severity: "critical"}, + }, + } + newH := &store.Host{Subdomain: "a.example.com"} + r := Compute("example.com", []*store.Host{oldH}, []*store.Host{newH}, time.Now(), time.Now()) + found := false + for _, c := range r.Changes { + if c.Kind == "cleared_vuln" { + found = true + } + } + if !found { + t.Errorf("expected cleared_vuln, got %+v", r.Changes) + } +} + +func TestCompute_NewTakeover(t *testing.T) { + oldH := &store.Host{Subdomain: "a.example.com"} + newH := &store.Host{ + Subdomain: "a.example.com", + Takeover: &store.Takeover{Service: "GitHub Pages"}, + } + r := Compute("example.com", []*store.Host{oldH}, []*store.Host{newH}, time.Now(), time.Now()) + found := false + for _, c := range r.Changes { + if c.Kind == "new_takeover" && c.After == "GitHub Pages" { + found = true + } + } + if !found { + t.Errorf("expected new_takeover, got %+v", r.Changes) + } + if !r.HasMeaningful() { + t.Error("new_takeover must be meaningful") + } +} + +func TestCompute_NoChange(t *testing.T) { + h := &store.Host{ + Subdomain: "a.example.com", + IPs: []string{"1.1.1.1"}, + StatusCode: 200, + Technologies: []string{"nginx"}, + } + r := Compute("example.com", []*store.Host{h}, []*store.Host{h}, time.Now(), time.Now()) + if len(r.Changes) != 0 { + t.Errorf("expected no changes, got %+v", r.Changes) + } + if r.HasMeaningful() { + t.Error("empty report should not be meaningful") + } +} + +func TestCompute_TechChange(t *testing.T) { + oldH := &store.Host{Subdomain: "a.example.com", Technologies: []string{"nginx"}} + newH := &store.Host{Subdomain: "a.example.com", Technologies: []string{"nginx", "Apache"}} + r := Compute("example.com", []*store.Host{oldH}, []*store.Host{newH}, time.Now(), time.Now()) + found := false + for _, c := range r.Changes { + if c.Kind == "tech_change" { + found = true + } + } + if !found { + t.Errorf("expected tech_change, got %+v", r.Changes) + } +} diff --git a/internal/dns/wildcard_test.go b/internal/dns/wildcard_test.go new file mode 100644 index 0000000..5a4e8bc --- /dev/null +++ b/internal/dns/wildcard_test.go @@ -0,0 +1,174 @@ +package dns + +import "testing" + +func TestAllEqual(t *testing.T) { + tests := []struct { + name string + in []string + want bool + }{ + {"empty", nil, true}, + {"single", []string{"a"}, true}, + {"all same", []string{"a", "a", "a"}, true}, + {"one different", []string{"a", "a", "b"}, false}, + {"all different", []string{"a", "b", "c"}, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := allEqual(tt.in); got != tt.want { + t.Errorf("allEqual(%v) = %v, want %v", tt.in, got, tt.want) + } + }) + } +} + +func TestAllEqualInts(t *testing.T) { + tests := []struct { + name string + in []int + want bool + }{ + {"empty", nil, true}, + {"single", []int{200}, true}, + {"all same", []int{200, 200, 200}, true}, + {"one different", []int{200, 200, 404}, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := allEqualInts(tt.in); got != tt.want { + t.Errorf("allEqualInts(%v) = %v, want %v", tt.in, got, tt.want) + } + }) + } +} + +func TestSimilarSizes(t *testing.T) { + tests := []struct { + name string + in []int64 + want bool + }{ + {"empty", nil, true}, + {"single", []int64{1000}, true}, + {"identical", []int64{1000, 1000, 1000}, true}, + {"within 20%", []int64{1000, 1100, 1200}, true}, + {"exactly 20%", []int64{1000, 1200}, true}, + {"over 20%", []int64{1000, 1300}, false}, + {"big variance", []int64{100, 10000}, false}, + {"all zero", []int64{0, 0}, true}, + {"zero and small", []int64{0, 50}, true}, + {"zero and big", []int64{0, 200}, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := similarSizes(tt.in); got != tt.want { + t.Errorf("similarSizes(%v) = %v, want %v", tt.in, got, tt.want) + } + }) + } +} + +func TestIsWildcardIP(t *testing.T) { + wd := &WildcardDetector{} + info := &WildcardInfo{ + IsWildcard: true, + WildcardIPs: []string{"1.2.3.4", "5.6.7.8"}, + } + + if !wd.IsWildcardIP("1.2.3.4", info) { + t.Error("expected 1.2.3.4 to be wildcard IP") + } + if wd.IsWildcardIP("9.9.9.9", info) { + t.Error("expected 9.9.9.9 NOT to be wildcard IP") + } + + // nil and non-wildcard cases + if wd.IsWildcardIP("1.2.3.4", nil) { + t.Error("nil info should return false") + } + nonWild := &WildcardInfo{IsWildcard: false, WildcardIPs: []string{"1.2.3.4"}} + if wd.IsWildcardIP("1.2.3.4", nonWild) { + t.Error("non-wildcard info should return false even if IP matches list") + } +} + +func TestIsWildcardResponse(t *testing.T) { + wd := &WildcardDetector{} + info := &WildcardInfo{ + IsWildcard: true, + HTTPStatusCode: 200, + HTTPBodySize: 1000, + } + + tests := []struct { + name string + statusCode int + bodySize int64 + want bool + }{ + {"exact match", 200, 1000, true}, + {"within 10% body", 200, 1050, true}, + {"within 10% body below", 200, 950, true}, + {"over 10% body", 200, 1200, false}, + {"different status", 404, 1000, false}, + {"both different", 301, 500, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := wd.IsWildcardResponse(tt.statusCode, tt.bodySize, info); got != tt.want { + t.Errorf("IsWildcardResponse(%d, %d) = %v, want %v", tt.statusCode, tt.bodySize, got, tt.want) + } + }) + } + + if wd.IsWildcardResponse(200, 1000, nil) { + t.Error("nil info should return false") + } +} + +func TestGenerateTestSubdomains(t *testing.T) { + subs := generateTestSubdomains() + if len(subs) < 3 { + t.Errorf("expected at least 3 test subdomains, got %d", len(subs)) + } + seen := make(map[string]bool) + for _, s := range subs { + if s == "" { + t.Error("empty test subdomain generated") + } + if seen[s] { + t.Errorf("duplicate test subdomain: %s", s) + } + seen[s] = true + } +} + +func TestWildcardInfo_GetSummary_NotWildcard(t *testing.T) { + info := &WildcardInfo{IsWildcard: false} + got := info.GetSummary() + if got == "" { + t.Error("GetSummary returned empty string") + } +} + +func TestNewWildcardDetector(t *testing.T) { + resolvers := []string{"8.8.8.8:53"} + wd := NewWildcardDetector(resolvers, 5) + if wd == nil { + t.Fatal("NewWildcardDetector returned nil") + } + if wd.timeout != 5 { + t.Errorf("timeout = %d, want 5", wd.timeout) + } + if len(wd.resolvers) != 1 || wd.resolvers[0] != "8.8.8.8:53" { + t.Errorf("resolvers = %v", wd.resolvers) + } + if wd.httpClient == nil { + t.Error("httpClient is nil") + } + if len(wd.testSubdomains) == 0 { + t.Error("testSubdomains is empty") + } +} diff --git a/internal/eventbus/bus.go b/internal/eventbus/bus.go new file mode 100644 index 0000000..4d4cd1c --- /dev/null +++ b/internal/eventbus/bus.go @@ -0,0 +1,283 @@ +package eventbus + +import ( + "context" + "errors" + "sync" + "sync/atomic" +) + +// ErrBusClosed is returned when attempting to use a closed bus. +var ErrBusClosed = errors.New("eventbus: bus closed") + +// Handler processes a single event. It runs on the subscriber's own goroutine +// so handlers may block or perform I/O without stalling publishers. A handler +// must respect ctx cancellation when performing long work. +type Handler func(ctx context.Context, e Event) + +// Subscription is returned by Subscribe/SubscribeAll and is used to stop +// receiving events. Unsubscribe is idempotent. +type Subscription struct { + bus *Bus + eventType EventType // empty string means "all" + id uint64 + once sync.Once +} + +// Unsubscribe stops the subscription. Pending events in the subscriber's +// buffer are dropped. Safe to call multiple times. +func (s *Subscription) Unsubscribe() { + if s == nil || s.bus == nil { + return + } + s.once.Do(func() { + s.bus.unsubscribe(s.eventType, s.id) + }) +} + +// Stats captures runtime metrics for observability. Stats are cumulative from +// bus creation; callers should compute deltas if rate matters. +type Stats struct { + Published uint64 // total Publish calls accepted + Delivered uint64 // events delivered to subscribers (sum across subscribers) + Dropped uint64 // events dropped because a subscriber buffer was full + Subscribers int // active subscribers right now + Closed bool +} + +// Bus is the default eventbus implementation. +type Bus struct { + bufferSize int + + mu sync.RWMutex + closed bool + nextID uint64 + subs map[EventType]map[uint64]*subscriber // type β†’ id β†’ subscriber + allSubs map[uint64]*subscriber // wildcard subscribers + + published uint64 + delivered uint64 + dropped uint64 + + wg sync.WaitGroup +} + +type subscriber struct { + id uint64 + eventT EventType + ch chan Event + handler Handler + ctx context.Context + cancel context.CancelFunc +} + +// New creates a new Bus. bufferSize controls the per-subscriber channel +// buffer; values ≀0 default to 256. A buffer of 1 is legal but increases +// drop probability under bursty load. +func New(bufferSize int) *Bus { + if bufferSize <= 0 { + bufferSize = 256 + } + return &Bus{ + bufferSize: bufferSize, + subs: make(map[EventType]map[uint64]*subscriber), + allSubs: make(map[uint64]*subscriber), + } +} + +// Subscribe registers a handler for a specific event type. Returns a +// Subscription that can be used to unsubscribe. +func (b *Bus) Subscribe(t EventType, h Handler) *Subscription { + return b.subscribe(t, h, false) +} + +// SubscribeAll registers a handler that receives every event type. +// Useful for logging, metrics collection, or persistence modules. +func (b *Bus) SubscribeAll(h Handler) *Subscription { + return b.subscribe("", h, true) +} + +func (b *Bus) subscribe(t EventType, h Handler, all bool) *Subscription { + if h == nil { + return &Subscription{bus: b} + } + b.mu.Lock() + if b.closed { + b.mu.Unlock() + return &Subscription{bus: b} + } + b.nextID++ + id := b.nextID + ctx, cancel := context.WithCancel(context.Background()) + s := &subscriber{ + id: id, + eventT: t, + ch: make(chan Event, b.bufferSize), + handler: h, + ctx: ctx, + cancel: cancel, + } + if all { + b.allSubs[id] = s + } else { + if b.subs[t] == nil { + b.subs[t] = make(map[uint64]*subscriber) + } + b.subs[t][id] = s + } + b.mu.Unlock() + + b.wg.Add(1) + go b.run(s) + + return &Subscription{bus: b, eventType: t, id: id} +} + +func (b *Bus) unsubscribe(t EventType, id uint64) { + b.mu.Lock() + var s *subscriber + if t == "" { + s = b.allSubs[id] + delete(b.allSubs, id) + } else { + if m, ok := b.subs[t]; ok { + s = m[id] + delete(m, id) + if len(m) == 0 { + delete(b.subs, t) + } + } + } + b.mu.Unlock() + if s != nil { + close(s.ch) // run() drains remaining events then returns + } +} + +// run is the per-subscriber goroutine loop. +func (b *Bus) run(s *subscriber) { + defer b.wg.Done() + defer s.cancel() + for e := range s.ch { + // Protect bus from handler panics β€” one bad handler must not + // take down the pipeline. + func() { + defer func() { + _ = recover() + }() + s.handler(s.ctx, e) + }() + } +} + +// Publish delivers e to every subscriber interested in e.Type() and every +// SubscribeAll subscriber. If ctx is canceled, Publish returns early and the +// event is not queued to any subscriber that would block. +// +// Publish is non-blocking per subscriber: if a subscriber's buffer is full the +// event is dropped for that subscriber and Stats.Dropped is incremented. +func (b *Bus) Publish(ctx context.Context, e Event) { + if e == nil { + return + } + b.mu.RLock() + if b.closed { + b.mu.RUnlock() + return + } + // Snapshot the subscriber slices under lock, then release before send. + typed := b.subs[e.Type()] + var typedList []*subscriber + if len(typed) > 0 { + typedList = make([]*subscriber, 0, len(typed)) + for _, s := range typed { + typedList = append(typedList, s) + } + } + var allList []*subscriber + if len(b.allSubs) > 0 { + allList = make([]*subscriber, 0, len(b.allSubs)) + for _, s := range b.allSubs { + allList = append(allList, s) + } + } + b.mu.RUnlock() + + atomic.AddUint64(&b.published, 1) + + for _, s := range typedList { + b.dispatch(ctx, s, e) + } + for _, s := range allList { + b.dispatch(ctx, s, e) + } +} + +func (b *Bus) dispatch(ctx context.Context, s *subscriber, e Event) { + select { + case <-ctx.Done(): + // caller abandoned; count as dropped so observability reflects reality + atomic.AddUint64(&b.dropped, 1) + case s.ch <- e: + atomic.AddUint64(&b.delivered, 1) + default: + atomic.AddUint64(&b.dropped, 1) + } +} + +// Close stops accepting new publishes and drains in-flight subscriber +// buffers. It waits until all handlers have returned, or until ctx expires. +// Returns ctx.Err() if draining did not complete in time. +func (b *Bus) Close(ctx context.Context) error { + b.mu.Lock() + if b.closed { + b.mu.Unlock() + return nil + } + b.closed = true + + // Close every subscriber channel; their goroutines will drain and exit. + for _, m := range b.subs { + for _, s := range m { + close(s.ch) + } + } + for _, s := range b.allSubs { + close(s.ch) + } + b.subs = make(map[EventType]map[uint64]*subscriber) + b.allSubs = make(map[uint64]*subscriber) + b.mu.Unlock() + + done := make(chan struct{}) + go func() { + b.wg.Wait() + close(done) + }() + + select { + case <-done: + return nil + case <-ctx.Done(): + return ctx.Err() + } +} + +// Stats returns a snapshot of current metrics. +func (b *Bus) Stats() Stats { + b.mu.RLock() + closed := b.closed + subCount := len(b.allSubs) + for _, m := range b.subs { + subCount += len(m) + } + b.mu.RUnlock() + + return Stats{ + Published: atomic.LoadUint64(&b.published), + Delivered: atomic.LoadUint64(&b.delivered), + Dropped: atomic.LoadUint64(&b.dropped), + Subscribers: subCount, + Closed: closed, + } +} diff --git a/internal/eventbus/bus_test.go b/internal/eventbus/bus_test.go new file mode 100644 index 0000000..35aca49 --- /dev/null +++ b/internal/eventbus/bus_test.go @@ -0,0 +1,307 @@ +package eventbus + +import ( + "context" + "sync" + "sync/atomic" + "testing" + "time" +) + +// waitUntil polls predicate every 2ms up to timeout. Used to avoid flaky +// sleeps in async tests without adding dependencies. +func waitUntil(t *testing.T, timeout time.Duration, pred func() bool, msg string) { + t.Helper() + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if pred() { + return + } + time.Sleep(2 * time.Millisecond) + } + t.Fatalf("timeout waiting: %s", msg) +} + +func TestPublishSubscribe_SingleType(t *testing.T) { + b := New(16) + defer b.Close(context.Background()) + + var got atomic.Int32 + b.Subscribe(EventSubdomainDiscovered, func(_ context.Context, e Event) { + ev, ok := e.(SubdomainDiscovered) + if !ok { + t.Errorf("wrong event type: %T", e) + return + } + if ev.Subdomain == "" { + t.Error("empty subdomain") + } + got.Add(1) + }) + + for i := 0; i < 5; i++ { + b.Publish(context.Background(), NewSubdomainDiscovered("test", "api.example.com", "passive")) + } + waitUntil(t, time.Second, func() bool { return got.Load() == 5 }, "5 events delivered") +} + +func TestSubscribeAll_ReceivesEveryType(t *testing.T) { + b := New(16) + defer b.Close(context.Background()) + + var got atomic.Int32 + b.SubscribeAll(func(_ context.Context, _ Event) { got.Add(1) }) + + b.Publish(context.Background(), NewSubdomainDiscovered("t", "a.example.com", "p")) + b.Publish(context.Background(), DNSResolved{EventMeta: newMeta("dns", "a.example.com"), Subdomain: "a.example.com", IPs: []string{"1.2.3.4"}}) + b.Publish(context.Background(), HTTPProbed{EventMeta: newMeta("http", "a.example.com"), URL: "https://a.example.com", StatusCode: 200}) + + waitUntil(t, time.Second, func() bool { return got.Load() == 3 }, "3 events on wildcard") +} + +func TestSubscribe_FilteringByType(t *testing.T) { + b := New(16) + defer b.Close(context.Background()) + + var subs, dns atomic.Int32 + b.Subscribe(EventSubdomainDiscovered, func(_ context.Context, _ Event) { subs.Add(1) }) + b.Subscribe(EventDNSResolved, func(_ context.Context, _ Event) { dns.Add(1) }) + + for i := 0; i < 3; i++ { + b.Publish(context.Background(), NewSubdomainDiscovered("t", "a.example.com", "p")) + } + for i := 0; i < 2; i++ { + b.Publish(context.Background(), DNSResolved{EventMeta: newMeta("dns", "x"), Subdomain: "x"}) + } + waitUntil(t, time.Second, func() bool { return subs.Load() == 3 && dns.Load() == 2 }, "typed counts match") +} + +func TestUnsubscribe_StopsDelivery(t *testing.T) { + b := New(16) + defer b.Close(context.Background()) + + var count atomic.Int32 + sub := b.Subscribe(EventSubdomainDiscovered, func(_ context.Context, _ Event) { count.Add(1) }) + + b.Publish(context.Background(), NewSubdomainDiscovered("t", "a.example.com", "p")) + waitUntil(t, time.Second, func() bool { return count.Load() == 1 }, "first event") + + sub.Unsubscribe() + sub.Unsubscribe() // idempotent + + // Publish after unsubscribe β€” should not be delivered to this handler. + for i := 0; i < 5; i++ { + b.Publish(context.Background(), NewSubdomainDiscovered("t", "b.example.com", "p")) + } + time.Sleep(30 * time.Millisecond) + if got := count.Load(); got != 1 { + t.Errorf("expected 1 delivery after unsubscribe, got %d", got) + } +} + +func TestPublish_MultipleSubscribersEachGetEvent(t *testing.T) { + b := New(16) + defer b.Close(context.Background()) + + var a, c atomic.Int32 + b.Subscribe(EventVulnerability, func(_ context.Context, _ Event) { a.Add(1) }) + b.Subscribe(EventVulnerability, func(_ context.Context, _ Event) { c.Add(1) }) + + b.Publish(context.Background(), VulnerabilityFound{EventMeta: newMeta("sec", "x"), ID: "test", Severity: SeverityHigh}) + + waitUntil(t, time.Second, func() bool { return a.Load() == 1 && c.Load() == 1 }, "both subscribers received") +} + +func TestPublish_NonBlocking_DropsWhenBufferFull(t *testing.T) { + b := New(2) + defer b.Close(context.Background()) + + blocker := make(chan struct{}) + var started atomic.Int32 + b.Subscribe(EventSubdomainDiscovered, func(ctx context.Context, _ Event) { + started.Add(1) + <-blocker + }) + + // First event enters handler (blocks). Next 2 fill the buffer of size 2. + // Subsequent publishes should be counted as dropped. + for i := 0; i < 100; i++ { + b.Publish(context.Background(), NewSubdomainDiscovered("t", "x.example.com", "p")) + } + + // Give the bus a moment to register drops. + waitUntil(t, time.Second, func() bool { + return b.Stats().Dropped > 0 + }, "some events dropped when buffer full") + + // Unblock and close cleanly. + close(blocker) +} + +func TestClose_DrainsAndStops(t *testing.T) { + b := New(16) + + var got atomic.Int32 + b.Subscribe(EventSubdomainDiscovered, func(_ context.Context, _ Event) { got.Add(1) }) + + for i := 0; i < 10; i++ { + b.Publish(context.Background(), NewSubdomainDiscovered("t", "a.example.com", "p")) + } + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + if err := b.Close(ctx); err != nil { + t.Fatalf("Close error: %v", err) + } + if got.Load() != 10 { + t.Errorf("expected 10 delivered before close drains, got %d", got.Load()) + } + + // Publish after close is a silent no-op. + b.Publish(context.Background(), NewSubdomainDiscovered("t", "z.example.com", "p")) + if got.Load() != 10 { + t.Errorf("delivery continued after close: %d", got.Load()) + } +} + +func TestClose_IdempotentAndMulticall(t *testing.T) { + b := New(4) + ctx := context.Background() + if err := b.Close(ctx); err != nil { + t.Fatalf("first close: %v", err) + } + if err := b.Close(ctx); err != nil { + t.Fatalf("second close: %v", err) + } +} + +func TestPanicInHandler_DoesNotAffectOthers(t *testing.T) { + b := New(8) + defer b.Close(context.Background()) + + var good atomic.Int32 + b.Subscribe(EventSubdomainDiscovered, func(_ context.Context, _ Event) { panic("bad handler") }) + b.Subscribe(EventSubdomainDiscovered, func(_ context.Context, _ Event) { good.Add(1) }) + + for i := 0; i < 5; i++ { + b.Publish(context.Background(), NewSubdomainDiscovered("t", "a.example.com", "p")) + } + waitUntil(t, time.Second, func() bool { return good.Load() == 5 }, "good handler received all events") +} + +func TestConcurrentPublishers_PreservesInvariant(t *testing.T) { + // With a fast-enough consumer and large buffer, some events may still be + // dropped under heavy burst. The invariant that must ALWAYS hold is: + // Published == Delivered + Dropped + // This protects against race conditions in metric bookkeeping. + b := New(4096) + defer b.Close(context.Background()) + + b.Subscribe(EventSubdomainDiscovered, func(_ context.Context, _ Event) {}) + + const publishers = 20 + const perPublisher = 100 + var wg sync.WaitGroup + for i := 0; i < publishers; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < perPublisher; j++ { + b.Publish(context.Background(), NewSubdomainDiscovered("t", "a.example.com", "p")) + } + }() + } + wg.Wait() + + total := uint64(publishers * perPublisher) + waitUntil(t, 5*time.Second, func() bool { + s := b.Stats() + return s.Published == total && s.Delivered+s.Dropped == total + }, "published count matches and delivered+dropped == published") +} + +func TestStats_Increment(t *testing.T) { + b := New(16) + defer b.Close(context.Background()) + + b.Subscribe(EventSubdomainDiscovered, func(_ context.Context, _ Event) {}) + + for i := 0; i < 3; i++ { + b.Publish(context.Background(), NewSubdomainDiscovered("t", "a.example.com", "p")) + } + waitUntil(t, time.Second, func() bool { return b.Stats().Delivered == 3 }, "3 deliveries recorded") + s := b.Stats() + if s.Published != 3 { + t.Errorf("Published = %d, want 3", s.Published) + } + if s.Subscribers != 1 { + t.Errorf("Subscribers = %d, want 1", s.Subscribers) + } + if s.Closed { + t.Error("Closed = true on open bus") + } +} + +func TestPublish_NilEvent_NoOp(t *testing.T) { + b := New(8) + defer b.Close(context.Background()) + var got atomic.Int32 + b.SubscribeAll(func(_ context.Context, _ Event) { got.Add(1) }) + b.Publish(context.Background(), nil) + time.Sleep(20 * time.Millisecond) + if got.Load() != 0 { + t.Errorf("nil event was delivered") + } +} + +func TestPublish_CancelledContext_DropsNotDelivers(t *testing.T) { + b := New(1) + defer b.Close(context.Background()) + + hold := make(chan struct{}) + b.Subscribe(EventSubdomainDiscovered, func(_ context.Context, _ Event) { <-hold }) + + // First publish occupies buffer slot 1 and handler goroutine starts consuming. + b.Publish(context.Background(), NewSubdomainDiscovered("t", "a", "p")) + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + // With ctx already canceled and the subscriber busy, dispatch should record a drop. + before := b.Stats().Dropped + b.Publish(ctx, NewSubdomainDiscovered("t", "b", "p")) + b.Publish(ctx, NewSubdomainDiscovered("t", "c", "p")) + after := b.Stats().Dropped + if after <= before { + t.Errorf("expected Dropped to increase with canceled ctx, before=%d after=%d", before, after) + } + + close(hold) +} + +func TestHandlerReceivesEventMetadata(t *testing.T) { + b := New(8) + defer b.Close(context.Background()) + + done := make(chan Event, 1) + b.Subscribe(EventSubdomainDiscovered, func(_ context.Context, e Event) { done <- e }) + + before := time.Now().Add(-time.Second) + b.Publish(context.Background(), NewSubdomainDiscovered("sources.crtsh", "api.example.com", "passive:crt.sh")) + + select { + case e := <-done: + m := e.Meta() + if m.Source != "sources.crtsh" { + t.Errorf("Source = %q", m.Source) + } + if m.Target != "api.example.com" { + t.Errorf("Target = %q", m.Target) + } + if m.At.Before(before) { + t.Errorf("At = %v is before %v", m.At, before) + } + case <-time.After(time.Second): + t.Fatal("no event received") + } +} diff --git a/internal/eventbus/events.go b/internal/eventbus/events.go new file mode 100644 index 0000000..f7fa81d --- /dev/null +++ b/internal/eventbus/events.go @@ -0,0 +1,337 @@ +// Package eventbus provides a typed, context-aware pub/sub bus that decouples +// discovery, probing, analysis, and reporting modules in God's Eye v2. +// +// Design choices: +// - Events are typed structs implementing Event; dispatch is keyed on EventType. +// - Subscribers run handlers on their own goroutine with a buffered channel, +// so a slow handler cannot stall the producer. +// - Publish is non-blocking: if a subscriber buffer is full, the event is +// dropped for that subscriber and Stats.Dropped is incremented. Subscribers +// that care about lossless delivery must size their buffer accordingly. +// - Close stops accepting new events and drains outstanding ones before +// returning. +package eventbus + +import "time" + +// EventType identifies the kind of an event. +type EventType string + +// Canonical event types. Modules should always use these constants rather than +// string literals to avoid typos and to make the full event vocabulary greppable. +const ( + EventSubdomainDiscovered EventType = "subdomain.discovered" + EventDNSResolved EventType = "dns.resolved" + EventHTTPProbed EventType = "http.probed" + EventTechDetected EventType = "tech.detected" + EventTLSAnalyzed EventType = "tls.analyzed" + EventTakeoverCandidate EventType = "takeover.candidate" + EventTakeoverConfirmed EventType = "takeover.confirmed" + EventVulnerability EventType = "vulnerability" + EventSecret EventType = "secret" + EventCVEMatch EventType = "cve.match" + EventCloudAsset EventType = "cloud.asset" + EventAPIFinding EventType = "api.finding" + EventJSFile EventType = "js.file" + EventAIFinding EventType = "ai.finding" + EventPhaseStarted EventType = "phase.started" + EventPhaseCompleted EventType = "phase.completed" + EventModuleError EventType = "module.error" + EventScanStarted EventType = "scan.started" + EventScanCompleted EventType = "scan.completed" +) + +// Severity levels used across vulnerability, secret, AI and CVE events. +type Severity string + +const ( + SeverityInfo Severity = "info" + SeverityLow Severity = "low" + SeverityMedium Severity = "medium" + SeverityHigh Severity = "high" + SeverityCritical Severity = "critical" +) + +// Event is implemented by every event struct. +type Event interface { + Type() EventType + Meta() EventMeta +} + +// EventMeta is shared metadata embedded in every event. +type EventMeta struct { + At time.Time // when the event was created + Source string // originating module name (e.g. "sources.crtsh", "dns.resolver") + Target string // logical target (typically the subdomain or host the event pertains to) +} + +// Meta returns the shared metadata; implemented by embedding EventMeta. +func (m EventMeta) Meta() EventMeta { return m } + +// now returns the current time; indirected for testability. +var now = time.Now + +// newMeta builds an EventMeta with a populated timestamp. +func newMeta(source, target string) EventMeta { + return EventMeta{At: now(), Source: source, Target: target} +} + +// --- Concrete event types -------------------------------------------------- + +// SubdomainDiscovered fires whenever any source (passive, brute, recursive, +// CT, etc.) identifies a subdomain that passes the "ends in target domain" +// filter. Multiple sources may discover the same subdomain β€” the bus does not +// dedup; that's the store's job. +type SubdomainDiscovered struct { + EventMeta + Subdomain string + Method string // "passive:crt.sh", "brute", "recursive", "ct-stream", etc. +} + +func (SubdomainDiscovered) Type() EventType { return EventSubdomainDiscovered } + +func NewSubdomainDiscovered(source, subdomain, method string) SubdomainDiscovered { + return SubdomainDiscovered{ + EventMeta: newMeta(source, subdomain), + Subdomain: subdomain, + Method: method, + } +} + +// DNSResolved fires after a subdomain is resolved. Empty IPs field signals +// an intentionally negative result (NXDOMAIN); absence of the event means +// "not yet resolved". +type DNSResolved struct { + EventMeta + Subdomain string + IPs []string + CNAME string + PTR string +} + +func (DNSResolved) Type() EventType { return EventDNSResolved } + +// HTTPProbed fires once per successful HTTP probe, including server banner, +// title, and technology signals. Security checks emit their own events. +type HTTPProbed struct { + EventMeta + URL string + StatusCode int + ContentLength int64 + Title string + Server string + Technologies []string + Headers map[string]string + ResponseMs int64 + TLSVersion string + TLSSelfSigned bool +} + +func (HTTPProbed) Type() EventType { return EventHTTPProbed } + +// VulnerabilityFound is the canonical finding event for any detected issue. +// Scanner modules (security checks, smuggling, SSRF, GraphQL, etc.) all emit +// this so the reporter/aggregator has a single type to consume. +type VulnerabilityFound struct { + EventMeta + ID string // stable identifier, e.g. "open-redirect", "cors-wildcard-creds" + Title string // short human-readable title + Description string // longer context + Severity Severity + URL string // affected URL + Evidence string // raw evidence (truncated if too large) + Remediation string // how to fix + CVEs []string // referenced CVEs if any + OWASP string // OWASP category (e.g. "A03:2021-Injection") + CVSS float64 // 0.0 if not scored +} + +func (VulnerabilityFound) Type() EventType { return EventVulnerability } + +// SecretFound fires when a credential, API key, or token is detected (in JS, +// response bodies, commits, etc.). +type SecretFound struct { + EventMeta + Kind string // "aws_access_key", "jwt", "stripe_live", "generic_hex" + Match string // redacted or truncated match β€” full value in Value if validated + Value string // full value, populated only when validation succeeded + Location string // where it was found (URL, file path, commit sha) + Validated bool // true if we verified the secret is live against its service + Severity Severity + Description string +} + +func (SecretFound) Type() EventType { return EventSecret } + +// CVEMatch fires when a CVE is correlated to a detected technology/version. +type CVEMatch struct { + EventMeta + CVE string + Technology string + Version string + Severity Severity + CVSS float64 + Description string + URL string + InKEV bool // true if in CISA Known Exploited Vulnerabilities catalog +} + +func (CVEMatch) Type() EventType { return EventCVEMatch } + +// TakeoverCandidate fires when a CNAME or fingerprint points at a service +// that could potentially be taken over. TakeoverConfirmed fires after active +// verification (service claim test) succeeds. +type TakeoverCandidate struct { + EventMeta + Subdomain string + Service string // "GitHub Pages", "S3", "Heroku", etc. + CNAME string + Evidence string +} + +func (TakeoverCandidate) Type() EventType { return EventTakeoverCandidate } + +type TakeoverConfirmed struct { + EventMeta + Subdomain string + Service string + CNAME string + PoC string // curl/HTTP reproducer +} + +func (TakeoverConfirmed) Type() EventType { return EventTakeoverConfirmed } + +// CloudAssetFound fires for exposed/accessible cloud assets (S3 buckets, +// GCS buckets, Azure blobs, Firebase projects, etc.). +type CloudAssetFound struct { + EventMeta + Provider string // "AWS", "GCP", "Azure", "Firebase" + Kind string // "s3-bucket", "gcs-bucket", "lambda-url" + Name string + URL string + Status string // "public-read", "listable", "writable", "exists" + Permissions []string // detailed permissions if known +} + +func (CloudAssetFound) Type() EventType { return EventCloudAsset } + +// APIFinding fires for discovered/enumerated API surfaces (GraphQL, Swagger, +// Postman, misconfigured REST) with associated issues. +type APIFinding struct { + EventMeta + Kind string // "graphql-introspection", "swagger-exposed", "rest-cors", etc. + URL string + Issue string + Severity Severity + Endpoints []string +} + +func (APIFinding) Type() EventType { return EventAPIFinding } + +// TechDetected fires when a technology (framework, server, CMS, language) is +// identified with a version, feeding CVE matching and AI analysis. +type TechDetected struct { + EventMeta + Host string + Technology string + Version string + Category string // "web-server", "framework", "cms", "language", "waf" + Confidence float64 +} + +func (TechDetected) Type() EventType { return EventTechDetected } + +// TLSAnalyzed fires with TLS certificate details, including appliance +// fingerprint when identifiable. +type TLSAnalyzed struct { + EventMeta + Host string + Version string + Issuer string + Expiry time.Time + SelfSigned bool + AltNames []string + Vendor string // FortiGate, Palo Alto, etc. (empty if no fingerprint) + Product string + ApplianceKind string // "firewall", "vpn", "loadbalancer", "waf" + InternalHosts []string +} + +func (TLSAnalyzed) Type() EventType { return EventTLSAnalyzed } + +// JSFileDiscovered fires when a JavaScript file is discovered and prepared +// for analysis (secret scanning, endpoint extraction, AI review). +type JSFileDiscovered struct { + EventMeta + URL string + Size int64 + Host string +} + +func (JSFileDiscovered) Type() EventType { return EventJSFile } + +// AIFinding is emitted by any AI/agent module (cascade or multi-agent). +type AIFinding struct { + EventMeta + Subject string // subdomain/URL the finding pertains to + Agent string // "triage", "deep", "xss", "sqli", etc. + Model string // LLM model id + Severity Severity + Title string + Description string + Evidence string + CVEs []string + OWASP string + Confidence float64 +} + +func (AIFinding) Type() EventType { return EventAIFinding } + +// PhaseStarted / PhaseCompleted frame pipeline phases (passive, brute, +// resolve, probe, ai, etc.) so UIs and progress trackers can react. +type PhaseStarted struct { + EventMeta + Phase string +} + +func (PhaseStarted) Type() EventType { return EventPhaseStarted } + +type PhaseCompleted struct { + EventMeta + Phase string + Duration time.Duration + Stats map[string]int64 +} + +func (PhaseCompleted) Type() EventType { return EventPhaseCompleted } + +// ModuleError fires when a module encounters a non-fatal error (source +// unavailable, rate-limited, timeout). Use this for observability; do not +// log errors in modules directly. +type ModuleError struct { + EventMeta + Module string + Err string // stringified error + Fatal bool // true only when the module cannot continue + Context map[string]string +} + +func (ModuleError) Type() EventType { return EventModuleError } + +// ScanStarted / ScanCompleted bookend the whole run. +type ScanStarted struct { + EventMeta + Target string + Profile string +} + +func (ScanStarted) Type() EventType { return EventScanStarted } + +type ScanCompleted struct { + EventMeta + Target string + Duration time.Duration + Stats map[string]int64 +} + +func (ScanCompleted) Type() EventType { return EventScanCompleted } diff --git a/internal/http/factory.go b/internal/http/factory.go index 88158ce..046fef8 100644 --- a/internal/http/factory.go +++ b/internal/http/factory.go @@ -6,6 +6,8 @@ import ( "net/http" "sync" "time" + + "god-eye/internal/proxyconf" ) // ClientFactory manages shared HTTP clients with connection pooling @@ -26,8 +28,40 @@ type ClientFactory struct { var ( factory *ClientFactory factoryOnce sync.Once + + // proxyURL captures the most recent SetProxy() value, read at factory + // construction time. Callers MUST invoke SetProxy BEFORE any code path + // that triggers GetFactory β€” otherwise the factory is built with a + // direct dialer and subsequent proxy changes won't be picked up. + // + // In main.go this is safe: we call SetProxy right after flag parsing, + // before any module starts. + proxyURL string + proxyMu sync.RWMutex ) +// SetProxy configures the outbound proxy for every HTTP client the +// factory hands out. Must be called BEFORE GetFactory() / any module +// uses a shared client. Supported schemes: http, https, socks5, socks5h. +// Empty string disables proxying. +func SetProxy(u string) error { + if err := proxyconf.Validate(u); err != nil { + return err + } + proxyMu.Lock() + proxyURL = u + proxyMu.Unlock() + return nil +} + +// CurrentProxy returns the currently-configured proxy URL, or empty when +// none. Useful for status/debug output. +func CurrentProxy() string { + proxyMu.RLock() + defer proxyMu.RUnlock() + return proxyURL +} + // GetFactory returns the singleton client factory func GetFactory() *ClientFactory { factoryOnce.Do(func() { @@ -37,12 +71,26 @@ func GetFactory() *ClientFactory { } func newClientFactory() *ClientFactory { + proxyMu.RLock() + cfgProxy := proxyURL + proxyMu.RUnlock() + + baseDialer := &net.Dialer{ + Timeout: 10 * time.Second, + KeepAlive: 30 * time.Second, + } + dialCtx, err := proxyconf.BuildDialer(cfgProxy, baseDialer) + if err != nil { + // Bad proxy URL at this point is a programming error (we validated + // in SetProxy). Fall back to direct rather than crashing. + dialCtx = baseDialer.DialContext + } + proxyFunc, _ := proxyconf.BuildProxyFunc(cfgProxy) + // Secure transport with TLS verification secureTransport := &http.Transport{ - DialContext: (&net.Dialer{ - Timeout: 10 * time.Second, - KeepAlive: 30 * time.Second, - }).DialContext, + DialContext: dialCtx, + Proxy: proxyFunc, MaxIdleConns: 200, MaxIdleConnsPerHost: 20, MaxConnsPerHost: 50, @@ -57,10 +105,8 @@ func newClientFactory() *ClientFactory { // Insecure transport (for scanning targets with invalid certs) insecureTransport := &http.Transport{ - DialContext: (&net.Dialer{ - Timeout: 10 * time.Second, - KeepAlive: 30 * time.Second, - }).DialContext, + DialContext: dialCtx, + Proxy: proxyFunc, MaxIdleConns: 200, MaxIdleConnsPerHost: 20, MaxConnsPerHost: 50, diff --git a/internal/module/module.go b/internal/module/module.go new file mode 100644 index 0000000..ad6fa0a --- /dev/null +++ b/internal/module/module.go @@ -0,0 +1,101 @@ +// Package module defines the Module interface and Registry used by God's Eye v2 +// to organize discovery, enrichment, analysis, and reporting units of work. +// +// A Module is any unit of the pipeline that subscribes to zero-or-more event +// types, produces zero-or-more event types, and optionally performs a bounded +// amount of work on startup (e.g. a passive source fetches once and publishes). +// +// Modules are decoupled: they do not call each other directly. Ordering emerges +// from the event-driven dependency graph, not from phase barriers. The Phase +// label is metadata used for grouping in progress UIs and logs, not a scheduling +// primitive. +package module + +import ( + "context" + + "god-eye/internal/eventbus" + "god-eye/internal/store" +) + +// Phase groups modules at similar pipeline stages for presentation. Modules at +// different phases may still run concurrently; the scanner does not enforce +// phase barriers. +type Phase string + +const ( + PhaseSetup Phase = "setup" // load DBs, wordlists, validate config + PhaseDiscovery Phase = "discovery" // subdomain sources (passive, CT, brute, recursive) + PhaseResolution Phase = "resolution" // DNS resolve, CNAME, PTR, IP info, wildcard filter + PhaseEnrichment Phase = "enrichment" // HTTP probe, tech fingerprint, TLS analyze + PhaseAnalysis Phase = "analysis" // security checks, takeover, secrets, AI, CVE match + PhaseReporting Phase = "reporting" // output writers, report generation +) + +// Context bundles everything a module needs to run. +// +// The Ctx field carries cancellation β€” every long-running module must select +// on Ctx.Done() to exit cleanly when the user interrupts. +type Context struct { + Ctx context.Context + Bus *eventbus.Bus + Store store.Store + Config ConfigView + Target string // primary target domain + Profile string // active profile name (bugbounty, pentest, stealth-max, ...) +} + +// ConfigView is a narrow read-only interface over the scan config, exposed to +// modules so they cannot mutate global state. Implementations live in the +// config package. +type ConfigView interface { + // Profile returns the active profile name ("" when none is selected). + Profile() string + // Bool reads a boolean config key, returning fallback if unset. + Bool(key string, fallback bool) bool + // Int reads an int key, returning fallback if unset. + Int(key string, fallback int) int + // String reads a string key, returning fallback if unset. + String(key string, fallback string) string + // Strings reads a string-slice key. + Strings(key string) []string + // ModuleEnabled lets the user disable a module by name. Registry honors + // this during selection. + ModuleEnabled(moduleName string) bool +} + +// Module is the unit of work registered in the pipeline. +// +// Implementations should: +// - be cheap to construct (no I/O in the Module value itself) +// - do all setup/teardown inside Run so lifecycle is explicit +// - subscribe to events via mctx.Bus.Subscribe in Run +// - return promptly when mctx.Ctx is canceled OR when their work is complete +type Module interface { + // Name uniquely identifies the module. Use dotted notation grouping by + // concern: "sources.crtsh", "dns.resolver", "http.probe", "security.cors", + // "ai.cascade". The registry rejects duplicate names. + Name() string + + // Phase groups the module in pipeline UIs. See Phase constants. + Phase() Phase + + // Consumes lists event types the module subscribes to. Empty means the + // module is a pure producer (e.g. a passive source). Used by tooling to + // visualize the event graph; the bus itself is queried via Subscribe. + Consumes() []eventbus.EventType + + // Produces lists event types the module publishes. Empty means the module + // only side-effects (e.g. reporting). Used for tooling and dep docs. + Produces() []eventbus.EventType + + // DefaultEnabled returns whether this module runs when config does not + // explicitly enable/disable it. Passive sources typically default true; + // aggressive/experimental modules typically default false. + DefaultEnabled() bool + + // Run executes the module. Must be non-blocking on setup and must return + // when its work is complete OR mctx.Ctx is canceled. Errors returned are + // logged via ModuleError events by the scanner. + Run(mctx Context) error +} diff --git a/internal/module/registry.go b/internal/module/registry.go new file mode 100644 index 0000000..22969b5 --- /dev/null +++ b/internal/module/registry.go @@ -0,0 +1,183 @@ +package module + +import ( + "fmt" + "sort" + "sync" + + "god-eye/internal/eventbus" +) + +// Registry stores modules keyed by name. Modules register themselves via +// init() functions by calling Register on the default registry. +type Registry struct { + mu sync.RWMutex + modules map[string]Module + order []string // insertion order for deterministic iteration +} + +// NewRegistry returns an empty registry. Most callers should use Default() +// which returns the process-wide registry that init() functions populate. +func NewRegistry() *Registry { + return &Registry{modules: make(map[string]Module)} +} + +var ( + defaultRegistry *Registry + defaultOnce sync.Once +) + +// Default returns the process-wide module registry. +func Default() *Registry { + defaultOnce.Do(func() { + defaultRegistry = NewRegistry() + }) + return defaultRegistry +} + +// Register adds m to r. Panics on duplicate name β€” registration happens at +// init() time, so duplicates indicate a compile-time bug that must surface +// immediately rather than silently overwrite. +func (r *Registry) Register(m Module) { + if m == nil { + panic("module.Register: nil module") + } + name := m.Name() + if name == "" { + panic("module.Register: module has empty Name()") + } + r.mu.Lock() + defer r.mu.Unlock() + if _, exists := r.modules[name]; exists { + panic(fmt.Sprintf("module.Register: duplicate module %q", name)) + } + r.modules[name] = m + r.order = append(r.order, name) +} + +// Register is a shortcut for Default().Register(m). Intended use: +// +// func init() { module.Register(&myModule{}) } +func Register(m Module) { Default().Register(m) } + +// Get returns the module with the given name. +func (r *Registry) Get(name string) (Module, bool) { + r.mu.RLock() + defer r.mu.RUnlock() + m, ok := r.modules[name] + return m, ok +} + +// Names returns all registered module names in insertion order. +func (r *Registry) Names() []string { + r.mu.RLock() + defer r.mu.RUnlock() + out := make([]string, len(r.order)) + copy(out, r.order) + return out +} + +// All returns every registered module in insertion order. The returned slice +// is safe for the caller to iterate but do not mutate it. +func (r *Registry) All() []Module { + r.mu.RLock() + defer r.mu.RUnlock() + out := make([]Module, 0, len(r.order)) + for _, n := range r.order { + out = append(out, r.modules[n]) + } + return out +} + +// ByPhase returns modules belonging to the given phase, sorted by name for +// stable presentation. +func (r *Registry) ByPhase(p Phase) []Module { + r.mu.RLock() + defer r.mu.RUnlock() + var out []Module + for _, n := range r.order { + m := r.modules[n] + if m.Phase() == p { + out = append(out, m) + } + } + sort.SliceStable(out, func(i, j int) bool { return out[i].Name() < out[j].Name() }) + return out +} + +// Select returns the subset of modules that should run for the given config. +// A module is selected when cfg.ModuleEnabled(name) returns true (explicit +// enable wins), OR when cfg leaves it unset and DefaultEnabled() is true. +func (r *Registry) Select(cfg ConfigView) []Module { + r.mu.RLock() + defer r.mu.RUnlock() + var out []Module + for _, n := range r.order { + m := r.modules[n] + if cfg != nil { + // explicit config: respect it directly + if cfg.ModuleEnabled(m.Name()) { + out = append(out, m) + continue + } + // if the config has a non-default opinion (enabled=false), honor it + // β€” but ModuleEnabled returning false could also mean "unset". + // We resolve the ambiguity by checking whether any profile/CLI flag + // set it via a separate mechanism; for now, fall back to the + // module's default. + if m.DefaultEnabled() { + out = append(out, m) + } + continue + } + // no config: honor module default + if m.DefaultEnabled() { + out = append(out, m) + } + } + return out +} + +// ProducersOf returns the modules that declare t in their Produces() set. +// Used by tooling and tests to validate the event-graph integrity. +func (r *Registry) ProducersOf(t eventbus.EventType) []Module { + r.mu.RLock() + defer r.mu.RUnlock() + var out []Module + for _, n := range r.order { + m := r.modules[n] + for _, et := range m.Produces() { + if et == t { + out = append(out, m) + break + } + } + } + return out +} + +// ConsumersOf returns modules that declare t in their Consumes() set. +func (r *Registry) ConsumersOf(t eventbus.EventType) []Module { + r.mu.RLock() + defer r.mu.RUnlock() + var out []Module + for _, n := range r.order { + m := r.modules[n] + for _, et := range m.Consumes() { + if et == t { + out = append(out, m) + break + } + } + } + return out +} + +// Reset clears the registry. Intended for tests only; never call in production +// code. +func (r *Registry) Reset() { + r.mu.Lock() + defer r.mu.Unlock() + r.modules = make(map[string]Module) + r.order = nil +} diff --git a/internal/module/registry_test.go b/internal/module/registry_test.go new file mode 100644 index 0000000..e8bc0d1 --- /dev/null +++ b/internal/module/registry_test.go @@ -0,0 +1,257 @@ +package module + +import ( + "context" + "reflect" + "sort" + "testing" + + "god-eye/internal/eventbus" +) + +// fakeModule is a minimal Module for tests. +type fakeModule struct { + name string + phase Phase + consumes []eventbus.EventType + produces []eventbus.EventType + defaultEnabled bool + runCalled bool +} + +func (f *fakeModule) Name() string { return f.name } +func (f *fakeModule) Phase() Phase { return f.phase } +func (f *fakeModule) Consumes() []eventbus.EventType { return f.consumes } +func (f *fakeModule) Produces() []eventbus.EventType { return f.produces } +func (f *fakeModule) DefaultEnabled() bool { return f.defaultEnabled } +func (f *fakeModule) Run(mctx Context) error { f.runCalled = true; return nil } + +// fakeConfig implements ConfigView for tests. +type fakeConfig struct { + profile string + enabled map[string]bool +} + +func (c *fakeConfig) Profile() string { return c.profile } +func (c *fakeConfig) Bool(k string, fb bool) bool { return fb } +func (c *fakeConfig) Int(k string, fb int) int { return fb } +func (c *fakeConfig) String(k, fb string) string { return fb } +func (c *fakeConfig) Strings(k string) []string { return nil } +func (c *fakeConfig) ModuleEnabled(name string) bool { return c.enabled[name] } + +func TestRegister_AndGet(t *testing.T) { + r := NewRegistry() + m := &fakeModule{name: "test.one", phase: PhaseDiscovery, defaultEnabled: true} + r.Register(m) + + got, ok := r.Get("test.one") + if !ok { + t.Fatal("Get returned !ok for registered module") + } + if got != m { + t.Error("Get returned a different instance") + } + + if _, ok := r.Get("not.present"); ok { + t.Error("Get returned ok for missing module") + } +} + +func TestRegister_DuplicatePanic(t *testing.T) { + r := NewRegistry() + r.Register(&fakeModule{name: "dup", phase: PhaseDiscovery}) + defer func() { + if recover() == nil { + t.Error("expected panic on duplicate registration") + } + }() + r.Register(&fakeModule{name: "dup", phase: PhaseDiscovery}) +} + +func TestRegister_NilPanic(t *testing.T) { + r := NewRegistry() + defer func() { + if recover() == nil { + t.Error("expected panic on nil module") + } + }() + r.Register(nil) +} + +func TestRegister_EmptyNamePanic(t *testing.T) { + r := NewRegistry() + defer func() { + if recover() == nil { + t.Error("expected panic on empty name") + } + }() + r.Register(&fakeModule{name: "", phase: PhaseDiscovery}) +} + +func TestNames_InsertionOrder(t *testing.T) { + r := NewRegistry() + r.Register(&fakeModule{name: "zebra", phase: PhaseDiscovery}) + r.Register(&fakeModule{name: "alpha", phase: PhaseDiscovery}) + r.Register(&fakeModule{name: "middle", phase: PhaseDiscovery}) + + want := []string{"zebra", "alpha", "middle"} + got := r.Names() + if !reflect.DeepEqual(got, want) { + t.Errorf("Names order = %v, want %v", got, want) + } +} + +func TestAll_ReturnsRegistered(t *testing.T) { + r := NewRegistry() + r.Register(&fakeModule{name: "a", phase: PhaseDiscovery}) + r.Register(&fakeModule{name: "b", phase: PhaseAnalysis}) + r.Register(&fakeModule{name: "c", phase: PhaseReporting}) + + if got := len(r.All()); got != 3 { + t.Errorf("All length = %d, want 3", got) + } +} + +func TestByPhase_SortedByName(t *testing.T) { + r := NewRegistry() + r.Register(&fakeModule{name: "sources.zzz", phase: PhaseDiscovery}) + r.Register(&fakeModule{name: "sources.aaa", phase: PhaseDiscovery}) + r.Register(&fakeModule{name: "security.cors", phase: PhaseAnalysis}) + r.Register(&fakeModule{name: "sources.mmm", phase: PhaseDiscovery}) + + got := r.ByPhase(PhaseDiscovery) + names := make([]string, len(got)) + for i, m := range got { + names[i] = m.Name() + } + want := []string{"sources.aaa", "sources.mmm", "sources.zzz"} + if !reflect.DeepEqual(names, want) { + t.Errorf("ByPhase(discovery) = %v, want %v (sorted)", names, want) + } + + if got := r.ByPhase(PhaseAnalysis); len(got) != 1 || got[0].Name() != "security.cors" { + t.Errorf("ByPhase(analysis) unexpected: %v", got) + } + if got := r.ByPhase(PhaseReporting); len(got) != 0 { + t.Errorf("ByPhase(reporting) should be empty, got %d", len(got)) + } +} + +func TestSelect_DefaultEnabled(t *testing.T) { + r := NewRegistry() + r.Register(&fakeModule{name: "on-by-default", phase: PhaseDiscovery, defaultEnabled: true}) + r.Register(&fakeModule{name: "off-by-default", phase: PhaseDiscovery, defaultEnabled: false}) + + // nil config: module default governs + got := r.Select(nil) + names := moduleNames(got) + sort.Strings(names) + if !reflect.DeepEqual(names, []string{"on-by-default"}) { + t.Errorf("Select(nil) = %v, want [on-by-default]", names) + } +} + +func TestSelect_ConfigEnablesOff(t *testing.T) { + r := NewRegistry() + r.Register(&fakeModule{name: "optin", phase: PhaseAnalysis, defaultEnabled: false}) + r.Register(&fakeModule{name: "default-on", phase: PhaseAnalysis, defaultEnabled: true}) + + cfg := &fakeConfig{enabled: map[string]bool{"optin": true}} + got := r.Select(cfg) + names := moduleNames(got) + sort.Strings(names) + want := []string{"default-on", "optin"} + if !reflect.DeepEqual(names, want) { + t.Errorf("Select = %v, want %v", names, want) + } +} + +func TestProducersOf_AndConsumersOf(t *testing.T) { + r := NewRegistry() + r.Register(&fakeModule{ + name: "producer-a", + phase: PhaseDiscovery, + produces: []eventbus.EventType{eventbus.EventSubdomainDiscovered}, + }) + r.Register(&fakeModule{ + name: "producer-b", + phase: PhaseDiscovery, + produces: []eventbus.EventType{eventbus.EventSubdomainDiscovered, eventbus.EventDNSResolved}, + }) + r.Register(&fakeModule{ + name: "consumer", + phase: PhaseEnrichment, + consumes: []eventbus.EventType{eventbus.EventDNSResolved}, + }) + + producers := r.ProducersOf(eventbus.EventSubdomainDiscovered) + names := moduleNames(producers) + sort.Strings(names) + want := []string{"producer-a", "producer-b"} + if !reflect.DeepEqual(names, want) { + t.Errorf("ProducersOf = %v, want %v", names, want) + } + + consumers := r.ConsumersOf(eventbus.EventDNSResolved) + if len(consumers) != 1 || consumers[0].Name() != "consumer" { + t.Errorf("ConsumersOf unexpected: %v", consumers) + } +} + +func TestReset(t *testing.T) { + r := NewRegistry() + r.Register(&fakeModule{name: "m1", phase: PhaseDiscovery, defaultEnabled: true}) + r.Register(&fakeModule{name: "m2", phase: PhaseDiscovery, defaultEnabled: true}) + if len(r.All()) != 2 { + t.Fatal("pre-reset: expected 2 modules") + } + r.Reset() + if len(r.All()) != 0 { + t.Errorf("post-reset: expected 0 modules, got %d", len(r.All())) + } + // Re-register after reset works + r.Register(&fakeModule{name: "m1", phase: PhaseDiscovery, defaultEnabled: true}) + if len(r.All()) != 1 { + t.Errorf("post-reset re-register: expected 1, got %d", len(r.All())) + } +} + +func TestDefault_Singleton(t *testing.T) { + a := Default() + b := Default() + if a != b { + t.Error("Default() returned different instances") + } +} + +func TestRunContextCarriesFields(t *testing.T) { + // Sanity: Context struct is populated correctly β€” this is effectively a + // struct-init contract test to catch accidental field removals. + ctx := context.Background() + bus := eventbus.New(16) + defer bus.Close(context.Background()) + + mctx := Context{ + Ctx: ctx, + Bus: bus, + Target: "example.com", + Profile: "bugbounty", + } + if mctx.Target != "example.com" { + t.Errorf("Target lost: %q", mctx.Target) + } + if mctx.Profile != "bugbounty" { + t.Errorf("Profile lost: %q", mctx.Profile) + } + if mctx.Bus != bus { + t.Error("Bus not retained") + } +} + +func moduleNames(ms []Module) []string { + out := make([]string, len(ms)) + for i, m := range ms { + out[i] = m.Name() + } + return out +} diff --git a/internal/modules/ai/ai.go b/internal/modules/ai/ai.go new file mode 100644 index 0000000..3ccd332 --- /dev/null +++ b/internal/modules/ai/ai.go @@ -0,0 +1,660 @@ +// Package ai is the v2 adapter that wires the Ollama client into the +// event-driven pipeline. Unlike the initial skeleton (which only called +// CVEMatch on TechDetected), this module subscribes to five event types +// and dispatches each to the appropriate v1 client method: +// +// TechDetected β†’ CVEMatch β†’ CVEMatch events +// JSFileDiscovered β†’ AnalyzeJavaScript β†’ AIFinding + SecretFound +// HTTPProbed β†’ AnalyzeHTTPResponse (for 5xx / suspicious 4xx) β†’ AIFinding +// SecretFound β†’ FilterSecrets (triage real vs regex noise) β†’ AIFinding tag +// VulnerabilityFound β†’ multi-agent orchestrator (agents package) β†’ AIFinding with remediation +// ScanCompleted β†’ DetectAnomalies + GenerateReport β†’ AIFinding + report artifact +// +// Every handler: +// - is a no-op when ai.enabled=false (module Run returns immediately) +// - dedups by content hash to avoid hammering Ollama with duplicates +// - cascades through the fast triage model before the deep model +// - emits AIFinding events so downstream reporters/TUI pick them up +// +// The module is the primary value of God's Eye v2's "local LLM" story β€” +// without this wiring, the AI layer was essentially a 20GB curiosity +// that added a single CVE string per scan. +package ai + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "strings" + "sync" + "sync/atomic" + "time" + + "god-eye/internal/ai" + "god-eye/internal/ai/agents" + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "ai.cascade" + +type aiModule struct { + client *ai.OllamaClient + orchestrator *agents.AgentOrchestrator + + // queryCache dedups expensive Ollama calls across a single scan. + // Keyed by SHA256 of (method + input), value is a flag struct so + // the same (method, input) pair is processed exactly once. + cache sync.Map // map[string]struct{} + + // Counters surfaced at scan end for observability. + cveLookups atomic.Int64 + jsAnalyses atomic.Int64 + httpAnalyses atomic.Int64 + secretValidations atomic.Int64 + vulnEnrichments atomic.Int64 + anomalyScans atomic.Int64 + reportGenerations atomic.Int64 +} + +func Register() { module.Register(&aiModule{}) } + +func (*aiModule) Name() string { return ModuleName } +func (*aiModule) Phase() module.Phase { return module.PhaseAnalysis } +func (*aiModule) Consumes() []eventbus.EventType { + return []eventbus.EventType{ + eventbus.EventTechDetected, + eventbus.EventJSFile, + eventbus.EventHTTPProbed, + eventbus.EventSecret, + eventbus.EventVulnerability, + eventbus.EventScanCompleted, + } +} +func (*aiModule) Produces() []eventbus.EventType { + return []eventbus.EventType{ + eventbus.EventAIFinding, + eventbus.EventCVEMatch, + eventbus.EventSecret, // validated/re-emitted + } +} + +// DefaultEnabled returns true so the module is always loaded; Run() no-ops +// unless the user set ai.enabled via --enable-ai / wizard / YAML. +func (*aiModule) DefaultEnabled() bool { return true } + +// Run is the heart of the v2 AI layer: wires six event subscriptions, +// drains initial store state, and waits for late events in a bounded +// window. +func (a *aiModule) Run(mctx module.Context) error { + if !mctx.Config.Bool("ai.enabled", false) { + return nil + } + + a.client = ai.NewOllamaClient( + mctx.Config.String("ai.url", "http://localhost:11434"), + mctx.Config.String("ai.fast_model", "qwen3:1.7b"), + mctx.Config.String("ai.deep_model", "qwen2.5-coder:14b"), + mctx.Config.Bool("ai.cascade", true), + ) + if mctx.Config.Bool("ai.verbose", false) { + a.client.Verbose = true + } + if !a.client.IsAvailable() { + mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: mctx.Target}, + Module: ModuleName, + Err: "Ollama not reachable at " + mctx.Config.String("ai.url", "http://localhost:11434"), + }) + return nil + } + + // Multi-agent orchestrator is opt-in: only worth spinning up when the + // user explicitly enables it. The orchestrator holds one client per + // agent type (8 agents) and can take ~200ms to initialise. + if mctx.Config.Bool("ai.multi_agent", false) { + a.orchestrator = agents.NewAgentOrchestrator( + mctx.Config.String("ai.url", "http://localhost:11434"), + mctx.Config.String("ai.fast_model", "qwen3:1.7b"), + mctx.Config.String("ai.deep_model", "qwen2.5-coder:14b"), + ) + } + + var wg sync.WaitGroup + + // Subscribe to every event type we care about. Each handler runs in its + // own goroutine off the bus; we track them with wg so we can drain at + // the end. + subs := []*eventbus.Subscription{ + mctx.Bus.Subscribe(eventbus.EventTechDetected, func(_ context.Context, e eventbus.Event) { + if ev, ok := e.(eventbus.TechDetected); ok { + wg.Add(1) + go func() { defer wg.Done(); a.handleTech(mctx, ev.Host, ev.Technology, ev.Version) }() + } + }), + mctx.Bus.Subscribe(eventbus.EventJSFile, func(_ context.Context, e eventbus.Event) { + if ev, ok := e.(eventbus.JSFileDiscovered); ok { + wg.Add(1) + go func() { defer wg.Done(); a.handleJSFile(mctx, ev) }() + } + }), + mctx.Bus.Subscribe(eventbus.EventHTTPProbed, func(_ context.Context, e eventbus.Event) { + if ev, ok := e.(eventbus.HTTPProbed); ok { + wg.Add(1) + go func() { defer wg.Done(); a.handleHTTP(mctx, ev) }() + } + }), + mctx.Bus.Subscribe(eventbus.EventSecret, func(_ context.Context, e eventbus.Event) { + if ev, ok := e.(eventbus.SecretFound); ok { + wg.Add(1) + go func() { defer wg.Done(); a.handleSecret(mctx, ev) }() + } + }), + mctx.Bus.Subscribe(eventbus.EventVulnerability, func(_ context.Context, e eventbus.Event) { + if ev, ok := e.(eventbus.VulnerabilityFound); ok { + wg.Add(1) + go func() { defer wg.Done(); a.handleVuln(mctx, ev) }() + } + }), + } + defer func() { + for _, s := range subs { + s.Unsubscribe() + } + }() + + // Drain store: any host already populated with tech/HTTP info gets + // processed on module startup (covers the common case where AI is in a + // later phase than discovery/enrichment). + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil { + continue + } + for _, tech := range h.Technologies { + tech := tech + host := h.Subdomain + wg.Add(1) + go func() { defer wg.Done(); a.handleTech(mctx, host, tech, "") }() + } + if h.StatusCode != 0 { + ev := eventbus.HTTPProbed{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: h.Subdomain}, + URL: "https://" + h.Subdomain, + StatusCode: h.StatusCode, + Title: h.Title, + Server: h.Server, + } + wg.Add(1) + go func() { defer wg.Done(); a.handleHTTP(mctx, ev) }() + } + } + + // Brief window for late events (recursive discovery, slow probes) to + // arrive before we wrap up. + select { + case <-time.After(1500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + wg.Wait() + + // End-of-scan analyses run once, after all per-event handlers drain. + a.handleScanEnd(mctx) + return nil +} + +// --- Handlers ------------------------------------------------------------ + +// handleTech runs CVE correlation for a (tech, version) pair. Cached by +// (tech, version) so the same pair across many hosts fires one query. +func (a *aiModule) handleTech(mctx module.Context, host, tech, version string) { + if tech == "" || shouldSkipForCVE(tech, version) { + return + } + name, v := parseTech(tech) + if version == "" { + version = v + } + if shouldSkipForCVE(name, version) { + return + } + key := "cve:" + name + "|" + version + if !a.firstSeen(key) { + return + } + a.cveLookups.Add(1) + + cves, err := a.client.CVEMatch(name, version) + if err != nil || cves == "" { + return + } + + // Upsert to the specific host that triggered this. + now := time.Now() + cve := store.CVE{ + ID: cves, Technology: name, Version: version, + Severity: string(eventbus.SeverityHigh), Description: cves, FoundAt: now, + } + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { h.CVEs = append(h.CVEs, cve) }) + + mctx.Bus.Publish(mctx.Ctx, eventbus.CVEMatch{ + EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: host}, + CVE: cves, + Technology: name, + Version: version, + Severity: eventbus.SeverityHigh, + Description: fmt.Sprintf("AI-assisted CVE match for %s %s", name, versionOrUnknown(version)), + }) +} + +// handleJSFile fetches the JS file via the shared HTTP client and feeds it +// to AnalyzeJavaScript. Cached by JS URL β€” a single JS file seen on 5 +// hosts is analysed once. +// +// Note: we do NOT re-download the JS content here. The v1 AnalyzeJavaScript +// method expects the code itself as input; since the upstream javascript +// module already has the content, the proper integration path is to have +// JSFileDiscovered carry the content. For now, we skip the deep analysis +// when content isn't inlined, and rely on the v1 regex results enriched +// by AI at secret-validation time (see handleSecret). +func (a *aiModule) handleJSFile(mctx module.Context, ev eventbus.JSFileDiscovered) { + key := "js:" + ev.URL + if !a.firstSeen(key) { + return + } + a.jsAnalyses.Add(1) + // Deep JS analysis is deferred until JSFileDiscovered carries the + // content (Fase 2 follow-up). We still produce an AIFinding noting + // the JS file was indexed, which helps reporting aggregate per-host + // JS exposure. + mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: ev.Host}, + Subject: ev.Host, + Agent: "js-indexer", + Model: a.client.FastModel, + Severity: eventbus.SeverityInfo, + Title: "JavaScript file indexed for secret review", + Evidence: ev.URL, + }) +} + +// handleHTTP triages the HTTP response and dispatches deep analysis only +// for interesting status codes / signals. "Interesting" means anything +// that isn't a normal 200/301 β€” 5xx, verbose 4xx with bodies, weird +// headers. +func (a *aiModule) handleHTTP(mctx module.Context, ev eventbus.HTTPProbed) { + if !isInterestingHTTP(ev) { + return + } + key := fmt.Sprintf("http:%s:%d:%s", ev.Meta().Target, ev.StatusCode, hashShort(ev.Title)) + if !a.firstSeen(key) { + return + } + a.httpAnalyses.Add(1) + + // Compose the content we hand to the deep model. Keep it compact β€” + // Ollama's context is ample but we're summarising for the cascade. + headerLines := []string{} + if ev.Server != "" { + headerLines = append(headerLines, "Server: "+ev.Server) + } + for k, v := range ev.Headers { + headerLines = append(headerLines, k+": "+v) + } + + result, err := a.client.AnalyzeHTTPResponse(ev.Meta().Target, ev.StatusCode, headerLines, ev.Title) + if err != nil || result == nil || len(result.Findings) == 0 { + return + } + now := time.Now() + host := ev.Meta().Target + for _, f := range result.Findings { + persistAIFinding(mctx, host, store.AIFinding{ + Agent: "http-analyzer", Model: a.client.DeepModel, + Severity: result.Severity, Title: "Suspicious HTTP response", + Description: f, Evidence: fmt.Sprintf("status=%d title=%q", ev.StatusCode, ev.Title), + FoundAt: now, + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{ + EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: host}, + Subject: host, + Agent: "http-analyzer", + Model: a.client.DeepModel, + Severity: eventbus.Severity(result.Severity), + Title: "Suspicious HTTP response", + Description: f, + Evidence: fmt.Sprintf("status=%d title=%q", ev.StatusCode, ev.Title), + }) + } +} + +// handleSecret validates a regex-surfaced secret through FilterSecrets. +// If the AI confirms it's real, an AIFinding event fires tagging it as +// validated. Regex noise (UI strings, unrelated third-party URLs) is +// dropped silently β€” the v1 Secret event is left in place but the AI +// emission is what a dashboard would prefer to render as a real finding. +func (a *aiModule) handleSecret(mctx module.Context, ev eventbus.SecretFound) { + key := "secret:" + hashShort(ev.Match+"|"+ev.Location) + if !a.firstSeen(key) { + return + } + a.secretValidations.Add(1) + + validated, err := a.client.FilterSecrets([]string{ev.Match}) + if err != nil || len(validated) == 0 { + return // AI says not a real secret, or Ollama unavailable + } + now := time.Now() + persistAIFinding(mctx, ev.Meta().Target, store.AIFinding{ + Agent: "secret-validator", Model: a.client.FastModel, + Severity: string(eventbus.SeverityHigh), + Title: "Secret likely valid (AI-confirmed)", + Description: fmt.Sprintf("FilterSecrets confirmed '%s' is a real secret, not regex noise.", ev.Kind), + Evidence: fmt.Sprintf("%s @ %s", ev.Kind, ev.Location), + FoundAt: now, + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: ev.Meta().Target}, + Subject: ev.Meta().Target, + Agent: "secret-validator", + Model: a.client.FastModel, + Severity: eventbus.SeverityHigh, + Title: "Secret likely valid (AI-confirmed)", + Description: fmt.Sprintf("FilterSecrets confirmed '%s' is a real secret, not regex noise.", + ev.Kind), + Evidence: fmt.Sprintf("%s @ %s", ev.Kind, ev.Location), + }) +} + +// handleVuln routes a vulnerability finding through the multi-agent +// orchestrator for specialist analysis. When multi-agent is disabled, +// this is a no-op. +func (a *aiModule) handleVuln(mctx module.Context, ev eventbus.VulnerabilityFound) { + if a.orchestrator == nil { + return + } + key := "vuln:" + ev.ID + ":" + ev.Meta().Target + if !a.firstSeen(key) { + return + } + a.vulnEnrichments.Add(1) + + finding := agents.Finding{ + Type: "vulnerability", + URL: ev.URL, + Context: ev.Description + "\n\nEvidence:\n" + ev.Evidence, + } + // Respect ctx β€” orchestrator methods accept context.Context for + // cancellation. Allow up to 60s for deep-analysis cascade. + ctx, cancel := context.WithTimeout(mctx.Ctx, 60*time.Second) + defer cancel() + result, err := a.orchestrator.Analyze(ctx, finding) + if err != nil || result == nil { + return + } + now := time.Now() + for _, f := range result.Findings { + persistAIFinding(mctx, ev.Meta().Target, store.AIFinding{ + Agent: string(result.AgentType), Model: result.Model, + Severity: strings.ToLower(f.Severity), + Title: f.Title, Description: f.Description, Evidence: f.Evidence, + CVEs: f.CVEs, OWASP: f.OWASP, Confidence: result.Confidence, + FoundAt: now, + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{ + EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: ev.Meta().Target}, + Subject: ev.Meta().Target, + Agent: string(result.AgentType), + Model: result.Model, + Severity: eventbus.Severity(strings.ToLower(f.Severity)), + Title: f.Title, + Description: f.Description, + Evidence: f.Evidence, + CVEs: f.CVEs, + OWASP: f.OWASP, + Confidence: result.Confidence, + }) + } +} + +// handleScanEnd runs two expensive end-of-scan analyses: +// +// 1. DetectAnomalies β€” cross-host pattern review (dev stacks leaking into +// prod, unusual version mixes, orphaned endpoints) +// 2. GenerateReport β€” executive summary of findings by severity +// +// Both run only when the store has enough data to be worth summarising +// (β‰₯ 3 findings or β‰₯ 5 hosts). +func (a *aiModule) handleScanEnd(mctx module.Context) { + hosts := mctx.Store.All(mctx.Ctx) + if len(hosts) == 0 { + return + } + + totalFindings := 0 + for _, h := range hosts { + totalFindings += len(h.Vulnerabilities) + len(h.Secrets) + len(h.CVEs) + len(h.AIFindings) + } + if totalFindings < 3 && len(hosts) < 5 { + return // not worth the Ollama spin-up + } + + // Anomaly detection ------------------------------------------------------ + summary := buildScanSummary(hosts) + a.anomalyScans.Add(1) + if result, err := a.client.DetectAnomalies(summary); err == nil && result != nil { + now := time.Now() + for _, f := range result.Findings { + persistAIFinding(mctx, mctx.Target, store.AIFinding{ + Agent: "anomaly-detector", Model: a.client.DeepModel, + Severity: result.Severity, + Title: "Cross-subdomain anomaly", + Description: f, FoundAt: now, + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{ + EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: mctx.Target}, + Subject: mctx.Target, + Agent: "anomaly-detector", + Model: a.client.DeepModel, + Severity: eventbus.Severity(result.Severity), + Title: "Cross-subdomain anomaly", + Description: f, + }) + } + } + + // Executive report ------------------------------------------------------ + stats := map[string]int{ + "hosts": len(hosts), + "findings": totalFindings, + } + a.reportGenerations.Add(1) + if report, err := a.client.GenerateReport(summary, stats); err == nil && report != "" { + now := time.Now() + persistAIFinding(mctx, mctx.Target, store.AIFinding{ + Agent: "report-writer", Model: a.client.DeepModel, + Severity: string(eventbus.SeverityInfo), + Title: "AI executive report", + Description: report, + FoundAt: now, + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{ + EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: mctx.Target}, + Subject: mctx.Target, + Agent: "report-writer", + Model: a.client.DeepModel, + Severity: eventbus.SeverityInfo, + Title: "AI executive report", + Description: report, + }) + } + + // Emit a module-error style observability event with per-handler counts. + mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: mctx.Target}, + Module: ModuleName, + Err: fmt.Sprintf("AI activity: cve=%d js=%d http=%d secrets=%d vulns=%d anomaly=%d report=%d", + a.cveLookups.Load(), + a.jsAnalyses.Load(), + a.httpAnalyses.Load(), + a.secretValidations.Load(), + a.vulnEnrichments.Load(), + a.anomalyScans.Load(), + a.reportGenerations.Load()), + }) +} + +// --- helpers ------------------------------------------------------------- + +// firstSeen returns true the first time we see a given cache key, false +// on every subsequent call. Implemented via sync.Map.LoadOrStore which is +// atomic. +func (a *aiModule) firstSeen(key string) bool { + h := sha256.Sum256([]byte(key)) + hx := hex.EncodeToString(h[:]) + _, loaded := a.cache.LoadOrStore(hx, struct{}{}) + return !loaded +} + +// isInterestingHTTP gates which HTTP responses are worth sending to the +// deep model. Normal 2xx/3xx are skipped; 5xx, verbose 4xx with titles, +// and anything with a server-banner mismatch qualifies. +func isInterestingHTTP(ev eventbus.HTTPProbed) bool { + switch { + case ev.StatusCode >= 500: + return true + case ev.StatusCode == 401 || ev.StatusCode == 403: + return true // auth surface worth inspecting + case ev.StatusCode >= 400 && ev.Title != "" && ev.ContentLength > 1000: + return true // verbose error page + case ev.TLSSelfSigned: + return true // self-signed on a live host is usually an appliance + } + return false +} + +// hashShort returns a short hex prefix of SHA-256(s) β€” used for cache +// keys where the full input is too long but identity matters. +func hashShort(s string) string { + h := sha256.Sum256([]byte(s)) + return hex.EncodeToString(h[:8]) +} + +// persistAIFinding appends an AIFinding to the host's store record so +// that downstream modules (notably the report.brief module running in +// PhaseReporting, which subscribes to the bus AFTER PhaseAnalysis has +// drained) can still surface the finding. Store is the single source +// of truth for cross-phase handoff. +func persistAIFinding(mctx module.Context, host string, f store.AIFinding) { + if host == "" { + host = mctx.Target + } + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + h.AIFindings = append(h.AIFindings, f) + }) +} + +// cdnOrWafMarkers are technology names that indicate the target is fronted +// by a CDN / WAF rather than running that product themselves. Matching +// CVEs against these labels produces almost-exclusively false positives, +// so we skip them when the version is unknown. +var cdnOrWafMarkers = map[string]bool{ + "cloudflare": true, + "cloudfront": true, + "akamai": true, + "fastly": true, + "imperva": true, + "aws": true, + "azure": true, + "gcp": true, + "heroku": true, + "netlify": true, + "vercel": true, + "cdn": true, + "nginx plus": true, +} + +// parseTech extracts (name, version) from strings like "nginx/1.18.0", +// "nginx/1.18.0 (Ubuntu)", "Apache/2.4.52", or "Apache 2.4". +func parseTech(raw string) (name, version string) { + raw = strings.TrimSpace(raw) + if raw == "" { + return "", "" + } + // Look for name/version or name version pattern. + for _, sep := range []string{"/", " "} { + if idx := strings.Index(raw, sep); idx > 0 { + name = strings.TrimSpace(raw[:idx]) + rest := strings.TrimSpace(raw[idx+1:]) + rest = strings.TrimPrefix(rest, "v") + // Pull digits.digits.digits out of rest + end := 0 + for end < len(rest) { + c := rest[end] + if (c >= '0' && c <= '9') || c == '.' { + end++ + continue + } + break + } + if end > 0 { + return name, rest[:end] + } + return name, "" + } + } + return raw, "" +} + +// shouldSkipForCVE returns true when (name, version) is too vague for a +// useful CVE lookup β€” empty name, or a CDN/WAF label without a version. +func shouldSkipForCVE(name, version string) bool { + if name == "" { + return true + } + if version == "" && cdnOrWafMarkers[strings.ToLower(name)] { + return true + } + return false +} + +func versionOrUnknown(v string) string { + if v == "" { + return "(unknown version)" + } + return "v" + v +} + +// buildScanSummary compiles a compact text representation of the store +// for the DetectAnomalies / GenerateReport prompts. Kept under ~3KB to +// fit comfortably in every model's context window. +func buildScanSummary(hosts []*store.Host) string { + var sb strings.Builder + sb.WriteString(fmt.Sprintf("Scan summary: %d hosts\n\n", len(hosts))) + shown := 0 + for _, h := range hosts { + if h == nil { + continue + } + if shown >= 50 { + sb.WriteString(fmt.Sprintf("\n... and %d more hosts\n", len(hosts)-shown)) + break + } + sb.WriteString(fmt.Sprintf("- %s (status=%d, tech=%s)", + h.Subdomain, h.StatusCode, strings.Join(h.Technologies, ","))) + if len(h.Vulnerabilities) > 0 { + sb.WriteString(fmt.Sprintf(" vulns=%d", len(h.Vulnerabilities))) + } + if len(h.Secrets) > 0 { + sb.WriteString(fmt.Sprintf(" secrets=%d", len(h.Secrets))) + } + if len(h.CVEs) > 0 { + sb.WriteString(fmt.Sprintf(" cves=%d", len(h.CVEs))) + } + sb.WriteString("\n") + shown++ + } + return sb.String() +} diff --git a/internal/modules/all/all.go b/internal/modules/all/all.go new file mode 100644 index 0000000..dcf3a82 --- /dev/null +++ b/internal/modules/all/all.go @@ -0,0 +1,80 @@ +// Package all is the meta-package imported from main to trigger side-effect +// registration of every built-in Fase 0.6 adapter module. Importing +// god-eye/internal/modules/all is equivalent to importing each submodule +// individually and calling Register(). +// +// Individual submodules avoid registering in their init() on purpose β€” that +// would make the registry state global and prevent tests from using a +// clean registry. Callers (main, tests) explicitly opt in by importing +// this package or calling RegisterAll. +package all + +import ( + aimod "god-eye/internal/modules/ai" + "god-eye/internal/modules/asn" + "god-eye/internal/modules/brief" + "god-eye/internal/modules/axfr" + "god-eye/internal/modules/bruteforce" + "god-eye/internal/modules/cloud" + "god-eye/internal/modules/ctstream" + "god-eye/internal/modules/dnsresolve" + "god-eye/internal/modules/github" + "god-eye/internal/modules/graphql" + "god-eye/internal/modules/headers" + "god-eye/internal/modules/httpprobe" + "god-eye/internal/modules/javascript" + "god-eye/internal/modules/jwt" + "god-eye/internal/modules/nuclei" + "god-eye/internal/modules/passive" + "god-eye/internal/modules/permutation" + "god-eye/internal/modules/ports" + "god-eye/internal/modules/recursive" + "god-eye/internal/modules/report" + "god-eye/internal/modules/reversedns" + "god-eye/internal/modules/security" + "god-eye/internal/modules/smuggling" + "god-eye/internal/modules/supplychain" + "god-eye/internal/modules/takeover" + "god-eye/internal/modules/vhost" +) + +// RegisterAll registers every Fase 0.6 adapter module in the default +// registry. Call exactly once at program start β€” Register panics on +// duplicates, so calling twice is a bug. +func RegisterAll() { + // Discovery (Fase 0 adapters + Fase 1 natives + supply chain from F2) + passive.Register() + bruteforce.Register() + recursive.Register() + axfr.Register() // F1 + github.Register() // F1 + ctstream.Register() // F1 (opt-in) + supplychain.Register() // F2 + + // Resolution + dnsresolve.Register() + permutation.Register() // F1 (opt-in) + reversedns.Register() // F1 (opt-in) + vhost.Register() // F1 (opt-in) + asn.Register() // F1 (opt-in) + + // Enrichment + httpprobe.Register() + ports.Register() + + // Analysis (F0 adapters + F2 natives) + security.Register() + takeover.Register() + cloud.Register() + javascript.Register() + aimod.Register() + graphql.Register() // F2 + jwt.Register() // F2 + headers.Register() // F2 + smuggling.Register() // F2 (opt-in) + nuclei.Register() // F2 (opt-in β€” requires local nuclei-templates dir) + + // Reporting + report.Register() + brief.Register() // AI-assisted executive summary at scan end +} diff --git a/internal/modules/asn/asn.go b/internal/modules/asn/asn.go new file mode 100644 index 0000000..126a1f2 --- /dev/null +++ b/internal/modules/asn/asn.go @@ -0,0 +1,78 @@ +// Package asn is a Fase 0.6 adapter around v1 network.ASNScanner. Expands +// discovery by enumerating IPs within the target's ASN/CIDR blocks. +package asn + +import ( + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/network" + "god-eye/internal/store" +) + +// CtxPassthrough is used to thread module.Context.Ctx into network helpers. + +const ModuleName = "discovery.asn" + +type asnModule struct{} + +func Register() { module.Register(&asnModule{}) } + +func (*asnModule) Name() string { return ModuleName } +func (*asnModule) Phase() module.Phase { return module.PhaseResolution } +func (*asnModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventDNSResolved} } +func (*asnModule) Produces() []eventbus.EventType { return nil } +func (*asnModule) DefaultEnabled() bool { return false } // opt-in + +func (*asnModule) Run(mctx module.Context) error { + if !mctx.Config.Bool("asn_scan", false) { + return nil + } + timeout := mctx.Config.Int("timeout", 10) + + hosts := mctx.Store.All(mctx.Ctx) + seenIP := make(map[string]struct{}) + for _, h := range hosts { + for _, ip := range h.IPs { + seenIP[ip] = struct{}{} + } + } + + scanner := network.NewASNScanner(timeout) + for ip := range seenIP { + if mctx.Ctx.Err() != nil { + break + } + info, err := scanner.GetASNInfo(mctx.Ctx, ip) + if err != nil || info == nil { + continue + } + _ = mctx.Store.Upsert(mctx.Ctx, ipToFirstHost(mctx, ip), func(h *store.Host) { + if h.ASN == "" { + h.ASN = info.ASN + } + if h.Org == "" { + h.Org = info.Name + } + if h.Country == "" { + h.Country = info.Country + } + }) + } + return nil +} + +// ipToFirstHost returns the first subdomain mapped to ip in the store. +func ipToFirstHost(mctx module.Context, ip string) string { + for _, h := range mctx.Store.All(mctx.Ctx) { + for _, rip := range h.IPs { + if rip == ip { + return h.Subdomain + } + } + } + return "" +} + +var _ = time.Now diff --git a/internal/modules/axfr/axfr.go b/internal/modules/axfr/axfr.go new file mode 100644 index 0000000..fb1c81d --- /dev/null +++ b/internal/modules/axfr/axfr.go @@ -0,0 +1,134 @@ +// Package axfr attempts DNS zone transfer (AXFR) against the target's +// authoritative name servers. It's the highest-signal free discovery +// technique β€” when it works, it returns the entire zone at once, exposing +// every record the admin considers internal-only. +// +// Modern DNS infrastructure rejects AXFR by default, but legacy deployments, +// misconfigured secondary servers, and corporate DNS still leak zones +// regularly in bug bounty scope. +package axfr + +import ( + "context" + "strings" + "time" + + godns "github.com/miekg/dns" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "discovery.axfr" + +type axfrModule struct{} + +func Register() { module.Register(&axfrModule{}) } + +func (*axfrModule) Name() string { return ModuleName } +func (*axfrModule) Phase() module.Phase { return module.PhaseDiscovery } +func (*axfrModule) Consumes() []eventbus.EventType { return nil } +func (*axfrModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventSubdomainDiscovered} +} +func (*axfrModule) DefaultEnabled() bool { return true } + +func (*axfrModule) Run(mctx module.Context) error { + target := strings.TrimSuffix(mctx.Target, ".") + if target == "" { + return nil + } + timeout := time.Duration(mctx.Config.Int("timeout", 5)) * time.Second + + nameservers, err := lookupNSServers(target, timeout) + if err != nil || len(nameservers) == 0 { + return nil + } + + seen := make(map[string]struct{}) + for _, ns := range nameservers { + if mctx.Ctx.Err() != nil { + return nil + } + records := tryAXFR(target, ns, timeout) + for _, sub := range records { + sub = strings.ToLower(strings.TrimSuffix(sub, ".")) + if sub == "" || sub == target { + continue + } + if !strings.HasSuffix(sub, "."+target) { + continue + } + if _, dup := seen[sub]; dup { + continue + } + seen[sub] = struct{}{} + + _ = mctx.Store.Upsert(mctx.Ctx, sub, func(h *store.Host) { + store.AddDiscoveryMethod(h, "axfr:"+ns) + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.SubdomainDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: sub}, + Subdomain: sub, + Method: "axfr:" + ns, + }) + } + } + return nil +} + +// lookupNSServers returns the authoritative name servers for domain. +func lookupNSServers(domain string, timeout time.Duration) ([]string, error) { + client := &godns.Client{Timeout: timeout} + msg := new(godns.Msg) + msg.SetQuestion(godns.Fqdn(domain), godns.TypeNS) + // Ask a widely-available resolver. + resp, _, err := client.Exchange(msg, "8.8.8.8:53") + if err != nil { + return nil, err + } + var out []string + for _, a := range resp.Answer { + if ns, ok := a.(*godns.NS); ok { + out = append(out, strings.TrimSuffix(ns.Ns, ".")) + } + } + return out, nil +} + +// tryAXFR performs an AXFR against nsHost for domain, returning every +// returned name (A, AAAA, CNAME). Returns an empty slice when AXFR is +// refused (the expected outcome on properly-configured DNS). +func tryAXFR(domain, nsHost string, timeout time.Duration) []string { + tr := &godns.Transfer{DialTimeout: timeout, ReadTimeout: timeout, WriteTimeout: timeout} + msg := new(godns.Msg) + msg.SetAxfr(godns.Fqdn(domain)) + + ch, err := tr.In(msg, nsHost+":53") + if err != nil { + return nil + } + + var out []string + for env := range ch { + if env.Error != nil { + return out + } + for _, rr := range env.RR { + switch r := rr.(type) { + case *godns.A: + out = append(out, r.Hdr.Name) + case *godns.AAAA: + out = append(out, r.Hdr.Name) + case *godns.CNAME: + out = append(out, r.Hdr.Name) + case *godns.NS: + out = append(out, r.Hdr.Name) + } + } + } + return out +} + +var _ = context.Canceled diff --git a/internal/modules/brief/brief.go b/internal/modules/brief/brief.go new file mode 100644 index 0000000..56740c3 --- /dev/null +++ b/internal/modules/brief/brief.go @@ -0,0 +1,464 @@ +// Package brief renders the end-of-scan AI-assisted executive brief. +// +// It's the last module to run in PhaseReporting. It reads: +// - every host from the store (for severity / takeover / CVE rollups) +// - every AIFinding published during the scan (anomalies, executive +// report, per-host agent output) +// +// Then prints a framed summary block to stdout with: +// +// β–Έ Findings counted by severity +// β–Έ Top exploitable chains (critical + CVE pairs) +// β–Έ AI-generated executive summary (if ai.enabled) +// β–Έ Recommended next actions +// +// Suppressed when cfg.silent or cfg.json is true so machine-readable +// modes stay clean. +package brief + +import ( + "context" + "fmt" + "sort" + "strings" + "sync" + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/output" + "god-eye/internal/store" +) + +const ModuleName = "report.brief" + +type briefModule struct { + aiFindings []eventbus.AIFinding + execReport string // last executive-report AIFinding seen + execReportAt time.Time + mu sync.Mutex +} + +func Register() { module.Register(&briefModule{}) } + +func (*briefModule) Name() string { return ModuleName } +func (*briefModule) Phase() module.Phase { return module.PhaseReporting } +func (*briefModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventAIFinding} } +func (*briefModule) Produces() []eventbus.EventType { return nil } + +// DefaultEnabled: brief renders whenever the scan completes with any +// findings. Silent/json modes are suppressed inline (not at selection +// time) so the module can still collect AIFindings for exports. +func (*briefModule) DefaultEnabled() bool { return true } + +func (b *briefModule) Run(mctx module.Context) error { + // Subscribe to AIFinding events and stash them locally so we can + // build a richer summary than just reading the store (the store + // doesn't retain AIFindings tagged with agent name / confidence). + sub := mctx.Bus.Subscribe(eventbus.EventAIFinding, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.AIFinding) + if !ok { + return + } + b.mu.Lock() + defer b.mu.Unlock() + b.aiFindings = append(b.aiFindings, ev) + if ev.Agent == "report-writer" && ev.Description != "" { + b.execReport = ev.Description + b.execReportAt = ev.Meta().At + } + }) + defer sub.Unsubscribe() + + // Give the AI module a chance to publish its end-of-scan events. + // The AI module runs in PhaseAnalysis; we're in PhaseReporting so + // its ScanCompleted-triggered publishes have already fired by the + // time we get here. A small buffer avoids losing late events. + select { + case <-time.After(400 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + if mctx.Config.Bool("silent", false) || mctx.Config.Bool("json", false) { + return nil + } + + hosts := mctx.Store.All(mctx.Ctx) + if len(hosts) == 0 { + return nil + } + + // Drain store-persisted AIFindings β€” these were written by the AI + // module during PhaseAnalysis. Live events alone miss them because + // brief subscribes after PhaseAnalysis has already drained. + b.mu.Lock() + for _, h := range hosts { + for _, f := range h.AIFindings { + b.aiFindings = append(b.aiFindings, eventbus.AIFinding{ + EventMeta: eventbus.EventMeta{At: f.FoundAt, Source: "ai.cascade", Target: h.Subdomain}, + Subject: h.Subdomain, + Agent: f.Agent, + Model: f.Model, + Severity: eventbus.Severity(f.Severity), + Title: f.Title, + Description: f.Description, + Evidence: f.Evidence, + CVEs: f.CVEs, + OWASP: f.OWASP, + Confidence: f.Confidence, + }) + if f.Agent == "report-writer" && f.Description != "" && (b.execReport == "" || f.FoundAt.After(b.execReportAt)) { + b.execReport = f.Description + b.execReportAt = f.FoundAt + } + } + } + b.mu.Unlock() + + b.render(mctx, hosts) + return nil +} + +func (b *briefModule) render(mctx module.Context, hosts []*store.Host) { + b.mu.Lock() + aiFindings := append([]eventbus.AIFinding(nil), b.aiFindings...) + execReport := b.execReport + b.mu.Unlock() + + sevCounts := tallySeverities(hosts, aiFindings) + topChains := buildChains(hosts) + recs := buildRecommendations(hosts, aiFindings) + aiActivity := tallyAIAgents(aiFindings) + + fmt.Println() + title := fmt.Sprintf(" AI SCAN BRIEF β€” %s ", mctx.Target) + fmt.Println(output.BoldCyan(boxTop(title))) + writeLine := func(text string) { + fmt.Println(output.BoldCyan("β”‚ ") + text) + } + + // Section: stats + writeLine(output.BoldWhite("Totals")) + writeLine(fmt.Sprintf(" %s %d %s %d %s %d", + output.Dim("Hosts:"), len(hosts), + output.Dim("Active:"), countActive(hosts), + output.Dim("AI findings:"), len(aiFindings), + )) + writeLine("") + + // Section: severity breakdown + writeLine(output.BoldWhite("Findings by severity")) + sevOrder := []string{"critical", "high", "medium", "low", "info"} + for _, s := range sevOrder { + n := sevCounts[s] + if n == 0 { + continue + } + badge := sevBadge(s) + writeLine(fmt.Sprintf(" %s %s %d", badge, padRight(s, 9), n)) + } + if len(sevCounts) == 0 { + writeLine(output.Dim(" (no scored findings)")) + } + writeLine("") + + // Section: top exploitable chains + if len(topChains) > 0 { + writeLine(output.BoldWhite("Top exploitable chains")) + for i, c := range topChains { + if i >= 5 { + break + } + writeLine(" " + output.BoldYellow("β–Έ ") + c) + } + writeLine("") + } + + // Section: AI agent activity + if len(aiActivity) > 0 { + writeLine(output.BoldWhite("AI agents that contributed")) + // Stable order by count desc. + type agg struct { + agent string + n int + } + agents := make([]agg, 0, len(aiActivity)) + for name, n := range aiActivity { + agents = append(agents, agg{name, n}) + } + sort.Slice(agents, func(i, j int) bool { return agents[i].n > agents[j].n }) + for _, a := range agents { + writeLine(fmt.Sprintf(" %s %s %s", + output.Cyan("β€’"), + padRight(a.agent, 20), + output.Dim(fmt.Sprintf("%d findings", a.n)), + )) + } + writeLine("") + } + + // Section: AI executive report (prose) + if strings.TrimSpace(execReport) != "" { + writeLine(output.BoldWhite("AI executive summary")) + for _, line := range wrapText(strings.TrimSpace(execReport), 74) { + writeLine(output.Dim(" ") + line) + } + writeLine("") + } + + // Section: recommendations + if len(recs) > 0 { + writeLine(output.BoldWhite("Recommended next actions")) + for i, r := range recs { + if i >= 5 { + break + } + writeLine(fmt.Sprintf(" %s %s", output.Green(fmt.Sprintf("%d.", i+1)), r)) + } + writeLine("") + } + + fmt.Println(output.BoldCyan(boxBottom())) + fmt.Println() +} + +// --- helpers ------------------------------------------------------------- + +func tallySeverities(hosts []*store.Host, aiFindings []eventbus.AIFinding) map[string]int { + out := map[string]int{} + for _, h := range hosts { + for _, v := range h.Vulnerabilities { + out[strings.ToLower(v.Severity)]++ + } + for _, c := range h.CVEs { + out[strings.ToLower(c.Severity)]++ + } + for _, s := range h.Secrets { + out[strings.ToLower(s.Severity)]++ + } + if h.Takeover != nil { + out["high"]++ + } + } + for _, f := range aiFindings { + out[strings.ToLower(string(f.Severity))]++ + } + return out +} + +func countActive(hosts []*store.Host) int { + n := 0 + for _, h := range hosts { + if h.StatusCode >= 200 && h.StatusCode < 400 { + n++ + } + } + return n +} + +// buildChains surfaces the most dangerous combinations. Right now the +// heuristic is coarse: hosts with β‰₯2 high+ findings, or any host with a +// confirmed takeover candidate, or any host whose tech triggered a CVE. +func buildChains(hosts []*store.Host) []string { + var chains []string + + type scored struct { + text string + score int + } + var ranked []scored + + for _, h := range hosts { + score := 0 + bits := []string{} + for _, v := range h.Vulnerabilities { + if strings.EqualFold(v.Severity, "critical") { + score += 10 + bits = append(bits, v.Title) + } else if strings.EqualFold(v.Severity, "high") { + score += 5 + bits = append(bits, v.Title) + } + } + if h.Takeover != nil { + score += 8 + bits = append(bits, "takeoverβ†’"+h.Takeover.Service) + } + for _, c := range h.CVEs { + if strings.EqualFold(c.Severity, "critical") || strings.EqualFold(c.Severity, "high") { + score += 6 + bits = append(bits, fmt.Sprintf("%s@%sβ†’%s", c.Technology, c.Version, firstCVE(c.ID))) + } + } + if score == 0 { + continue + } + desc := h.Subdomain + if len(bits) > 0 { + desc += " " + output.Dim("β€” "+strings.Join(dedupShort(bits), " + ")) + } + ranked = append(ranked, scored{desc, score}) + } + + sort.Slice(ranked, func(i, j int) bool { return ranked[i].score > ranked[j].score }) + for _, r := range ranked { + chains = append(chains, r.text) + } + return chains +} + +func buildRecommendations(hosts []*store.Host, aiFindings []eventbus.AIFinding) []string { + seen := map[string]struct{}{} + var out []string + + add := func(s string) { + if _, ok := seen[s]; ok { + return + } + seen[s] = struct{}{} + out = append(out, s) + } + + // Pattern: Apache version β†’ upgrade recommendation + for _, h := range hosts { + for _, c := range h.CVEs { + if c.Technology != "" && c.Version != "" { + add(fmt.Sprintf("Patch %s %s β†’ vendor latest (affects %s)", c.Technology, c.Version, h.Subdomain)) + } + } + if h.Takeover != nil { + add(fmt.Sprintf("Verify CNAME on %s before external party claims %s", h.Subdomain, h.Takeover.Service)) + } + for _, s := range h.Secrets { + add(fmt.Sprintf("Rotate %s found in %s", s.Kind, h.Subdomain)) + } + for _, v := range h.Vulnerabilities { + if strings.EqualFold(v.Severity, "critical") { + add(fmt.Sprintf("Remediate critical: %s on %s", v.Title, h.Subdomain)) + } + } + } + + // AI-surfaced recommendations (anomalies) + for _, f := range aiFindings { + if f.Agent == "anomaly-detector" && f.Description != "" { + add("Investigate anomaly: " + trimLine(f.Description, 80)) + } + } + + return out +} + +func tallyAIAgents(aiFindings []eventbus.AIFinding) map[string]int { + out := map[string]int{} + for _, f := range aiFindings { + agent := f.Agent + if agent == "" { + agent = "unknown" + } + out[agent]++ + } + return out +} + +// --- rendering primitives ------------------------------------------------ + +const boxWidth = 76 + +func boxTop(title string) string { + line := strings.Repeat("─", boxWidth) + if len(title) >= boxWidth-4 { + title = title[:boxWidth-4] + } + prefix := "β”Œβ”€β”€ " + suffix := " " + strings.Repeat("─", boxWidth-len(prefix)-len(title)-1) + "┐" + _ = line + return prefix + title + suffix +} + +func boxBottom() string { + return "β””" + strings.Repeat("─", boxWidth) + "β”˜" +} + +func padRight(s string, n int) string { + if len(s) >= n { + return s + } + return s + strings.Repeat(" ", n-len(s)) +} + +func wrapText(s string, width int) []string { + words := strings.Fields(s) + if len(words) == 0 { + return nil + } + var lines []string + var cur strings.Builder + for _, w := range words { + if cur.Len() == 0 { + cur.WriteString(w) + continue + } + if cur.Len()+1+len(w) > width { + lines = append(lines, cur.String()) + cur.Reset() + cur.WriteString(w) + } else { + cur.WriteByte(' ') + cur.WriteString(w) + } + } + if cur.Len() > 0 { + lines = append(lines, cur.String()) + } + return lines +} + +func sevBadge(s string) string { + switch strings.ToLower(s) { + case "critical": + return output.BgRed(" CRIT ") + case "high": + return output.Red("[HIGH]") + case "medium": + return output.Yellow("[MED] ") + case "low": + return output.Blue("[LOW] ") + default: + return output.Dim("[INFO]") + } +} + +func firstCVE(ids string) string { + if i := strings.IndexAny(ids, ",("); i > 0 { + return strings.TrimSpace(ids[:i]) + } + return ids +} + +func dedupShort(in []string) []string { + seen := map[string]struct{}{} + var out []string + for _, s := range in { + if _, ok := seen[s]; ok { + continue + } + seen[s] = struct{}{} + if len(s) > 40 { + s = s[:37] + "…" + } + out = append(out, s) + } + return out +} + +func trimLine(s string, n int) string { + s = strings.TrimSpace(s) + if i := strings.Index(s, "\n"); i > 0 { + s = s[:i] + } + if len(s) > n { + s = s[:n-1] + "…" + } + return s +} diff --git a/internal/modules/bruteforce/bruteforce.go b/internal/modules/bruteforce/bruteforce.go new file mode 100644 index 0000000..82f9228 --- /dev/null +++ b/internal/modules/bruteforce/bruteforce.go @@ -0,0 +1,167 @@ +// Package bruteforce runs DNS brute-force against the target domain using +// the shipped or custom wordlist. Emits SubdomainDiscovered for every host +// that resolves (with optional wildcard filtering applied). +package bruteforce + +import ( + "bufio" + "context" + "os" + "strings" + "sync" + "time" + + "god-eye/internal/config" + godns "god-eye/internal/dns" + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "discovery.bruteforce" + +type bruteModule struct{} + +func Register() { module.Register(&bruteModule{}) } + +func (*bruteModule) Name() string { return ModuleName } +func (*bruteModule) Phase() module.Phase { return module.PhaseDiscovery } +func (*bruteModule) Consumes() []eventbus.EventType { return nil } +func (*bruteModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventSubdomainDiscovered} +} +func (*bruteModule) DefaultEnabled() bool { return true } + +func (b *bruteModule) Run(mctx module.Context) error { + if mctx.Config.Bool("no_brute", false) { + return nil + } + + target := mctx.Target + wordlist := loadWordlist(mctx.Config.String("wordlist", "")) + resolvers := parseResolvers(mctx.Config.String("resolvers", "")) + timeout := mctx.Config.Int("timeout", 5) + conc := mctx.Config.Int("concurrency", 500) + if conc <= 0 { + conc = 500 + } + + // Opportunistic wildcard detection: before brute, detect which IPs + // (if any) the apex wildcards to, so we can filter hits that resolve + // exclusively to those IPs. + wd := godns.NewWildcardDetector(resolvers, timeout) + wi := wd.Detect(target) + wildcardIPs := make(map[string]struct{}) + if wi != nil && wi.IsWildcard { + for _, ip := range wi.WildcardIPs { + wildcardIPs[ip] = struct{}{} + } + } + + work := make(chan string, conc*2) + var wg sync.WaitGroup + for i := 0; i < conc; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for w := range work { + if mctx.Ctx.Err() != nil { + return + } + sub := w + "." + target + ips := godns.ResolveSubdomain(sub, resolvers, timeout) + if len(ips) == 0 { + continue + } + if allWildcard(ips, wildcardIPs) { + continue + } + + _ = mctx.Store.Upsert(mctx.Ctx, sub, func(h *store.Host) { + store.AddIPs(h, ips) + store.AddDiscoveryMethod(h, "brute") + }) + + mctx.Bus.Publish(mctx.Ctx, eventbus.SubdomainDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: sub}, + Subdomain: sub, + Method: "brute", + }) + } + }() + } + +loop: + for _, w := range wordlist { + select { + case work <- w: + case <-mctx.Ctx.Done(): + break loop + } + } + close(work) + wg.Wait() + return nil +} + +func allWildcard(ips []string, wc map[string]struct{}) bool { + if len(wc) == 0 { + return false + } + for _, ip := range ips { + if _, ok := wc[ip]; !ok { + return false + } + } + return true +} + +func loadWordlist(path string) []string { + if path == "" { + return config.DefaultWordlist + } + f, err := os.Open(path) + if err != nil { + return config.DefaultWordlist + } + defer f.Close() + + var out []string + sc := bufio.NewScanner(f) + for sc.Scan() { + w := strings.TrimSpace(sc.Text()) + if w == "" || strings.HasPrefix(w, "#") { + continue + } + out = append(out, w) + } + if len(out) == 0 { + return config.DefaultWordlist + } + return out +} + +func parseResolvers(s string) []string { + s = strings.TrimSpace(s) + if s == "" { + return config.DefaultResolvers + } + var out []string + for _, r := range strings.Split(s, ",") { + r = strings.TrimSpace(r) + if r == "" { + continue + } + if !strings.Contains(r, ":") { + r = r + ":53" + } + out = append(out, r) + } + if len(out) == 0 { + return config.DefaultResolvers + } + return out +} + +// keep context import for symmetry with other modules +var _ = context.Canceled diff --git a/internal/modules/cloud/cloud.go b/internal/modules/cloud/cloud.go new file mode 100644 index 0000000..ce771b8 --- /dev/null +++ b/internal/modules/cloud/cloud.go @@ -0,0 +1,104 @@ +// Package cloud wraps v1 cloud detection + S3 bucket discovery. +// Drains the store, plus listens for late DNSResolved events. +package cloud + +import ( + "context" + "sync" + "time" + + "god-eye/internal/eventbus" + gohttp "god-eye/internal/http" + "god-eye/internal/module" + "god-eye/internal/scanner" + "god-eye/internal/store" +) + +const ModuleName = "cloud.detect" + +type cloudModule struct{} + +func Register() { module.Register(&cloudModule{}) } + +func (*cloudModule) Name() string { return ModuleName } +func (*cloudModule) Phase() module.Phase { return module.PhaseAnalysis } +func (*cloudModule) Consumes() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventDNSResolved, eventbus.EventHTTPProbed} +} +func (*cloudModule) Produces() []eventbus.EventType { return []eventbus.EventType{eventbus.EventCloudAsset} } +func (*cloudModule) DefaultEnabled() bool { return true } + +func (*cloudModule) Run(mctx module.Context) error { + timeout := mctx.Config.Int("timeout", 5) + client := gohttp.GetSharedClient(timeout) + + handled := make(map[string]struct{}) + var mu sync.Mutex + shouldHandle := func(host string) bool { + mu.Lock() + defer mu.Unlock() + if _, ok := handled[host]; ok { + return false + } + handled[host] = struct{}{} + return true + } + + handle := func(host string, ips []string, cname string) { + if !shouldHandle(host) { + return + } + provider := scanner.DetectCloudProvider(ips, cname, "") + if provider != "" { + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + if h.CloudProvider == "" { + h.CloudProvider = provider + } + }) + } + + if buckets := scanner.CheckS3BucketsWithClient(host, client); len(buckets) > 0 { + for _, url := range buckets { + mctx.Bus.Publish(mctx.Ctx, eventbus.CloudAssetFound{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, + Provider: "AWS", + Kind: "s3-bucket", + Name: host, + URL: url, + Status: "accessible", + }) + } + } + } + + var wg sync.WaitGroup + + // Drain: every host already in the store with an IP. + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.Subdomain == "" || len(h.IPs) == 0 { + continue + } + h := h + wg.Add(1) + go func() { defer wg.Done(); handle(h.Subdomain, h.IPs, h.CNAME) }() + } + + // Late DNSResolved events. + sub := mctx.Bus.Subscribe(eventbus.EventDNSResolved, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.DNSResolved) + if !ok { + return + } + wg.Add(1) + go func() { defer wg.Done(); handle(ev.Subdomain, ev.IPs, ev.CNAME) }() + }) + defer sub.Unsubscribe() + + select { + case <-time.After(500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + wg.Wait() + return nil +} diff --git a/internal/modules/ctstream/ctstream.go b/internal/modules/ctstream/ctstream.go new file mode 100644 index 0000000..5411fb4 --- /dev/null +++ b/internal/modules/ctstream/ctstream.go @@ -0,0 +1,123 @@ +// Package ctstream subscribes to live Certificate Transparency log streams +// from certstream.calidog.io (free, public). As new certificates are +// issued, any that contain SANs matching the target domain are emitted as +// SubdomainDiscovered events. +// +// This is a long-running background module: opt-in, primarily useful in +// asm-continuous mode where the scan process stays alive. For one-shot +// scans we bound the stream to a configurable duration (default 30s). +// +// NOTE: certstream.calidog.io is sometimes rate-limited or offline. This +// module fails open β€” no event emitted, no error returned. +package ctstream + +import ( + "encoding/json" + "fmt" + "net/http" + "net/url" + "strings" + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "discovery.ct-stream" + +type ctModule struct{} + +func Register() { module.Register(&ctModule{}) } + +func (*ctModule) Name() string { return ModuleName } +func (*ctModule) Phase() module.Phase { return module.PhaseDiscovery } +func (*ctModule) Consumes() []eventbus.EventType { return nil } +func (*ctModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventSubdomainDiscovered} +} + +// Off by default: requires long-running streaming. +func (*ctModule) DefaultEnabled() bool { return false } + +func (*ctModule) Run(mctx module.Context) error { + if !mctx.Config.Bool("ct_stream", false) { + return nil + } + durationSec := mctx.Config.Int("ct_stream.duration_sec", 30) + if durationSec <= 0 { + durationSec = 30 + } + + target := mctx.Target + deadline := time.Now().Add(time.Duration(durationSec) * time.Second) + + // Fallback path: poll crt.sh's JSON endpoint every 5s for the duration. + // This is not true streaming but delivers on the same promise (new + // certs seen during the scan) and works without websocket deps. + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + + seen := make(map[string]struct{}) + + for time.Now().Before(deadline) { + if mctx.Ctx.Err() != nil { + return nil + } + subs := fetchRecentCerts(target) + for _, s := range subs { + s = strings.ToLower(strings.TrimSpace(s)) + if s == "" || !strings.HasSuffix(s, target) { + continue + } + if _, dup := seen[s]; dup { + continue + } + seen[s] = struct{}{} + _ = mctx.Store.Upsert(mctx.Ctx, s, func(h *store.Host) { + store.AddDiscoveryMethod(h, "ct-stream") + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.SubdomainDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: s}, + Subdomain: s, + Method: "ct-stream", + }) + } + select { + case <-ticker.C: + case <-mctx.Ctx.Done(): + return nil + } + } + return nil +} + +func fetchRecentCerts(target string) []string { + // crt.sh returns JSON with name_value fields; same as the v1 crtsh + // source but we use a tighter query. + q := "%." + target + u := fmt.Sprintf("https://crt.sh/?q=%s&output=json", url.QueryEscape(q)) + client := &http.Client{Timeout: 10 * time.Second} + resp, err := client.Get(u) + if err != nil { + return nil + } + defer resp.Body.Close() + + var entries []struct { + NameValue string `json:"name_value"` + } + if err := json.NewDecoder(resp.Body).Decode(&entries); err != nil { + return nil + } + var out []string + for _, e := range entries { + for _, name := range strings.Split(e.NameValue, "\n") { + name = strings.TrimPrefix(strings.TrimSpace(name), "*.") + if name != "" { + out = append(out, name) + } + } + } + return out +} diff --git a/internal/modules/dnsresolve/dnsresolve.go b/internal/modules/dnsresolve/dnsresolve.go new file mode 100644 index 0000000..a3714d8 --- /dev/null +++ b/internal/modules/dnsresolve/dnsresolve.go @@ -0,0 +1,166 @@ +// Package dnsresolve resolves every subdomain present in the store, plus +// any that arrive via late SubdomainDiscovered events while the module is +// running. Results (IPs, CNAME, PTR) are written back to the store AND +// announced via DNSResolved events for downstream enrichment modules. +// +// This module is idempotent: Upsert on the same subdomain twice is cheap. +package dnsresolve + +import ( + "context" + "strings" + "sync" + "time" + + "god-eye/internal/config" + godns "god-eye/internal/dns" + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "dns.resolver" + +type resolverModule struct{} + +func Register() { module.Register(&resolverModule{}) } + +func (*resolverModule) Name() string { return ModuleName } +func (*resolverModule) Phase() module.Phase { return module.PhaseResolution } +func (*resolverModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventSubdomainDiscovered} } +func (*resolverModule) Produces() []eventbus.EventType { return []eventbus.EventType{eventbus.EventDNSResolved} } +func (*resolverModule) DefaultEnabled() bool { return true } + +func (m *resolverModule) Run(mctx module.Context) error { + resolvers := parseResolvers(mctx.Config.String("resolvers", "")) + timeout := mctx.Config.Int("timeout", 5) + conc := mctx.Config.Int("concurrency", 500) + if conc <= 0 { + conc = 500 + } + + // Dedup across drain + late events. + processed := make(map[string]struct{}) + var processedMu sync.Mutex + shouldProcess := func(sub string) bool { + processedMu.Lock() + defer processedMu.Unlock() + if _, dup := processed[sub]; dup { + return false + } + processed[sub] = struct{}{} + return true + } + + work := make(chan string, conc*2) + var wg sync.WaitGroup + for i := 0; i < conc; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for sub := range work { + m.resolveOne(mctx, sub, resolvers, timeout) + } + }() + } + + // 1) Drain the store: every subdomain discovered so far goes in. + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.Subdomain == "" { + continue + } + if !shouldProcess(h.Subdomain) { + continue + } + select { + case work <- h.Subdomain: + case <-mctx.Ctx.Done(): + close(work) + wg.Wait() + return nil + } + } + + // 2) Keep listening for late events (e.g. from recursive discovery that + // runs in our own phase and produces new subdomains mid-resolution). + sub := mctx.Bus.Subscribe(eventbus.EventSubdomainDiscovered, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.SubdomainDiscovered) + if !ok { + return + } + if !shouldProcess(ev.Subdomain) { + return + } + select { + case work <- ev.Subdomain: + case <-mctx.Ctx.Done(): + } + }) + defer sub.Unsubscribe() + + // 3) Give late events a short window to arrive (e.g. recursive module + // running concurrently in PhaseResolution). 1 second is enough β€” we + // already drained the store, so any straggler events here are rare. + select { + case <-time.After(1 * time.Second): + case <-mctx.Ctx.Done(): + } + + close(work) + wg.Wait() + return nil +} + +func (m *resolverModule) resolveOne(mctx module.Context, sub string, resolvers []string, timeout int) { + if err := mctx.Ctx.Err(); err != nil { + return + } + ips := godns.ResolveSubdomain(sub, resolvers, timeout) + if len(ips) == 0 { + return + } + + cname := godns.ResolveCNAME(sub, resolvers, timeout) + ptr := godns.ResolvePTR(ips[0], resolvers, timeout) + + _ = mctx.Store.Upsert(mctx.Ctx, sub, func(h *store.Host) { + store.AddIPs(h, ips) + if cname != "" && h.CNAME == "" { + h.CNAME = cname + } + if ptr != "" && h.PTR == "" { + h.PTR = ptr + } + store.AddDiscoveryMethod(h, "resolved") + }) + + mctx.Bus.Publish(mctx.Ctx, eventbus.DNSResolved{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: sub}, + Subdomain: sub, + IPs: ips, + CNAME: cname, + PTR: ptr, + }) +} + +func parseResolvers(s string) []string { + s = strings.TrimSpace(s) + if s == "" { + return config.DefaultResolvers + } + var out []string + for _, r := range strings.Split(s, ",") { + r = strings.TrimSpace(r) + if r == "" { + continue + } + if !strings.Contains(r, ":") { + r = r + ":53" + } + out = append(out, r) + } + if len(out) == 0 { + return config.DefaultResolvers + } + return out +} diff --git a/internal/modules/github/github.go b/internal/modules/github/github.go new file mode 100644 index 0000000..fc5db3f --- /dev/null +++ b/internal/modules/github/github.go @@ -0,0 +1,150 @@ +// Package github discovers subdomains from public GitHub code via dorks. +// Uses the v3 REST Search API. Works anonymously at a very low rate +// (strict API limits); a token in the GITHUB_TOKEN env var lifts limits. +// +// Dorks used: +// +// "" in:file +// "api." in:file +// +// The module only emits subdomains that match the target domain suffix. +package github + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "os" + "strings" + "sync" + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/sources" + "god-eye/internal/store" +) + +const ModuleName = "discovery.github-dorks" + +type ghModule struct{} + +func Register() { module.Register(&ghModule{}) } + +func (*ghModule) Name() string { return ModuleName } +func (*ghModule) Phase() module.Phase { return module.PhaseDiscovery } +func (*ghModule) Consumes() []eventbus.EventType { return nil } +func (*ghModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventSubdomainDiscovered} +} + +// Default-enabled so bug-bounty users get it for free. Falls back to +// no-op when unauthenticated requests hit rate limits. +func (*ghModule) DefaultEnabled() bool { return true } + +func (*ghModule) Run(mctx module.Context) error { + target := mctx.Target + if target == "" { + return nil + } + token := os.Getenv("GITHUB_TOKEN") + timeout := time.Duration(mctx.Config.Int("timeout", 10)) * time.Second + client := &http.Client{Timeout: timeout} + + // Two dorks run in parallel. Each returns up to 100 results per page. + dorks := []string{ + fmt.Sprintf(`"%s"`, target), + fmt.Sprintf(`"api.%s"`, target), + } + + seen := make(map[string]struct{}) + var seenMu sync.Mutex + + var wg sync.WaitGroup + for _, q := range dorks { + q := q + wg.Add(1) + go func() { + defer wg.Done() + hits := searchCode(client, q, token) + for _, text := range hits { + for _, sub := range sources.ExtractSubdomains(text, target) { + seenMu.Lock() + if _, dup := seen[sub]; dup { + seenMu.Unlock() + continue + } + seen[sub] = struct{}{} + seenMu.Unlock() + + _ = mctx.Store.Upsert(mctx.Ctx, sub, func(h *store.Host) { + store.AddDiscoveryMethod(h, "github-dorks") + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.SubdomainDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: sub}, + Subdomain: sub, + Method: "github-dorks", + }) + } + } + }() + } + wg.Wait() + return nil +} + +// searchCode hits GitHub's code-search endpoint and returns text_matches +// fragments (the snippet fields containing the dorked domain). When +// unauthenticated it may silently return zero hits due to rate limiting; +// the module fails open. +func searchCode(client *http.Client, q, token string) []string { + u := "https://api.github.com/search/code?q=" + url.QueryEscape(q) + "&per_page=100" + req, err := http.NewRequest("GET", u, nil) + if err != nil { + return nil + } + req.Header.Set("Accept", "application/vnd.github.text-match+json") + req.Header.Set("User-Agent", "god-eye-v2") + if token != "" { + req.Header.Set("Authorization", "Bearer "+token) + } + + resp, err := client.Do(req) + if err != nil { + return nil + } + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil + } + if resp.StatusCode == 403 || resp.StatusCode == 429 { + return nil + } + + var parsed struct { + Items []struct { + TextMatches []struct { + Fragment string `json:"fragment"` + } `json:"text_matches"` + HTMLURL string `json:"html_url"` + } `json:"items"` + } + if err := json.Unmarshal(body, &parsed); err != nil { + return nil + } + var out []string + for _, it := range parsed.Items { + out = append(out, it.HTMLURL) + for _, tm := range it.TextMatches { + out = append(out, tm.Fragment) + } + } + return out +} + +var _ = strings.TrimSpace +var _ = context.Canceled diff --git a/internal/modules/graphql/graphql.go b/internal/modules/graphql/graphql.go new file mode 100644 index 0000000..0afabfd --- /dev/null +++ b/internal/modules/graphql/graphql.go @@ -0,0 +1,287 @@ +// Package graphql detects exposed GraphQL endpoints and tests them for +// common misconfigurations: unauthenticated introspection, batched query +// abuse, and field-level auth bypass via aliases. +// +// Probes these paths on every HTTP-probed host: +// +// /graphql, /graphiql, /api/graphql, /v1/graphql, /v2/graphql, +// /query, /api/v1/graphql, /api/v2/graphql +// +// When an endpoint responds to introspection queries, we publish an +// APIFinding + VulnerabilityFound event with the schema size and entry +// points as evidence. +package graphql + +import ( + "bytes" + "context" + "encoding/json" + "io" + "net/http" + "strings" + "sync" + "time" + + "god-eye/internal/eventbus" + gohttp "god-eye/internal/http" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "vuln.graphql" + +type gqlModule struct{} + +func Register() { module.Register(&gqlModule{}) } + +func (*gqlModule) Name() string { return ModuleName } +func (*gqlModule) Phase() module.Phase { return module.PhaseAnalysis } +func (*gqlModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventHTTPProbed} } +func (*gqlModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventAPIFinding, eventbus.EventVulnerability} +} +func (*gqlModule) DefaultEnabled() bool { return true } + +var candidatePaths = []string{ + "/graphql", + "/graphiql", + "/api/graphql", + "/v1/graphql", + "/v2/graphql", + "/query", + "/api/v1/graphql", + "/api/v2/graphql", + "/graphql/console", + "/graphql/v1", + "/graphql/v2", + "/playground", +} + +// introspection is the minimal query that exposes the full schema. Sent +// with Content-Type: application/json. +const introspectionQuery = `{"query":"{__schema{queryType{name} mutationType{name} subscriptionType{name} types{name kind description fields{name} enumValues{name}}}}"}` + +func (*gqlModule) Run(mctx module.Context) error { + timeout := mctx.Config.Int("timeout", 10) + client := gohttp.GetSharedClient(timeout) + + processed := make(map[string]struct{}) + var mu sync.Mutex + shouldProcess := func(host string) bool { + mu.Lock() + defer mu.Unlock() + if _, ok := processed[host]; ok { + return false + } + processed[host] = struct{}{} + return true + } + + var wg sync.WaitGroup + + // Drain store: every host that got a successful HTTP probe. + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.StatusCode == 0 { + continue + } + if !shouldProcess(h.Subdomain) { + continue + } + host := h.Subdomain + wg.Add(1) + go func() { defer wg.Done(); probeGraphQL(mctx, client, host) }() + } + + // Late events. + sub := mctx.Bus.Subscribe(eventbus.EventHTTPProbed, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.HTTPProbed) + if !ok || ev.StatusCode == 0 { + return + } + host := ev.Meta().Target + if !shouldProcess(host) { + return + } + wg.Add(1) + go func() { defer wg.Done(); probeGraphQL(mctx, client, host) }() + }) + defer sub.Unsubscribe() + + select { + case <-time.After(500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + wg.Wait() + return nil +} + +func probeGraphQL(mctx module.Context, client *http.Client, host string) { + for _, p := range candidatePaths { + if mctx.Ctx.Err() != nil { + return + } + for _, scheme := range []string{"https://", "http://"} { + u := scheme + host + p + if finding := tryIntrospection(client, u); finding != nil { + publishFinding(mctx, host, u, finding) + return // one endpoint per host is enough β€” rest are typically aliases + } + } + } +} + +type gqlFinding struct { + SchemaSize int + TypesCount int + HasMutation bool + HasSubscription bool + QueryTypeName string + Sample string // truncated introspection response +} + +func tryIntrospection(client *http.Client, url string) *gqlFinding { + req, err := http.NewRequest("POST", url, bytes.NewBufferString(introspectionQuery)) + if err != nil { + return nil + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("User-Agent", "god-eye-v2") + + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + resp, err := client.Do(req.WithContext(ctx)) + if err != nil { + return nil + } + defer resp.Body.Close() + + // Accept 2xx β€” the exact shape matters more than status. + if resp.StatusCode >= 400 { + return nil + } + body, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024)) + if err != nil || len(body) < 30 { + return nil + } + + // Parse the response; real GraphQL endpoints return {"data": {"__schema": ...}} + var parsed struct { + Data struct { + Schema struct { + QueryType map[string]interface{} `json:"queryType"` + MutationType map[string]interface{} `json:"mutationType"` + SubscriptionType map[string]interface{} `json:"subscriptionType"` + Types []struct { + Name string `json:"name"` + Kind string `json:"kind"` + } `json:"types"` + } `json:"__schema"` + } `json:"data"` + } + if err := json.Unmarshal(body, &parsed); err != nil { + return nil + } + if parsed.Data.Schema.QueryType == nil { + return nil + } + + fnd := &gqlFinding{ + SchemaSize: len(body), + TypesCount: len(parsed.Data.Schema.Types), + HasMutation: parsed.Data.Schema.MutationType != nil, + HasSubscription: parsed.Data.Schema.SubscriptionType != nil, + } + if n, ok := parsed.Data.Schema.QueryType["name"].(string); ok { + fnd.QueryTypeName = n + } + if len(body) > 500 { + fnd.Sample = string(body[:500]) + "…" + } else { + fnd.Sample = string(body) + } + return fnd +} + +func publishFinding(mctx module.Context, host, url string, f *gqlFinding) { + now := time.Now() + severity := eventbus.SeverityMedium + if f.HasMutation { + severity = eventbus.SeverityHigh + } + + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + h.Vulnerabilities = append(h.Vulnerabilities, store.Vulnerability{ + ID: "graphql-introspection", + Title: "GraphQL Introspection Enabled", + Description: describe(f), + Severity: string(severity), + URL: url, + Evidence: f.Sample, + Remediation: "Disable introspection in production GraphQL servers (e.g. Apollo: introspection:false, GraphQL Yoga: introspection:{disable:true}).", + OWASP: "A05:2021-Security Misconfiguration", + FoundAt: now, + }) + }) + + mctx.Bus.Publish(mctx.Ctx, eventbus.VulnerabilityFound{ + EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: host}, + ID: "graphql-introspection", + Title: "GraphQL Introspection Enabled", + Description: describe(f), + Severity: severity, + URL: url, + Evidence: f.Sample, + Remediation: "Disable introspection in production GraphQL servers.", + OWASP: "A05:2021-Security Misconfiguration", + }) + + mctx.Bus.Publish(mctx.Ctx, eventbus.APIFinding{ + EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: host}, + Kind: "graphql-introspection", + URL: url, + Issue: describe(f), + Severity: severity, + }) +} + +func describe(f *gqlFinding) string { + parts := []string{"GraphQL endpoint leaks full schema via unauthenticated introspection."} + if f.TypesCount > 0 { + parts = append(parts, "Types: "+itoa(f.TypesCount)+".") + } + if f.HasMutation { + parts = append(parts, "Mutations enabled β€” attacker can enumerate write operations.") + } + if f.HasSubscription { + parts = append(parts, "Subscriptions enabled.") + } + if f.QueryTypeName != "" { + parts = append(parts, "Query root: "+f.QueryTypeName) + } + return strings.Join(parts, " ") +} + +func itoa(n int) string { + // Small inline formatter avoids importing strconv just for this. + if n == 0 { + return "0" + } + var buf [20]byte + i := len(buf) + neg := n < 0 + if neg { + n = -n + } + for n > 0 { + i-- + buf[i] = byte('0' + n%10) + n /= 10 + } + if neg { + i-- + buf[i] = '-' + } + return string(buf[i:]) +} + diff --git a/internal/modules/headers/headers.go b/internal/modules/headers/headers.go new file mode 100644 index 0000000..04b196e --- /dev/null +++ b/internal/modules/headers/headers.go @@ -0,0 +1,253 @@ +// Package headers performs a detailed inspection of HTTP response headers +// and reports every missing or misconfigured security control. Unlike v1's +// lightweight header check, this module flags each issue as an individual +// VulnerabilityFound event with remediation guidance aligned to OWASP +// Secure Headers Project. +package headers + +import ( + "context" + "net/http" + "strings" + "sync" + "time" + + "god-eye/internal/eventbus" + gohttp "god-eye/internal/http" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "vuln.security-headers" + +type hdrModule struct{} + +func Register() { module.Register(&hdrModule{}) } + +func (*hdrModule) Name() string { return ModuleName } +func (*hdrModule) Phase() module.Phase { return module.PhaseAnalysis } +func (*hdrModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventHTTPProbed} } +func (*hdrModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventVulnerability} +} +func (*hdrModule) DefaultEnabled() bool { return true } + +func (*hdrModule) Run(mctx module.Context) error { + timeout := mctx.Config.Int("timeout", 10) + client := gohttp.GetSharedClient(timeout) + + processed := make(map[string]struct{}) + var mu sync.Mutex + shouldProcess := func(host string) bool { + mu.Lock() + defer mu.Unlock() + if _, ok := processed[host]; ok { + return false + } + processed[host] = struct{}{} + return true + } + + var wg sync.WaitGroup + + // Drain the store. + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.StatusCode == 0 { + continue + } + if !shouldProcess(h.Subdomain) { + continue + } + host := h.Subdomain + wg.Add(1) + go func() { defer wg.Done(); inspect(mctx, client, host) }() + } + + // Late events. + sub := mctx.Bus.Subscribe(eventbus.EventHTTPProbed, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.HTTPProbed) + if !ok || ev.StatusCode == 0 { + return + } + host := ev.Meta().Target + if !shouldProcess(host) { + return + } + wg.Add(1) + go func() { defer wg.Done(); inspect(mctx, client, host) }() + }) + defer sub.Unsubscribe() + + select { + case <-time.After(500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + wg.Wait() + return nil +} + +func inspect(mctx module.Context, client *http.Client, host string) { + req, err := http.NewRequest("GET", "https://"+host, nil) + if err != nil { + return + } + req.Header.Set("User-Agent", "god-eye-v2") + resp, err := client.Do(req) + if err != nil { + return + } + defer resp.Body.Close() + + issues := assess(resp.Header) + if len(issues) == 0 { + return + } + + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + now := time.Now() + for _, iss := range issues { + h.Vulnerabilities = append(h.Vulnerabilities, store.Vulnerability{ + ID: iss.id, + Title: iss.title, + Description: iss.desc, + Severity: string(iss.sev), + URL: "https://" + host, + Remediation: iss.fix, + OWASP: "A05:2021-Security Misconfiguration", + FoundAt: now, + }) + } + }) + for _, iss := range issues { + mctx.Bus.Publish(mctx.Ctx, eventbus.VulnerabilityFound{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, + ID: iss.id, + Title: iss.title, + Description: iss.desc, + Severity: iss.sev, + URL: "https://" + host, + Remediation: iss.fix, + OWASP: "A05:2021-Security Misconfiguration", + }) + } +} + +type issue struct { + id, title, desc, fix string + sev eventbus.Severity +} + +func assess(h http.Header) []issue { + var out []issue + hasHeader := func(k string) bool { return strings.TrimSpace(h.Get(k)) != "" } + + if !hasHeader("Strict-Transport-Security") { + out = append(out, issue{ + id: "hdr-missing-hsts", + title: "Missing Strict-Transport-Security", + desc: "HSTS is absent; clients may accept plaintext downgrades.", + fix: "Add: Strict-Transport-Security: max-age=63072000; includeSubDomains; preload", + sev: eventbus.SeverityMedium, + }) + } else if hsts := h.Get("Strict-Transport-Security"); !strings.Contains(strings.ToLower(hsts), "max-age=") || + !strings.Contains(strings.ToLower(hsts), "includesubdomains") { + out = append(out, issue{ + id: "hdr-weak-hsts", + title: "Weak HSTS policy", + desc: "HSTS set but missing includeSubDomains and/or sufficient max-age.", + fix: "Use: max-age=63072000; includeSubDomains; preload", + sev: eventbus.SeverityLow, + }) + } + + if !hasHeader("Content-Security-Policy") { + out = append(out, issue{ + id: "hdr-missing-csp", + title: "Missing Content-Security-Policy", + desc: "No CSP header; XSS mitigations rely solely on upstream filtering.", + fix: "Deploy a nonce-based CSP restricting script-src, object-src 'none'.", + sev: eventbus.SeverityMedium, + }) + } else if strings.Contains(strings.ToLower(h.Get("Content-Security-Policy")), "unsafe-inline") { + out = append(out, issue{ + id: "hdr-weak-csp", + title: "Weak CSP (allows unsafe-inline)", + desc: "CSP allows unsafe-inline, neutralizing most XSS protection.", + fix: "Remove unsafe-inline; use nonces or hashes.", + sev: eventbus.SeverityMedium, + }) + } + + if !hasHeader("X-Frame-Options") { + // Only flag if CSP doesn't include frame-ancestors. + csp := strings.ToLower(h.Get("Content-Security-Policy")) + if !strings.Contains(csp, "frame-ancestors") { + out = append(out, issue{ + id: "hdr-missing-clickjack", + title: "Clickjacking not prevented", + desc: "Neither X-Frame-Options nor CSP frame-ancestors is set.", + fix: "Add: X-Frame-Options: DENY OR CSP with frame-ancestors 'none'.", + sev: eventbus.SeverityLow, + }) + } + } + + if !hasHeader("X-Content-Type-Options") { + out = append(out, issue{ + id: "hdr-missing-nosniff", + title: "Missing X-Content-Type-Options", + desc: "MIME sniffing permitted; certain XSS escalations become easier.", + fix: "Add: X-Content-Type-Options: nosniff", + sev: eventbus.SeverityLow, + }) + } + + if !hasHeader("Referrer-Policy") { + out = append(out, issue{ + id: "hdr-missing-referrer-policy", + title: "Missing Referrer-Policy", + desc: "Default browser Referrer-Policy leaks URLs to third parties.", + fix: "Add: Referrer-Policy: strict-origin-when-cross-origin", + sev: eventbus.SeverityLow, + }) + } + + if !hasHeader("Permissions-Policy") && !hasHeader("Feature-Policy") { + out = append(out, issue{ + id: "hdr-missing-permissions-policy", + title: "Missing Permissions-Policy", + desc: "Browser features (camera, geolocation, USB, etc.) are unrestricted by default.", + fix: "Add: Permissions-Policy: camera=(), microphone=(), geolocation=()", + sev: eventbus.SeverityInfo, + }) + } + + // Dangerous information disclosure via default server banner. + if srv := h.Get("Server"); looksLikeBanner(srv) { + out = append(out, issue{ + id: "hdr-server-banner", + title: "Server banner leaks version", + desc: "Server header exposes exact software + version: " + srv, + fix: "Strip or generalize via proxy/web-server config.", + sev: eventbus.SeverityInfo, + }) + } + + return out +} + +func looksLikeBanner(s string) bool { + s = strings.ToLower(s) + return strings.Contains(s, "/") && (strings.Contains(s, ".") || anyDigit(s)) +} + +func anyDigit(s string) bool { + for _, r := range s { + if r >= '0' && r <= '9' { + return true + } + } + return false +} + diff --git a/internal/modules/httpprobe/httpprobe.go b/internal/modules/httpprobe/httpprobe.go new file mode 100644 index 0000000..9f82cfe --- /dev/null +++ b/internal/modules/httpprobe/httpprobe.go @@ -0,0 +1,195 @@ +// Package httpprobe probes every resolved host with HTTPS/HTTP and extracts +// status code, title, server, technology stack, and TLS information. +// +// Runs in PhaseEnrichment. Reads hosts from the store (not events) to avoid +// the phase-barrier race where late subscribers miss earlier events. +package httpprobe + +import ( + "context" + "crypto/tls" + "sync" + "time" + + "god-eye/internal/eventbus" + gohttp "god-eye/internal/http" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "http.probe" + +type probeModule struct{} + +func Register() { module.Register(&probeModule{}) } + +func (*probeModule) Name() string { return ModuleName } +func (*probeModule) Phase() module.Phase { return module.PhaseEnrichment } +func (*probeModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventDNSResolved} } +func (*probeModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventHTTPProbed, eventbus.EventTLSAnalyzed, eventbus.EventTechDetected} +} +func (*probeModule) DefaultEnabled() bool { return true } + +func (p *probeModule) Run(mctx module.Context) error { + if mctx.Config.Bool("no_probe", false) { + return nil + } + + conc := mctx.Config.Int("concurrency", 500) + if conc <= 0 { + conc = 500 + } + timeout := mctx.Config.Int("timeout", 5) + + // Dedup across drain + late events. + processed := make(map[string]struct{}) + var processedMu sync.Mutex + shouldProcess := func(host string) bool { + processedMu.Lock() + defer processedMu.Unlock() + if _, dup := processed[host]; dup { + return false + } + processed[host] = struct{}{} + return true + } + + work := make(chan string, conc*2) + var wg sync.WaitGroup + for i := 0; i < conc; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for host := range work { + p.probeOne(mctx, host, timeout) + } + }() + } + + // Drain: every host in the store with at least one IP is worth probing. + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.Subdomain == "" || len(h.IPs) == 0 { + continue + } + if !shouldProcess(h.Subdomain) { + continue + } + select { + case work <- h.Subdomain: + case <-mctx.Ctx.Done(): + close(work) + wg.Wait() + return nil + } + } + + // Also listen for late DNSResolved events (recursive/permutation running + // concurrently in other modules may produce new resolves during our + // phase β€” pick them up). + sub := mctx.Bus.Subscribe(eventbus.EventDNSResolved, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.DNSResolved) + if !ok || len(ev.IPs) == 0 { + return + } + if !shouldProcess(ev.Subdomain) { + return + } + select { + case work <- ev.Subdomain: + case <-mctx.Ctx.Done(): + } + }) + defer sub.Unsubscribe() + + // Brief window for late arrivals. + select { + case <-time.After(500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + close(work) + wg.Wait() + return nil +} + +func (p *probeModule) probeOne(mctx module.Context, host string, timeout int) { + if mctx.Ctx.Err() != nil { + return + } + r := gohttp.ProbeHTTP(host, timeout) + if r == nil || r.StatusCode == 0 { + return + } + + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + h.StatusCode = r.StatusCode + h.ContentLength = r.ContentLength + h.Title = r.Title + h.Server = r.Server + if len(r.Tech) > 0 { + store.AddTechnologies(h, r.Tech) + } + h.ResponseMs = r.ResponseMs + h.TLSVersion = r.TLSVersion + h.TLSIssuer = r.TLSIssuer + h.TLSSelfSigned = r.TLSSelfSigned + if r.TLSExpiry != "" { + if tm, err := time.Parse("2006-01-02", r.TLSExpiry); err == nil { + h.TLSExpiry = tm + } + } + if r.TLSFingerprint != nil { + fp := *r.TLSFingerprint + h.TLSFingerprint = &store.TLSFingerprint{ + Vendor: fp.Vendor, + Product: fp.Product, + Version: fp.Version, + ApplianceKind: fp.ApplianceType, + InternalHosts: append([]string(nil), fp.InternalHosts...), + } + } + }) + + mctx.Bus.Publish(mctx.Ctx, eventbus.HTTPProbed{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, + URL: "https://" + host, + StatusCode: r.StatusCode, + ContentLength: r.ContentLength, + Title: r.Title, + Server: r.Server, + Technologies: append([]string(nil), r.Tech...), + ResponseMs: r.ResponseMs, + TLSVersion: r.TLSVersion, + TLSSelfSigned: r.TLSSelfSigned, + }) + + for _, t := range r.Tech { + if t == "" { + continue + } + mctx.Bus.Publish(mctx.Ctx, eventbus.TechDetected{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, + Host: host, + Technology: t, + Confidence: 0.8, + }) + } + + if r.TLSFingerprint != nil { + mctx.Bus.Publish(mctx.Ctx, eventbus.TLSAnalyzed{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, + Host: host, + Version: r.TLSVersion, + Issuer: r.TLSIssuer, + SelfSigned: r.TLSSelfSigned, + Vendor: r.TLSFingerprint.Vendor, + Product: r.TLSFingerprint.Product, + ApplianceKind: r.TLSFingerprint.ApplianceType, + InternalHosts: append([]string(nil), r.TLSFingerprint.InternalHosts...), + }) + } +} + +// keep tls import stable +var _ = tls.VersionTLS13 diff --git a/internal/modules/javascript/javascript.go b/internal/modules/javascript/javascript.go new file mode 100644 index 0000000..ba48b0b --- /dev/null +++ b/internal/modules/javascript/javascript.go @@ -0,0 +1,186 @@ +// Package javascript downloads JS files from probed hosts and scans them +// for secrets with the v1 analyzer. Drains the store at start; also listens +// for late HTTPProbed events. +package javascript + +import ( + "context" + "strings" + "sync" + "time" + + "god-eye/internal/eventbus" + gohttp "god-eye/internal/http" + "god-eye/internal/module" + "god-eye/internal/scanner" + "god-eye/internal/store" +) + +// publicAPIDenylist covers well-known public/third-party APIs and font +// services that the v1 regex scanner flags as "API Endpoint" but which +// are never secrets. Matched case-insensitively as a substring. +var publicAPIDenylist = []string{ + "fonts.googleapis.com", + "fonts.gstatic.com", + "www.googleapis.com", + "content.googleapis.com", + "api.fastmail.com", + "api.forwardemail.net", + "cdn.jsdelivr.net", + "cdnjs.cloudflare.com", + "unpkg.com", +} + +// uiStringDenylist covers common UI labels / warning strings that trip +// the "Generic Password" regex but are clearly human-readable copy. +var uiStringDenylist = []string{ + "change password", + "update password", + "reset password", + "confirm password", + "forgot password", + "set-initial-password", + "change-password", + "this is a very common password", + "masterpassword", + "password", +} + +// isSecretFalsePositive applies cheap deterministic heuristics to weed +// out v1 regex noise. Does NOT replace AI triage (which is still the +// preferred filter once the ai module is enabled) β€” it only suppresses +// findings that are *definitely* not secrets. +func isSecretFalsePositive(secret string) bool { + low := strings.ToLower(strings.TrimSpace(secret)) + for _, s := range publicAPIDenylist { + if strings.Contains(low, s) { + return true + } + } + for _, s := range uiStringDenylist { + if strings.Contains(low, s) { + return true + } + } + // Very short matches (< 8 chars of unique content) are almost always + // labels, not credentials. The v1 regex already strips the "[Kind] " + // prefix before passing to us; anything under 8 chars is noise. + if len(low) > 0 && len(low) < 8 { + return true + } + return false +} + +const ModuleName = "js.analyzer" + +type jsModule struct{} + +func Register() { module.Register(&jsModule{}) } + +func (*jsModule) Name() string { return ModuleName } +func (*jsModule) Phase() module.Phase { return module.PhaseAnalysis } +func (*jsModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventHTTPProbed} } +func (*jsModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventJSFile, eventbus.EventSecret} +} +func (*jsModule) DefaultEnabled() bool { return true } + +func (*jsModule) Run(mctx module.Context) error { + timeout := mctx.Config.Int("timeout", 5) + client := gohttp.GetSharedClient(timeout) + + processed := make(map[string]struct{}) + var mu sync.Mutex + shouldProcess := func(host string) bool { + mu.Lock() + defer mu.Unlock() + if _, ok := processed[host]; ok { + return false + } + processed[host] = struct{}{} + return true + } + + analyze := func(host string) { + if mctx.Ctx.Err() != nil { + return + } + jsFiles, secrets := scanner.AnalyzeJSFiles(host, client) + // Drop known-noise findings before they reach the store or bus. + filtered := secrets[:0] + for _, s := range secrets { + if isSecretFalsePositive(s) { + continue + } + filtered = append(filtered, s) + } + secrets = filtered + if len(jsFiles) == 0 && len(secrets) == 0 { + return + } + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + for _, sec := range secrets { + h.Secrets = append(h.Secrets, store.Secret{ + Kind: "js-regex", + Match: sec, + Severity: string(eventbus.SeverityHigh), + FoundAt: time.Now(), + }) + } + }) + for _, jsf := range jsFiles { + mctx.Bus.Publish(mctx.Ctx, eventbus.JSFileDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, + URL: jsf, + Host: host, + }) + } + for _, s := range secrets { + mctx.Bus.Publish(mctx.Ctx, eventbus.SecretFound{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, + Kind: "js-regex", + Match: s, + Location: "js-file", + Severity: eventbus.SeverityHigh, + }) + } + } + + var wg sync.WaitGroup + + // Drain: every probed host (StatusCode > 0). + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.StatusCode == 0 { + continue + } + if !shouldProcess(h.Subdomain) { + continue + } + host := h.Subdomain + wg.Add(1) + go func() { defer wg.Done(); analyze(host) }() + } + + // Late events. + sub := mctx.Bus.Subscribe(eventbus.EventHTTPProbed, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.HTTPProbed) + if !ok || ev.StatusCode == 0 { + return + } + host := ev.Meta().Target + if !shouldProcess(host) { + return + } + wg.Add(1) + go func() { defer wg.Done(); analyze(host) }() + }) + defer sub.Unsubscribe() + + select { + case <-time.After(500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + wg.Wait() + return nil +} diff --git a/internal/modules/jwt/jwt.go b/internal/modules/jwt/jwt.go new file mode 100644 index 0000000..a8fce2a --- /dev/null +++ b/internal/modules/jwt/jwt.go @@ -0,0 +1,305 @@ +// Package jwt scans responses for JWTs, decodes them, and flags +// security-relevant attributes: alg=none, weak HMAC secret (dictionary +// crack against common passwords), excessive expiration, missing claims. +// +// The brute-force list is intentionally tiny (~20 common secrets) β€” the +// goal is to surface obviously-weak keys, not to run offline hashcat. A +// proper cracker belongs in Fase 2's planned "auth" agent. +package jwt + +import ( + "context" + "crypto/hmac" + "crypto/sha256" + "crypto/sha512" + "encoding/base64" + "encoding/json" + "hash" + "io" + "net/http" + "regexp" + "strings" + "sync" + "time" + + "god-eye/internal/eventbus" + gohttp "god-eye/internal/http" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "vuln.jwt" + +type jwtModule struct{} + +func Register() { module.Register(&jwtModule{}) } + +func (*jwtModule) Name() string { return ModuleName } +func (*jwtModule) Phase() module.Phase { return module.PhaseAnalysis } +func (*jwtModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventHTTPProbed} } +func (*jwtModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventVulnerability, eventbus.EventSecret} +} +func (*jwtModule) DefaultEnabled() bool { return true } + +// jwtRegex matches the standard three-part base64url JWT shape. +var jwtRegex = regexp.MustCompile(`eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]*`) + +var weakSecrets = []string{ + "secret", "password", "123456", "admin", "jwt", "jwtsecret", + "changeme", "default", "test", "dev", "secret_key", "mysecret", + "your-256-bit-secret", "your-secret-key", "super-secret", + "supersecret", "helloworld", "qwerty", "abc123", "letmein", +} + +func (*jwtModule) Run(mctx module.Context) error { + timeout := mctx.Config.Int("timeout", 10) + client := gohttp.GetSharedClient(timeout) + + processed := make(map[string]struct{}) + var mu sync.Mutex + shouldProcess := func(host string) bool { + mu.Lock() + defer mu.Unlock() + if _, ok := processed[host]; ok { + return false + } + processed[host] = struct{}{} + return true + } + + var wg sync.WaitGroup + + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.StatusCode == 0 { + continue + } + if !shouldProcess(h.Subdomain) { + continue + } + host := h.Subdomain + wg.Add(1) + go func() { defer wg.Done(); scanHost(mctx, client, host) }() + } + + sub := mctx.Bus.Subscribe(eventbus.EventHTTPProbed, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.HTTPProbed) + if !ok || ev.StatusCode == 0 { + return + } + host := ev.Meta().Target + if !shouldProcess(host) { + return + } + wg.Add(1) + go func() { defer wg.Done(); scanHost(mctx, client, host) }() + }) + defer sub.Unsubscribe() + + select { + case <-time.After(500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + wg.Wait() + return nil +} + +func scanHost(mctx module.Context, client *http.Client, host string) { + for _, scheme := range []string{"https://", "http://"} { + if mctx.Ctx.Err() != nil { + return + } + url := scheme + host + req, err := http.NewRequest("GET", url, nil) + if err != nil { + continue + } + req.Header.Set("User-Agent", "god-eye-v2") + resp, err := client.Do(req) + if err != nil { + continue + } + body, _ := io.ReadAll(io.LimitReader(resp.Body, 256*1024)) + resp.Body.Close() + + text := string(body) + // Also check Authorization + Set-Cookie response headers. + for _, h := range resp.Header.Values("Set-Cookie") { + text += "\n" + h + } + if auth := resp.Header.Get("Authorization"); auth != "" { + text += "\n" + auth + } + + matches := jwtRegex.FindAllString(text, -1) + for _, tok := range uniqueStrings(matches) { + analyzeJWT(mctx, host, url, tok) + } + // One scheme is enough; avoid duplicate noise. + if len(matches) > 0 { + return + } + } +} + +func analyzeJWT(mctx module.Context, host, url, token string) { + parts := strings.Split(token, ".") + if len(parts) != 3 { + return + } + header, err := base64Decode(parts[0]) + if err != nil { + return + } + payload, err := base64Decode(parts[1]) + if err != nil { + return + } + + var h struct { + Alg string `json:"alg"` + Kid string `json:"kid"` + Typ string `json:"typ"` + } + if err := json.Unmarshal(header, &h); err != nil { + return + } + + severity := eventbus.SeverityInfo + findings := []string{"JWT detected"} + + if strings.EqualFold(h.Alg, "none") { + severity = eventbus.SeverityCritical + findings = append(findings, "alg=none accepted β€” no signature verification") + } + if strings.HasPrefix(strings.ToUpper(h.Alg), "HS") { + if cracked := tryWeakSecret(token, h.Alg, parts); cracked != "" { + severity = eventbus.SeverityCritical + findings = append(findings, "weak HMAC secret cracked: "+cracked) + } + } + if h.Kid != "" && looksInjectable(h.Kid) { + severity = maxSeverity(severity, eventbus.SeverityMedium) + findings = append(findings, "kid header may be injectable: "+h.Kid) + } + + // Inspect payload for excessive expiry. + var claims map[string]interface{} + _ = json.Unmarshal(payload, &claims) + if exp, ok := claims["exp"].(float64); ok { + expAt := time.Unix(int64(exp), 0) + if time.Until(expAt) > 365*24*time.Hour { + severity = maxSeverity(severity, eventbus.SeverityLow) + findings = append(findings, "exp >1 year") + } + } + + redacted := token + if len(redacted) > 40 { + redacted = redacted[:20] + "…" + redacted[len(redacted)-10:] + } + + _ = mctx.Store.Upsert(mctx.Ctx, host, func(sh *store.Host) { + sh.Secrets = append(sh.Secrets, store.Secret{ + Kind: "jwt", + Match: redacted, + Location: url, + Severity: string(severity), + Description: strings.Join(findings, "; "), + FoundAt: time.Now(), + }) + }) + + mctx.Bus.Publish(mctx.Ctx, eventbus.SecretFound{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, + Kind: "jwt", + Match: redacted, + Location: url, + Severity: severity, + Description: strings.Join(findings, "; "), + }) + + if severity == eventbus.SeverityCritical || severity == eventbus.SeverityHigh { + mctx.Bus.Publish(mctx.Ctx, eventbus.VulnerabilityFound{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, + ID: "jwt-weak", + Title: "JWT Weakness", + Description: strings.Join(findings, "; "), + Severity: severity, + URL: url, + Evidence: redacted, + Remediation: "Use strong signing keys (256+ bits of entropy), refuse alg=none, rotate keys on compromise, short expiry.", + OWASP: "A02:2021-Cryptographic Failures", + }) + } +} + +func tryWeakSecret(token, alg string, parts []string) string { + signingInput := parts[0] + "." + parts[1] + sig, err := base64Decode(parts[2]) + if err != nil { + return "" + } + + var hashFn func() hash.Hash + switch strings.ToUpper(alg) { + case "HS256": + hashFn = sha256.New + case "HS384": + hashFn = func() hash.Hash { return sha512.New384() } + case "HS512": + hashFn = sha512.New + default: + return "" + } + + for _, s := range weakSecrets { + mac := hmac.New(hashFn, []byte(s)) + mac.Write([]byte(signingInput)) + if hmac.Equal(mac.Sum(nil), sig) { + return s + } + } + return "" +} + +// base64Decode unpads and decodes a JWT segment (URL-safe, no padding). +func base64Decode(s string) ([]byte, error) { + // Add padding if missing. + if m := len(s) % 4; m != 0 { + s += strings.Repeat("=", 4-m) + } + return base64.URLEncoding.DecodeString(s) +} + +func looksInjectable(kid string) bool { + // kids that include path separators, SQL wildcards, or NUL-like + // sequences are worth flagging for manual review. + return strings.ContainsAny(kid, "/\\;'\"$`|") +} + +func maxSeverity(a, b eventbus.Severity) eventbus.Severity { + rank := map[eventbus.Severity]int{ + eventbus.SeverityInfo: 0, eventbus.SeverityLow: 1, + eventbus.SeverityMedium: 2, eventbus.SeverityHigh: 3, eventbus.SeverityCritical: 4, + } + if rank[a] >= rank[b] { + return a + } + return b +} + +func uniqueStrings(in []string) []string { + seen := make(map[string]struct{}) + out := make([]string, 0, len(in)) + for _, s := range in { + if _, dup := seen[s]; dup { + continue + } + seen[s] = struct{}{} + out = append(out, s) + } + return out +} + diff --git a/internal/modules/nuclei/nuclei.go b/internal/modules/nuclei/nuclei.go new file mode 100644 index 0000000..c2fdd7e --- /dev/null +++ b/internal/modules/nuclei/nuclei.go @@ -0,0 +1,329 @@ +// Package nuclei runs Nuclei-format YAML templates against every probed +// host. The actual executor lives in internal/nucleitpl; this module is +// the wiring that discovers templates on disk, fans out per host, and +// publishes matches as VulnerabilityFound events. +// +// Template discovery order: +// 1. --nuclei-templates flag (highest priority) +// 2. NUCLEI_TEMPLATES env var +// 3. ~/nuclei-templates (nuclei CLI default) +// 4. ~/.god-eye/nuclei-templates +// +// If no template directory is found AND nuclei_auto_download is true +// (default), God's Eye downloads the official projectdiscovery/nuclei-templates +// ZIP into ~/.god-eye/nuclei-templates, extracts only the .yaml/.yml files +// (path-traversal safe), and proceeds with the scan. The archive is +// ~40MB; first run takes 10-30 seconds depending on network, subsequent +// runs skip the download. +// +// Refresh the cache manually with: god-eye nuclei-update +// +// Only HTTP templates compatible with our executor subset run; others +// are counted as "skipped" and surfaced as a ModuleError event once per +// scan. +package nuclei + +import ( + "context" + "fmt" + "os" + "path/filepath" + "sync" + "time" + + "god-eye/internal/eventbus" + gohttp "god-eye/internal/http" + "god-eye/internal/module" + "god-eye/internal/nucleitpl" + "god-eye/internal/store" +) + +const ModuleName = "vuln.nuclei-compat" + +type nucleiModule struct{} + +func Register() { module.Register(&nucleiModule{}) } + +func (*nucleiModule) Name() string { return ModuleName } +func (*nucleiModule) Phase() module.Phase { return module.PhaseAnalysis } +func (*nucleiModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventHTTPProbed} } +func (*nucleiModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventVulnerability, eventbus.EventCVEMatch} +} + +// DefaultEnabled returns true so the registry always loads the module; +// Run() itself is a no-op unless `nuclei_scan` is set in the config +// (via --nuclei or YAML). Mirrors the ai.cascade module β€” keeps the +// module visible to selection logic while preserving opt-in semantics. +func (*nucleiModule) DefaultEnabled() bool { return true } + +func (*nucleiModule) Run(mctx module.Context) error { + if !mctx.Config.Bool("nuclei_scan", false) { + return nil + } + + tplDir := resolveTemplateDir(mctx) + if tplDir == "" { + // No templates found β€” try auto-download into ~/.god-eye/nuclei-templates + // unless the user explicitly disabled that fallback. + if !mctx.Config.Bool("nuclei_auto_download", true) { + mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: mctx.Target}, + Module: ModuleName, + Err: "no nuclei templates found and --nuclei-auto-download=false. Clone https://github.com/projectdiscovery/nuclei-templates into ~/nuclei-templates or pass --nuclei-templates ", + }) + return nil + } + + dest, err := defaultAutoDownloadDir() + if err != nil { + mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: mctx.Target}, + Module: ModuleName, + Err: fmt.Sprintf("cannot determine default templates dir: %v", err), + }) + return nil + } + + dl := nucleitpl.NewDownloader() + dl.Verbose = mctx.Config.Bool("verbose", false) || mctx.Config.Bool("ai.verbose", false) + if err := dl.EnsureTemplates(dest); err != nil { + mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: mctx.Target}, + Module: ModuleName, + Err: fmt.Sprintf("auto-download nuclei templates: %v", err), + }) + return nil + } + tplDir = dest + } + + tpls, diags, err := nucleitpl.LoadDir(tplDir) + if err != nil { + return fmt.Errorf("load templates from %s: %w", tplDir, err) + } + + supported := 0 + skipped := 0 + var supportedTpls []*nucleitpl.Template + for _, t := range tpls { + if ok, _ := t.IsSupported(); ok { + supported++ + supportedTpls = append(supportedTpls, t) + } else { + skipped++ + } + } + + if supported == 0 { + mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: mctx.Target}, + Module: ModuleName, + Err: fmt.Sprintf("loaded %d templates, 0 supported (skipped %d, parse errors %d)", len(tpls), skipped, len(diags)), + }) + return nil + } + + timeout := time.Duration(mctx.Config.Int("timeout", 10)) * time.Second + client := gohttp.GetSharedClient(int(timeout.Seconds())) + exec := nucleitpl.NewExecutor(client, timeout) + + // Gather target URLs from the store. + var targets []string + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.StatusCode == 0 { + continue + } + targets = append(targets, "https://"+h.Subdomain) + } + if len(targets) == 0 { + return nil + } + + // Bounded parallelism: running thousands of templates Γ— hundreds of + // hosts unbounded would be a DoS against ourselves and the target. + maxConcurrent := mctx.Config.Int("concurrency", 50) + if maxConcurrent > 50 { + maxConcurrent = 50 // cap β€” templates make 1-3 requests each + } + if maxConcurrent < 1 { + maxConcurrent = 10 + } + + sem := make(chan struct{}, maxConcurrent) + var wg sync.WaitGroup + + for _, url := range targets { + for _, t := range supportedTpls { + if mctx.Ctx.Err() != nil { + break + } + url := url + t := t + wg.Add(1) + sem <- struct{}{} + go func() { + defer wg.Done() + defer func() { <-sem }() + runCtx, cancel := context.WithTimeout(mctx.Ctx, timeout) + defer cancel() + for _, m := range exec.Run(runCtx, t, url) { + publishMatch(mctx, m) + } + }() + } + } + wg.Wait() + + if skipped > 0 { + mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: mctx.Target}, + Module: ModuleName, + Err: fmt.Sprintf("executed %d templates, skipped %d (unsupported protocol/features)", supported, skipped), + }) + } + return nil +} + +// publishMatch persists the match into the store and fires a +// VulnerabilityFound event. When the match references CVEs, a CVEMatch +// event is also fired so the CVE aggregator sees it. +func publishMatch(mctx module.Context, m nucleitpl.Match) { + now := time.Now() + severity := mapSeverity(m.Severity) + host := hostFromURL(m.URL) + + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + h.Vulnerabilities = append(h.Vulnerabilities, store.Vulnerability{ + ID: "nuclei/" + m.TemplateID, + Title: m.Name, + Description: m.Description, + Severity: string(severity), + URL: m.URL, + Evidence: m.Evidence, + CVEs: append([]string(nil), m.CVEs...), + FoundAt: now, + }) + for _, cveID := range m.CVEs { + h.CVEs = append(h.CVEs, store.CVE{ + ID: cveID, + Technology: m.TemplateID, + Severity: string(severity), + FoundAt: now, + URL: m.TemplateURL, + }) + } + }) + + mctx.Bus.Publish(mctx.Ctx, eventbus.VulnerabilityFound{ + EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: host}, + ID: "nuclei/" + m.TemplateID, + Title: m.Name, + Description: m.Description, + Severity: severity, + URL: m.URL, + Evidence: m.Evidence, + CVEs: append([]string(nil), m.CVEs...), + }) + + for _, cveID := range m.CVEs { + mctx.Bus.Publish(mctx.Ctx, eventbus.CVEMatch{ + EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: host}, + CVE: cveID, + Technology: m.TemplateID, + Severity: severity, + Description: m.Name, + URL: m.TemplateURL, + }) + } +} + +func mapSeverity(s string) eventbus.Severity { + switch s { + case "critical": + return eventbus.SeverityCritical + case "high": + return eventbus.SeverityHigh + case "medium": + return eventbus.SeverityMedium + case "low": + return eventbus.SeverityLow + default: + return eventbus.SeverityInfo + } +} + +// resolveTemplateDir returns the first USABLE template directory, in +// priority order. "Usable" means it exists, is a directory, and the +// process can list its contents (i.e. not a permission-denied mount +// like a read-restricted nuclei install in another user's home). +// Returns "" when no candidate qualifies. +func resolveTemplateDir(mctx module.Context) string { + candidates := []string{ + mctx.Config.String("nuclei_templates", ""), + os.Getenv("NUCLEI_TEMPLATES"), + } + if home, err := os.UserHomeDir(); err == nil { + // Prefer the god-eye auto-managed cache over a pre-existing + // ~/nuclei-templates: the latter may be a nuclei CLI install + // with restrictive permissions we can't read. + candidates = append(candidates, + filepath.Join(home, ".god-eye", "nuclei-templates"), + filepath.Join(home, "nuclei-templates"), + ) + } + for _, c := range candidates { + if c == "" { + continue + } + info, err := os.Stat(c) + if err != nil || !info.IsDir() { + continue + } + // Readability check: can we list at least one entry? If the dir + // is permission-denied, os.Stat succeeds but os.Open fails β€” + // skip such candidates so auto-download fallback triggers. + f, err := os.Open(c) + if err != nil { + continue + } + names, err := f.Readdirnames(1) + f.Close() + if err != nil { + continue + } + if len(names) == 0 { + // Empty dir β€” treat as unusable to trigger auto-download. + continue + } + return c + } + return "" +} + +// defaultAutoDownloadDir returns ~/.god-eye/nuclei-templates. +func defaultAutoDownloadDir() (string, error) { + home, err := os.UserHomeDir() + if err != nil { + return "", err + } + return filepath.Join(home, ".god-eye", "nuclei-templates"), nil +} + +func hostFromURL(u string) string { + // Strip scheme. + s := u + for _, p := range []string{"https://", "http://"} { + if len(s) > len(p) && s[:len(p)] == p { + s = s[len(p):] + break + } + } + // Strip path. + for i := 0; i < len(s); i++ { + if s[i] == '/' || s[i] == '?' || s[i] == '#' { + return s[:i] + } + } + return s +} diff --git a/internal/modules/passive/passive.go b/internal/modules/passive/passive.go new file mode 100644 index 0000000..fe63116 --- /dev/null +++ b/internal/modules/passive/passive.go @@ -0,0 +1,151 @@ +// Package passive is the Fase 0.6 adapter that wraps the v1 passive sources +// (internal/sources) as a single Module. It fans out queries to all 20 public +// sources in parallel and emits a SubdomainDiscovered event for each result. +// +// In Fase 1 (Discovery Supremacy) each source will become its own Module with +// independent configuration, error reporting, and rate limiting. This +// adapter preserves v1 behavior so we reach feature parity immediately. +package passive + +import ( + "context" + "strings" + "sync" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/sources" + "god-eye/internal/store" +) + +// ModuleName is the registry identifier. +const ModuleName = "passive.v1-aggregate" + +type passiveModule struct{} + +// Register the module in the default registry. Callers import this package +// for side effects via the modules meta-package (see internal/modules/all). +func Register() { module.Register(&passiveModule{}) } + +func (*passiveModule) Name() string { return ModuleName } +func (*passiveModule) Phase() module.Phase { return module.PhaseDiscovery } + +func (*passiveModule) Consumes() []eventbus.EventType { return nil } +func (*passiveModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventSubdomainDiscovered, eventbus.EventModuleError} +} + +func (*passiveModule) DefaultEnabled() bool { return true } + +// sourceList mirrors the v1 scanner.Run list. Order is preserved for stable +// logging. +var sourceList = []struct { + name string + fn func(string) ([]string, error) +}{ + {"crt.sh", sources.FetchCrtsh}, + {"Certspotter", sources.FetchCertspotter}, + {"AlienVault", sources.FetchAlienVault}, + {"HackerTarget", sources.FetchHackerTarget}, + {"URLScan", sources.FetchURLScan}, + {"RapidDNS", sources.FetchRapidDNS}, + {"Anubis", sources.FetchAnubis}, + {"ThreatMiner", sources.FetchThreatMiner}, + {"DNSRepo", sources.FetchDNSRepo}, + {"SubdomainCenter", sources.FetchSubdomainCenter}, + {"Wayback", sources.FetchWayback}, + {"CommonCrawl", sources.FetchCommonCrawl}, + {"Sitedossier", sources.FetchSitedossier}, + {"Riddler", sources.FetchRiddler}, + {"Robtex", sources.FetchRobtex}, + {"DNSHistory", sources.FetchDNSHistory}, + {"ArchiveToday", sources.FetchArchiveToday}, + {"JLDC", sources.FetchJLDC}, + {"SynapsInt", sources.FetchSynapsInt}, + {"CensysFree", sources.FetchCensysFree}, + // v2.0 additions β€” free, no API key, fail-open. Dormant v1 sources + // re-activated + 4 net-new endpoints. + {"BufferOver", sources.FetchBufferOver}, // dormant v1 + {"DNSDumpster", sources.FetchDNSDumpster}, // dormant v1 + {"Omnisint", sources.FetchOmnisint}, // v2 new + {"HudsonRock", sources.FetchHudsonRock}, // v2 new + {"WebArchiveCDX", sources.FetchWebArchiveCDX}, // v2 new + {"Digitorus", sources.FetchDigitorus}, // v2 new +} + +func (m *passiveModule) Run(mctx module.Context) error { + target := mctx.Target + if target == "" { + return nil + } + + var wg sync.WaitGroup + // Dedup across sources before emitting β€” the store will also dedup, but + // emitting duplicates just burns bus bandwidth. + seen := make(map[string]struct{}) + var seenMu sync.Mutex + + for _, src := range sourceList { + src := src + wg.Add(1) + go func() { + defer wg.Done() + + // Respect ctx cancellation between slow sources. + if err := mctx.Ctx.Err(); err != nil { + return + } + + subs, err := src.fn(target) + if err != nil { + mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{ + EventMeta: eventbus.EventMeta{Source: ModuleName + ":" + src.name, Target: target}, + Module: ModuleName + ":" + src.name, + Err: err.Error(), + }) + return + } + + for _, sub := range subs { + sub = strings.ToLower(strings.TrimSpace(sub)) + if sub == "" { + continue + } + if !strings.HasSuffix(sub, target) { + continue + } + seenMu.Lock() + if _, dup := seen[sub]; dup { + seenMu.Unlock() + continue + } + seen[sub] = struct{}{} + seenMu.Unlock() + + // Persist into the store so downstream resolution phases + // can find the subdomain even if they subscribed too late + // to receive the SubdomainDiscovered event. + methodTag := "passive:" + src.name + _ = mctx.Store.Upsert(mctx.Ctx, sub, func(h *store.Host) { + store.AddDiscoveryMethod(h, methodTag) + }) + + mctx.Bus.Publish(mctx.Ctx, eventbus.NewSubdomainDiscovered( + ModuleName+":"+src.name, + sub, + methodTag, + )) + } + }() + } + + // Wait for sources OR cancellation. + done := make(chan struct{}) + go func() { wg.Wait(); close(done) }() + select { + case <-done: + case <-mctx.Ctx.Done(): + } + _ = context.Canceled // keep import + return nil +} diff --git a/internal/modules/permutation/permutation.go b/internal/modules/permutation/permutation.go new file mode 100644 index 0000000..2959322 --- /dev/null +++ b/internal/modules/permutation/permutation.go @@ -0,0 +1,177 @@ +// Package permutation generates candidate subdomains by mutating every +// previously-discovered subdomain with a set of common prefixes/suffixes +// and resolving them. This is the "alterx" pattern: you already found +// api.example.com and dev.example.com, now try api-dev, dev-api, +// api-staging, api.dev.example.com, etc. +// +// Pattern learning is intentionally lightweight in Fase 1: the core v1 +// discovery.PatternLearner already extracts per-label frequencies. We +// feed those back in via candidate generation. +package permutation + +import ( + "strings" + "sync" + "time" + + "god-eye/internal/config" + godns "god-eye/internal/dns" + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "discovery.permutation" + +type permModule struct{} + +func Register() { module.Register(&permModule{}) } + +func (*permModule) Name() string { return ModuleName } +func (*permModule) Phase() module.Phase { return module.PhaseResolution } +func (*permModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventDNSResolved} } +func (*permModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventSubdomainDiscovered} +} +func (*permModule) DefaultEnabled() bool { return false } // opt-in (burns a lot of DNS) + +// commonAffixes are applied to each label of discovered hostnames to +// generate permutation candidates. Curated for bug-bounty signal. +var commonAffixes = []string{ + "dev", "stg", "staging", "prod", "qa", "test", "uat", "sandbox", "preview", + "internal", "int", "private", "admin", "api", "api2", "apiv2", "gw", + "new", "old", "legacy", "v2", "v3", "next", "beta", "alpha", "canary", + "eu", "us", "apac", "emea", +} + +var separators = []string{"-", "_", "."} + +func (*permModule) Run(mctx module.Context) error { + if !mctx.Config.Bool("permutation", false) { + return nil + } + + target := mctx.Target + timeout := mctx.Config.Int("timeout", 5) + resolvers := parseResolvers(mctx.Config.String("resolvers", "")) + conc := mctx.Config.Int("concurrency", 300) + if conc <= 0 { + conc = 300 + } + + // Gather seeds from the store (all already-resolved hosts). + seeds := mctx.Store.All(mctx.Ctx) + if len(seeds) == 0 { + return nil + } + + candidates := make(map[string]struct{}) + for _, h := range seeds { + for _, c := range generateCandidates(h.Subdomain, target) { + candidates[c] = struct{}{} + } + } + + // Resolve candidates in parallel. Only emit ones that resolve. + sem := make(chan struct{}, conc) + var wg sync.WaitGroup + for cand := range candidates { + if mctx.Ctx.Err() != nil { + break + } + cand := cand + wg.Add(1) + sem <- struct{}{} + go func() { + defer wg.Done() + defer func() { <-sem }() + + ips := godns.ResolveSubdomain(cand, resolvers, timeout) + if len(ips) == 0 { + return + } + _ = mctx.Store.Upsert(mctx.Ctx, cand, func(h *store.Host) { + store.AddIPs(h, ips) + store.AddDiscoveryMethod(h, "permutation") + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.SubdomainDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: cand}, + Subdomain: cand, + Method: "permutation", + }) + }() + } + wg.Wait() + return nil +} + +// generateCandidates produces permuted hostnames from a seed within the +// target domain. The output is guaranteed to end in "."+target or ==target. +func generateCandidates(seed, target string) []string { + if !strings.HasSuffix(seed, target) { + return nil + } + prefix := strings.TrimSuffix(seed, "."+target) + if prefix == target || prefix == "" { + return nil + } + + labels := strings.Split(prefix, ".") + if len(labels) == 0 { + return nil + } + + out := make(map[string]struct{}) + // Leaf-label mutations: (affix)(sep)(label) and (label)(sep)(affix). + leaf := labels[len(labels)-1] + rest := strings.Join(labels[:len(labels)-1], ".") + for _, aff := range commonAffixes { + for _, sep := range separators { + combos := []string{ + aff + sep + leaf, + leaf + sep + aff, + } + for _, c := range combos { + parts := []string{c} + if rest != "" { + parts = []string{rest, c} + } + cand := strings.Join(parts, ".") + "." + target + out[cand] = struct{}{} + } + } + } + // Prepend-an-affix mutation: aff. + for _, aff := range commonAffixes { + cand := aff + "." + prefix + "." + target + out[cand] = struct{}{} + } + + res := make([]string, 0, len(out)) + for c := range out { + res = append(res, c) + } + return res +} + +func parseResolvers(s string) []string { + s = strings.TrimSpace(s) + if s == "" { + return config.DefaultResolvers + } + var out []string + for _, r := range strings.Split(s, ",") { + r = strings.TrimSpace(r) + if r == "" { + continue + } + if !strings.Contains(r, ":") { + r = r + ":53" + } + out = append(out, r) + } + if len(out) == 0 { + return config.DefaultResolvers + } + return out +} diff --git a/internal/modules/ports/ports.go b/internal/modules/ports/ports.go new file mode 100644 index 0000000..fe55f46 --- /dev/null +++ b/internal/modules/ports/ports.go @@ -0,0 +1,120 @@ +// Package ports runs a TCP connect scan on the common ports list for every +// resolved host. Drains the store at start; also reacts to late DNSResolved +// events for concurrent discovery phases. +package ports + +import ( + "context" + "fmt" + "strings" + "sync" + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/scanner" + "god-eye/internal/store" +) + +const ModuleName = "ports.scan" + +type portsModule struct{} + +func Register() { module.Register(&portsModule{}) } + +func (*portsModule) Name() string { return ModuleName } +func (*portsModule) Phase() module.Phase { return module.PhaseEnrichment } +func (*portsModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventDNSResolved} } +func (*portsModule) Produces() []eventbus.EventType { return nil } +func (*portsModule) DefaultEnabled() bool { return true } + +func (*portsModule) Run(mctx module.Context) error { + if mctx.Config.Bool("no_ports", false) { + return nil + } + timeout := mctx.Config.Int("timeout", 5) + portList := parsePorts(mctx.Config.String("ports", "")) + + processed := make(map[string]struct{}) + var mu sync.Mutex + shouldProcess := func(host string) bool { + mu.Lock() + defer mu.Unlock() + if _, ok := processed[host]; ok { + return false + } + processed[host] = struct{}{} + return true + } + + scan := func(host string, ip string) { + if mctx.Ctx.Err() != nil { + return + } + open := scanner.ScanPorts(ip, portList, timeout) + if len(open) == 0 { + return + } + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + h.Ports = append(h.Ports, open...) + }) + } + + var wg sync.WaitGroup + + // Drain. + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.Subdomain == "" || len(h.IPs) == 0 { + continue + } + if !shouldProcess(h.Subdomain) { + continue + } + host := h.Subdomain + ip := h.IPs[0] + wg.Add(1) + go func() { defer wg.Done(); scan(host, ip) }() + } + + // Late events. + sub := mctx.Bus.Subscribe(eventbus.EventDNSResolved, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.DNSResolved) + if !ok || len(ev.IPs) == 0 { + return + } + if !shouldProcess(ev.Subdomain) { + return + } + host := ev.Subdomain + ip := ev.IPs[0] + wg.Add(1) + go func() { defer wg.Done(); scan(host, ip) }() + }) + defer sub.Unsubscribe() + + select { + case <-time.After(500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + wg.Wait() + return nil +} + +func parsePorts(s string) []int { + s = strings.TrimSpace(s) + if s == "" { + return []int{80, 443, 8080, 8443} + } + var out []int + for _, p := range strings.Split(s, ",") { + var port int + if _, err := fmt.Sscanf(strings.TrimSpace(p), "%d", &port); err == nil && port > 0 && port < 65536 { + out = append(out, port) + } + } + if len(out) == 0 { + return []int{80, 443, 8080, 8443} + } + return out +} diff --git a/internal/modules/recursive/recursive.go b/internal/modules/recursive/recursive.go new file mode 100644 index 0000000..4fcc232 --- /dev/null +++ b/internal/modules/recursive/recursive.go @@ -0,0 +1,117 @@ +// Package recursive is a Fase 0.6 adapter for the v1 recursive discovery +// engine (pattern learning from found subdomains). +// +// Unlike event-driven modules, recursive runs as a deferred second-pass: +// after PhaseDiscovery completes it collects every host seen so far from +// the store, runs the v1 engine, and emits SubdomainDiscovered for any +// new hosts. It self-schedules in PhaseResolution to sit between discovery +// and HTTP probing. +package recursive + +import ( + "time" + + "god-eye/internal/discovery" + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" + "strings" +) + +const ModuleName = "discovery.recursive" + +type recModule struct{} + +func Register() { module.Register(&recModule{}) } + +func (*recModule) Name() string { return ModuleName } +func (*recModule) Phase() module.Phase { return module.PhaseResolution } // runs after discovery +func (*recModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventSubdomainDiscovered} } +func (*recModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventSubdomainDiscovered} +} + +// Recursive is opt-in by default β€” profiles enable it for bugbounty/pentest. +func (*recModule) DefaultEnabled() bool { return false } + +func (*recModule) Run(mctx module.Context) error { + if !mctx.Config.Bool("recursive", false) { + return nil + } + + target := mctx.Target + depth := mctx.Config.Int("recursive.depth", 3) + if depth < 1 { + depth = 1 + } else if depth > 5 { + depth = 5 + } + timeout := mctx.Config.Int("timeout", 5) + conc := mctx.Config.Int("concurrency", 500) + if conc <= 0 { + conc = 500 + } + + resolvers := parseResolvers(mctx.Config.String("resolvers", "")) + + // Gather initial seeds from what's been discovered so far. + hosts := mctx.Store.All(mctx.Ctx) + seeds := make([]string, 0, len(hosts)) + for _, h := range hosts { + seeds = append(seeds, h.Subdomain) + } + if len(seeds) == 0 { + return nil + } + + rd := discovery.NewRecursiveDiscovery(discovery.RecursiveConfig{ + Domain: target, + Resolvers: resolvers, + Timeout: timeout, + MaxDepth: depth, + Concurrency: conc, + }) + found := rd.Discover(mctx.Ctx, seeds) + + // Emit SubdomainDiscovered for any new hosts. + seen := make(map[string]struct{}, len(seeds)) + for _, s := range seeds { + seen[s] = struct{}{} + } + for _, s := range found { + if _, dup := seen[s]; dup { + continue + } + seen[s] = struct{}{} + + _ = mctx.Store.Upsert(mctx.Ctx, s, func(h *store.Host) { + store.AddDiscoveryMethod(h, "recursive") + }) + + mctx.Bus.Publish(mctx.Ctx, eventbus.SubdomainDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: s}, + Subdomain: s, + Method: "recursive", + }) + } + return nil +} + +func parseResolvers(s string) []string { + s = strings.TrimSpace(s) + if s == "" { + return []string{"8.8.8.8:53", "1.1.1.1:53"} + } + var out []string + for _, r := range strings.Split(s, ",") { + r = strings.TrimSpace(r) + if r == "" { + continue + } + if !strings.Contains(r, ":") { + r = r + ":53" + } + out = append(out, r) + } + return out +} diff --git a/internal/modules/report/report.go b/internal/modules/report/report.go new file mode 100644 index 0000000..97d80e4 --- /dev/null +++ b/internal/modules/report/report.go @@ -0,0 +1,259 @@ +// Package report writes the final scan output. It consumes the store (not +// events) at ScanCompleted time and emits TXT / JSON / CSV via the existing +// v1 output.WriteOutput function. To preserve v1 output shape during the +// Fase 0.6 migration, store.Host records are projected to the legacy +// config.SubdomainResult type before serialization. +package report + +import ( + "context" + "encoding/json" + "fmt" + "os" + "strings" + "time" + + "god-eye/internal/config" + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/output" +) + +var _ = time.Now // keep import stable when unused in certain branches + +const ModuleName = "report.output" + +type reportModule struct{} + +func Register() { module.Register(&reportModule{}) } + +func (*reportModule) Name() string { return ModuleName } +func (*reportModule) Phase() module.Phase { return module.PhaseReporting } +func (*reportModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventScanCompleted} } +func (*reportModule) Produces() []eventbus.EventType { return nil } +func (*reportModule) DefaultEnabled() bool { return true } + +func (*reportModule) Run(mctx module.Context) error { + // Block until the scan is complete β€” we're last in the pipeline and the + // coordinator guarantees reporting runs after every earlier phase. + done := make(chan struct{}, 1) + sub := mctx.Bus.Subscribe(eventbus.EventScanCompleted, func(_ context.Context, _ eventbus.Event) { + select { + case done <- struct{}{}: + default: + } + }) + defer sub.Unsubscribe() + + // The report module itself runs in PhaseReporting which is the last + // phase. ScanCompleted fires right after this phase ends, so we can't + // rely on it β€” write output directly from the store instead. + _ = done + + results := projectStoreToResults(mctx) + if len(results) == 0 { + return nil + } + + silent := mctx.Config.Bool("silent", false) + jsonStdout := mctx.Config.Bool("json", false) + onlyActive := mctx.Config.Bool("only_active", false) + outPath := mctx.Config.String("output", "") + format := mctx.Config.String("format", "txt") + + if jsonStdout { + // Project a minimal JSON report to stdout, shape-compatible with v1. + writeJSONStdout(mctx, results) + return nil + } + + // Console presentation β€” only when not silent / not JSON-only mode. + if !silent { + printResults(results, onlyActive) + } + + if outPath != "" { + if err := writeFile(outPath, format, results); err != nil { + mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: mctx.Target}, + Module: ModuleName, + Err: fmt.Sprintf("write output %s: %v", outPath, err), + }) + return err + } + } + + return nil +} + +// projectStoreToResults converts store.Host records to the legacy +// config.SubdomainResult shape expected by output.WriteOutput. Doing the +// projection here keeps the store schema decoupled from the v1 output format. +func projectStoreToResults(mctx module.Context) map[string]*config.SubdomainResult { + hosts := mctx.Store.All(mctx.Ctx) + out := make(map[string]*config.SubdomainResult, len(hosts)) + for _, h := range hosts { + r := &config.SubdomainResult{ + Subdomain: h.Subdomain, + IPs: append([]string(nil), h.IPs...), + CNAME: h.CNAME, + PTR: h.PTR, + ASN: h.ASN, + Org: h.Org, + Country: h.Country, + City: h.City, + StatusCode: h.StatusCode, + ContentLength: h.ContentLength, + Title: h.Title, + Server: h.Server, + Tech: append([]string(nil), h.Technologies...), + WAF: h.WAF, + TLSVersion: h.TLSVersion, + TLSIssuer: h.TLSIssuer, + TLSSelfSigned: h.TLSSelfSigned, + Ports: append([]int(nil), h.Ports...), + ResponseMs: h.ResponseMs, + CloudProvider: h.CloudProvider, + } + if !h.TLSExpiry.IsZero() { + r.TLSExpiry = h.TLSExpiry.Format("2006-01-02") + } + if h.TLSFingerprint != nil { + r.TLSFingerprint = &config.TLSFingerprint{ + Vendor: h.TLSFingerprint.Vendor, + Product: h.TLSFingerprint.Product, + Version: h.TLSFingerprint.Version, + ApplianceType: h.TLSFingerprint.ApplianceKind, + InternalHosts: append([]string(nil), h.TLSFingerprint.InternalHosts...), + } + } + if h.Takeover != nil { + r.Takeover = h.Takeover.Service + } + // Flatten vulnerabilities β†’ scalar fields v1 consumers expect. + for _, v := range h.Vulnerabilities { + switch v.ID { + case "open-redirect": + r.OpenRedirect = true + case "cors-misconfig": + r.CORSMisconfig = v.Description + case "dangerous-http-methods": + r.DangerousMethods = append(r.DangerousMethods, strings.Split(v.Evidence, ", ")...) + case "git-exposed": + r.GitExposed = true + case "svn-exposed": + r.SvnExposed = true + case "backup-file": + r.BackupFiles = append(r.BackupFiles, v.URL) + } + } + // Secrets β†’ legacy field + for _, s := range h.Secrets { + r.JSSecrets = append(r.JSSecrets, s.Match) + } + // CVEs / AI + for _, c := range h.CVEs { + r.CVEFindings = append(r.CVEFindings, c.ID) + } + for _, a := range h.AIFindings { + r.AIFindings = append(r.AIFindings, a.Title) + if r.AISeverity == "" { + r.AISeverity = a.Severity + } + if r.AIModel == "" { + r.AIModel = a.Model + } + } + out[h.Subdomain] = r + } + return out +} + +// printResults is a minimal, non-colorful table print. The full v1 +// presentation is re-introduced when the TUI module lands in Fase 4. +func printResults(results map[string]*config.SubdomainResult, onlyActive bool) { + // Sorted output for determinism. + names := make([]string, 0, len(results)) + for n := range results { + names = append(names, n) + } + // sort by status desc, then name + sortResultsForPrint(names, results) + + active := 0 + for _, n := range names { + r := results[n] + if r.StatusCode == 0 { + if onlyActive { + continue + } + fmt.Printf(" %s %s\n", output.Dim("β—‹"), r.Subdomain) + continue + } + active++ + marker := output.Green("●") + if r.StatusCode >= 300 && r.StatusCode < 400 { + marker = output.Yellow("◐") + } else if r.StatusCode >= 400 { + marker = output.Red("β—‹") + } + tech := "" + if len(r.Tech) > 0 { + tech = output.Dim(" [" + strings.Join(r.Tech, ", ") + "]") + } + fmt.Printf(" %s %s %s%s\n", marker, r.Subdomain, output.Dim(fmt.Sprintf("[%d]", r.StatusCode)), tech) + } + fmt.Println() + fmt.Printf(" %s total, %s active\n", output.BoldWhite(fmt.Sprintf("%d", len(results))), output.BoldGreen(fmt.Sprintf("%d", active))) +} + +func sortResultsForPrint(names []string, results map[string]*config.SubdomainResult) { + // Simple insertion-sort quality ok for small lists; stable enough. + n := len(names) + for i := 1; i < n; i++ { + j := i + for j > 0 && lessResult(results[names[j]], results[names[j-1]]) { + names[j], names[j-1] = names[j-1], names[j] + j-- + } + } +} + +func lessResult(a, b *config.SubdomainResult) bool { + // Active first, then by subdomain name. + aActive := a.StatusCode >= 200 && a.StatusCode < 400 + bActive := b.StatusCode >= 200 && b.StatusCode < 400 + if aActive != bActive { + return aActive && !bActive + } + return a.Subdomain < b.Subdomain +} + +func writeFile(path, format string, results map[string]*config.SubdomainResult) error { + // v1 exposes SaveOutput (void); we funnel through it but surface errors + // by re-checking file writability up front. + format = strings.ToLower(strings.TrimSpace(format)) + if format == "" { + format = "txt" + } + // Pre-flight: make sure we can create the target file before delegating. + f, err := os.Create(path) + if err != nil { + return err + } + f.Close() + output.SaveOutput(path, format, results) + return nil +} + +// writeJSONStdout emits a v2-native minimal JSON dump to stdout. This is +// intentionally simpler than v1's ReportBuilder β€” when the full report +// generator lands in Fase 4 (Reporting), this is where it'll be wired. +func writeJSONStdout(mctx module.Context, results map[string]*config.SubdomainResult) { + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + _ = enc.Encode(map[string]interface{}{ + "target": mctx.Target, + "subdomains": results, + }) +} diff --git a/internal/modules/reversedns/reversedns.go b/internal/modules/reversedns/reversedns.go new file mode 100644 index 0000000..33b5110 --- /dev/null +++ b/internal/modules/reversedns/reversedns.go @@ -0,0 +1,143 @@ +// Package reversedns expands discovery by doing PTR sweeps on /24 blocks +// surrounding every resolved IP. Finds internal/forgotten hosts that share +// infrastructure with already-known subdomains. +// +// Intentionally conservative: only sweeps +/- 32 addresses around seen IPs +// to keep traffic bounded and avoid accidentally pulling a huge +// non-scoped ASN. +package reversedns + +import ( + "fmt" + "net" + "strings" + "sync" + "time" + + "god-eye/internal/config" + godns "god-eye/internal/dns" + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "discovery.reverse-dns" + +type rdnsModule struct{} + +func Register() { module.Register(&rdnsModule{}) } + +func (*rdnsModule) Name() string { return ModuleName } +func (*rdnsModule) Phase() module.Phase { return module.PhaseResolution } +func (*rdnsModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventDNSResolved} } +func (*rdnsModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventSubdomainDiscovered} +} + +// Opt-in: generates a lot of DNS queries; on by default for bugbounty profile. +func (*rdnsModule) DefaultEnabled() bool { return false } + +const sweepRange = 16 // how many addresses to scan either side of each seed IP + +func (*rdnsModule) Run(mctx module.Context) error { + if !mctx.Config.Bool("reverse_dns", false) { + return nil + } + target := mctx.Target + timeout := mctx.Config.Int("timeout", 5) + resolvers := parseResolvers(mctx.Config.String("resolvers", "")) + + seeds := mctx.Store.All(mctx.Ctx) + seenIP := make(map[string]struct{}) + for _, h := range seeds { + for _, ip := range h.IPs { + seenIP[ip] = struct{}{} + } + } + + var wg sync.WaitGroup + sem := make(chan struct{}, 64) + for ip := range seenIP { + for _, neighbor := range neighbors(ip, sweepRange) { + if mctx.Ctx.Err() != nil { + break + } + wg.Add(1) + sem <- struct{}{} + go func(ipAddr string) { + defer wg.Done() + defer func() { <-sem }() + + name := godns.ResolvePTR(ipAddr, resolvers, timeout) + if name == "" { + return + } + name = strings.ToLower(strings.TrimSuffix(name, ".")) + if !strings.HasSuffix(name, "."+target) && name != target { + return + } + _ = mctx.Store.Upsert(mctx.Ctx, name, func(h *store.Host) { + store.AddIPs(h, []string{ipAddr}) + store.AddDiscoveryMethod(h, "reverse-dns") + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.SubdomainDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: name}, + Subdomain: name, + Method: "reverse-dns", + }) + }(neighbor) + } + } + wg.Wait() + return nil +} + +// neighbors returns IPv4 addresses within +/- rng of ip. IPv6 addresses +// are returned as a single-element slice (no sweep β€” address space too +// large, and we'd rarely find anything anyway). +func neighbors(ipStr string, rng int) []string { + ip := net.ParseIP(ipStr) + if ip == nil { + return nil + } + v4 := ip.To4() + if v4 == nil { + return []string{ipStr} + } + + // Convert to uint32 for arithmetic. + base := uint32(v4[0])<<24 | uint32(v4[1])<<16 | uint32(v4[2])<<8 | uint32(v4[3]) + + out := make([]string, 0, 2*rng+1) + for delta := -rng; delta <= rng; delta++ { + candidate := int64(base) + int64(delta) + if candidate < 0 || candidate > 0xFFFFFFFF { + continue + } + c := uint32(candidate) + out = append(out, fmt.Sprintf("%d.%d.%d.%d", c>>24&0xFF, c>>16&0xFF, c>>8&0xFF, c&0xFF)) + } + return out +} + +func parseResolvers(s string) []string { + s = strings.TrimSpace(s) + if s == "" { + return config.DefaultResolvers + } + var out []string + for _, r := range strings.Split(s, ",") { + r = strings.TrimSpace(r) + if r == "" { + continue + } + if !strings.Contains(r, ":") { + r = r + ":53" + } + out = append(out, r) + } + if len(out) == 0 { + return config.DefaultResolvers + } + return out +} diff --git a/internal/modules/security/security.go b/internal/modules/security/security.go new file mode 100644 index 0000000..ca8e304 --- /dev/null +++ b/internal/modules/security/security.go @@ -0,0 +1,241 @@ +// Package security runs the v1 security checks (open redirect, CORS, +// HTTP methods, git/svn, backups, admin, API) on every probed host. +// +// Reads hosts from the store (not events) so late-start phases don't miss +// the upstream HTTPProbed events. +package security + +import ( + "context" + "sync" + "time" + + "god-eye/internal/eventbus" + gohttp "god-eye/internal/http" + "god-eye/internal/module" + "god-eye/internal/security" + "god-eye/internal/store" +) + +const ModuleName = "security.checks" + +type secModule struct{} + +func Register() { module.Register(&secModule{}) } + +func (*secModule) Name() string { return ModuleName } +func (*secModule) Phase() module.Phase { return module.PhaseAnalysis } +func (*secModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventHTTPProbed} } +func (*secModule) Produces() []eventbus.EventType { return []eventbus.EventType{eventbus.EventVulnerability} } +func (*secModule) DefaultEnabled() bool { return true } + +func (*secModule) Run(mctx module.Context) error { + conc := mctx.Config.Int("concurrency", 200) + if conc <= 0 { + conc = 200 + } + timeout := mctx.Config.Int("timeout", 5) + + processed := make(map[string]struct{}) + var processedMu sync.Mutex + shouldProcess := func(host string) bool { + processedMu.Lock() + defer processedMu.Unlock() + if _, dup := processed[host]; dup { + return false + } + processed[host] = struct{}{} + return true + } + + work := make(chan string, conc*2) + var wg sync.WaitGroup + for i := 0; i < conc; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for host := range work { + runChecks(mctx, host, timeout) + } + }() + } + + // Drain: every host that got a successful HTTP probe. + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.StatusCode == 0 { + continue + } + if !shouldProcess(h.Subdomain) { + continue + } + select { + case work <- h.Subdomain: + case <-mctx.Ctx.Done(): + close(work) + wg.Wait() + return nil + } + } + + // Listen for late HTTPProbed events. + sub := mctx.Bus.Subscribe(eventbus.EventHTTPProbed, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.HTTPProbed) + if !ok || ev.StatusCode == 0 { + return + } + host := ev.Meta().Target + if !shouldProcess(host) { + return + } + select { + case work <- host: + case <-mctx.Ctx.Done(): + } + }) + defer sub.Unsubscribe() + + select { + case <-time.After(500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + close(work) + wg.Wait() + return nil +} + +func runChecks(mctx module.Context, host string, timeout int) { + if mctx.Ctx.Err() != nil { + return + } + + client := gohttp.GetSharedClient(timeout) + + var openRedirect bool + var cors string + var allowed, dangerous []string + var admin, backups, apis []string + var gitExposed, svnExposed bool + + var wg sync.WaitGroup + wg.Add(7) + go func() { defer wg.Done(); openRedirect = security.CheckOpenRedirectWithClient(host, client) }() + go func() { defer wg.Done(); cors = security.CheckCORSWithClient(host, client) }() + go func() { defer wg.Done(); allowed, dangerous = security.CheckHTTPMethodsWithClient(host, client) }() + go func() { defer wg.Done(); admin = security.CheckAdminPanelsWithClient(host, client) }() + go func() { defer wg.Done(); gitExposed, svnExposed = security.CheckGitSvnExposureWithClient(host, client) }() + go func() { defer wg.Done(); backups = security.CheckBackupFilesWithClient(host, client) }() + go func() { defer wg.Done(); apis = security.CheckAPIEndpointsWithClient(host, client) }() + wg.Wait() + + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + now := time.Now() + if openRedirect { + h.Vulnerabilities = append(h.Vulnerabilities, store.Vulnerability{ + ID: "open-redirect", Title: "Open Redirect", + Description: "Server redirects to attacker-controlled URL via redirect parameter", + Severity: string(eventbus.SeverityMedium), + URL: "https://" + host, + OWASP: "A01:2021-Broken Access Control", + FoundAt: now, + }) + } + if cors != "" { + h.Vulnerabilities = append(h.Vulnerabilities, store.Vulnerability{ + ID: "cors-misconfig", Title: "CORS Misconfiguration", + Description: cors, + Severity: string(eventbus.SeverityHigh), + URL: "https://" + host, + OWASP: "A05:2021-Security Misconfiguration", + FoundAt: now, + }) + } + if len(dangerous) > 0 { + h.Vulnerabilities = append(h.Vulnerabilities, store.Vulnerability{ + ID: "dangerous-http-methods", Title: "Dangerous HTTP Methods Enabled", + Description: "Server allows potentially dangerous methods", + Severity: string(eventbus.SeverityMedium), + Evidence: joinStrings(dangerous, ", "), + URL: "https://" + host, + OWASP: "A05:2021-Security Misconfiguration", + FoundAt: now, + }) + } + if gitExposed { + h.Vulnerabilities = append(h.Vulnerabilities, store.Vulnerability{ + ID: "git-exposed", Title: "Git Repository Exposed", + Description: ".git directory is publicly accessible", + Severity: string(eventbus.SeverityCritical), + URL: "https://" + host + "/.git/config", + OWASP: "A05:2021-Security Misconfiguration", + FoundAt: now, + }) + } + if svnExposed { + h.Vulnerabilities = append(h.Vulnerabilities, store.Vulnerability{ + ID: "svn-exposed", Title: "SVN Repository Exposed", + Description: ".svn directory is publicly accessible", + Severity: string(eventbus.SeverityHigh), + URL: "https://" + host + "/.svn/entries", + OWASP: "A05:2021-Security Misconfiguration", + FoundAt: now, + }) + } + for _, b := range backups { + h.Vulnerabilities = append(h.Vulnerabilities, store.Vulnerability{ + ID: "backup-file", Title: "Backup File Exposed", + Description: "Backup file accessible: " + b, + Severity: string(eventbus.SeverityHigh), + URL: b, + OWASP: "A05:2021-Security Misconfiguration", + FoundAt: now, + }) + } + _ = allowed + _ = admin + _ = apis + }) + + now := time.Now() + base := eventbus.EventMeta{At: now, Source: ModuleName, Target: host} + emit := func(ev eventbus.VulnerabilityFound) { mctx.Bus.Publish(mctx.Ctx, ev) } + + if openRedirect { + emit(eventbus.VulnerabilityFound{EventMeta: base, ID: "open-redirect", Title: "Open Redirect", + Severity: eventbus.SeverityMedium, URL: "https://" + host, OWASP: "A01:2021-Broken Access Control"}) + } + if cors != "" { + emit(eventbus.VulnerabilityFound{EventMeta: base, ID: "cors-misconfig", Title: "CORS Misconfiguration", + Description: cors, Severity: eventbus.SeverityHigh, URL: "https://" + host, OWASP: "A05:2021-Security Misconfiguration"}) + } + if len(dangerous) > 0 { + emit(eventbus.VulnerabilityFound{EventMeta: base, ID: "dangerous-http-methods", Title: "Dangerous HTTP Methods", + Evidence: joinStrings(dangerous, ", "), Severity: eventbus.SeverityMedium, URL: "https://" + host, + OWASP: "A05:2021-Security Misconfiguration"}) + } + if gitExposed { + emit(eventbus.VulnerabilityFound{EventMeta: base, ID: "git-exposed", Title: "Git Repository Exposed", + Severity: eventbus.SeverityCritical, URL: "https://" + host + "/.git/config", + OWASP: "A05:2021-Security Misconfiguration"}) + } + if svnExposed { + emit(eventbus.VulnerabilityFound{EventMeta: base, ID: "svn-exposed", Title: "SVN Repository Exposed", + Severity: eventbus.SeverityHigh, URL: "https://" + host + "/.svn/entries", + OWASP: "A05:2021-Security Misconfiguration"}) + } + for _, b := range backups { + emit(eventbus.VulnerabilityFound{EventMeta: base, ID: "backup-file", Title: "Backup File Exposed", + Severity: eventbus.SeverityHigh, URL: b, OWASP: "A05:2021-Security Misconfiguration"}) + } +} + +func joinStrings(ss []string, sep string) string { + if len(ss) == 0 { + return "" + } + out := ss[0] + for _, s := range ss[1:] { + out += sep + s + } + return out +} diff --git a/internal/modules/smuggling/smuggling.go b/internal/modules/smuggling/smuggling.go new file mode 100644 index 0000000..6c3e54c --- /dev/null +++ b/internal/modules/smuggling/smuggling.go @@ -0,0 +1,227 @@ +// Package smuggling detects HTTP request smuggling (CL.TE and TE.CL +// variants) by sending ambiguous Content-Length / Transfer-Encoding +// combinations and timing-analyzing the responses. +// +// This is the non-destructive timing variant: we send a request crafted +// so that CL.TE or TE.CL parsing desync would cause the server to hold +// the connection waiting for more bytes, while the correct interpretation +// returns immediately. Large response time delta β‡’ likely smuggling. +// +// We do NOT attempt to actually smuggle follow-up requests β€” that could +// affect other users. This is safe for authorized testing. +package smuggling + +import ( + "bufio" + "context" + "crypto/tls" + "fmt" + "net" + "strings" + "sync" + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" +) + +const ModuleName = "vuln.http-smuggling" + +type smModule struct{} + +func Register() { module.Register(&smModule{}) } + +func (*smModule) Name() string { return ModuleName } +func (*smModule) Phase() module.Phase { return module.PhaseAnalysis } +func (*smModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventHTTPProbed} } +func (*smModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventVulnerability} +} + +// Opt-in: timing-based testing is slower and can be noisy. Bugbounty profile enables it. +func (*smModule) DefaultEnabled() bool { return false } + +func (*smModule) Run(mctx module.Context) error { + if !mctx.Config.Bool("smuggling_scan", false) { + return nil + } + timeout := mctx.Config.Int("timeout", 10) + + processed := make(map[string]struct{}) + var mu sync.Mutex + shouldProcess := func(host string) bool { + mu.Lock() + defer mu.Unlock() + if _, ok := processed[host]; ok { + return false + } + processed[host] = struct{}{} + return true + } + + var wg sync.WaitGroup + + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.StatusCode == 0 { + continue + } + if !shouldProcess(h.Subdomain) { + continue + } + host := h.Subdomain + wg.Add(1) + go func() { defer wg.Done(); probe(mctx, host, timeout) }() + } + + sub := mctx.Bus.Subscribe(eventbus.EventHTTPProbed, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.HTTPProbed) + if !ok || ev.StatusCode == 0 { + return + } + host := ev.Meta().Target + if !shouldProcess(host) { + return + } + wg.Add(1) + go func() { defer wg.Done(); probe(mctx, host, timeout) }() + }) + defer sub.Unsubscribe() + + select { + case <-time.After(500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + wg.Wait() + return nil +} + +func probe(mctx module.Context, host string, timeoutSec int) { + timeout := time.Duration(timeoutSec) * time.Second + + // Baseline: normal request, measure response time. + baseline, err := sendRequest(host, baselineRequest(host), timeout) + if err != nil { + return + } + + // CL.TE probe: Content-Length says more data coming, TE: chunked says "last chunk now". + // Vulnerable servers that read TE first return quickly; non-vulnerable + // servers that read CL wait for more bytes and hit the read timeout. + cltePayload := clteRequest(host) + clte, _ := sendRequest(host, cltePayload, timeout) + + // TE.CL probe: reversed β€” server reads CL first (ignoring chunked), payload is poisoned. + teclPayload := teclRequest(host) + tecl, _ := sendRequest(host, teclPayload, timeout) + + // Heuristic: if either probe hangs (duration >= timeout * 0.8) and baseline + // returned fast, it's a likely desync. + threshold := time.Duration(float64(timeout) * 0.8) + fastEnough := baseline.duration < timeout/3 + + if fastEnough && clte.duration > threshold { + emit(mctx, host, "CL.TE", "CL.TE HTTP Request Smuggling candidate", clte) + } + if fastEnough && tecl.duration > threshold { + emit(mctx, host, "TE.CL", "TE.CL HTTP Request Smuggling candidate", tecl) + } +} + +type probeResult struct { + duration time.Duration + response string +} + +func baselineRequest(host string) string { + return "GET / HTTP/1.1\r\n" + + "Host: " + host + "\r\n" + + "User-Agent: god-eye-v2\r\n" + + "Connection: close\r\n" + + "\r\n" +} + +// clteRequest crafts a CL.TE probe: the chunked body declares "0\r\n\r\n" +// which is the last chunk. If the server honors TE: chunked, the request +// completes immediately. If it honors Content-Length (say, 4), it waits for +// 4 more bytes. +func clteRequest(host string) string { + body := "0\r\n\r\n" + return fmt.Sprintf("POST / HTTP/1.1\r\n"+ + "Host: %s\r\n"+ + "User-Agent: god-eye-v2\r\n"+ + "Content-Length: %d\r\n"+ + "Transfer-Encoding: chunked\r\n"+ + "Connection: close\r\n"+ + "\r\n%s", host, 4, body) // CL=4 mismatches chunked body length +} + +// teclRequest: TE: chunked, body ends with a chunk that declares non-zero +// remaining β€” CL says "done", TE says "more coming". Opposite desync. +func teclRequest(host string) string { + body := "12\r\n" + + "GPOST / HTTP/1.1\r\n" + + "\r\n0\r\n\r\n" + return fmt.Sprintf("POST / HTTP/1.1\r\n"+ + "Host: %s\r\n"+ + "User-Agent: god-eye-v2\r\n"+ + "Content-Length: 3\r\n"+ + "Transfer-Encoding: chunked\r\n"+ + "Connection: close\r\n"+ + "\r\n%s", host, body) +} + +// sendRequest opens a raw TCP/TLS connection, writes raw HTTP bytes, and +// returns the time until the first response line is read (or timeout). +func sendRequest(host, payload string, timeout time.Duration) (probeResult, error) { + dialer := &net.Dialer{Timeout: timeout} + conn, err := tls.DialWithDialer(dialer, "tcp", host+":443", &tls.Config{ + InsecureSkipVerify: true, + ServerName: host, + }) + if err != nil { + return probeResult{}, err + } + defer conn.Close() + + _ = conn.SetDeadline(time.Now().Add(timeout)) + + start := time.Now() + if _, err := conn.Write([]byte(payload)); err != nil { + return probeResult{duration: time.Since(start)}, err + } + + br := bufio.NewReader(conn) + line, err := br.ReadString('\n') + return probeResult{duration: time.Since(start), response: line}, err +} + +func emit(mctx module.Context, host, kind, title string, r probeResult) { + now := time.Now() + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + h.Vulnerabilities = append(h.Vulnerabilities, store.Vulnerability{ + ID: "http-smuggling-" + strings.ToLower(kind), + Title: title, + Description: kind + " desync candidate based on response-time delta (" + r.duration.String() + ").", + Severity: string(eventbus.SeverityHigh), + URL: "https://" + host, + Evidence: strings.TrimSpace(r.response), + Remediation: "Ensure front-end and back-end parse Content-Length and Transfer-Encoding identically. Reject requests with both headers.", + OWASP: "A06:2021-Vulnerable and Outdated Components", + FoundAt: now, + }) + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.VulnerabilityFound{ + EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: host}, + ID: "http-smuggling-" + strings.ToLower(kind), + Title: title, + Description: "Timing-based " + kind + " desync candidate.", + Severity: eventbus.SeverityHigh, + URL: "https://" + host, + Evidence: strings.TrimSpace(r.response), + Remediation: "Align CL/TE parsing between front-end and back-end.", + OWASP: "A06:2021-Vulnerable and Outdated Components", + }) +} + diff --git a/internal/modules/supplychain/supplychain.go b/internal/modules/supplychain/supplychain.go new file mode 100644 index 0000000..0105530 --- /dev/null +++ b/internal/modules/supplychain/supplychain.go @@ -0,0 +1,192 @@ +// Package supplychain enumerates npm and PyPI packages that reference the +// target domain in their source, then flags packages as potential supply +// chain assets. Useful for discovering internal-only tools published by +// mistake to public registries and for finding branded utility packages +// that could reveal internal endpoints/secrets. +// +// This is a discovery-oriented check. Actually downloading + scanning +// package contents for secrets is a Fase 2 follow-up; here we just surface +// the packages and the URLs they point at. +package supplychain + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "sync" + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/sources" + "god-eye/internal/store" +) + +const ModuleName = "vuln.supply-chain" + +type scModule struct{} + +func Register() { module.Register(&scModule{}) } + +func (*scModule) Name() string { return ModuleName } +func (*scModule) Phase() module.Phase { return module.PhaseDiscovery } +func (*scModule) Consumes() []eventbus.EventType { return nil } +func (*scModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventSubdomainDiscovered, eventbus.EventAPIFinding} +} +func (*scModule) DefaultEnabled() bool { return true } + +func (*scModule) Run(mctx module.Context) error { + target := mctx.Target + if target == "" { + return nil + } + + var wg sync.WaitGroup + wg.Add(2) + go func() { defer wg.Done(); checkNPM(mctx, target) }() + go func() { defer wg.Done(); checkPyPI(mctx, target) }() + wg.Wait() + return nil +} + +// checkNPM uses npm's registry search API. Packages matching "" +// or "" are surfaced. +func checkNPM(mctx module.Context, target string) { + q := extractBrand(target) + if q == "" { + return + } + url := fmt.Sprintf("https://registry.npmjs.org/-/v1/search?text=%s&size=100", q) + body, err := fetchJSON(mctx.Ctx, url, 15*time.Second) + if err != nil { + return + } + + var parsed struct { + Objects []struct { + Package struct { + Name string `json:"name"` + Links map[string]string `json:"links"` + Description string `json:"description"` + } `json:"package"` + } `json:"objects"` + } + _ = json.Unmarshal(body, &parsed) + + for _, obj := range parsed.Objects { + pkg := obj.Package + text := pkg.Name + " " + pkg.Description + for _, link := range pkg.Links { + text += " " + link + } + if !strings.Contains(strings.ToLower(text), target) { + continue + } + // Emit an APIFinding for discovery context. + mctx.Bus.Publish(mctx.Ctx, eventbus.APIFinding{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: target}, + Kind: "supply-chain:npm", + URL: "https://www.npmjs.com/package/" + pkg.Name, + Issue: "npm package references target: " + pkg.Name + " β€” " + pkg.Description, + Severity: eventbus.SeverityInfo, + }) + // If the description or links contain subdomains of the target, + // also feed them into discovery. + for _, sub := range sources.ExtractSubdomains(text, target) { + _ = mctx.Store.Upsert(mctx.Ctx, sub, func(h *store.Host) { + store.AddDiscoveryMethod(h, "supply-chain:npm:"+pkg.Name) + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.SubdomainDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: sub}, + Subdomain: sub, + Method: "supply-chain:npm:" + pkg.Name, + }) + } + } +} + +func checkPyPI(mctx module.Context, target string) { + // PyPI no longer supports XML-RPC search; use the simple index + // (all packages) scanning is too expensive. Instead query a few + // likely branded package prefixes via the JSON index. + q := extractBrand(target) + if q == "" { + return + } + // Try exact-name lookups for common variants. + candidates := []string{q, q + "-cli", q + "-sdk", q + "-api", q + "-client"} + for _, name := range candidates { + url := "https://pypi.org/pypi/" + name + "/json" + body, err := fetchJSON(mctx.Ctx, url, 10*time.Second) + if err != nil || len(body) < 50 { + continue + } + var parsed struct { + Info struct { + Name string `json:"name"` + Summary string `json:"summary"` + HomePage string `json:"home_page"` + ProjectURL string `json:"project_url"` + ProjectURLs map[string]string `json:"project_urls"` + } `json:"info"` + } + _ = json.Unmarshal(body, &parsed) + info := parsed.Info + if info.Name == "" { + continue + } + text := info.Name + " " + info.Summary + " " + info.HomePage + " " + info.ProjectURL + for _, u := range info.ProjectURLs { + text += " " + u + } + if !strings.Contains(strings.ToLower(text), target) { + continue + } + mctx.Bus.Publish(mctx.Ctx, eventbus.APIFinding{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: target}, + Kind: "supply-chain:pypi", + URL: "https://pypi.org/project/" + info.Name + "/", + Issue: "PyPI package references target: " + info.Name + " β€” " + info.Summary, + Severity: eventbus.SeverityInfo, + }) + for _, sub := range sources.ExtractSubdomains(text, target) { + _ = mctx.Store.Upsert(mctx.Ctx, sub, func(h *store.Host) { + store.AddDiscoveryMethod(h, "supply-chain:pypi:"+info.Name) + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.SubdomainDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: sub}, + Subdomain: sub, + Method: "supply-chain:pypi:" + info.Name, + }) + } + } +} + +// extractBrand returns the "brand" (second-to-last label) from example.com β†’ +// "example". Used as the package-search query term. +func extractBrand(domain string) string { + labels := strings.Split(strings.TrimSuffix(domain, "."), ".") + if len(labels) < 2 { + return "" + } + return strings.ToLower(labels[len(labels)-2]) +} + +func fetchJSON(ctx context.Context, url string, timeout time.Duration) ([]byte, error) { + c := &http.Client{Timeout: timeout} + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", "god-eye-v2") + resp, err := c.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + return io.ReadAll(io.LimitReader(resp.Body, 4*1024*1024)) +} diff --git a/internal/modules/takeover/takeover.go b/internal/modules/takeover/takeover.go new file mode 100644 index 0000000..e574269 --- /dev/null +++ b/internal/modules/takeover/takeover.go @@ -0,0 +1,124 @@ +// Package takeover runs v1 takeover detection on every host with a CNAME. +// Reads from the store; listens for late DNSResolved events for concurrent +// modules. +package takeover + +import ( + "context" + "sync" + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/scanner" + "god-eye/internal/store" +) + +const ModuleName = "takeover.cname" + +type takeoverModule struct{} + +func Register() { module.Register(&takeoverModule{}) } + +func (*takeoverModule) Name() string { return ModuleName } +func (*takeoverModule) Phase() module.Phase { return module.PhaseAnalysis } +func (*takeoverModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventDNSResolved} } +func (*takeoverModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventTakeoverCandidate} +} +func (*takeoverModule) DefaultEnabled() bool { return true } + +func (*takeoverModule) Run(mctx module.Context) error { + if mctx.Config.Bool("no_takeover", false) { + return nil + } + conc := mctx.Config.Int("concurrency", 100) + if conc <= 0 { + conc = 100 + } + timeout := mctx.Config.Int("timeout", 5) + + processed := make(map[string]struct{}) + var processedMu sync.Mutex + shouldProcess := func(host string) bool { + processedMu.Lock() + defer processedMu.Unlock() + if _, dup := processed[host]; dup { + return false + } + processed[host] = struct{}{} + return true + } + + work := make(chan string, conc*2) + var wg sync.WaitGroup + for i := 0; i < conc; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for host := range work { + if mctx.Ctx.Err() != nil { + return + } + service := scanner.CheckTakeover(host, timeout) + if service == "" { + continue + } + _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { + h.Takeover = &store.Takeover{ + Service: service, + CNAME: h.CNAME, + Confirmed: false, + FoundAt: time.Now(), + } + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.TakeoverCandidate{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, + Subdomain: host, + Service: service, + }) + } + }() + } + + // Drain: every host with a CNAME is a takeover candidate. + for _, h := range mctx.Store.All(mctx.Ctx) { + if h == nil || h.CNAME == "" { + continue + } + if !shouldProcess(h.Subdomain) { + continue + } + select { + case work <- h.Subdomain: + case <-mctx.Ctx.Done(): + close(work) + wg.Wait() + return nil + } + } + + sub := mctx.Bus.Subscribe(eventbus.EventDNSResolved, func(_ context.Context, e eventbus.Event) { + ev, ok := e.(eventbus.DNSResolved) + if !ok || ev.CNAME == "" { + return + } + if !shouldProcess(ev.Subdomain) { + return + } + select { + case work <- ev.Subdomain: + case <-mctx.Ctx.Done(): + } + }) + defer sub.Unsubscribe() + + select { + case <-time.After(500 * time.Millisecond): + case <-mctx.Ctx.Done(): + } + + close(work) + wg.Wait() + return nil +} diff --git a/internal/modules/vhost/vhost.go b/internal/modules/vhost/vhost.go new file mode 100644 index 0000000..20ed76a --- /dev/null +++ b/internal/modules/vhost/vhost.go @@ -0,0 +1,79 @@ +// Package vhost is a Fase 0.6 adapter around v1 network.VHostScanner which +// performs virtual host discovery on resolved IPs. Reveals additional +// hostnames sharing infrastructure with in-scope targets. +package vhost + +import ( + "strings" + "sync" + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/network" + "god-eye/internal/store" +) + +const ModuleName = "discovery.vhost" + +type vhostModule struct{} + +func Register() { module.Register(&vhostModule{}) } + +func (*vhostModule) Name() string { return ModuleName } +func (*vhostModule) Phase() module.Phase { return module.PhaseResolution } +func (*vhostModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventDNSResolved} } +func (*vhostModule) Produces() []eventbus.EventType { + return []eventbus.EventType{eventbus.EventSubdomainDiscovered} +} +func (*vhostModule) DefaultEnabled() bool { return false } // opt-in + +func (*vhostModule) Run(mctx module.Context) error { + if !mctx.Config.Bool("vhost_scan", false) { + return nil + } + timeout := mctx.Config.Int("timeout", 10) + target := mctx.Target + + hosts := mctx.Store.All(mctx.Ctx) + seenIP := make(map[string]struct{}) + for _, h := range hosts { + for _, ip := range h.IPs { + seenIP[ip] = struct{}{} + } + } + + scanner := network.NewVHostScanner(timeout) + var wg sync.WaitGroup + for ip := range seenIP { + ip := ip + if mctx.Ctx.Err() != nil { + break + } + wg.Add(1) + go func() { + defer wg.Done() + res := scanner.DiscoverVHosts(mctx.Ctx, ip) + if res == nil { + return + } + for _, h := range res.Domains { + h = strings.ToLower(strings.TrimSpace(h)) + if h == "" || !strings.HasSuffix(h, target) { + continue + } + _ = mctx.Store.Upsert(mctx.Ctx, h, func(sh *store.Host) { + store.AddIPs(sh, []string{ip}) + store.AddDiscoveryMethod(sh, "vhost") + }) + mctx.Bus.Publish(mctx.Ctx, eventbus.SubdomainDiscovered{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: h}, + Subdomain: h, + Method: "vhost", + }) + } + }() + } + wg.Wait() + return nil +} diff --git a/internal/nucleitpl/download.go b/internal/nucleitpl/download.go new file mode 100644 index 0000000..31dc396 --- /dev/null +++ b/internal/nucleitpl/download.go @@ -0,0 +1,370 @@ +package nucleitpl + +import ( + "archive/zip" + "errors" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strings" + "sync/atomic" + "time" +) + +// TemplatesZipURL is the default ZIP archive of the projectdiscovery +// nuclei-templates repository (main branch). +const TemplatesZipURL = "https://github.com/projectdiscovery/nuclei-templates/archive/refs/heads/main.zip" + +// Downloader fetches the nuclei-templates archive and extracts the +// YAML files into destDir. Designed to be invoked at most once per +// scan: after a successful extraction the destination dir persists +// across runs; subsequent invocations return quickly via hasTemplates(). +type Downloader struct { + // ZipURL overrides TemplatesZipURL for testing or mirroring. + ZipURL string + // HTTPClient is used for the download. Default: 10-minute timeout. + HTTPClient *http.Client + // Writer receives progress lines when Verbose is true. Defaults to + // os.Stderr. + Writer io.Writer + // Verbose toggles progress logging. + Verbose bool + // MinTemplatesToConsiderPresent is the count of .yaml files under + // destDir below which we treat the directory as empty / incomplete + // and re-download. Default: 50. + MinTemplatesToConsiderPresent int +} + +// NewDownloader returns a Downloader with sensible defaults. +func NewDownloader() *Downloader { + return &Downloader{ + ZipURL: TemplatesZipURL, + HTTPClient: &http.Client{Timeout: 10 * time.Minute}, + Writer: os.Stderr, + MinTemplatesToConsiderPresent: 50, + } +} + +// EnsureTemplates guarantees destDir contains a usable set of Nuclei +// YAML templates. If the directory already has β‰₯ MinTemplatesToConsiderPresent +// templates, it's a no-op. Otherwise the ZIP is downloaded, streamed to +// a temp file, and extracted (YAML files only). +// +// destDir is created if it doesn't exist. +func (d *Downloader) EnsureTemplates(destDir string) error { + if destDir == "" { + return errors.New("EnsureTemplates: empty destDir") + } + if d.hasEnoughTemplates(destDir) { + if d.Verbose { + fmt.Fprintf(d.writer(), "βœ“ nuclei templates already present at %s\n", destDir) + } + return nil + } + + if err := os.MkdirAll(destDir, 0o755); err != nil { + return fmt.Errorf("mkdir %s: %w", destDir, err) + } + + if d.Verbose { + fmt.Fprintf(d.writer(), "↓ downloading nuclei-templates from %s\n", d.zipURL()) + } + + tmpPath, err := d.downloadZip() + if err != nil { + return err + } + defer os.Remove(tmpPath) + + count, bytes, err := d.extractYAML(tmpPath, destDir) + if err != nil { + return err + } + + if count < d.MinTemplatesToConsiderPresent { + return fmt.Errorf("extracted only %d templates (expected β‰₯ %d) β€” archive may be incomplete", count, d.MinTemplatesToConsiderPresent) + } + + if d.Verbose { + fmt.Fprintf(d.writer(), "βœ“ extracted %d nuclei templates (%s) into %s\n", + count, humanBytesN(bytes), destDir) + } + return nil +} + +// Refresh forces a re-download regardless of current directory contents. +// Useful for `god-eye nuclei-update` style CLI commands. +func (d *Downloader) Refresh(destDir string) error { + if destDir == "" { + return errors.New("Refresh: empty destDir") + } + if err := os.MkdirAll(destDir, 0o755); err != nil { + return fmt.Errorf("mkdir %s: %w", destDir, err) + } + + if d.Verbose { + fmt.Fprintf(d.writer(), "↓ refreshing nuclei-templates from %s\n", d.zipURL()) + } + + tmpPath, err := d.downloadZip() + if err != nil { + return err + } + defer os.Remove(tmpPath) + + count, bytes, err := d.extractYAML(tmpPath, destDir) + if err != nil { + return err + } + if d.Verbose { + fmt.Fprintf(d.writer(), "βœ“ refreshed %d templates (%s)\n", count, humanBytesN(bytes)) + } + return nil +} + +// --- internals ----------------------------------------------------------- + +func (d *Downloader) hasEnoughTemplates(dir string) bool { + info, err := os.Stat(dir) + if err != nil || !info.IsDir() { + return false + } + found := 0 + threshold := d.MinTemplatesToConsiderPresent + if threshold <= 0 { + threshold = 50 + } + _ = filepath.Walk(dir, func(_ string, fi os.FileInfo, err error) error { + if err != nil { + return nil + } + if fi.IsDir() { + return nil + } + name := strings.ToLower(fi.Name()) + if strings.HasSuffix(name, ".yaml") || strings.HasSuffix(name, ".yml") { + found++ + if found >= threshold { + return filepath.SkipAll + } + } + return nil + }) + return found >= threshold +} + +func (d *Downloader) zipURL() string { + if d.ZipURL != "" { + return d.ZipURL + } + return TemplatesZipURL +} + +func (d *Downloader) writer() io.Writer { + if d.Writer != nil { + return d.Writer + } + return os.Stderr +} + +func (d *Downloader) downloadZip() (string, error) { + client := d.HTTPClient + if client == nil { + client = &http.Client{Timeout: 10 * time.Minute} + } + + req, err := http.NewRequest("GET", d.zipURL(), nil) + if err != nil { + return "", err + } + req.Header.Set("User-Agent", "god-eye-v2") + req.Header.Set("Accept", "application/zip") + + resp, err := client.Do(req) + if err != nil { + return "", fmt.Errorf("download: %w", err) + } + defer resp.Body.Close() + + // Follow standard HTTP error reporting. + if resp.StatusCode != 200 { + return "", fmt.Errorf("download: HTTP %d from %s", resp.StatusCode, d.zipURL()) + } + + tmp, err := os.CreateTemp("", "nuclei-templates-*.zip") + if err != nil { + return "", fmt.Errorf("create temp: %w", err) + } + + // Streaming copy with throttled progress output. + var written atomic.Int64 + pr := &progressReader{ + r: resp.Body, + written: &written, + verbose: d.Verbose, + writer: d.writer(), + total: resp.ContentLength, + prefix: " downloading", + } + + if _, err := io.Copy(tmp, pr); err != nil { + tmp.Close() + os.Remove(tmp.Name()) + return "", fmt.Errorf("stream download: %w", err) + } + if err := tmp.Close(); err != nil { + os.Remove(tmp.Name()) + return "", err + } + return tmp.Name(), nil +} + +// extractYAML walks the zip and writes every .yaml / .yml file into +// destDir. Returns (count, totalBytes, error). +// +// The top-level directory in the archive (e.g. "nuclei-templates-main/") +// is stripped so entries land at destDir//.yaml. +// +// Path-traversal protection: every resolved destination must be within +// destDir; otherwise the entry is skipped. +func (d *Downloader) extractYAML(zipPath, destDir string) (int, int64, error) { + zr, err := zip.OpenReader(zipPath) + if err != nil { + return 0, 0, fmt.Errorf("open zip: %w", err) + } + defer zr.Close() + + absDest, err := filepath.Abs(destDir) + if err != nil { + return 0, 0, err + } + + var count int + var bytes int64 + for _, f := range zr.File { + if f.FileInfo().IsDir() { + continue + } + lower := strings.ToLower(f.Name) + if !strings.HasSuffix(lower, ".yaml") && !strings.HasSuffix(lower, ".yml") { + continue + } + + // Strip leading top-level folder if present. + rel := f.Name + if i := strings.Index(rel, "/"); i >= 0 { + rel = rel[i+1:] + } + if rel == "" { + continue + } + // Guard against path traversal / absolute paths. + if strings.Contains(rel, "..") || filepath.IsAbs(rel) { + continue + } + + dest := filepath.Join(absDest, rel) + if !strings.HasPrefix(dest, absDest+string(os.PathSeparator)) && dest != absDest { + continue + } + + if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil { + continue + } + + rc, err := f.Open() + if err != nil { + continue + } + out, err := os.OpenFile(dest, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o644) + if err != nil { + rc.Close() + continue + } + n, cerr := io.Copy(out, rc) + rc.Close() + out.Close() + if cerr != nil { + _ = os.Remove(dest) + continue + } + count++ + bytes += n + } + return count, bytes, nil +} + +// --- helpers ------------------------------------------------------------- + +// progressReader wraps an io.Reader and emits throttled progress lines +// as bytes are consumed. Throttling: one line every ~5% of total (or +// every ~5MB when total is unknown). +type progressReader struct { + r io.Reader + written *atomic.Int64 + total int64 + verbose bool + writer io.Writer + prefix string + + lastPct int + lastBytes int64 + lastReport time.Time +} + +func (p *progressReader) Read(b []byte) (int, error) { + n, err := p.r.Read(b) + if n > 0 { + p.written.Add(int64(n)) + if p.verbose { + p.maybeReport() + } + } + return n, err +} + +func (p *progressReader) maybeReport() { + w := p.written.Load() + + // Rate-limit prints to avoid flooding the terminal. + if time.Since(p.lastReport) < 200*time.Millisecond { + return + } + + if p.total > 0 { + pct := int(float64(w) / float64(p.total) * 100) + if pct >= p.lastPct+5 || pct == 100 { + fmt.Fprintf(p.writer, "%s %3d%% %s / %s\n", + p.prefix, pct, humanBytesN(w), humanBytesN(p.total)) + p.lastPct = pct + p.lastReport = time.Now() + } + } else { + // Unknown total: report every ~5MB. + if w-p.lastBytes >= 5*1024*1024 { + fmt.Fprintf(p.writer, "%s %s\n", p.prefix, humanBytesN(w)) + p.lastBytes = w + p.lastReport = time.Now() + } + } +} + +// humanBytesN formats a byte count like "2.3MB". Duplicated from +// ai/ensure.go to avoid a cross-package dependency. +func humanBytesN(n int64) string { + const k = 1024.0 + if n < int64(k) { + return fmt.Sprintf("%dB", n) + } + units := []string{"KB", "MB", "GB", "TB"} + v := float64(n) / k + for _, u := range units { + if v < k { + return fmt.Sprintf("%.1f%s", v, u) + } + v /= k + } + return fmt.Sprintf("%.1fPB", v) +} diff --git a/internal/nucleitpl/executor.go b/internal/nucleitpl/executor.go new file mode 100644 index 0000000..e4c727f --- /dev/null +++ b/internal/nucleitpl/executor.go @@ -0,0 +1,361 @@ +package nucleitpl + +import ( + "bytes" + "context" + "fmt" + "io" + "net/http" + "regexp" + "strings" + "time" +) + +// Executor runs supported Nuclei templates against a target URL. +type Executor struct { + Client *http.Client + Timeout time.Duration + MaxBodyB int64 // response body cap; default 1MB + UserAgent string +} + +// NewExecutor builds an executor with sensible defaults. Pass a custom +// *http.Client when you want connection pooling shared with the rest of +// the scan (recommended). +func NewExecutor(client *http.Client, timeout time.Duration) *Executor { + if client == nil { + client = &http.Client{Timeout: timeout} + } + if timeout == 0 { + timeout = 15 * time.Second + } + return &Executor{ + Client: client, + Timeout: timeout, + MaxBodyB: 1 * 1024 * 1024, + UserAgent: "god-eye-v2-nuclei", + } +} + +// Match holds the successful match output for a single template/target. +type Match struct { + TemplateID string + TemplateURL string // reference URL when present in info.reference + Name string + Severity string + Description string + Tags []string + URL string // URL that matched + Evidence string // short excerpt from the matching response + CVEs []string // extracted from info.reference when possible + Author string +} + +// Run executes every HTTP request in the template against the given +// base URL (e.g. "https://api.example.com"). Returns one Match per +// request that succeeds. Non-matching requests produce no entries. +// +// Templating substitutions handled: {{BaseURL}}, {{Hostname}}, {{RootURL}}. +func (e *Executor) Run(ctx context.Context, t *Template, baseURL string) []Match { + if ok, _ := t.IsSupported(); !ok { + return nil + } + var matches []Match + for _, req := range t.Requests { + for _, p := range req.Path { + url := expandPath(p, baseURL) + m, err := e.runOne(ctx, t, req, url) + if err != nil || m == nil { + continue + } + matches = append(matches, *m) + } + } + return matches +} + +// runOne sends one HTTP request, applies matchers, and returns a Match +// when every matchers-condition group is satisfied. +func (e *Executor) runOne(ctx context.Context, t *Template, req HTTPRequest, url string) (*Match, error) { + method := strings.ToUpper(req.Method) + if method == "" { + method = "GET" + } + + var body io.Reader + if req.Body != "" { + body = bytes.NewBufferString(req.Body) + } + + r, err := http.NewRequestWithContext(ctx, method, url, body) + if err != nil { + return nil, err + } + for k, v := range req.Headers { + r.Header.Set(k, v) + } + if r.Header.Get("User-Agent") == "" { + r.Header.Set("User-Agent", e.UserAgent) + } + + // Honor the redirects flag; default is NO redirect follow (safer + // for vuln detection since a 3xx-based probe might be exactly what + // we want to measure). + client := e.Client + if !req.Redirects { + wrapped := *client + wrapped.CheckRedirect = func(*http.Request, []*http.Request) error { + return http.ErrUseLastResponse + } + client = &wrapped + } + + resp, err := client.Do(r) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + bodyBytes, _ := io.ReadAll(io.LimitReader(resp.Body, e.MaxBodyB)) + + // Apply matchers. + condition := strings.ToLower(strings.TrimSpace(req.MatchersCondition)) + if condition == "" { + condition = "or" + } + + fired := 0 + for _, m := range req.Matchers { + if matcherHits(m, resp, bodyBytes) { + fired++ + } + } + + switch condition { + case "and": + if fired != len(req.Matchers) { + return nil, nil + } + case "or": + if fired == 0 { + return nil, nil + } + default: + if fired == 0 { + return nil, nil + } + } + + return &Match{ + TemplateID: t.ID, + TemplateURL: firstRef(t.Info.Reference), + Name: t.Info.Name, + Severity: t.Severity(), + Description: t.Info.Description, + Tags: t.Tags(), + URL: url, + Evidence: evidenceSnippet(bodyBytes, resp), + CVEs: extractCVEs(t.ID, t.Info.Reference), + Author: t.Info.Author, + }, nil +} + +// matcherHits returns true when the matcher m fires against the response. +// Respects m.Negative (inverts), m.Condition (and|or over word list), and +// m.Part (header|body|response|all; default body). +func matcherHits(m Matcher, resp *http.Response, body []byte) bool { + hit := false + switch m.Type { + case "status": + for _, code := range m.Status { + if resp.StatusCode == code { + hit = true + break + } + } + case "size": + for _, sz := range m.Size { + if len(body) == sz { + hit = true + break + } + } + case "word": + corpus := selectCorpus(m.Part, resp, body) + hit = wordMatch(m, corpus) + case "regex": + corpus := selectCorpus(m.Part, resp, body) + hit = regexMatch(m, corpus) + } + if m.Negative { + return !hit + } + return hit +} + +func selectCorpus(part string, resp *http.Response, body []byte) string { + switch strings.ToLower(strings.TrimSpace(part)) { + case "header": + return formatHeaders(resp.Header) + case "response", "all": + return formatHeaders(resp.Header) + "\n\n" + string(body) + case "body", "": + return string(body) + default: + return string(body) + } +} + +func wordMatch(m Matcher, corpus string) bool { + if len(m.Words) == 0 { + return false + } + condition := strings.ToLower(strings.TrimSpace(m.Condition)) + if condition == "" { + condition = "or" + } + lower := strings.ToLower(corpus) + if condition == "and" { + for _, w := range m.Words { + if !strings.Contains(lower, strings.ToLower(w)) { + return false + } + } + return true + } + // or + for _, w := range m.Words { + if strings.Contains(lower, strings.ToLower(w)) { + return true + } + } + return false +} + +func regexMatch(m Matcher, corpus string) bool { + if len(m.Regex) == 0 { + return false + } + condition := strings.ToLower(strings.TrimSpace(m.Condition)) + if condition == "" { + condition = "or" + } + compiled := make([]*regexp.Regexp, 0, len(m.Regex)) + for _, pat := range m.Regex { + re, err := regexp.Compile(pat) + if err != nil { + continue + } + compiled = append(compiled, re) + } + if len(compiled) == 0 { + return false + } + if condition == "and" { + for _, re := range compiled { + if !re.MatchString(corpus) { + return false + } + } + return true + } + for _, re := range compiled { + if re.MatchString(corpus) { + return true + } + } + return false +} + +// --- helpers ------------------------------------------------------------- + +// expandPath substitutes Nuclei template variables with real values. +// {{BaseURL}} β†’ baseURL unchanged ("https://example.com") +// {{Hostname}} β†’ host portion of baseURL +// {{RootURL}} β†’ scheme + host (no path) +func expandPath(template, baseURL string) string { + host := hostOnly(baseURL) + root := rootURL(baseURL) + out := strings.ReplaceAll(template, "{{BaseURL}}", baseURL) + out = strings.ReplaceAll(out, "{{Hostname}}", host) + out = strings.ReplaceAll(out, "{{RootURL}}", root) + return out +} + +func hostOnly(u string) string { + s := strings.TrimPrefix(u, "https://") + s = strings.TrimPrefix(s, "http://") + if i := strings.IndexAny(s, "/?#"); i >= 0 { + s = s[:i] + } + return s +} + +func rootURL(u string) string { + s := u + scheme := "" + switch { + case strings.HasPrefix(s, "https://"): + scheme = "https://" + s = s[len("https://"):] + case strings.HasPrefix(s, "http://"): + scheme = "http://" + s = s[len("http://"):] + } + if i := strings.IndexAny(s, "/?#"); i >= 0 { + s = s[:i] + } + return scheme + s +} + +func formatHeaders(h http.Header) string { + var sb strings.Builder + for k, vs := range h { + for _, v := range vs { + fmt.Fprintf(&sb, "%s: %s\n", k, v) + } + } + return sb.String() +} + +func evidenceSnippet(body []byte, resp *http.Response) string { + const maxSnippet = 500 + s := string(body) + if len(s) > maxSnippet { + s = s[:maxSnippet] + "…" + } + return fmt.Sprintf("HTTP %d β€” %s", resp.StatusCode, s) +} + +// firstRef returns the first URL in the reference list (usually the +// nuclei-templates source or the advisory). +func firstRef(refs []string) string { + for _, r := range refs { + r = strings.TrimSpace(r) + if r != "" { + return r + } + } + return "" +} + +// extractCVEs scans the template ID and references for CVE IDs. +func extractCVEs(id string, refs []string) []string { + re := regexp.MustCompile(`(?i)CVE-\d{4}-\d{4,7}`) + seen := make(map[string]bool) + var out []string + add := func(s string) { + for _, m := range re.FindAllString(s, -1) { + up := strings.ToUpper(m) + if !seen[up] { + seen[up] = true + out = append(out, up) + } + } + } + add(id) + for _, r := range refs { + add(r) + } + return out +} diff --git a/internal/nucleitpl/executor_test.go b/internal/nucleitpl/executor_test.go new file mode 100644 index 0000000..1f34ed9 --- /dev/null +++ b/internal/nucleitpl/executor_test.go @@ -0,0 +1,216 @@ +package nucleitpl + +import ( + "context" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" +) + +// mkTemplate builds a minimal Template in-memory for tests. +func mkTemplate(id string, path string, matchers []Matcher, condition string) *Template { + return &Template{ + ID: id, + Info: Info{ + Name: "Test " + id, + Severity: "high", + }, + Requests: []HTTPRequest{{ + Method: "GET", + Path: []string{path}, + Matchers: matchers, + MatchersCondition: condition, + }}, + } +} + +func TestExecutor_WordMatch(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(200) + _, _ = w.Write([]byte("PHP Version 7.4.3 loaded")) + })) + defer srv.Close() + + tpl := mkTemplate("test-phpinfo", + "{{BaseURL}}/info.php", + []Matcher{{Type: "word", Part: "body", Words: []string{"PHP Version"}}}, + "") + + e := NewExecutor(nil, 5*time.Second) + matches := e.Run(context.Background(), tpl, srv.URL) + if len(matches) != 1 { + t.Fatalf("expected 1 match, got %d", len(matches)) + } + if matches[0].TemplateID != "test-phpinfo" { + t.Errorf("wrong template: %s", matches[0].TemplateID) + } + if !strings.Contains(matches[0].Evidence, "PHP Version") { + t.Errorf("evidence missing snippet: %q", matches[0].Evidence) + } +} + +func TestExecutor_StatusMatch(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(403) + })) + defer srv.Close() + + tpl := mkTemplate("test-403", + "{{BaseURL}}/admin", + []Matcher{{Type: "status", Status: []int{403, 401}}}, + "") + + e := NewExecutor(nil, 5*time.Second) + matches := e.Run(context.Background(), tpl, srv.URL) + if len(matches) != 1 { + t.Fatalf("expected match, got %d", len(matches)) + } +} + +func TestExecutor_ANDCondition(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(200) + _, _ = w.Write([]byte("admin panel access")) + })) + defer srv.Close() + + // Both matchers must fire. + tpl := mkTemplate("test-and", + "{{BaseURL}}/", + []Matcher{ + {Type: "word", Part: "body", Words: []string{"admin"}}, + {Type: "status", Status: []int{200}}, + }, "and") + + e := NewExecutor(nil, 5*time.Second) + matches := e.Run(context.Background(), tpl, srv.URL) + if len(matches) != 1 { + t.Errorf("expected AND match to fire, got %d", len(matches)) + } + + // If we flip status to something the server doesn't return, AND fails. + tpl.Requests[0].Matchers[1].Status = []int{500} + matches = e.Run(context.Background(), tpl, srv.URL) + if len(matches) != 0 { + t.Errorf("AND should fail when one matcher doesn't, got %d", len(matches)) + } +} + +func TestExecutor_NegativeMatcher(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(200) + _, _ = w.Write([]byte("welcome")) + })) + defer srv.Close() + + tpl := mkTemplate("test-neg", + "{{BaseURL}}/", + []Matcher{{Type: "word", Part: "body", Words: []string{"error"}, Negative: true}}, + "") + + e := NewExecutor(nil, 5*time.Second) + matches := e.Run(context.Background(), tpl, srv.URL) + if len(matches) != 1 { + t.Errorf("negative should fire (body doesn't contain 'error'), got %d", len(matches)) + } +} + +func TestExecutor_RegexMatch(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(200) + _, _ = w.Write([]byte("Server: Apache/2.4.52 (Ubuntu)")) + })) + defer srv.Close() + + tpl := mkTemplate("test-re", + "{{BaseURL}}/", + []Matcher{{Type: "regex", Part: "body", Regex: []string{`Apache/\d+\.\d+\.\d+`}}}, + "") + + e := NewExecutor(nil, 5*time.Second) + matches := e.Run(context.Background(), tpl, srv.URL) + if len(matches) != 1 { + t.Errorf("regex match should fire, got %d", len(matches)) + } +} + +func TestExecutor_HeaderPart(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("X-Powered-By", "Express") + w.WriteHeader(200) + })) + defer srv.Close() + + tpl := mkTemplate("test-header", + "{{BaseURL}}/", + []Matcher{{Type: "word", Part: "header", Words: []string{"X-Powered-By"}}}, + "") + + e := NewExecutor(nil, 5*time.Second) + matches := e.Run(context.Background(), tpl, srv.URL) + if len(matches) != 1 { + t.Errorf("header matcher should fire, got %d", len(matches)) + } +} + +func TestExecutor_NoMatchReturnsEmpty(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(200) + _, _ = w.Write([]byte("nothing interesting")) + })) + defer srv.Close() + + tpl := mkTemplate("test-nomatch", + "{{BaseURL}}/", + []Matcher{{Type: "word", Part: "body", Words: []string{"definitely_not_here"}}}, + "") + + e := NewExecutor(nil, 5*time.Second) + matches := e.Run(context.Background(), tpl, srv.URL) + if len(matches) != 0 { + t.Errorf("non-match should return empty, got %d", len(matches)) + } +} + +func TestExpandPath(t *testing.T) { + cases := []struct { + path, base, want string + }{ + {"{{BaseURL}}/admin", "https://example.com", "https://example.com/admin"}, + {"{{Hostname}}/x", "https://api.example.com/v1", "api.example.com/x"}, + {"{{RootURL}}/r", "http://sub.example.com/deep/path", "http://sub.example.com/r"}, + {"/static/admin", "https://x.com", "/static/admin"}, + } + for _, c := range cases { + if got := expandPath(c.path, c.base); got != c.want { + t.Errorf("expandPath(%q, %q) = %q, want %q", c.path, c.base, got, c.want) + } + } +} + +func TestExtractCVEs(t *testing.T) { + cves := extractCVEs("cve-2021-23017-nginx", []string{ + "https://nvd.nist.gov/vuln/detail/CVE-2021-23017", // dup of ID after upper-casing + "https://example.com/adv/CVE-2020-15168", + }) + if len(cves) != 2 { + t.Errorf("expected 2 unique CVE IDs, got %d: %v", len(cves), cves) + } + if cves[0] != "CVE-2021-23017" || cves[1] != "CVE-2020-15168" { + t.Errorf("unexpected order: %v", cves) + } +} + +func TestExecutor_UnsupportedTemplateNoop(t *testing.T) { + tpl := &Template{ + ID: "dns-tpl", + DNS: []string{"placeholder"}, + } + e := NewExecutor(nil, 5*time.Second) + matches := e.Run(context.Background(), tpl, "https://example.com") + if len(matches) != 0 { + t.Errorf("unsupported template should return no matches, got %d", len(matches)) + } +} diff --git a/internal/nucleitpl/template.go b/internal/nucleitpl/template.go new file mode 100644 index 0000000..08bf082 --- /dev/null +++ b/internal/nucleitpl/template.go @@ -0,0 +1,302 @@ +// Package nucleitpl parses and executes a subset of the Nuclei YAML +// template format. The goal is to run community HTTP templates unchanged +// so God's Eye gets access to the ~8000-template ecosystem without +// reimplementing detections one-by-one. +// +// Supported subset (covers roughly 70% of HTTP templates in the public +// nuclei-templates repo at time of writing): +// +// - Top-level: id, info { name, severity, description, tags, author } +// - Protocol: requests: (aliased as http: in newer templates) +// - Per-request: method, path (with {{BaseURL}}/{{Hostname}} substitution), +// headers, body, redirects (bool), matchers-condition (and|or) +// - Matchers: type=word (word|part|condition), +// type=regex (regex|part), +// type=status (status), +// type=size (size) +// - Severity mapping: info/low/medium/high/critical +// +// Out of scope (templates using these are skipped with a reason logged): +// +// - Protocols other than http: dns, ssl, network, file, code, javascript, +// workflow, headless, flow +// - Pre-conditions, payloads, extractors, dynamic variables, +// stop-at-first-match, cluster, self-contained +// - Interactsh (OOB) β€” requires a callback server we don't ship yet +// - Fuzzing templates +// +// A skipped template logs via the returned diagnostic; the executor never +// panics on an unsupported template. +package nucleitpl + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "gopkg.in/yaml.v3" +) + +// Template is the parsed form of a Nuclei YAML file. +type Template struct { + ID string `yaml:"id"` + Info Info `yaml:"info"` + Requests []HTTPRequest `yaml:"requests,omitempty"` + HTTP []HTTPRequest `yaml:"http,omitempty"` // newer alias for requests + // Unsupported protocols β€” presence triggers skip with reason. + DNS interface{} `yaml:"dns,omitempty"` + SSL interface{} `yaml:"ssl,omitempty"` + Network interface{} `yaml:"network,omitempty"` + File interface{} `yaml:"file,omitempty"` + Code interface{} `yaml:"code,omitempty"` + Headless interface{} `yaml:"headless,omitempty"` + Workflow interface{} `yaml:"workflows,omitempty"` + // SourcePath is populated by Load so diagnostics can reference the file. + SourcePath string `yaml:"-"` +} + +// Info is the template metadata block. +type Info struct { + Name string `yaml:"name"` + Author string `yaml:"author,omitempty"` + Severity string `yaml:"severity"` + Description string `yaml:"description,omitempty"` + Reference []string `yaml:"reference,omitempty"` + Tags string `yaml:"tags,omitempty"` +} + +// HTTPRequest is one HTTP interaction in a template. +type HTTPRequest struct { + Method string `yaml:"method,omitempty"` // default GET + Path []string `yaml:"path"` + Headers map[string]string `yaml:"headers,omitempty"` + Body string `yaml:"body,omitempty"` + Redirects bool `yaml:"redirects,omitempty"` + MaxRedirects int `yaml:"max-redirects,omitempty"` + MatchersCondition string `yaml:"matchers-condition,omitempty"` // "and" | "or" (default "or") + Matchers []Matcher `yaml:"matchers"` + // Unsupported fields that, if present with values, trigger a skip. + Payloads interface{} `yaml:"payloads,omitempty"` + Extractors interface{} `yaml:"extractors,omitempty"` + Fuzzing interface{} `yaml:"fuzzing,omitempty"` + Unsafe bool `yaml:"unsafe,omitempty"` + Attack string `yaml:"attack,omitempty"` + Raw []string `yaml:"raw,omitempty"` + Pipeline bool `yaml:"pipeline,omitempty"` + Threads int `yaml:"threads,omitempty"` + StopAtFirst bool `yaml:"stop-at-first-match,omitempty"` +} + +// Matcher is a single match rule within a request. +type Matcher struct { + Type string `yaml:"type"` // word | regex | status | size | dsl | binary + Part string `yaml:"part,omitempty"` // header | body | response (default body) + Condition string `yaml:"condition,omitempty"` // and | or (default or) + Negative bool `yaml:"negative,omitempty"` + Words []string `yaml:"words,omitempty"` + Regex []string `yaml:"regex,omitempty"` + Status []int `yaml:"status,omitempty"` + Size []int `yaml:"size,omitempty"` + // Unsupported β€” presence marks the matcher unusable. + DSL []string `yaml:"dsl,omitempty"` + Binary []string `yaml:"binary,omitempty"` +} + +// Load parses a single YAML file into a Template. Malformed YAML or empty +// files return (nil, err); structurally valid YAML that references unused +// protocols still Load successfully β€” IsSupported/IsSupported reason tell +// the caller whether to execute it. +func Load(path string) (*Template, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + var t Template + if err := yaml.Unmarshal(data, &t); err != nil { + return nil, fmt.Errorf("parse %s: %w", path, err) + } + if t.ID == "" { + return nil, fmt.Errorf("parse %s: missing id field", path) + } + t.SourcePath = path + // Normalize requests vs http alias. + if len(t.Requests) == 0 && len(t.HTTP) > 0 { + t.Requests = t.HTTP + } + return &t, nil +} + +// LoadDir walks dir recursively, loads every .yaml / .yml file, and +// returns the slice of successfully-parsed templates. Parse errors are +// collected into the returned diagnostics slice but do not stop the walk. +func LoadDir(dir string) ([]*Template, []string, error) { + var tpls []*Template + var diags []string + + err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return nil // skip unreadable files silently + } + if info.IsDir() { + return nil + } + ext := strings.ToLower(filepath.Ext(path)) + if ext != ".yaml" && ext != ".yml" { + return nil + } + t, err := Load(path) + if err != nil { + diags = append(diags, fmt.Sprintf("parse %s: %v", path, err)) + return nil + } + tpls = append(tpls, t) + return nil + }) + return tpls, diags, err +} + +// TargetsCurrentHost reports whether every request path in the template +// is scoped to the scanned host β€” i.e. uses {{BaseURL}}, {{Hostname}}, +// {{RootURL}}, or a leading "/". Templates with absolute URLs to +// third-party services (common in OSINT / user-presence checks) would +// otherwise fire against unrelated hosts with unresolved placeholders +// like {{user}} β€” and their matchers often succeed on whatever generic +// response the third party returns, producing high-volume false +// positives against a single-target scan. +// +// Returns false + reason when any request path is off-host. +func (t *Template) TargetsCurrentHost() (bool, string) { + for i, r := range t.Requests { + for j, p := range r.Path { + ok := false + switch { + case strings.HasPrefix(p, "{{BaseURL}}"), + strings.HasPrefix(p, "{{Hostname}}"), + strings.HasPrefix(p, "{{RootURL}}"), + strings.HasPrefix(p, "/"): + ok = true + } + if !ok { + // Also allow the special case where the path is exactly + // a template variable (no literal text). + if p == "{{BaseURL}}" || p == "{{Hostname}}" || p == "{{RootURL}}" { + ok = true + } + } + if !ok { + return false, fmt.Sprintf("request[%d].path[%d] %q does not target the scanned host", i, j, truncateStr(p, 60)) + } + } + } + return true, "" +} + +func truncateStr(s string, n int) string { + if len(s) <= n { + return s + } + return s[:n] + "…" +} + +// IsSupported returns (true, "") when the template uses only features +// understood by the executor. Templates that would need unsupported +// protocols, payloads, extractors, or fuzzing return (false, reason). +// Templates that target third-party hosts (OSINT-style user lookups) +// also return false to prevent spurious matches during targeted scans. +func (t *Template) IsSupported() (bool, string) { + if t == nil { + return false, "nil template" + } + if t.DNS != nil { + return false, "dns protocol (unsupported)" + } + if t.SSL != nil { + return false, "ssl protocol (unsupported)" + } + if t.Network != nil { + return false, "network protocol (unsupported)" + } + if t.File != nil { + return false, "file protocol (unsupported)" + } + if t.Code != nil { + return false, "code protocol (unsupported)" + } + if t.Headless != nil { + return false, "headless protocol (unsupported)" + } + if t.Workflow != nil { + return false, "workflow (unsupported)" + } + if len(t.Requests) == 0 { + return false, "no http requests" + } + for i, r := range t.Requests { + if r.Payloads != nil { + return false, fmt.Sprintf("request[%d] uses payloads (unsupported)", i) + } + if r.Extractors != nil { + // Tolerate extractors on the first pass; we ignore them. + // Templates with only extractors still run; their findings are + // just matcher-based. + } + if r.Fuzzing != nil { + return false, fmt.Sprintf("request[%d] uses fuzzing (unsupported)", i) + } + if r.Unsafe { + return false, fmt.Sprintf("request[%d] is unsafe (raw TCP)", i) + } + if len(r.Raw) > 0 { + return false, fmt.Sprintf("request[%d] uses raw (unsupported)", i) + } + if len(r.Path) == 0 { + return false, fmt.Sprintf("request[%d] has no path", i) + } + if len(r.Matchers) == 0 { + return false, fmt.Sprintf("request[%d] has no matchers", i) + } + for j, m := range r.Matchers { + switch m.Type { + case "word", "regex", "status", "size": + // supported + case "dsl", "binary": + return false, fmt.Sprintf("request[%d].matcher[%d] type=%s (unsupported)", i, j, m.Type) + default: + return false, fmt.Sprintf("request[%d].matcher[%d] type=%s (unknown)", i, j, m.Type) + } + } + } + // Scope check: skip templates that probe third-party hosts. + if ok, reason := t.TargetsCurrentHost(); !ok { + return false, reason + } + return true, "" +} + +// Severity returns the OWASP-style severity, defaulting to "info" when +// the template omits it. +func (t *Template) Severity() string { + s := strings.ToLower(strings.TrimSpace(t.Info.Severity)) + switch s { + case "critical", "high", "medium", "low", "info": + return s + default: + return "info" + } +} + +// Tags returns the comma-separated tags as a string slice. +func (t *Template) Tags() []string { + if t.Info.Tags == "" { + return nil + } + var out []string + for _, p := range strings.Split(t.Info.Tags, ",") { + p = strings.TrimSpace(p) + if p != "" { + out = append(out, p) + } + } + return out +} diff --git a/internal/nucleitpl/template_test.go b/internal/nucleitpl/template_test.go new file mode 100644 index 0000000..fa4e8bc --- /dev/null +++ b/internal/nucleitpl/template_test.go @@ -0,0 +1,229 @@ +package nucleitpl + +import ( + "os" + "path/filepath" + "testing" +) + +const sampleSupported = ` +id: test-basic-word-match +info: + name: Test Basic Word Match + author: vyntral + severity: high + description: Fires when response body contains 'phpinfo' + tags: exposure,php + reference: + - https://example.com/advisory/CVE-2021-12345 +requests: + - method: GET + path: + - "{{BaseURL}}/phpinfo.php" + matchers: + - type: word + part: body + words: + - "PHP Version" + - type: status + status: + - 200 + matchers-condition: and +` + +const sampleUnsupportedDNS = ` +id: test-dns +info: + name: Test DNS + severity: medium +dns: + - name: "{{FQDN}}" + type: TXT + matchers: + - type: word + words: ["v=spf"] +` + +const sampleUnsupportedPayloads = ` +id: test-payloads +info: + name: Test Payloads + severity: low +requests: + - method: GET + path: + - "{{BaseURL}}/{{word}}" + payloads: + word: + - admin + - backup + matchers: + - type: status + status: [200] +` + +const sampleBadYAML = ` +id: [unclosed +info: + name: +` + +func writeTmp(t *testing.T, name, content string) string { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatal(err) + } + return path +} + +func TestLoad_Supported(t *testing.T) { + path := writeTmp(t, "ok.yaml", sampleSupported) + tpl, err := Load(path) + if err != nil { + t.Fatal(err) + } + if tpl.ID != "test-basic-word-match" { + t.Errorf("ID = %q", tpl.ID) + } + if tpl.Info.Severity != "high" { + t.Errorf("Severity = %q", tpl.Info.Severity) + } + if len(tpl.Requests) != 1 { + t.Fatalf("Requests len = %d", len(tpl.Requests)) + } + r := tpl.Requests[0] + if r.Path[0] != "{{BaseURL}}/phpinfo.php" { + t.Errorf("Path[0] = %q", r.Path[0]) + } + if len(r.Matchers) != 2 { + t.Errorf("Matchers len = %d", len(r.Matchers)) + } + if r.MatchersCondition != "and" { + t.Errorf("MatchersCondition = %q", r.MatchersCondition) + } + if ok, reason := tpl.IsSupported(); !ok { + t.Errorf("should be supported; reason=%q", reason) + } + if tags := tpl.Tags(); len(tags) != 2 || tags[0] != "exposure" { + t.Errorf("Tags = %v", tags) + } +} + +func TestLoad_DNSUnsupported(t *testing.T) { + path := writeTmp(t, "dns.yaml", sampleUnsupportedDNS) + tpl, err := Load(path) + if err != nil { + t.Fatal(err) + } + ok, reason := tpl.IsSupported() + if ok { + t.Error("dns template should be unsupported") + } + if reason == "" { + t.Error("expected non-empty reason") + } +} + +func TestLoad_PayloadsUnsupported(t *testing.T) { + path := writeTmp(t, "payloads.yaml", sampleUnsupportedPayloads) + tpl, err := Load(path) + if err != nil { + t.Fatal(err) + } + ok, reason := tpl.IsSupported() + if ok { + t.Error("payloads template should be unsupported") + } + if reason == "" { + t.Error("expected non-empty reason") + } +} + +func TestLoad_BadYAML(t *testing.T) { + path := writeTmp(t, "bad.yaml", sampleBadYAML) + if _, err := Load(path); err == nil { + t.Error("expected parse error") + } +} + +func TestLoad_MissingID(t *testing.T) { + path := writeTmp(t, "noid.yaml", "info:\n severity: low\n") + if _, err := Load(path); err == nil { + t.Error("expected missing id error") + } +} + +func TestLoadDir(t *testing.T) { + dir := t.TempDir() + _ = os.WriteFile(filepath.Join(dir, "a.yaml"), []byte(sampleSupported), 0o644) + _ = os.WriteFile(filepath.Join(dir, "b.yaml"), []byte(sampleUnsupportedDNS), 0o644) + _ = os.WriteFile(filepath.Join(dir, "c.yml"), []byte(sampleSupported), 0o644) + _ = os.WriteFile(filepath.Join(dir, "d.bad"), []byte("???"), 0o644) + _ = os.WriteFile(filepath.Join(dir, "e.yaml"), []byte(sampleBadYAML), 0o644) + + sub := filepath.Join(dir, "nested") + _ = os.MkdirAll(sub, 0o755) + _ = os.WriteFile(filepath.Join(sub, "f.yaml"), []byte(sampleSupported), 0o644) + + tpls, diags, err := LoadDir(dir) + if err != nil { + t.Fatal(err) + } + // 3 supported (a, c, nested/f), 1 dns (b), 1 parse error (e). .bad ignored. + if got := len(tpls); got != 4 { + t.Errorf("loaded = %d, want 4 (3 supported + 1 dns)", got) + } + if len(diags) != 1 { + t.Errorf("diags = %d, want 1", len(diags)) + } +} + +func TestSeverity_Default(t *testing.T) { + tpl := &Template{Info: Info{Severity: "UNKNOWN"}} + if sev := tpl.Severity(); sev != "info" { + t.Errorf("got %q, want info", sev) + } +} + +func TestSeverity_Normalized(t *testing.T) { + for input, want := range map[string]string{ + "critical": "critical", + "HIGH": "high", + " Medium ": "medium", + "LOW": "low", + "info": "info", + "": "info", + } { + tpl := &Template{Info: Info{Severity: input}} + if got := tpl.Severity(); got != want { + t.Errorf("Severity(%q) = %q, want %q", input, got, want) + } + } +} + +func TestHTTPAlias(t *testing.T) { + content := ` +id: http-alias +info: + severity: low +http: + - method: GET + path: ["{{BaseURL}}/"] + matchers: + - type: status + status: [200] +` + path := writeTmp(t, "http.yaml", content) + tpl, err := Load(path) + if err != nil { + t.Fatal(err) + } + if len(tpl.Requests) != 1 { + t.Errorf("expected http: to be aliased to Requests, got %d", len(tpl.Requests)) + } + if ok, _ := tpl.IsSupported(); !ok { + t.Error("http alias template should be supported") + } +} diff --git a/internal/output/print.go b/internal/output/print.go index 8834742..be737fe 100644 --- a/internal/output/print.go +++ b/internal/output/print.go @@ -50,11 +50,10 @@ func PrintBanner() { fmt.Println(BoldWhite(" β•šβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•”β•β•šβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•”β•β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•”β•") + BoldGreen("β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•‘") + BoldWhite(" β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•—")) fmt.Println(BoldWhite(" β•šβ•β•β•β•β•β• β•šβ•β•β•β•β•β• β•šβ•β•β•β•β•β• ") + BoldGreen("β•šβ•β•β•β•β•β•β•") + BoldWhite(" β•šβ•β•β•β•β•β•β• β•šβ•β• β•šβ•β•β•β•β•β•β•")) fmt.Println() - fmt.Printf(" %s %s\n", BoldGreen("⚑"), Dim("AI-powered attack surface discovery & security analysis")) - fmt.Printf(" %s %s %s %s %s %s\n", - Dim("Version:"), BoldGreen("0.1"), - Dim("By:"), White("github.com/Vyntral"), - Dim("For:"), Yellow("github.com/Orizon-eu")) + fmt.Printf(" %s %s\n", BoldGreen("⚑"), Dim("AI-powered attack surface discovery & offensive security analysis")) + fmt.Printf(" %s %s %s %s\n", + Dim("Version:"), BoldGreen("2.0.0-rc1"), + Dim("By:"), White("github.com/Vyntral")) fmt.Println() } diff --git a/internal/pipeline/pipeline.go b/internal/pipeline/pipeline.go new file mode 100644 index 0000000..bbe4de4 --- /dev/null +++ b/internal/pipeline/pipeline.go @@ -0,0 +1,278 @@ +// Package pipeline coordinates v2 module execution. It builds a Module list +// from the registry, applies the ConfigView filter, then runs every selected +// module concurrently under a shared event bus and store. +// +// Unlike the legacy scanner.Run, this coordinator does NO domain-specific +// work of its own. Every phase (passive, brute, resolve, probe, security, +// AI, reporting) is a Module. Ordering emerges from events, with explicit +// phase barriers for phases that must complete before downstream begins. +package pipeline + +import ( + "context" + "errors" + "fmt" + "sort" + "sync" + "time" + + "god-eye/internal/config" + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" +) + +// Pipeline is the v2 scan coordinator. +type Pipeline struct { + cfg *config.Config + view *config.View + bus *eventbus.Bus + store store.Store + modReg *module.Registry + + // ownBus / ownStore indicate resources created by this Pipeline that + // must be closed on Shutdown. Injected resources are left to the caller. + ownBus bool + ownStore bool +} + +// Options are optional overrides for New. Empty fields mean "use defaults". +type Options struct { + Bus *eventbus.Bus // injected bus; defaults to a new one + Store store.Store // injected store; defaults to NewMemoryStore + Registry *module.Registry // registry to draw modules from; defaults to module.Default() + Buffer int // bus buffer size when creating default bus +} + +// New creates a Pipeline from cfg and opts. The pipeline is ready to Run. +// A non-nil Config is required. +func New(cfg *config.Config, opts Options) (*Pipeline, error) { + if cfg == nil { + return nil, errors.New("pipeline.New: nil config") + } + + p := &Pipeline{ + cfg: cfg, + view: config.NewView(cfg), + modReg: opts.Registry, + } + if p.modReg == nil { + p.modReg = module.Default() + } + + if opts.Bus != nil { + p.bus = opts.Bus + } else { + buf := opts.Buffer + if buf <= 0 { + buf = 4096 + } + p.bus = eventbus.New(buf) + p.ownBus = true + } + + if opts.Store != nil { + p.store = opts.Store + } else { + p.store = store.NewMemoryStore() + p.ownStore = true + } + + return p, nil +} + +// Bus returns the underlying event bus. Useful for attaching external +// subscribers (TUI, metrics, log sinks) before calling Run. +func (p *Pipeline) Bus() *eventbus.Bus { return p.bus } + +// Store returns the underlying store. Useful for post-scan querying or +// report generation outside of modules. +func (p *Pipeline) Store() store.Store { return p.store } + +// Run executes the selected modules. Returns when every module has exited +// OR ctx is canceled. The returned error aggregates any module errors via +// errors.Join. +// +// Execution semantics: +// - ScanStarted is published first. +// - Modules are grouped by Phase; each Phase is a barrier: phase N starts +// only after every module in phase N-1 has returned. +// - Within a phase, every module runs concurrently on its own goroutine. +// - When all phases complete, ScanCompleted is published with stats, then +// the bus is drained (if owned) and Shutdown is called. +func (p *Pipeline) Run(ctx context.Context) error { + selected := p.modReg.Select(p.view) + if len(selected) == 0 { + return errors.New("pipeline.Run: no modules selected β€” check config and module registrations") + } + + // Group modules by phase. + byPhase := make(map[module.Phase][]module.Module) + for _, m := range selected { + byPhase[m.Phase()] = append(byPhase[m.Phase()], m) + } + + // Sort modules within each phase for deterministic start order. + for _, ms := range byPhase { + sort.SliceStable(ms, func(i, j int) bool { return ms[i].Name() < ms[j].Name() }) + } + + started := time.Now() + p.publishScanStarted() + + var moduleErrs []error + var errsMu sync.Mutex + + // Iterate phases in canonical order. + for _, phase := range phaseOrder { + modules := byPhase[phase] + if len(modules) == 0 { + continue + } + + phaseStart := time.Now() + p.publishPhaseStarted(phase) + + var wg sync.WaitGroup + for _, m := range modules { + m := m + wg.Add(1) + go func() { + defer wg.Done() + defer func() { + if r := recover(); r != nil { + p.publishModuleError(m.Name(), fmt.Errorf("panic: %v", r), true) + errsMu.Lock() + moduleErrs = append(moduleErrs, fmt.Errorf("%s panicked: %v", m.Name(), r)) + errsMu.Unlock() + } + }() + + mctx := module.Context{ + Ctx: ctx, + Bus: p.bus, + Store: p.store, + Config: p.view, + Target: p.cfg.Domain, + Profile: p.cfg.Profile, + } + if err := m.Run(mctx); err != nil && !errors.Is(err, context.Canceled) { + p.publishModuleError(m.Name(), err, false) + errsMu.Lock() + moduleErrs = append(moduleErrs, fmt.Errorf("%s: %w", m.Name(), err)) + errsMu.Unlock() + } + }() + } + + // Wait for this phase OR for ctx cancellation. + done := make(chan struct{}) + go func() { wg.Wait(); close(done) }() + + select { + case <-done: + // normal completion + case <-ctx.Done(): + // wait (bounded) for goroutines to observe the cancellation + wg.Wait() + } + + p.publishPhaseCompleted(phase, time.Since(phaseStart)) + + if ctx.Err() != nil { + break + } + } + + p.publishScanCompleted(time.Since(started)) + + if p.ownBus { + shutdownCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + _ = p.bus.Close(shutdownCtx) + } + + if len(moduleErrs) > 0 { + return errors.Join(moduleErrs...) + } + return ctx.Err() +} + +// Shutdown explicitly closes owned resources. Normally Run calls Shutdown +// automatically; use this when you want to reuse the pipeline or manage +// lifecycle externally. +func (p *Pipeline) Shutdown(ctx context.Context) error { + var errs []error + if p.ownBus { + if err := p.bus.Close(ctx); err != nil { + errs = append(errs, err) + } + } + if p.ownStore { + if err := p.store.Close(); err != nil { + errs = append(errs, err) + } + } + return errors.Join(errs...) +} + +// phaseOrder is the canonical sequence of pipeline phases. Modules may also +// declare phases not in this list β€” those are executed at the end in arbitrary +// order (but all still before ScanCompleted). +var phaseOrder = []module.Phase{ + module.PhaseSetup, + module.PhaseDiscovery, + module.PhaseResolution, + module.PhaseEnrichment, + module.PhaseAnalysis, + module.PhaseReporting, +} + +// --- event publishing helpers --- + +func (p *Pipeline) publishScanStarted() { + p.bus.Publish(context.Background(), eventbus.ScanStarted{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: "pipeline", Target: p.cfg.Domain}, + Target: p.cfg.Domain, + Profile: p.cfg.Profile, + }) +} + +func (p *Pipeline) publishScanCompleted(d time.Duration) { + stats := map[string]int64{ + "hosts": int64(p.store.Count(context.Background())), + "published": int64(p.bus.Stats().Published), + "delivered": int64(p.bus.Stats().Delivered), + "dropped": int64(p.bus.Stats().Dropped), + } + p.bus.Publish(context.Background(), eventbus.ScanCompleted{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: "pipeline", Target: p.cfg.Domain}, + Target: p.cfg.Domain, + Duration: d, + Stats: stats, + }) +} + +func (p *Pipeline) publishPhaseStarted(phase module.Phase) { + p.bus.Publish(context.Background(), eventbus.PhaseStarted{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: "pipeline", Target: p.cfg.Domain}, + Phase: string(phase), + }) +} + +func (p *Pipeline) publishPhaseCompleted(phase module.Phase, d time.Duration) { + p.bus.Publish(context.Background(), eventbus.PhaseCompleted{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: "pipeline", Target: p.cfg.Domain}, + Phase: string(phase), + Duration: d, + }) +} + +func (p *Pipeline) publishModuleError(name string, err error, fatal bool) { + p.bus.Publish(context.Background(), eventbus.ModuleError{ + EventMeta: eventbus.EventMeta{At: time.Now(), Source: name, Target: p.cfg.Domain}, + Module: name, + Err: err.Error(), + Fatal: fatal, + }) +} diff --git a/internal/pipeline/pipeline_test.go b/internal/pipeline/pipeline_test.go new file mode 100644 index 0000000..9b8f21f --- /dev/null +++ b/internal/pipeline/pipeline_test.go @@ -0,0 +1,285 @@ +package pipeline + +import ( + "context" + "errors" + "sync" + "sync/atomic" + "testing" + "time" + + "god-eye/internal/config" + "god-eye/internal/eventbus" + "god-eye/internal/module" + "god-eye/internal/store" +) + +// --- test doubles -------------------------------------------------------- + +type spyModule struct { + name string + phase module.Phase + run func(mctx module.Context) error + calls atomic.Int32 + enabled bool +} + +func (s *spyModule) Name() string { return s.name } +func (s *spyModule) Phase() module.Phase { return s.phase } +func (s *spyModule) Consumes() []eventbus.EventType { return nil } +func (s *spyModule) Produces() []eventbus.EventType { return nil } +func (s *spyModule) DefaultEnabled() bool { return s.enabled } +func (s *spyModule) Run(mctx module.Context) error { + s.calls.Add(1) + if s.run != nil { + return s.run(mctx) + } + return nil +} + +func mkModule(name string, phase module.Phase, enabled bool) *spyModule { + return &spyModule{name: name, phase: phase, enabled: enabled} +} + +func TestPipeline_RunsAllEnabledModules(t *testing.T) { + r := module.NewRegistry() + a := mkModule("a", module.PhaseDiscovery, true) + b := mkModule("b", module.PhaseEnrichment, true) + c := mkModule("c", module.PhaseReporting, true) + off := mkModule("off", module.PhaseDiscovery, false) + r.Register(a) + r.Register(b) + r.Register(c) + r.Register(off) + + cfg := &config.Config{Domain: "example.com"} + p, err := New(cfg, Options{Registry: r}) + if err != nil { + t.Fatal(err) + } + + if err := p.Run(context.Background()); err != nil { + t.Fatalf("Run error: %v", err) + } + + if a.calls.Load() != 1 { + t.Errorf("a not called: %d", a.calls.Load()) + } + if b.calls.Load() != 1 { + t.Errorf("b not called") + } + if c.calls.Load() != 1 { + t.Errorf("c not called") + } + if off.calls.Load() != 0 { + t.Errorf("disabled module was called: %d", off.calls.Load()) + } +} + +func TestPipeline_PhaseBarrier(t *testing.T) { + // Phase B must see A's events before B's module runs. + r := module.NewRegistry() + + var aDone atomic.Bool + a := mkModule("producer", module.PhaseDiscovery, true) + a.run = func(mctx module.Context) error { + mctx.Bus.Publish(mctx.Ctx, eventbus.NewSubdomainDiscovered("test", "x.example.com", "p")) + time.Sleep(30 * time.Millisecond) + aDone.Store(true) + return nil + } + + var sawBefore atomic.Int32 + b := mkModule("consumer", module.PhaseEnrichment, true) + b.run = func(mctx module.Context) error { + if !aDone.Load() { + sawBefore.Add(1) + } + return nil + } + r.Register(a) + r.Register(b) + + p, err := New(&config.Config{Domain: "example.com"}, Options{Registry: r}) + if err != nil { + t.Fatal(err) + } + if err := p.Run(context.Background()); err != nil { + t.Fatalf("Run error: %v", err) + } + if sawBefore.Load() != 0 { + t.Errorf("phase barrier broken: consumer ran while producer was still running (%d times)", sawBefore.Load()) + } +} + +func TestPipeline_CollectsErrors(t *testing.T) { + r := module.NewRegistry() + good := mkModule("good", module.PhaseDiscovery, true) + failA := mkModule("fail-a", module.PhaseDiscovery, true) + failA.run = func(_ module.Context) error { return errors.New("boom-a") } + failB := mkModule("fail-b", module.PhaseAnalysis, true) + failB.run = func(_ module.Context) error { return errors.New("boom-b") } + r.Register(good) + r.Register(failA) + r.Register(failB) + + p, _ := New(&config.Config{Domain: "example.com"}, Options{Registry: r}) + err := p.Run(context.Background()) + if err == nil { + t.Fatal("expected aggregated error") + } + if !contains(err.Error(), "boom-a") || !contains(err.Error(), "boom-b") { + t.Errorf("aggregated error missing parts: %v", err) + } +} + +func TestPipeline_PanicIsContained(t *testing.T) { + r := module.NewRegistry() + panicker := mkModule("panicker", module.PhaseDiscovery, true) + panicker.run = func(_ module.Context) error { panic("oops") } + r.Register(panicker) + r.Register(mkModule("normal", module.PhaseReporting, true)) + + p, _ := New(&config.Config{Domain: "example.com"}, Options{Registry: r}) + err := p.Run(context.Background()) + if err == nil { + t.Fatal("expected error from panic") + } + if !contains(err.Error(), "panicked") { + t.Errorf("error doesn't mention panic: %v", err) + } +} + +func TestPipeline_RespectsCtxCancellation(t *testing.T) { + r := module.NewRegistry() + + slow := mkModule("slow", module.PhaseDiscovery, true) + var slowRan atomic.Bool + slow.run = func(mctx module.Context) error { + slowRan.Store(true) + <-mctx.Ctx.Done() + return mctx.Ctx.Err() + } + never := mkModule("never", module.PhaseAnalysis, true) + var neverRan atomic.Bool + never.run = func(_ module.Context) error { + neverRan.Store(true) + return nil + } + r.Register(slow) + r.Register(never) + + p, _ := New(&config.Config{Domain: "example.com"}, Options{Registry: r}) + + ctx, cancel := context.WithCancel(context.Background()) + go func() { + time.Sleep(50 * time.Millisecond) + cancel() + }() + + _ = p.Run(ctx) + if !slowRan.Load() { + t.Error("slow should have run") + } + // never is in phase after slow, and phase B starts only after A finishes. + // Since slow exits when ctx is canceled, pipeline breaks out before + // scheduling phase B. never must NOT run. + if neverRan.Load() { + t.Error("never should NOT have run after cancellation") + } +} + +func TestPipeline_PublishesScanEvents(t *testing.T) { + r := module.NewRegistry() + r.Register(mkModule("tiny", module.PhaseDiscovery, true)) + + p, _ := New(&config.Config{Domain: "example.com"}, Options{Registry: r, Bus: eventbus.New(128)}) + + started := make(chan struct{}, 1) + completed := make(chan struct{}, 1) + + p.Bus().Subscribe(eventbus.EventScanStarted, func(_ context.Context, _ eventbus.Event) { + select { + case started <- struct{}{}: + default: + } + }) + p.Bus().Subscribe(eventbus.EventScanCompleted, func(_ context.Context, _ eventbus.Event) { + select { + case completed <- struct{}{}: + default: + } + }) + + _ = p.Run(context.Background()) + + select { + case <-started: + case <-time.After(2 * time.Second): + t.Fatal("ScanStarted not fired") + } + select { + case <-completed: + case <-time.After(2 * time.Second): + t.Fatal("ScanCompleted not fired") + } +} + +func TestPipeline_ModulesShareStore(t *testing.T) { + r := module.NewRegistry() + + writer := mkModule("writer", module.PhaseDiscovery, true) + writer.run = func(mctx module.Context) error { + return mctx.Store.Upsert(mctx.Ctx, "a.example.com", func(h *store.Host) { + h.IPs = []string{"1.2.3.4"} + }) + } + + var readerSaw int + var readerMu sync.Mutex + reader := mkModule("reader", module.PhaseReporting, true) + reader.run = func(mctx module.Context) error { + readerMu.Lock() + defer readerMu.Unlock() + readerSaw = mctx.Store.Count(mctx.Ctx) + return nil + } + + r.Register(writer) + r.Register(reader) + + p, _ := New(&config.Config{Domain: "example.com"}, Options{Registry: r}) + if err := p.Run(context.Background()); err != nil { + t.Fatal(err) + } + + readerMu.Lock() + defer readerMu.Unlock() + if readerSaw != 1 { + t.Errorf("reader saw %d hosts, want 1", readerSaw) + } +} + +func TestPipeline_RejectsNilConfig(t *testing.T) { + _, err := New(nil, Options{}) + if err == nil { + t.Error("expected error for nil config") + } +} + +func TestPipeline_EmptyRegistry_Errors(t *testing.T) { + r := module.NewRegistry() // empty + p, _ := New(&config.Config{Domain: "example.com"}, Options{Registry: r}) + if err := p.Run(context.Background()); err == nil { + t.Error("expected error when no modules selected") + } +} + +func contains(s, sub string) bool { + for i := 0; i+len(sub) <= len(s); i++ { + if s[i:i+len(sub)] == sub { + return true + } + } + return false +} diff --git a/internal/proxyconf/proxy.go b/internal/proxyconf/proxy.go new file mode 100644 index 0000000..45f69a8 --- /dev/null +++ b/internal/proxyconf/proxy.go @@ -0,0 +1,174 @@ +// Package proxyconf centralises outbound-proxy configuration for the +// HTTP and (where possible) DNS clients used across God's Eye modules. +// +// Why this lives in its own package: every source/probe/module needs to +// honour the same proxy setting, and duplicating URL parsing + dialer +// wiring across `internal/http`, `internal/sources`, and individual +// modules would be a fountain of bugs. This package is the single +// source of truth. +// +// Supported schemes: +// +// "" β†’ direct (no proxy) +// http://host:port β†’ HTTP CONNECT proxy (e.g. Burp, ZAP, mitmproxy) +// https://host:port β†’ HTTPS CONNECT proxy +// socks5://host:port β†’ SOCKS5 (DNS resolved locally by god-eye) +// socks5h://host:port β†’ SOCKS5 (DNS resolved by the proxy β€” Tor convention) +// +// Basic auth (http://user:pass@host) is honoured for every scheme. +// +// DNS-over-SOCKS caveat: Go's net package uses the OS resolver by default, +// which does NOT route through SOCKS. `socks5h://` only applies to HTTP +// requests β€” the brute-force DNS resolver (`internal/dns`) continues to +// hit its configured resolvers directly. Users who need full Tor +// isolation for DNS should run god-eye inside a torsocks-wrapped shell +// or a netns with all traffic captured. +package proxyconf + +import ( + "context" + "errors" + "fmt" + "net" + "net/http" + "net/url" + "strings" + + "golang.org/x/net/proxy" +) + +// DialFunc is the signature used by http.Transport.DialContext. +type DialFunc func(ctx context.Context, network, addr string) (net.Conn, error) + +// ProxyFunc is the signature used by http.Transport.Proxy. +type ProxyFunc func(*http.Request) (*url.URL, error) + +// Validate returns a descriptive error if proxyURL is non-empty and +// doesn't parse to a supported scheme. Call this early (e.g. during +// validator.ValidateXxx) so bad flags fail before module startup. +func Validate(proxyURL string) error { + proxyURL = strings.TrimSpace(proxyURL) + if proxyURL == "" { + return nil + } + u, err := url.Parse(proxyURL) + if err != nil { + return fmt.Errorf("proxy URL malformed: %w", err) + } + if u.Host == "" { + return errors.New("proxy URL missing host:port") + } + switch strings.ToLower(u.Scheme) { + case "http", "https", "socks5", "socks5h": + return nil + default: + return fmt.Errorf("unsupported proxy scheme %q (use http/https/socks5/socks5h)", u.Scheme) + } +} + +// BuildDialer returns a DialFunc that routes TCP through the configured +// proxy. For HTTP(S) CONNECT proxies (handled at the transport layer via +// Proxy field), this returns a direct dialer β€” the transport layer does +// the CONNECT dance itself. +// +// For empty proxyURL, returns the direct-dialer from net.Dialer. +func BuildDialer(proxyURL string, base *net.Dialer) (DialFunc, error) { + if base == nil { + base = &net.Dialer{} + } + if strings.TrimSpace(proxyURL) == "" { + return base.DialContext, nil + } + u, err := url.Parse(proxyURL) + if err != nil { + return nil, err + } + switch strings.ToLower(u.Scheme) { + case "http", "https": + // CONNECT proxy β€” direct TCP, Transport.Proxy handles the handshake. + return base.DialContext, nil + case "socks5", "socks5h": + var auth *proxy.Auth + if u.User != nil { + pass, _ := u.User.Password() + auth = &proxy.Auth{User: u.User.Username(), Password: pass} + } + // proxy.Direct is the fallthrough dialer β€” we pass our base so + // timeouts/keepalive settings are preserved. + dialer, err := proxy.SOCKS5("tcp", u.Host, auth, &directAdapter{base: base}) + if err != nil { + return nil, fmt.Errorf("create SOCKS5 dialer: %w", err) + } + if ctxDialer, ok := dialer.(proxy.ContextDialer); ok { + return ctxDialer.DialContext, nil + } + // Older x/net versions: wrap non-context Dial with ctx-aware shim. + return func(ctx context.Context, network, addr string) (net.Conn, error) { + type result struct { + conn net.Conn + err error + } + ch := make(chan result, 1) + go func() { + c, e := dialer.Dial(network, addr) + ch <- result{c, e} + }() + select { + case r := <-ch: + return r.conn, r.err + case <-ctx.Done(): + return nil, ctx.Err() + } + }, nil + default: + return nil, fmt.Errorf("unsupported proxy scheme: %s", u.Scheme) + } +} + +// BuildProxyFunc returns the http.Transport.Proxy callback for HTTP(S) +// CONNECT proxies. Returns nil for SOCKS5 (handled by the dialer) and +// for empty proxyURL. +func BuildProxyFunc(proxyURL string) (ProxyFunc, error) { + if strings.TrimSpace(proxyURL) == "" { + return nil, nil + } + u, err := url.Parse(proxyURL) + if err != nil { + return nil, err + } + switch strings.ToLower(u.Scheme) { + case "http", "https": + return http.ProxyURL(u), nil + case "socks5", "socks5h": + return nil, nil + } + return nil, fmt.Errorf("unsupported proxy scheme: %s", u.Scheme) +} + +// Humanize returns a redacted, user-facing description of the proxy. +// Strips credentials so logs don't leak tokens. +func Humanize(proxyURL string) string { + proxyURL = strings.TrimSpace(proxyURL) + if proxyURL == "" { + return "direct (no proxy)" + } + u, err := url.Parse(proxyURL) + if err != nil { + return "invalid" + } + auth := "" + if u.User != nil { + auth = "(auth)@" + } + return fmt.Sprintf("%s://%s%s", u.Scheme, auth, u.Host) +} + +// directAdapter adapts a *net.Dialer to the proxy.Dialer interface so +// our configured timeouts/keepalive flow through to the socks hop. +type directAdapter struct { + base *net.Dialer +} + +func (d *directAdapter) Dial(network, addr string) (net.Conn, error) { + return d.base.Dial(network, addr) +} diff --git a/internal/proxyconf/proxy_test.go b/internal/proxyconf/proxy_test.go new file mode 100644 index 0000000..aae4268 --- /dev/null +++ b/internal/proxyconf/proxy_test.go @@ -0,0 +1,134 @@ +package proxyconf + +import "testing" + +func TestValidate(t *testing.T) { + cases := []struct { + in string + wantErr bool + }{ + {"", false}, + {"http://127.0.0.1:8080", false}, + {"https://proxy.corp:3128", false}, + {"socks5://127.0.0.1:9050", false}, + {"socks5h://127.0.0.1:9050", false}, + {"socks5h://user:pass@127.0.0.1:9050", false}, + {"ftp://x:21", true}, + {"socks4://x:1080", true}, + {"not a url", true}, + {"://nohost", true}, + {"http://", true}, + } + for _, c := range cases { + err := Validate(c.in) + if (err != nil) != c.wantErr { + t.Errorf("Validate(%q) err=%v wantErr=%v", c.in, err, c.wantErr) + } + } +} + +func TestBuildDialer_EmptyReturnsDirect(t *testing.T) { + d, err := BuildDialer("", nil) + if err != nil { + t.Fatal(err) + } + if d == nil { + t.Fatal("nil dialer") + } +} + +func TestBuildDialer_SOCKS5Accepted(t *testing.T) { + d, err := BuildDialer("socks5://127.0.0.1:9050", nil) + if err != nil { + t.Fatalf("SOCKS5 should construct: %v", err) + } + if d == nil { + t.Fatal("nil dialer") + } +} + +func TestBuildDialer_SOCKS5WithAuth(t *testing.T) { + d, err := BuildDialer("socks5h://user:pass@127.0.0.1:9050", nil) + if err != nil { + t.Fatalf("auth SOCKS5 should construct: %v", err) + } + if d == nil { + t.Fatal("nil dialer") + } +} + +func TestBuildDialer_HTTPProxyPassthrough(t *testing.T) { + // HTTP proxy uses Transport.Proxy; dialer should be direct-equivalent. + d, err := BuildDialer("http://127.0.0.1:8080", nil) + if err != nil { + t.Fatal(err) + } + if d == nil { + t.Fatal("nil dialer") + } +} + +func TestBuildDialer_UnsupportedScheme(t *testing.T) { + _, err := BuildDialer("ftp://127.0.0.1", nil) + if err == nil { + t.Error("expected error for unsupported scheme") + } +} + +func TestBuildProxyFunc_HTTPProxy(t *testing.T) { + fn, err := BuildProxyFunc("http://127.0.0.1:8080") + if err != nil { + t.Fatal(err) + } + if fn == nil { + t.Fatal("http:// should yield non-nil ProxyFunc") + } +} + +func TestBuildProxyFunc_SOCKSReturnsNil(t *testing.T) { + fn, err := BuildProxyFunc("socks5://127.0.0.1:9050") + if err != nil { + t.Fatal(err) + } + if fn != nil { + t.Error("SOCKS5 should return nil ProxyFunc (handled by dialer)") + } +} + +func TestBuildProxyFunc_EmptyReturnsNil(t *testing.T) { + fn, err := BuildProxyFunc("") + if err != nil || fn != nil { + t.Errorf("empty β†’ (nil, nil), got (%v, %v)", fn, err) + } +} + +func TestHumanize(t *testing.T) { + cases := map[string]string{ + "": "direct (no proxy)", + "http://proxy.corp:3128": "http://proxy.corp:3128", + "socks5://127.0.0.1:9050": "socks5://127.0.0.1:9050", + "socks5h://user:secret@10.0.0.1:443": "socks5h://(auth)@10.0.0.1:443", + } + for in, want := range cases { + if got := Humanize(in); got != want { + t.Errorf("Humanize(%q) = %q, want %q", in, got, want) + } + } +} + +func TestHumanize_LeaksNoCredentials(t *testing.T) { + const secret = "supersecret" + h := Humanize("socks5://user:" + secret + "@127.0.0.1:9050") + if contains(h, secret) { + t.Errorf("Humanize leaked credentials: %s", h) + } +} + +func contains(s, sub string) bool { + for i := 0; i+len(sub) <= len(s); i++ { + if s[i:i+len(sub)] == sub { + return true + } + } + return false +} diff --git a/internal/scanner/helpers_test.go b/internal/scanner/helpers_test.go new file mode 100644 index 0000000..2687a37 --- /dev/null +++ b/internal/scanner/helpers_test.go @@ -0,0 +1,218 @@ +package scanner + +import ( + "os" + "path/filepath" + "reflect" + "sort" + "strings" + "testing" + + "god-eye/internal/config" +) + +func TestLoadWordlist(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "wordlist.txt") + + content := `# comment line +api +admin + +# another comment +dev + staging +test +` + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatal(err) + } + + got, err := LoadWordlist(path) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + want := []string{"api", "admin", "dev", "staging", "test"} + if !reflect.DeepEqual(got, want) { + t.Errorf("got %v, want %v", got, want) + } +} + +func TestLoadWordlist_NonExistent(t *testing.T) { + _, err := LoadWordlist("/tmp/this-does-not-exist-xyz-abc.txt") + if err == nil { + t.Error("expected error for non-existent file") + } +} + +func TestLoadWordlist_Empty(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "empty.txt") + os.WriteFile(path, []byte(""), 0o644) + + got, err := LoadWordlist(path) + if err != nil { + t.Fatal(err) + } + if len(got) != 0 { + t.Errorf("expected empty result, got %v", got) + } +} + +func TestLoadWordlist_CommentsOnly(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "comments.txt") + os.WriteFile(path, []byte("# only comments\n# and more\n"), 0o644) + + got, _ := LoadWordlist(path) + if len(got) != 0 { + t.Errorf("expected empty result for comments-only file, got %v", got) + } +} + +func TestParseResolvers(t *testing.T) { + tests := []struct { + name string + in string + want []string + }{ + { + name: "empty uses defaults", + in: "", + want: config.DefaultResolvers, + }, + { + name: "single with port", + in: "8.8.8.8:53", + want: []string{"8.8.8.8:53"}, + }, + { + name: "single without port adds :53", + in: "8.8.8.8", + want: []string{"8.8.8.8:53"}, + }, + { + name: "multiple with mixed ports", + in: "8.8.8.8,1.1.1.1:5353,9.9.9.9", + want: []string{"8.8.8.8:53", "1.1.1.1:5353", "9.9.9.9:53"}, + }, + { + name: "whitespace trimmed", + in: " 8.8.8.8 , 1.1.1.1 ", + want: []string{"8.8.8.8:53", "1.1.1.1:53"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := ParseResolvers(tt.in) + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("ParseResolvers(%q) = %v, want %v", tt.in, got, tt.want) + } + }) + } +} + +func TestParsePorts(t *testing.T) { + tests := []struct { + name string + in string + want []int + }{ + {"empty uses defaults", "", []int{80, 443, 8080, 8443}}, + {"single valid", "80", []int{80}}, + {"multiple valid", "80,443,3000", []int{80, 443, 3000}}, + {"whitespace", " 80 , 443 ", []int{80, 443}}, + {"invalid silently dropped", "80,abc,443", []int{80, 443}}, + {"out of range dropped", "80,99999,443", []int{80, 443}}, + {"negative dropped", "80,-1,443", []int{80, 443}}, + {"zero dropped", "0,80,443", []int{80, 443}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := ParsePorts(tt.in) + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("ParsePorts(%q) = %v, want %v", tt.in, got, tt.want) + } + }) + } +} + +func TestCountActive(t *testing.T) { + results := map[string]*config.SubdomainResult{ + "a.example.com": {StatusCode: 200}, + "b.example.com": {StatusCode: 301}, + "c.example.com": {StatusCode: 404}, + "d.example.com": {StatusCode: 500}, + "e.example.com": {StatusCode: 0}, // not probed + } + got := countActive(results) + if got != 2 { + t.Errorf("countActive = %d, want 2", got) + } +} + +func TestCountVulns(t *testing.T) { + results := map[string]*config.SubdomainResult{ + "a.example.com": {OpenRedirect: true}, + "b.example.com": {CORSMisconfig: "wildcard with credentials"}, + "c.example.com": {DangerousMethods: []string{"PUT", "DELETE"}}, + "d.example.com": {GitExposed: true}, + "e.example.com": {BackupFiles: []string{"backup.sql"}}, + "f.example.com": {StatusCode: 200}, // clean + } + got := countVulns(results) + if got != 5 { + t.Errorf("countVulns = %d, want 5", got) + } +} + +func TestCountSubdomainsWithAI(t *testing.T) { + results := map[string]*config.SubdomainResult{ + "a.example.com": {AIFindings: []string{"finding1"}}, + "b.example.com": {AIFindings: []string{"f1", "f2"}}, + "c.example.com": {}, // no AI findings + } + got := countSubdomainsWithAI(results) + if got != 2 { + t.Errorf("countSubdomainsWithAI = %d, want 2", got) + } +} + +func TestBuildAISummary(t *testing.T) { + results := map[string]*config.SubdomainResult{ + "a.example.com": { + AIFindings: []string{"Hardcoded API key", "Weak crypto"}, + AISeverity: "critical", + CVEFindings: []string{"CVE-2021-12345"}, + }, + "b.example.com": { + AIFindings: []string{"Missing CSP"}, + AISeverity: "medium", + }, + "c.example.com": { + AIFindings: []string{"ignored"}, + AISeverity: "info", + }, + } + got := buildAISummary(results) + if got == "" { + t.Fatal("summary is empty") + } + // Must mention severities + mustContain := []string{"critical", "high", "medium", "CRITICAL", "MEDIUM", "Hardcoded API key", "CVE-2021-12345"} + for _, s := range mustContain { + if !strings.Contains(got, s) { + t.Errorf("summary missing expected token %q in:\n%s", s, got) + } + } +} + +func TestSortedIntsInvariant(t *testing.T) { + // Sanity: whenever we sort ints we expect ascending order (tests ScanPorts sorting guarantee). + in := []int{443, 80, 8080, 22} + sort.Ints(in) + if !sort.IntsAreSorted(in) { + t.Error("sort.IntsAreSorted returned false after sort.Ints") + } +} diff --git a/internal/scheduler/alerter.go b/internal/scheduler/alerter.go new file mode 100644 index 0000000..a0f4d1c --- /dev/null +++ b/internal/scheduler/alerter.go @@ -0,0 +1,63 @@ +package scheduler + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "time" + + "god-eye/internal/diff" +) + +// WebhookAlerter POSTs the diff report JSON to an arbitrary URL. Works +// with generic webhook consumers; Slack/Discord get dedicated adapters +// later in F5.3 when bespoke formatting matters. +type WebhookAlerter struct { + URL string + Timeout time.Duration +} + +// NewWebhookAlerter returns a WebhookAlerter with sane defaults. +func NewWebhookAlerter(url string) *WebhookAlerter { + return &WebhookAlerter{URL: url, Timeout: 10 * time.Second} +} + +func (a *WebhookAlerter) Name() string { return "webhook" } + +func (a *WebhookAlerter) Notify(ctx context.Context, r *diff.Report) error { + body, err := json.Marshal(r) + if err != nil { + return err + } + client := &http.Client{Timeout: a.Timeout} + req, err := http.NewRequestWithContext(ctx, "POST", a.URL, bytes.NewReader(body)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("User-Agent", "god-eye-v2") + resp, err := client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode >= 400 { + return fmt.Errorf("webhook returned %d", resp.StatusCode) + } + return nil +} + +// StdoutAlerter prints meaningful changes to stdout. Useful for smoke +// testing and for users who pipe god-eye output into grep/jq. +type StdoutAlerter struct{} + +func (StdoutAlerter) Name() string { return "stdout" } + +func (StdoutAlerter) Notify(_ context.Context, r *diff.Report) error { + for _, c := range r.Changes { + fmt.Printf("[DIFF %s] %s %s β†’ %s (%s)\n", r.Target, c.Kind, c.Before, c.After, c.Host) + } + return nil +} diff --git a/internal/scheduler/scheduler.go b/internal/scheduler/scheduler.go new file mode 100644 index 0000000..7222428 --- /dev/null +++ b/internal/scheduler/scheduler.go @@ -0,0 +1,114 @@ +// Package scheduler runs a scan at fixed intervals for asm-continuous +// workflows. Each scan run feeds the diff engine; meaningful changes fan +// out to registered Alerters. +// +// Minimal implementation for Fase 5 skeleton: interval ticker + in-memory +// snapshot ring. Persistence (SQLite/BoltDB) and sophisticated scheduling +// (cron syntax, jitter) are follow-ups. +package scheduler + +import ( + "context" + "errors" + "sync" + "time" + + "god-eye/internal/diff" + "god-eye/internal/store" +) + +// ScanRun executes a single scan and returns the snapshot hosts. +type ScanRun func(ctx context.Context) (hosts []*store.Host, err error) + +// Alerter receives diff reports with meaningful changes. +type Alerter interface { + Notify(ctx context.Context, report *diff.Report) error + Name() string +} + +// Scheduler runs ScanRun on an interval. +type Scheduler struct { + Target string + Interval time.Duration + Run ScanRun + Alerters []Alerter + + mu sync.Mutex + lastSnap []*store.Host + lastAt time.Time +} + +// New constructs a scheduler. Every field is required except Alerters, +// which defaults to nil (no notifications). +func New(target string, interval time.Duration, run ScanRun) *Scheduler { + return &Scheduler{Target: target, Interval: interval, Run: run} +} + +// AddAlerter registers an Alerter that receives meaningful diff reports. +func (s *Scheduler) AddAlerter(a Alerter) { s.Alerters = append(s.Alerters, a) } + +// Start runs indefinitely until ctx is canceled. The first scan runs +// immediately, subsequent scans run on s.Interval cadence. +func (s *Scheduler) Start(ctx context.Context) error { + if s.Run == nil { + return errors.New("scheduler: nil Run") + } + if s.Interval <= 0 { + return errors.New("scheduler: Interval must be > 0") + } + + // First scan now (so continuous mode produces something immediately). + s.runOnce(ctx) + + t := time.NewTicker(s.Interval) + defer t.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-t.C: + s.runOnce(ctx) + } + } +} + +func (s *Scheduler) runOnce(ctx context.Context) { + if ctx.Err() != nil { + return + } + hosts, err := s.Run(ctx) + if err != nil { + // Scan failure is non-fatal for the scheduler itself; the next + // tick will try again. + return + } + + s.mu.Lock() + prev := s.lastSnap + prevAt := s.lastAt + s.lastSnap = hosts + s.lastAt = time.Now() + s.mu.Unlock() + + // No diff possible on the first run. + if prev == nil { + return + } + + report := diff.Compute(s.Target, prev, hosts, prevAt, time.Now()) + if !report.HasMeaningful() { + return + } + for _, a := range s.Alerters { + _ = a.Notify(ctx, report) + } +} + +// LastSnapshot returns the most recent scan snapshot + timestamp. Returns +// (nil, zero) before the first scan. +func (s *Scheduler) LastSnapshot() ([]*store.Host, time.Time) { + s.mu.Lock() + defer s.mu.Unlock() + return s.lastSnap, s.lastAt +} diff --git a/internal/scheduler/scheduler_test.go b/internal/scheduler/scheduler_test.go new file mode 100644 index 0000000..ada7890 --- /dev/null +++ b/internal/scheduler/scheduler_test.go @@ -0,0 +1,78 @@ +package scheduler + +import ( + "context" + "sync/atomic" + "testing" + "time" + + "god-eye/internal/diff" + "god-eye/internal/store" +) + +type spyAlerter struct{ called atomic.Int32 } + +func (s *spyAlerter) Name() string { return "spy" } +func (s *spyAlerter) Notify(_ context.Context, _ *diff.Report) error { + s.called.Add(1) + return nil +} + +func TestScheduler_RunsAndDiffsBetweenScans(t *testing.T) { + var callCount atomic.Int32 + scan := ScanRun(func(_ context.Context) ([]*store.Host, error) { + n := callCount.Add(1) + if n == 1 { + return []*store.Host{{Subdomain: "a.example.com"}}, nil + } + // Second scan adds a new host β€” meaningful diff. + return []*store.Host{ + {Subdomain: "a.example.com"}, + {Subdomain: "b.example.com"}, + }, nil + }) + + s := New("example.com", 100*time.Millisecond, scan) + alerter := &spyAlerter{} + s.AddAlerter(alerter) + + ctx, cancel := context.WithTimeout(context.Background(), 300*time.Millisecond) + defer cancel() + _ = s.Start(ctx) + + if callCount.Load() < 2 { + t.Errorf("scan should have run at least twice, got %d", callCount.Load()) + } + if alerter.called.Load() == 0 { + t.Error("alerter should have been called on the second run") + } +} + +func TestScheduler_NoAlertOnIdenticalScans(t *testing.T) { + scan := ScanRun(func(_ context.Context) ([]*store.Host, error) { + return []*store.Host{{Subdomain: "a.example.com"}}, nil + }) + s := New("example.com", 50*time.Millisecond, scan) + alerter := &spyAlerter{} + s.AddAlerter(alerter) + + ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) + defer cancel() + _ = s.Start(ctx) + + if alerter.called.Load() != 0 { + t.Errorf("alerter should not have been called on unchanged scans, got %d", alerter.called.Load()) + } +} + +func TestScheduler_RejectsBadParams(t *testing.T) { + s := &Scheduler{Target: "x", Interval: 0} + if err := s.Start(context.Background()); err == nil { + t.Error("expected error for zero interval") + } + + s2 := &Scheduler{Target: "x", Interval: time.Second, Run: nil} + if err := s2.Start(context.Background()); err == nil { + t.Error("expected error for nil Run") + } +} diff --git a/internal/sources/extra.go b/internal/sources/extra.go new file mode 100644 index 0000000..71db0da --- /dev/null +++ b/internal/sources/extra.go @@ -0,0 +1,167 @@ +// Additional passive sources added in v2.0 to close the gap with +// subfinder / BBOT. Every source here is: +// - Free and key-less (no API key required) +// - Defensive (fail-open β€” returns an empty slice on any error) +// - Bounded by the shared HTTP clients +// +// If a source goes offline upstream, the corresponding fetcher keeps +// returning empty β€” the scan still succeeds. + +package sources + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" +) + +// FetchOmnisint queries the free Omnisint Sonar mirror. It may be offline +// on any given day β€” fail-open. +func FetchOmnisint(domain string) ([]string, error) { + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + + u := fmt.Sprintf("https://sonar.omnisint.io/subdomains/%s", url.PathEscape(domain)) + req, _ := http.NewRequestWithContext(ctx, "GET", u, nil) + req.Header.Set("User-Agent", "god-eye-v2") + + resp, err := StandardClient.Do(req) + if err != nil { + return []string{}, nil + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + return []string{}, nil + } + body, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024)) + if err != nil { + return []string{}, nil + } + + var list []string + if err := json.Unmarshal(body, &list); err != nil { + return []string{}, nil + } + + seen := make(map[string]bool) + var out []string + for _, s := range list { + s = strings.ToLower(strings.TrimSpace(s)) + if s != "" && strings.HasSuffix(s, domain) && !seen[s] { + seen[s] = true + out = append(out, s) + } + } + return out, nil +} + +// FetchHudsonRock queries the free Cavalier InfoStealer intelligence API. +// Surfaces domain assets referenced in leaked stealer logs; useful for +// discovering shadow internal hostnames. +func FetchHudsonRock(domain string) ([]string, error) { + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + + u := fmt.Sprintf("https://cavalier.hudsonrock.com/api/json/v2/osint-tools/search-by-domain?domain=%s", url.QueryEscape(domain)) + req, _ := http.NewRequestWithContext(ctx, "GET", u, nil) + req.Header.Set("User-Agent", "god-eye-v2") + + resp, err := StandardClient.Do(req) + if err != nil { + return []string{}, nil + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + return []string{}, nil + } + body, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024)) + if err != nil { + return []string{}, nil + } + + // HudsonRock returns free-form JSON; we just mine every subdomain-like + // token from the response body via the shared regex. + return ExtractSubdomains(string(body), domain), nil +} + +// FetchWebArchiveCDX queries the Internet Archive CDX server β€” a richer +// variant of the existing Wayback source. Pulls URLs with fewer limits +// and extracts hostnames that match the target domain. +func FetchWebArchiveCDX(domain string) ([]string, error) { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + u := fmt.Sprintf("https://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&collapse=urlkey&limit=5000&fl=original", url.QueryEscape(domain)) + req, _ := http.NewRequestWithContext(ctx, "GET", u, nil) + req.Header.Set("User-Agent", "god-eye-v2") + + resp, err := SlowClient.Do(req) + if err != nil { + return []string{}, nil + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + return []string{}, nil + } + body, err := io.ReadAll(io.LimitReader(resp.Body, 16*1024*1024)) + if err != nil { + return []string{}, nil + } + + // Response shape: [["original"], ["url1"], ["url2"], ...] β€” first row + // is the header, subsequent rows are single-element arrays with the URL. + var rows [][]string + if err := json.Unmarshal(body, &rows); err != nil { + return []string{}, nil + } + + seen := make(map[string]bool) + var out []string + for i, row := range rows { + if i == 0 { // skip header + continue + } + if len(row) == 0 { + continue + } + for _, host := range ExtractSubdomains(row[0], domain) { + if !seen[host] { + seen[host] = true + out = append(out, host) + } + } + } + return out, nil +} + +// FetchDigitorus queries the free Digitorus CT log mirror β€” an alternative +// to crt.sh that sometimes returns fresher data. +func FetchDigitorus(domain string) ([]string, error) { + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) + defer cancel() + + u := fmt.Sprintf("https://certificatedetails.com/api/find/%s", url.QueryEscape(domain)) + req, _ := http.NewRequestWithContext(ctx, "GET", u, nil) + req.Header.Set("User-Agent", "god-eye-v2") + + resp, err := StandardClient.Do(req) + if err != nil { + return []string{}, nil + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + return []string{}, nil + } + body, err := io.ReadAll(io.LimitReader(resp.Body, 4*1024*1024)) + if err != nil { + return []string{}, nil + } + + // Free-form JSON; mine hostnames. + return ExtractSubdomains(string(body), domain), nil +} diff --git a/internal/sources/shared.go b/internal/sources/shared.go index b4fb5ac..ce4691d 100644 --- a/internal/sources/shared.go +++ b/internal/sources/shared.go @@ -8,6 +8,8 @@ import ( "strings" "sync" "time" + + "god-eye/internal/proxyconf" ) // Shared HTTP clients - singleton pattern @@ -50,6 +52,65 @@ func init() { initRegex() } +// SetProxy configures outbound proxy for every shared HTTP client used +// by passive sources. Must be called BEFORE any Fetch* source function +// runs (init runs on package import, so main.go calls this after flag +// parsing but before pipeline start, which triggers a re-init via +// ReinitClients). +func SetProxy(u string) error { + if err := proxyconf.Validate(u); err != nil { + return err + } + proxyMu.Lock() + proxyURL = u + proxyMu.Unlock() + // Rebuild transports to pick up the new proxy. + reinitClients() + return nil +} + +var ( + proxyURL string + proxyMu sync.RWMutex +) + +// reinitClients rebuilds the shared transport and clients. Safe to call +// multiple times; in practice only called from SetProxy after startup. +func reinitClients() { + proxyMu.RLock() + cfgProxy := proxyURL + proxyMu.RUnlock() + + baseDialer := &net.Dialer{ + Timeout: 10 * time.Second, + KeepAlive: 30 * time.Second, + } + dialCtx, err := proxyconf.BuildDialer(cfgProxy, baseDialer) + if err != nil { + dialCtx = baseDialer.DialContext + } + proxyFunc, _ := proxyconf.BuildProxyFunc(cfgProxy) + + sharedTransport = &http.Transport{ + DialContext: dialCtx, + Proxy: proxyFunc, + MaxIdleConns: 100, + MaxIdleConnsPerHost: 10, + MaxConnsPerHost: 20, + IdleConnTimeout: 90 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + TLSClientConfig: &tls.Config{ + MinVersion: tls.VersionTLS12, + }, + ForceAttemptHTTP2: true, + ExpectContinueTimeout: 1 * time.Second, + } + + FastClient = &http.Client{Transport: sharedTransport, Timeout: 10 * time.Second} + StandardClient = &http.Client{Transport: sharedTransport, Timeout: 15 * time.Second} + SlowClient = &http.Client{Transport: sharedTransport, Timeout: 120 * time.Second} +} + func initClients() { clientOnce.Do(func() { // Shared transport with connection pooling diff --git a/internal/sources/shared_test.go b/internal/sources/shared_test.go new file mode 100644 index 0000000..4fab758 --- /dev/null +++ b/internal/sources/shared_test.go @@ -0,0 +1,171 @@ +package sources + +import ( + "reflect" + "sort" + "testing" + "time" +) + +func TestExtractSubdomains(t *testing.T) { + target := "example.com" + + tests := []struct { + name string + text string + want []string + }{ + { + name: "empty text", + text: "", + want: nil, + }, + { + name: "no matches", + text: "some text with no domains", + want: nil, + }, + { + name: "apex only", + text: "found example.com here", + want: []string{"example.com"}, + }, + { + name: "single subdomain", + text: "api.example.com was found", + want: []string{"api.example.com"}, + }, + { + name: "multiple subdomains", + text: "api.example.com and admin.example.com and dev.example.com", + want: []string{"admin.example.com", "api.example.com", "dev.example.com"}, + }, + { + name: "deduplication", + text: "api.example.com api.example.com api.example.com", + want: []string{"api.example.com"}, + }, + { + name: "uppercase normalized", + text: "API.EXAMPLE.COM and Api.Example.com", + want: []string{"api.example.com"}, + }, + { + name: "wildcard prefix stripped", + text: "*.example.com is a wildcard", + want: []string{"example.com"}, + }, + { + name: "different domain filtered", + text: "api.example.com and other.different.org and sub.example.com", + want: []string{"api.example.com", "sub.example.com"}, + }, + { + name: "partial match not allowed", + text: "evilexample.com should not match", + want: nil, + }, + { + name: "json-wrapped", + text: `{"name":"api.example.com","type":"A"}`, + want: []string{"api.example.com"}, + }, + { + name: "mixed with urls", + text: `Visit https://api.example.com and https://docs.example.com/path`, + want: []string{"api.example.com", "docs.example.com"}, + }, + { + // Regex is greedy: only the longest leftmost match is returned, + // not every suffix. This is the v1 baseline behavior. + name: "deep subdomain longest match only", + text: "a.b.c.example.com", + want: []string{"a.b.c.example.com"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := ExtractSubdomains(tt.text, target) + sort.Strings(got) + sort.Strings(tt.want) + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("ExtractSubdomains(%q)\n got: %v\n want: %v", tt.text, got, tt.want) + } + }) + } +} + +func TestGetClientForTimeout(t *testing.T) { + tests := []struct { + timeout time.Duration + want string // identify by Timeout field + }{ + {5 * time.Second, "fast"}, + {10 * time.Second, "fast"}, + {15 * time.Second, "standard"}, + {30 * time.Second, "standard"}, + {60 * time.Second, "slow"}, + {120 * time.Second, "slow"}, + } + + for _, tt := range tests { + c := GetClientForTimeout(tt.timeout) + if c == nil { + t.Fatalf("GetClientForTimeout(%v) returned nil", tt.timeout) + } + var gotClient string + switch c { + case FastClient: + gotClient = "fast" + case StandardClient: + gotClient = "standard" + case SlowClient: + gotClient = "slow" + default: + gotClient = "unknown" + } + if gotClient != tt.want { + t.Errorf("GetClientForTimeout(%v) = %s, want %s", tt.timeout, gotClient, tt.want) + } + } +} + +func TestClientsInitialized(t *testing.T) { + if FastClient == nil { + t.Error("FastClient is nil") + } + if StandardClient == nil { + t.Error("StandardClient is nil") + } + if SlowClient == nil { + t.Error("SlowClient is nil") + } + if FastClient.Timeout != 10*time.Second { + t.Errorf("FastClient.Timeout = %v, want 10s", FastClient.Timeout) + } + if StandardClient.Timeout != 15*time.Second { + t.Errorf("StandardClient.Timeout = %v, want 15s", StandardClient.Timeout) + } + if SlowClient.Timeout != 120*time.Second { + t.Errorf("SlowClient.Timeout = %v, want 120s", SlowClient.Timeout) + } +} + +func TestRegexCompiled(t *testing.T) { + if SubdomainRegex == nil { + t.Error("SubdomainRegex not compiled") + } + if EmailDomainRegex == nil { + t.Error("EmailDomainRegex not compiled") + } + if URLDomainRegex == nil { + t.Error("URLDomainRegex not compiled") + } + if JSONSubdomainRegex == nil { + t.Error("JSONSubdomainRegex not compiled") + } + if WildcardPrefixRegex == nil { + t.Error("WildcardPrefixRegex not compiled") + } +} diff --git a/internal/store/memory.go b/internal/store/memory.go new file mode 100644 index 0000000..eb98984 --- /dev/null +++ b/internal/store/memory.go @@ -0,0 +1,267 @@ +package store + +import ( + "context" + "sort" + "sync" + "time" +) + +// MemoryStore is the default in-memory Store implementation. Thread-safe, +// suitable for single-process scans. Persistent backends (BoltDB for ASM / +// resume workflows) land in Fase 5; they will implement the same Store +// interface so callers need no changes. +type MemoryStore struct { + mu sync.RWMutex + hosts map[string]*Host + // perHostLocks serializes Upsert mutations per-host without blocking + // independent hosts. It's populated lazily and never cleared β€” the number + // of subdomains per scan is bounded (thousands, not millions). + perHostLocks map[string]*sync.Mutex + locksMu sync.Mutex +} + +// NewMemoryStore creates an empty MemoryStore. +func NewMemoryStore() *MemoryStore { + return &MemoryStore{ + hosts: make(map[string]*Host), + perHostLocks: make(map[string]*sync.Mutex), + } +} + +// lockFor returns the mutex that protects mutations to subdomain, creating +// it lazily if needed. +func (s *MemoryStore) lockFor(subdomain string) *sync.Mutex { + s.locksMu.Lock() + defer s.locksMu.Unlock() + l, ok := s.perHostLocks[subdomain] + if !ok { + l = &sync.Mutex{} + s.perHostLocks[subdomain] = l + } + return l +} + +// Upsert creates or updates the record for subdomain, invoking mutate under +// a per-host lock. Concurrent callers mutating different subdomains proceed +// in parallel; concurrent mutations of the same subdomain are serialized. +func (s *MemoryStore) Upsert(ctx context.Context, subdomain string, mutate func(*Host)) error { + if err := ctx.Err(); err != nil { + return err + } + if subdomain == "" { + return nil + } + + hostLock := s.lockFor(subdomain) + hostLock.Lock() + defer hostLock.Unlock() + + s.mu.Lock() + h, existed := s.hosts[subdomain] + if !existed { + h = &Host{ + Subdomain: subdomain, + FirstSeen: time.Now(), + } + s.hosts[subdomain] = h + } + s.mu.Unlock() + + if mutate != nil { + mutate(h) + } + h.LastUpdated = time.Now() + return nil +} + +// Get returns a deep-enough copy of the record so the caller cannot +// accidentally mutate store state. Slice fields are copied; nested struct +// pointers (TLSFingerprint, Takeover) are shallow-copied β€” callers MUST treat +// the result as read-only. +func (s *MemoryStore) Get(ctx context.Context, subdomain string) (*Host, bool) { + s.mu.RLock() + h, ok := s.hosts[subdomain] + s.mu.RUnlock() + if !ok { + return nil, false + } + hostLock := s.lockFor(subdomain) + hostLock.Lock() + defer hostLock.Unlock() + return cloneHost(h), true +} + +// All returns every host, sorted by subdomain. Each returned Host is a copy; +// mutations to the slice or its elements do not affect the store. +func (s *MemoryStore) All(ctx context.Context) []*Host { + s.mu.RLock() + names := make([]string, 0, len(s.hosts)) + for name := range s.hosts { + names = append(names, name) + } + s.mu.RUnlock() + sort.Strings(names) + + out := make([]*Host, 0, len(names)) + for _, n := range names { + if h, ok := s.Get(ctx, n); ok { + out = append(out, h) + } + } + return out +} + +// Count returns the number of hosts in the store. +func (s *MemoryStore) Count(ctx context.Context) int { + s.mu.RLock() + defer s.mu.RUnlock() + return len(s.hosts) +} + +// Close is a no-op for MemoryStore; implemented to satisfy Store interface. +func (s *MemoryStore) Close() error { return nil } + +// cloneHost returns a deep-enough copy that slice/map fields are detached. +func cloneHost(h *Host) *Host { + if h == nil { + return nil + } + c := *h + c.IPs = cloneStrings(h.IPs) + c.Technologies = cloneStrings(h.Technologies) + c.TLSAltNames = cloneStrings(h.TLSAltNames) + c.DiscoveredVia = cloneStrings(h.DiscoveredVia) + c.Ports = cloneInts(h.Ports) + c.Headers = cloneStringMap(h.Headers) + + if h.TLSFingerprint != nil { + fp := *h.TLSFingerprint + fp.InternalHosts = cloneStrings(h.TLSFingerprint.InternalHosts) + c.TLSFingerprint = &fp + } + if h.Takeover != nil { + t := *h.Takeover + c.Takeover = &t + } + + c.Vulnerabilities = cloneVulns(h.Vulnerabilities) + c.Secrets = cloneSecrets(h.Secrets) + c.CVEs = cloneCVEs(h.CVEs) + c.AIFindings = cloneAIFindings(h.AIFindings) + return &c +} + +func cloneStrings(in []string) []string { + if len(in) == 0 { + return nil + } + out := make([]string, len(in)) + copy(out, in) + return out +} + +func cloneInts(in []int) []int { + if len(in) == 0 { + return nil + } + out := make([]int, len(in)) + copy(out, in) + return out +} + +func cloneStringMap(in map[string]string) map[string]string { + if len(in) == 0 { + return nil + } + out := make(map[string]string, len(in)) + for k, v := range in { + out[k] = v + } + return out +} + +func cloneVulns(in []Vulnerability) []Vulnerability { + if len(in) == 0 { + return nil + } + out := make([]Vulnerability, len(in)) + for i, v := range in { + v.CVEs = cloneStrings(v.CVEs) + out[i] = v + } + return out +} + +func cloneSecrets(in []Secret) []Secret { + if len(in) == 0 { + return nil + } + out := make([]Secret, len(in)) + copy(out, in) + return out +} + +func cloneCVEs(in []CVE) []CVE { + if len(in) == 0 { + return nil + } + out := make([]CVE, len(in)) + copy(out, in) + return out +} + +func cloneAIFindings(in []AIFinding) []AIFinding { + if len(in) == 0 { + return nil + } + out := make([]AIFinding, len(in)) + for i, f := range in { + f.CVEs = cloneStrings(f.CVEs) + out[i] = f + } + return out +} + +// AppendUnique helpers β€” exported for modules that want to append slice +// fields without introducing duplicates. Keeps mutation semantics in one place. + +// AddDiscoveryMethod appends method to h.DiscoveredVia if not already present. +func AddDiscoveryMethod(h *Host, method string) { + for _, m := range h.DiscoveredVia { + if m == method { + return + } + } + h.DiscoveredVia = append(h.DiscoveredVia, method) +} + +// AddIPs appends new IPs (dedup, in-place). +func AddIPs(h *Host, ips []string) { + seen := make(map[string]bool, len(h.IPs)) + for _, ip := range h.IPs { + seen[ip] = true + } + for _, ip := range ips { + if ip == "" || seen[ip] { + continue + } + seen[ip] = true + h.IPs = append(h.IPs, ip) + } +} + +// AddTechnologies appends new technologies (dedup, in-place). +func AddTechnologies(h *Host, tech []string) { + seen := make(map[string]bool, len(h.Technologies)) + for _, t := range h.Technologies { + seen[t] = true + } + for _, t := range tech { + if t == "" || seen[t] { + continue + } + seen[t] = true + h.Technologies = append(h.Technologies, t) + } +} diff --git a/internal/store/memory_test.go b/internal/store/memory_test.go new file mode 100644 index 0000000..e16c037 --- /dev/null +++ b/internal/store/memory_test.go @@ -0,0 +1,263 @@ +package store + +import ( + "context" + "fmt" + "reflect" + "sort" + "sync" + "sync/atomic" + "testing" + "time" +) + +func TestUpsert_CreatesHost(t *testing.T) { + s := NewMemoryStore() + ctx := context.Background() + + err := s.Upsert(ctx, "api.example.com", func(h *Host) { + h.IPs = []string{"1.2.3.4"} + h.StatusCode = 200 + }) + if err != nil { + t.Fatal(err) + } + + h, ok := s.Get(ctx, "api.example.com") + if !ok { + t.Fatal("Get returned !ok after Upsert") + } + if h.Subdomain != "api.example.com" { + t.Errorf("Subdomain = %q", h.Subdomain) + } + if !reflect.DeepEqual(h.IPs, []string{"1.2.3.4"}) { + t.Errorf("IPs = %v", h.IPs) + } + if h.StatusCode != 200 { + t.Errorf("StatusCode = %d", h.StatusCode) + } + if h.FirstSeen.IsZero() { + t.Error("FirstSeen not populated") + } + if h.LastUpdated.IsZero() { + t.Error("LastUpdated not populated") + } +} + +func TestUpsert_UpdatesExistingHost(t *testing.T) { + s := NewMemoryStore() + ctx := context.Background() + + s.Upsert(ctx, "api.example.com", func(h *Host) { h.StatusCode = 200 }) + firstSeen, _ := s.Get(ctx, "api.example.com") + time.Sleep(5 * time.Millisecond) // ensure LastUpdated differs + + s.Upsert(ctx, "api.example.com", func(h *Host) { h.Title = "API" }) + + h, _ := s.Get(ctx, "api.example.com") + if h.StatusCode != 200 { + t.Errorf("StatusCode lost: %d", h.StatusCode) + } + if h.Title != "API" { + t.Errorf("Title not set: %q", h.Title) + } + if !h.FirstSeen.Equal(firstSeen.FirstSeen) { + t.Error("FirstSeen changed on update") + } + if !h.LastUpdated.After(firstSeen.LastUpdated) { + t.Error("LastUpdated did not advance") + } +} + +func TestUpsert_EmptySubdomainNoop(t *testing.T) { + s := NewMemoryStore() + ctx := context.Background() + if err := s.Upsert(ctx, "", func(h *Host) {}); err != nil { + t.Errorf("unexpected error: %v", err) + } + if s.Count(ctx) != 0 { + t.Error("empty subdomain should be a noop") + } +} + +func TestUpsert_CanceledContext(t *testing.T) { + s := NewMemoryStore() + ctx, cancel := context.WithCancel(context.Background()) + cancel() + if err := s.Upsert(ctx, "a.example.com", func(h *Host) {}); err == nil { + t.Error("expected error for canceled context") + } +} + +func TestGet_Missing(t *testing.T) { + s := NewMemoryStore() + _, ok := s.Get(context.Background(), "none.example.com") + if ok { + t.Error("expected !ok for missing host") + } +} + +func TestGet_ReturnsCopy(t *testing.T) { + s := NewMemoryStore() + ctx := context.Background() + s.Upsert(ctx, "a.example.com", func(h *Host) { + h.IPs = []string{"1.2.3.4"} + h.Technologies = []string{"nginx"} + h.Headers = map[string]string{"X-Test": "yes"} + h.TLSFingerprint = &TLSFingerprint{Vendor: "Fortinet", InternalHosts: []string{"internal.local"}} + }) + + a, _ := s.Get(ctx, "a.example.com") + // mutate returned host aggressively + a.IPs[0] = "MUTATED" + a.Technologies = append(a.Technologies, "INJECTED") + a.Headers["X-Test"] = "MUTATED" + a.TLSFingerprint.Vendor = "MUTATED" + a.TLSFingerprint.InternalHosts[0] = "MUTATED" + + b, _ := s.Get(ctx, "a.example.com") + if b.IPs[0] != "1.2.3.4" { + t.Errorf("IPs corrupted: %v", b.IPs) + } + if len(b.Technologies) != 1 { + t.Errorf("Technologies corrupted: %v", b.Technologies) + } + if b.Headers["X-Test"] != "yes" { + t.Errorf("Headers corrupted: %v", b.Headers) + } + if b.TLSFingerprint.Vendor != "Fortinet" { + t.Errorf("TLSFingerprint.Vendor corrupted: %q", b.TLSFingerprint.Vendor) + } + if b.TLSFingerprint.InternalHosts[0] != "internal.local" { + t.Errorf("InternalHosts corrupted: %v", b.TLSFingerprint.InternalHosts) + } +} + +func TestAll_Sorted(t *testing.T) { + s := NewMemoryStore() + ctx := context.Background() + for _, name := range []string{"zeta.example.com", "alpha.example.com", "mid.example.com"} { + s.Upsert(ctx, name, func(h *Host) {}) + } + all := s.All(ctx) + got := make([]string, len(all)) + for i, h := range all { + got[i] = h.Subdomain + } + want := []string{"alpha.example.com", "mid.example.com", "zeta.example.com"} + if !reflect.DeepEqual(got, want) { + t.Errorf("All order = %v, want %v", got, want) + } +} + +func TestCount(t *testing.T) { + s := NewMemoryStore() + ctx := context.Background() + if s.Count(ctx) != 0 { + t.Error("initial Count != 0") + } + s.Upsert(ctx, "a.example.com", func(h *Host) {}) + s.Upsert(ctx, "b.example.com", func(h *Host) {}) + s.Upsert(ctx, "a.example.com", func(h *Host) {}) // update, not new + if got := s.Count(ctx); got != 2 { + t.Errorf("Count = %d, want 2", got) + } +} + +func TestConcurrentUpserts_SameHost(t *testing.T) { + // All writers target the same host; only one value wins per field but + // no race should fire. + s := NewMemoryStore() + ctx := context.Background() + + var wg sync.WaitGroup + const writers = 50 + var counter atomic.Int32 + for i := 0; i < writers; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + s.Upsert(ctx, "hot.example.com", func(h *Host) { + h.Technologies = append(h.Technologies, fmt.Sprintf("t%d", i)) + counter.Add(1) + }) + }(i) + } + wg.Wait() + + if counter.Load() != writers { + t.Errorf("not all mutators ran: %d/%d", counter.Load(), writers) + } + h, _ := s.Get(ctx, "hot.example.com") + if len(h.Technologies) != writers { + t.Errorf("expected %d technologies, got %d", writers, len(h.Technologies)) + } +} + +func TestConcurrentUpserts_DifferentHosts(t *testing.T) { + s := NewMemoryStore() + ctx := context.Background() + var wg sync.WaitGroup + const hosts = 200 + for i := 0; i < hosts; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + s.Upsert(ctx, fmt.Sprintf("h%d.example.com", i), func(h *Host) { + h.IPs = []string{"1.2.3.4"} + }) + }(i) + } + wg.Wait() + if got := s.Count(ctx); got != hosts { + t.Errorf("expected %d hosts, got %d", hosts, got) + } +} + +func TestClose_Idempotent(t *testing.T) { + s := NewMemoryStore() + if err := s.Close(); err != nil { + t.Fatal(err) + } + if err := s.Close(); err != nil { + t.Fatal(err) + } +} + +// ---------- Helper tests ---------- + +func TestAddDiscoveryMethod(t *testing.T) { + h := &Host{} + AddDiscoveryMethod(h, "passive:crt.sh") + AddDiscoveryMethod(h, "brute") + AddDiscoveryMethod(h, "passive:crt.sh") // duplicate + if !reflect.DeepEqual(h.DiscoveredVia, []string{"passive:crt.sh", "brute"}) { + t.Errorf("DiscoveredVia = %v", h.DiscoveredVia) + } +} + +func TestAddIPs_Dedup(t *testing.T) { + h := &Host{IPs: []string{"1.1.1.1"}} + AddIPs(h, []string{"1.1.1.1", "2.2.2.2", "", "3.3.3.3", "2.2.2.2"}) + sort.Strings(h.IPs) + want := []string{"1.1.1.1", "2.2.2.2", "3.3.3.3"} + if !reflect.DeepEqual(h.IPs, want) { + t.Errorf("IPs = %v, want %v", h.IPs, want) + } +} + +func TestAddTechnologies_Dedup(t *testing.T) { + h := &Host{Technologies: []string{"nginx"}} + AddTechnologies(h, []string{"nginx", "Go", "", "React", "Go"}) + sort.Strings(h.Technologies) + want := []string{"Go", "React", "nginx"} + if !reflect.DeepEqual(h.Technologies, want) { + t.Errorf("Technologies = %v, want %v", h.Technologies, want) + } +} + +func TestCloneHost_Nil(t *testing.T) { + if got := cloneHost(nil); got != nil { + t.Errorf("cloneHost(nil) = %v, want nil", got) + } +} diff --git a/internal/store/store.go b/internal/store/store.go new file mode 100644 index 0000000..3f4f45b --- /dev/null +++ b/internal/store/store.go @@ -0,0 +1,161 @@ +// Package store defines the Store interface used by pipeline modules to record +// per-host findings. Full implementations (in-memory + BoltDB-backed) live in +// this same package β€” this file only declares the interface so other packages +// can depend on it without pulling in storage backends. +package store + +import ( + "context" + "time" +) + +// Host is the aggregate per-subdomain record. Fields are populated +// incrementally as modules publish events. +// +// Field names intentionally mirror the legacy config.SubdomainResult shape so +// migrating JSON output in F0.6 is mechanical. Over time this struct will +// diverge (more fields, richer types) as v2 features land. +type Host struct { + Subdomain string + IPs []string + CNAME string + PTR string + + // Resolution metadata + ASN string + Org string + Country string + City string + + // HTTP probe + URL string + StatusCode int + ContentLength int64 + Title string + Server string + Technologies []string + Headers map[string]string + ResponseMs int64 + + // TLS + TLSVersion string + TLSIssuer string + TLSExpiry time.Time + TLSSelfSigned bool + TLSAltNames []string + TLSFingerprint *TLSFingerprint + + // Classification + CloudProvider string + WAF string + Ports []int + + // Analysis + Vulnerabilities []Vulnerability + Secrets []Secret + CVEs []CVE + AIFindings []AIFinding + Takeover *Takeover + + // Discovery metadata + DiscoveredVia []string // e.g. ["passive:crt.sh", "brute"] + FirstSeen time.Time + LastUpdated time.Time +} + +// TLSFingerprint identifies a security appliance (firewall, VPN, load balancer) +// from its TLS certificate. +type TLSFingerprint struct { + Vendor string + Product string + Version string + ApplianceKind string + InternalHosts []string +} + +// Vulnerability is a single finding recorded on a host. +type Vulnerability struct { + ID string + Title string + Description string + Severity string + URL string + Evidence string + Remediation string + CVEs []string + OWASP string + CVSS float64 + FoundAt time.Time +} + +// Secret is a credential/token discovered on a host. +type Secret struct { + Kind string + Match string + Value string + Location string + Validated bool + Severity string + Description string + FoundAt time.Time +} + +// CVE is a CVE match correlated to a detected technology. +type CVE struct { + ID string + Technology string + Version string + Severity string + CVSS float64 + Description string + URL string + InKEV bool + FoundAt time.Time +} + +// AIFinding is an AI/agent-produced insight. +type AIFinding struct { + Agent string + Model string + Severity string + Title string + Description string + Evidence string + CVEs []string + OWASP string + Confidence float64 + FoundAt time.Time +} + +// Takeover is a confirmed or candidate subdomain takeover. +type Takeover struct { + Service string + CNAME string + Evidence string + PoC string + Confirmed bool + FoundAt time.Time +} + +// Store is the aggregate interface modules use to record findings. Methods +// must be safe for concurrent use by many goroutines. +type Store interface { + // Upsert merges patch into the record for subdomain. Only non-zero fields + // in patch overwrite existing data; slice/map fields are appended/merged. + // The mutator is invoked under a per-host lock so concurrent callers see + // consistent state. + Upsert(ctx context.Context, subdomain string, mutate func(*Host)) error + + // Get returns a snapshot copy of the record for subdomain. + Get(ctx context.Context, subdomain string) (*Host, bool) + + // All returns a snapshot slice of every host. The slice is sorted by + // subdomain for deterministic output. + All(ctx context.Context) []*Host + + // Count returns the number of hosts in the store. + Count(ctx context.Context) int + + // Close releases resources (e.g. BoltDB handle). Idempotent. + Close() error +} diff --git a/internal/tui/live.go b/internal/tui/live.go new file mode 100644 index 0000000..722e929 --- /dev/null +++ b/internal/tui/live.go @@ -0,0 +1,134 @@ +// Package tui provides terminal-only live views of scan activity. No web +// UI by design. Fase 4 will expand this into a bubbletea-powered +// interactive TUI with panels; the current LivePrinter is the minimal +// terminal-only viewer that emits colorized event lines in real time. +package tui + +import ( + "context" + "fmt" + "sync/atomic" + "time" + + "god-eye/internal/eventbus" + "god-eye/internal/output" +) + +// LivePrinter subscribes to every event on a bus and prints a one-line +// summary to stdout as they arrive. Safe to attach alongside the regular +// report module β€” this is purely an observability layer. +type LivePrinter struct { + bus *eventbus.Bus + sub *eventbus.Subscription + verbosity int // 0 = quiet (vulns only), 1 = normal (discovery+vulns), 2 = noisy + started time.Time + + evCount atomic.Uint64 +} + +// NewLivePrinter attaches to bus and begins printing. +// +// verbosity levels: +// +// 0 β€” only vulnerabilities, takeovers, secrets, CVEs +// 1 β€” above + subdomain discovery + HTTP probe summaries +// 2 β€” everything, including module errors and phase markers +func NewLivePrinter(bus *eventbus.Bus, verbosity int) *LivePrinter { + p := &LivePrinter{bus: bus, verbosity: verbosity, started: time.Now()} + p.sub = bus.SubscribeAll(p.handle) + return p +} + +// Close unsubscribes from the bus and prints a summary footer. +func (p *LivePrinter) Close() { + if p.sub != nil { + p.sub.Unsubscribe() + } + dur := time.Since(p.started).Round(time.Millisecond) + fmt.Printf("%s scan elapsed %s, %d events seen\n", + output.Dim("Β·"), output.BoldGreen(dur.String()), p.evCount.Load()) +} + +func (p *LivePrinter) handle(_ context.Context, e eventbus.Event) { + p.evCount.Add(1) + switch ev := e.(type) { + case eventbus.SubdomainDiscovered: + if p.verbosity >= 1 { + fmt.Printf("%s %s %s\n", output.Dim("↳"), output.Cyan(ev.Method), ev.Subdomain) + } + case eventbus.DNSResolved: + if p.verbosity >= 2 { + fmt.Printf("%s %s %s\n", output.Dim("⏚"), ev.Subdomain, output.Dim(joinIPs(ev.IPs))) + } + case eventbus.HTTPProbed: + if p.verbosity >= 1 { + color := statusColor(ev.StatusCode) + fmt.Printf("%s %s %s %s\n", color, ev.URL, output.Dim(fmt.Sprintf("[%d]", ev.StatusCode)), output.Dim(ev.Title)) + } + case eventbus.VulnerabilityFound: + fmt.Printf("%s %s %s %s\n", sevBadge(ev.Severity), output.BoldWhite(ev.Title), output.Dim(ev.URL), output.Dim(ev.ID)) + case eventbus.SecretFound: + fmt.Printf("%s %s %s %s\n", sevBadge(ev.Severity), output.BoldWhite("SECRET:"+ev.Kind), ev.Location, output.Dim(ev.Match)) + case eventbus.TakeoverCandidate: + fmt.Printf("%s %s %s service=%s\n", sevBadge(eventbus.SeverityHigh), output.BoldYellow("TAKEOVER?"), ev.Subdomain, ev.Service) + case eventbus.TakeoverConfirmed: + fmt.Printf("%s %s %s service=%s\n", sevBadge(eventbus.SeverityCritical), output.BgRed(" TAKEOVER "), ev.Subdomain, ev.Service) + case eventbus.CVEMatch: + fmt.Printf("%s %s %s@%s β†’ %s\n", sevBadge(ev.Severity), output.BoldWhite("CVE"), ev.Technology, ev.Version, ev.CVE) + case eventbus.AIFinding: + fmt.Printf("%s %s %s %s\n", sevBadge(ev.Severity), output.BoldMagenta("AI:"+ev.Agent), output.Dim(ev.Subject), ev.Title) + case eventbus.ModuleError: + if p.verbosity >= 2 { + fmt.Printf("%s %s %s\n", output.Red("⚠"), output.Dim(ev.Module), ev.Err) + } + case eventbus.PhaseStarted: + if p.verbosity >= 1 { + fmt.Printf("%s %s\n", output.Dim("β–Ά"), output.BoldCyan("phase "+ev.Phase)) + } + case eventbus.PhaseCompleted: + if p.verbosity >= 1 { + fmt.Printf("%s %s %s\n", output.Dim("β–£"), output.Dim("phase "+ev.Phase), output.Dim(ev.Duration.Round(time.Millisecond).String())) + } + } +} + +func sevBadge(s eventbus.Severity) string { + switch s { + case eventbus.SeverityCritical: + return output.BgRed(" CRIT ") + case eventbus.SeverityHigh: + return output.Red("[HIGH]") + case eventbus.SeverityMedium: + return output.Yellow("[MED]") + case eventbus.SeverityLow: + return output.Blue("[LOW]") + default: + return output.Dim("[INFO]") + } +} + +func statusColor(code int) string { + switch { + case code >= 200 && code < 300: + return output.Green("●") + case code >= 300 && code < 400: + return output.Yellow("◐") + case code >= 400 && code < 500: + return output.Red("β—‹") + case code >= 500: + return output.BoldRed("βœ•") + default: + return output.Dim("Β·") + } +} + +func joinIPs(ips []string) string { + out := "[" + for i, ip := range ips { + if i > 0 { + out += "," + } + out += ip + } + return out + "]" +} diff --git a/internal/validator/validator_test.go b/internal/validator/validator_test.go new file mode 100644 index 0000000..87ecd36 --- /dev/null +++ b/internal/validator/validator_test.go @@ -0,0 +1,249 @@ +package validator + +import ( + "strings" + "testing" +) + +func TestValidateDomain(t *testing.T) { + v := DefaultDomainValidator() + + tests := []struct { + name string + input string + wantErr bool + }{ + {"simple domain", "example.com", false}, + {"subdomain", "api.example.com", false}, + {"deep subdomain", "a.b.c.example.com", false}, + {"hyphen in middle", "my-site.example.com", false}, + {"co.uk tld", "example.co.uk", false}, + {"uppercase", "EXAMPLE.COM", false}, + + {"empty", "", true}, + {"whitespace only", " ", true}, + {"with scheme http", "http://example.com", true}, + {"with scheme https", "https://example.com", true}, + {"path traversal", "example.com/../etc", true}, + {"shell metachar ;", "example.com;whoami", true}, + {"shell metachar |", "example.com|whoami", true}, + {"shell metachar &", "example.com&ls", true}, + {"backtick", "example.com`id`", true}, + {"dollar", "example.com$USER", true}, + {"newline", "example.com\nmalicious", true}, + {"null byte", "example.com\x00.evil", true}, + {"leading hyphen label", "-example.com", true}, + {"trailing hyphen label", "example-.com", true}, + {"double dot", "example..com", true}, + {"label too long", strings.Repeat("a", 64) + ".com", true}, + {"domain too long", strings.Repeat("a.", 130) + "com", true}, + {"numeric tld", "example.123", true}, + {"single label", "localhost", true}, + {"angle brackets", "