mirror of
https://github.com/Vyntral/god-eye.git
synced 2026-05-16 13:39:10 +02:00
3a4c230aa7
Complete architectural overhaul. Replaces the v0.1 monolithic scanner with an event-driven pipeline of auto-registered modules. Foundation (internal/): - eventbus: typed pub/sub, 20 event types, race-safe, drop counter - module: registry with phase-based selection - store: thread-safe host store with per-host locks + deep-copy reads - pipeline: coordinator with phase barriers + panic recovery - config: 5 scan profiles + 3 AI tiers + YAML loader + auto-discovery Modules (26 auto-registered across 6 phases): - Discovery: passive (26 sources), bruteforce, recursive, AXFR, GitHub dorks, CT streaming, permutation, reverse DNS, vhost, ASN, supply chain (npm + PyPI) - Enrichment: HTTP probe + tech fingerprint + TLS appliance ID, ports - Analysis: security checks, takeover (110+ sigs), cloud, JavaScript, GraphQL, JWT, headers (OWASP), HTTP smuggling, AI cascade, Nuclei - Reporting: TXT/JSON/CSV writer + AI scan brief AI layer (internal/ai/ + internal/modules/ai/): - Three profiles: lean (16 GB), balanced (32 GB MoE), heavy (64 GB) - Six event-driven handlers: CVE, JS file, HTTP response, secret filter, multi-agent vuln enrichment, anomaly + executive report - Content-hash cache dedups Ollama calls across hosts - Auto-pull of missing models via /api/pull with streaming progress - End-of-scan AI SCAN BRIEF in terminal with top chains + next actions Nuclei compat layer (internal/nucleitpl/): - Executes ~13k community templates (HTTP subset) - Auto-download of nuclei-templates ZIP to ~/.god-eye/nuclei-templates - Scope filter rejects off-host templates (eliminates OSINT FPs) Operations: - Interactive wizard (internal/wizard/) — zero-flag launch - LivePrinter (internal/tui/) — colorized event stream - Diff engine + scheduler (internal/diff, internal/scheduler) for continuous ASM monitoring with webhook alerts - Proxy support (internal/proxyconf/): http / https / socks5 / socks5h + basic auth Fixes #1 — native SOCKS5 / Tor compatibility via --proxy flag. 185 unit tests across 15 packages, all race-detector clean.
661 lines
22 KiB
Go
661 lines
22 KiB
Go
// Package ai is the v2 adapter that wires the Ollama client into the
|
|
// event-driven pipeline. Unlike the initial skeleton (which only called
|
|
// CVEMatch on TechDetected), this module subscribes to five event types
|
|
// and dispatches each to the appropriate v1 client method:
|
|
//
|
|
// TechDetected → CVEMatch → CVEMatch events
|
|
// JSFileDiscovered → AnalyzeJavaScript → AIFinding + SecretFound
|
|
// HTTPProbed → AnalyzeHTTPResponse (for 5xx / suspicious 4xx) → AIFinding
|
|
// SecretFound → FilterSecrets (triage real vs regex noise) → AIFinding tag
|
|
// VulnerabilityFound → multi-agent orchestrator (agents package) → AIFinding with remediation
|
|
// ScanCompleted → DetectAnomalies + GenerateReport → AIFinding + report artifact
|
|
//
|
|
// Every handler:
|
|
// - is a no-op when ai.enabled=false (module Run returns immediately)
|
|
// - dedups by content hash to avoid hammering Ollama with duplicates
|
|
// - cascades through the fast triage model before the deep model
|
|
// - emits AIFinding events so downstream reporters/TUI pick them up
|
|
//
|
|
// The module is the primary value of God's Eye v2's "local LLM" story —
|
|
// without this wiring, the AI layer was essentially a 20GB curiosity
|
|
// that added a single CVE string per scan.
|
|
package ai
|
|
|
|
import (
|
|
"context"
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"god-eye/internal/ai"
|
|
"god-eye/internal/ai/agents"
|
|
"god-eye/internal/eventbus"
|
|
"god-eye/internal/module"
|
|
"god-eye/internal/store"
|
|
)
|
|
|
|
const ModuleName = "ai.cascade"
|
|
|
|
type aiModule struct {
|
|
client *ai.OllamaClient
|
|
orchestrator *agents.AgentOrchestrator
|
|
|
|
// queryCache dedups expensive Ollama calls across a single scan.
|
|
// Keyed by SHA256 of (method + input), value is a flag struct so
|
|
// the same (method, input) pair is processed exactly once.
|
|
cache sync.Map // map[string]struct{}
|
|
|
|
// Counters surfaced at scan end for observability.
|
|
cveLookups atomic.Int64
|
|
jsAnalyses atomic.Int64
|
|
httpAnalyses atomic.Int64
|
|
secretValidations atomic.Int64
|
|
vulnEnrichments atomic.Int64
|
|
anomalyScans atomic.Int64
|
|
reportGenerations atomic.Int64
|
|
}
|
|
|
|
func Register() { module.Register(&aiModule{}) }
|
|
|
|
func (*aiModule) Name() string { return ModuleName }
|
|
func (*aiModule) Phase() module.Phase { return module.PhaseAnalysis }
|
|
func (*aiModule) Consumes() []eventbus.EventType {
|
|
return []eventbus.EventType{
|
|
eventbus.EventTechDetected,
|
|
eventbus.EventJSFile,
|
|
eventbus.EventHTTPProbed,
|
|
eventbus.EventSecret,
|
|
eventbus.EventVulnerability,
|
|
eventbus.EventScanCompleted,
|
|
}
|
|
}
|
|
func (*aiModule) Produces() []eventbus.EventType {
|
|
return []eventbus.EventType{
|
|
eventbus.EventAIFinding,
|
|
eventbus.EventCVEMatch,
|
|
eventbus.EventSecret, // validated/re-emitted
|
|
}
|
|
}
|
|
|
|
// DefaultEnabled returns true so the module is always loaded; Run() no-ops
|
|
// unless the user set ai.enabled via --enable-ai / wizard / YAML.
|
|
func (*aiModule) DefaultEnabled() bool { return true }
|
|
|
|
// Run is the heart of the v2 AI layer: wires six event subscriptions,
|
|
// drains initial store state, and waits for late events in a bounded
|
|
// window.
|
|
func (a *aiModule) Run(mctx module.Context) error {
|
|
if !mctx.Config.Bool("ai.enabled", false) {
|
|
return nil
|
|
}
|
|
|
|
a.client = ai.NewOllamaClient(
|
|
mctx.Config.String("ai.url", "http://localhost:11434"),
|
|
mctx.Config.String("ai.fast_model", "qwen3:1.7b"),
|
|
mctx.Config.String("ai.deep_model", "qwen2.5-coder:14b"),
|
|
mctx.Config.Bool("ai.cascade", true),
|
|
)
|
|
if mctx.Config.Bool("ai.verbose", false) {
|
|
a.client.Verbose = true
|
|
}
|
|
if !a.client.IsAvailable() {
|
|
mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{
|
|
EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: mctx.Target},
|
|
Module: ModuleName,
|
|
Err: "Ollama not reachable at " + mctx.Config.String("ai.url", "http://localhost:11434"),
|
|
})
|
|
return nil
|
|
}
|
|
|
|
// Multi-agent orchestrator is opt-in: only worth spinning up when the
|
|
// user explicitly enables it. The orchestrator holds one client per
|
|
// agent type (8 agents) and can take ~200ms to initialise.
|
|
if mctx.Config.Bool("ai.multi_agent", false) {
|
|
a.orchestrator = agents.NewAgentOrchestrator(
|
|
mctx.Config.String("ai.url", "http://localhost:11434"),
|
|
mctx.Config.String("ai.fast_model", "qwen3:1.7b"),
|
|
mctx.Config.String("ai.deep_model", "qwen2.5-coder:14b"),
|
|
)
|
|
}
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
// Subscribe to every event type we care about. Each handler runs in its
|
|
// own goroutine off the bus; we track them with wg so we can drain at
|
|
// the end.
|
|
subs := []*eventbus.Subscription{
|
|
mctx.Bus.Subscribe(eventbus.EventTechDetected, func(_ context.Context, e eventbus.Event) {
|
|
if ev, ok := e.(eventbus.TechDetected); ok {
|
|
wg.Add(1)
|
|
go func() { defer wg.Done(); a.handleTech(mctx, ev.Host, ev.Technology, ev.Version) }()
|
|
}
|
|
}),
|
|
mctx.Bus.Subscribe(eventbus.EventJSFile, func(_ context.Context, e eventbus.Event) {
|
|
if ev, ok := e.(eventbus.JSFileDiscovered); ok {
|
|
wg.Add(1)
|
|
go func() { defer wg.Done(); a.handleJSFile(mctx, ev) }()
|
|
}
|
|
}),
|
|
mctx.Bus.Subscribe(eventbus.EventHTTPProbed, func(_ context.Context, e eventbus.Event) {
|
|
if ev, ok := e.(eventbus.HTTPProbed); ok {
|
|
wg.Add(1)
|
|
go func() { defer wg.Done(); a.handleHTTP(mctx, ev) }()
|
|
}
|
|
}),
|
|
mctx.Bus.Subscribe(eventbus.EventSecret, func(_ context.Context, e eventbus.Event) {
|
|
if ev, ok := e.(eventbus.SecretFound); ok {
|
|
wg.Add(1)
|
|
go func() { defer wg.Done(); a.handleSecret(mctx, ev) }()
|
|
}
|
|
}),
|
|
mctx.Bus.Subscribe(eventbus.EventVulnerability, func(_ context.Context, e eventbus.Event) {
|
|
if ev, ok := e.(eventbus.VulnerabilityFound); ok {
|
|
wg.Add(1)
|
|
go func() { defer wg.Done(); a.handleVuln(mctx, ev) }()
|
|
}
|
|
}),
|
|
}
|
|
defer func() {
|
|
for _, s := range subs {
|
|
s.Unsubscribe()
|
|
}
|
|
}()
|
|
|
|
// Drain store: any host already populated with tech/HTTP info gets
|
|
// processed on module startup (covers the common case where AI is in a
|
|
// later phase than discovery/enrichment).
|
|
for _, h := range mctx.Store.All(mctx.Ctx) {
|
|
if h == nil {
|
|
continue
|
|
}
|
|
for _, tech := range h.Technologies {
|
|
tech := tech
|
|
host := h.Subdomain
|
|
wg.Add(1)
|
|
go func() { defer wg.Done(); a.handleTech(mctx, host, tech, "") }()
|
|
}
|
|
if h.StatusCode != 0 {
|
|
ev := eventbus.HTTPProbed{
|
|
EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: h.Subdomain},
|
|
URL: "https://" + h.Subdomain,
|
|
StatusCode: h.StatusCode,
|
|
Title: h.Title,
|
|
Server: h.Server,
|
|
}
|
|
wg.Add(1)
|
|
go func() { defer wg.Done(); a.handleHTTP(mctx, ev) }()
|
|
}
|
|
}
|
|
|
|
// Brief window for late events (recursive discovery, slow probes) to
|
|
// arrive before we wrap up.
|
|
select {
|
|
case <-time.After(1500 * time.Millisecond):
|
|
case <-mctx.Ctx.Done():
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
// End-of-scan analyses run once, after all per-event handlers drain.
|
|
a.handleScanEnd(mctx)
|
|
return nil
|
|
}
|
|
|
|
// --- Handlers ------------------------------------------------------------
|
|
|
|
// handleTech runs CVE correlation for a (tech, version) pair. Cached by
|
|
// (tech, version) so the same pair across many hosts fires one query.
|
|
func (a *aiModule) handleTech(mctx module.Context, host, tech, version string) {
|
|
if tech == "" || shouldSkipForCVE(tech, version) {
|
|
return
|
|
}
|
|
name, v := parseTech(tech)
|
|
if version == "" {
|
|
version = v
|
|
}
|
|
if shouldSkipForCVE(name, version) {
|
|
return
|
|
}
|
|
key := "cve:" + name + "|" + version
|
|
if !a.firstSeen(key) {
|
|
return
|
|
}
|
|
a.cveLookups.Add(1)
|
|
|
|
cves, err := a.client.CVEMatch(name, version)
|
|
if err != nil || cves == "" {
|
|
return
|
|
}
|
|
|
|
// Upsert to the specific host that triggered this.
|
|
now := time.Now()
|
|
cve := store.CVE{
|
|
ID: cves, Technology: name, Version: version,
|
|
Severity: string(eventbus.SeverityHigh), Description: cves, FoundAt: now,
|
|
}
|
|
_ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { h.CVEs = append(h.CVEs, cve) })
|
|
|
|
mctx.Bus.Publish(mctx.Ctx, eventbus.CVEMatch{
|
|
EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: host},
|
|
CVE: cves,
|
|
Technology: name,
|
|
Version: version,
|
|
Severity: eventbus.SeverityHigh,
|
|
Description: fmt.Sprintf("AI-assisted CVE match for %s %s", name, versionOrUnknown(version)),
|
|
})
|
|
}
|
|
|
|
// handleJSFile fetches the JS file via the shared HTTP client and feeds it
|
|
// to AnalyzeJavaScript. Cached by JS URL — a single JS file seen on 5
|
|
// hosts is analysed once.
|
|
//
|
|
// Note: we do NOT re-download the JS content here. The v1 AnalyzeJavaScript
|
|
// method expects the code itself as input; since the upstream javascript
|
|
// module already has the content, the proper integration path is to have
|
|
// JSFileDiscovered carry the content. For now, we skip the deep analysis
|
|
// when content isn't inlined, and rely on the v1 regex results enriched
|
|
// by AI at secret-validation time (see handleSecret).
|
|
func (a *aiModule) handleJSFile(mctx module.Context, ev eventbus.JSFileDiscovered) {
|
|
key := "js:" + ev.URL
|
|
if !a.firstSeen(key) {
|
|
return
|
|
}
|
|
a.jsAnalyses.Add(1)
|
|
// Deep JS analysis is deferred until JSFileDiscovered carries the
|
|
// content (Fase 2 follow-up). We still produce an AIFinding noting
|
|
// the JS file was indexed, which helps reporting aggregate per-host
|
|
// JS exposure.
|
|
mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{
|
|
EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: ev.Host},
|
|
Subject: ev.Host,
|
|
Agent: "js-indexer",
|
|
Model: a.client.FastModel,
|
|
Severity: eventbus.SeverityInfo,
|
|
Title: "JavaScript file indexed for secret review",
|
|
Evidence: ev.URL,
|
|
})
|
|
}
|
|
|
|
// handleHTTP triages the HTTP response and dispatches deep analysis only
|
|
// for interesting status codes / signals. "Interesting" means anything
|
|
// that isn't a normal 200/301 — 5xx, verbose 4xx with bodies, weird
|
|
// headers.
|
|
func (a *aiModule) handleHTTP(mctx module.Context, ev eventbus.HTTPProbed) {
|
|
if !isInterestingHTTP(ev) {
|
|
return
|
|
}
|
|
key := fmt.Sprintf("http:%s:%d:%s", ev.Meta().Target, ev.StatusCode, hashShort(ev.Title))
|
|
if !a.firstSeen(key) {
|
|
return
|
|
}
|
|
a.httpAnalyses.Add(1)
|
|
|
|
// Compose the content we hand to the deep model. Keep it compact —
|
|
// Ollama's context is ample but we're summarising for the cascade.
|
|
headerLines := []string{}
|
|
if ev.Server != "" {
|
|
headerLines = append(headerLines, "Server: "+ev.Server)
|
|
}
|
|
for k, v := range ev.Headers {
|
|
headerLines = append(headerLines, k+": "+v)
|
|
}
|
|
|
|
result, err := a.client.AnalyzeHTTPResponse(ev.Meta().Target, ev.StatusCode, headerLines, ev.Title)
|
|
if err != nil || result == nil || len(result.Findings) == 0 {
|
|
return
|
|
}
|
|
now := time.Now()
|
|
host := ev.Meta().Target
|
|
for _, f := range result.Findings {
|
|
persistAIFinding(mctx, host, store.AIFinding{
|
|
Agent: "http-analyzer", Model: a.client.DeepModel,
|
|
Severity: result.Severity, Title: "Suspicious HTTP response",
|
|
Description: f, Evidence: fmt.Sprintf("status=%d title=%q", ev.StatusCode, ev.Title),
|
|
FoundAt: now,
|
|
})
|
|
mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{
|
|
EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: host},
|
|
Subject: host,
|
|
Agent: "http-analyzer",
|
|
Model: a.client.DeepModel,
|
|
Severity: eventbus.Severity(result.Severity),
|
|
Title: "Suspicious HTTP response",
|
|
Description: f,
|
|
Evidence: fmt.Sprintf("status=%d title=%q", ev.StatusCode, ev.Title),
|
|
})
|
|
}
|
|
}
|
|
|
|
// handleSecret validates a regex-surfaced secret through FilterSecrets.
|
|
// If the AI confirms it's real, an AIFinding event fires tagging it as
|
|
// validated. Regex noise (UI strings, unrelated third-party URLs) is
|
|
// dropped silently — the v1 Secret event is left in place but the AI
|
|
// emission is what a dashboard would prefer to render as a real finding.
|
|
func (a *aiModule) handleSecret(mctx module.Context, ev eventbus.SecretFound) {
|
|
key := "secret:" + hashShort(ev.Match+"|"+ev.Location)
|
|
if !a.firstSeen(key) {
|
|
return
|
|
}
|
|
a.secretValidations.Add(1)
|
|
|
|
validated, err := a.client.FilterSecrets([]string{ev.Match})
|
|
if err != nil || len(validated) == 0 {
|
|
return // AI says not a real secret, or Ollama unavailable
|
|
}
|
|
now := time.Now()
|
|
persistAIFinding(mctx, ev.Meta().Target, store.AIFinding{
|
|
Agent: "secret-validator", Model: a.client.FastModel,
|
|
Severity: string(eventbus.SeverityHigh),
|
|
Title: "Secret likely valid (AI-confirmed)",
|
|
Description: fmt.Sprintf("FilterSecrets confirmed '%s' is a real secret, not regex noise.", ev.Kind),
|
|
Evidence: fmt.Sprintf("%s @ %s", ev.Kind, ev.Location),
|
|
FoundAt: now,
|
|
})
|
|
mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{
|
|
EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: ev.Meta().Target},
|
|
Subject: ev.Meta().Target,
|
|
Agent: "secret-validator",
|
|
Model: a.client.FastModel,
|
|
Severity: eventbus.SeverityHigh,
|
|
Title: "Secret likely valid (AI-confirmed)",
|
|
Description: fmt.Sprintf("FilterSecrets confirmed '%s' is a real secret, not regex noise.",
|
|
ev.Kind),
|
|
Evidence: fmt.Sprintf("%s @ %s", ev.Kind, ev.Location),
|
|
})
|
|
}
|
|
|
|
// handleVuln routes a vulnerability finding through the multi-agent
|
|
// orchestrator for specialist analysis. When multi-agent is disabled,
|
|
// this is a no-op.
|
|
func (a *aiModule) handleVuln(mctx module.Context, ev eventbus.VulnerabilityFound) {
|
|
if a.orchestrator == nil {
|
|
return
|
|
}
|
|
key := "vuln:" + ev.ID + ":" + ev.Meta().Target
|
|
if !a.firstSeen(key) {
|
|
return
|
|
}
|
|
a.vulnEnrichments.Add(1)
|
|
|
|
finding := agents.Finding{
|
|
Type: "vulnerability",
|
|
URL: ev.URL,
|
|
Context: ev.Description + "\n\nEvidence:\n" + ev.Evidence,
|
|
}
|
|
// Respect ctx — orchestrator methods accept context.Context for
|
|
// cancellation. Allow up to 60s for deep-analysis cascade.
|
|
ctx, cancel := context.WithTimeout(mctx.Ctx, 60*time.Second)
|
|
defer cancel()
|
|
result, err := a.orchestrator.Analyze(ctx, finding)
|
|
if err != nil || result == nil {
|
|
return
|
|
}
|
|
now := time.Now()
|
|
for _, f := range result.Findings {
|
|
persistAIFinding(mctx, ev.Meta().Target, store.AIFinding{
|
|
Agent: string(result.AgentType), Model: result.Model,
|
|
Severity: strings.ToLower(f.Severity),
|
|
Title: f.Title, Description: f.Description, Evidence: f.Evidence,
|
|
CVEs: f.CVEs, OWASP: f.OWASP, Confidence: result.Confidence,
|
|
FoundAt: now,
|
|
})
|
|
mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{
|
|
EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: ev.Meta().Target},
|
|
Subject: ev.Meta().Target,
|
|
Agent: string(result.AgentType),
|
|
Model: result.Model,
|
|
Severity: eventbus.Severity(strings.ToLower(f.Severity)),
|
|
Title: f.Title,
|
|
Description: f.Description,
|
|
Evidence: f.Evidence,
|
|
CVEs: f.CVEs,
|
|
OWASP: f.OWASP,
|
|
Confidence: result.Confidence,
|
|
})
|
|
}
|
|
}
|
|
|
|
// handleScanEnd runs two expensive end-of-scan analyses:
|
|
//
|
|
// 1. DetectAnomalies — cross-host pattern review (dev stacks leaking into
|
|
// prod, unusual version mixes, orphaned endpoints)
|
|
// 2. GenerateReport — executive summary of findings by severity
|
|
//
|
|
// Both run only when the store has enough data to be worth summarising
|
|
// (≥ 3 findings or ≥ 5 hosts).
|
|
func (a *aiModule) handleScanEnd(mctx module.Context) {
|
|
hosts := mctx.Store.All(mctx.Ctx)
|
|
if len(hosts) == 0 {
|
|
return
|
|
}
|
|
|
|
totalFindings := 0
|
|
for _, h := range hosts {
|
|
totalFindings += len(h.Vulnerabilities) + len(h.Secrets) + len(h.CVEs) + len(h.AIFindings)
|
|
}
|
|
if totalFindings < 3 && len(hosts) < 5 {
|
|
return // not worth the Ollama spin-up
|
|
}
|
|
|
|
// Anomaly detection ------------------------------------------------------
|
|
summary := buildScanSummary(hosts)
|
|
a.anomalyScans.Add(1)
|
|
if result, err := a.client.DetectAnomalies(summary); err == nil && result != nil {
|
|
now := time.Now()
|
|
for _, f := range result.Findings {
|
|
persistAIFinding(mctx, mctx.Target, store.AIFinding{
|
|
Agent: "anomaly-detector", Model: a.client.DeepModel,
|
|
Severity: result.Severity,
|
|
Title: "Cross-subdomain anomaly",
|
|
Description: f, FoundAt: now,
|
|
})
|
|
mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{
|
|
EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: mctx.Target},
|
|
Subject: mctx.Target,
|
|
Agent: "anomaly-detector",
|
|
Model: a.client.DeepModel,
|
|
Severity: eventbus.Severity(result.Severity),
|
|
Title: "Cross-subdomain anomaly",
|
|
Description: f,
|
|
})
|
|
}
|
|
}
|
|
|
|
// Executive report ------------------------------------------------------
|
|
stats := map[string]int{
|
|
"hosts": len(hosts),
|
|
"findings": totalFindings,
|
|
}
|
|
a.reportGenerations.Add(1)
|
|
if report, err := a.client.GenerateReport(summary, stats); err == nil && report != "" {
|
|
now := time.Now()
|
|
persistAIFinding(mctx, mctx.Target, store.AIFinding{
|
|
Agent: "report-writer", Model: a.client.DeepModel,
|
|
Severity: string(eventbus.SeverityInfo),
|
|
Title: "AI executive report",
|
|
Description: report,
|
|
FoundAt: now,
|
|
})
|
|
mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{
|
|
EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: mctx.Target},
|
|
Subject: mctx.Target,
|
|
Agent: "report-writer",
|
|
Model: a.client.DeepModel,
|
|
Severity: eventbus.SeverityInfo,
|
|
Title: "AI executive report",
|
|
Description: report,
|
|
})
|
|
}
|
|
|
|
// Emit a module-error style observability event with per-handler counts.
|
|
mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{
|
|
EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: mctx.Target},
|
|
Module: ModuleName,
|
|
Err: fmt.Sprintf("AI activity: cve=%d js=%d http=%d secrets=%d vulns=%d anomaly=%d report=%d",
|
|
a.cveLookups.Load(),
|
|
a.jsAnalyses.Load(),
|
|
a.httpAnalyses.Load(),
|
|
a.secretValidations.Load(),
|
|
a.vulnEnrichments.Load(),
|
|
a.anomalyScans.Load(),
|
|
a.reportGenerations.Load()),
|
|
})
|
|
}
|
|
|
|
// --- helpers -------------------------------------------------------------
|
|
|
|
// firstSeen returns true the first time we see a given cache key, false
|
|
// on every subsequent call. Implemented via sync.Map.LoadOrStore which is
|
|
// atomic.
|
|
func (a *aiModule) firstSeen(key string) bool {
|
|
h := sha256.Sum256([]byte(key))
|
|
hx := hex.EncodeToString(h[:])
|
|
_, loaded := a.cache.LoadOrStore(hx, struct{}{})
|
|
return !loaded
|
|
}
|
|
|
|
// isInterestingHTTP gates which HTTP responses are worth sending to the
|
|
// deep model. Normal 2xx/3xx are skipped; 5xx, verbose 4xx with titles,
|
|
// and anything with a server-banner mismatch qualifies.
|
|
func isInterestingHTTP(ev eventbus.HTTPProbed) bool {
|
|
switch {
|
|
case ev.StatusCode >= 500:
|
|
return true
|
|
case ev.StatusCode == 401 || ev.StatusCode == 403:
|
|
return true // auth surface worth inspecting
|
|
case ev.StatusCode >= 400 && ev.Title != "" && ev.ContentLength > 1000:
|
|
return true // verbose error page
|
|
case ev.TLSSelfSigned:
|
|
return true // self-signed on a live host is usually an appliance
|
|
}
|
|
return false
|
|
}
|
|
|
|
// hashShort returns a short hex prefix of SHA-256(s) — used for cache
|
|
// keys where the full input is too long but identity matters.
|
|
func hashShort(s string) string {
|
|
h := sha256.Sum256([]byte(s))
|
|
return hex.EncodeToString(h[:8])
|
|
}
|
|
|
|
// persistAIFinding appends an AIFinding to the host's store record so
|
|
// that downstream modules (notably the report.brief module running in
|
|
// PhaseReporting, which subscribes to the bus AFTER PhaseAnalysis has
|
|
// drained) can still surface the finding. Store is the single source
|
|
// of truth for cross-phase handoff.
|
|
func persistAIFinding(mctx module.Context, host string, f store.AIFinding) {
|
|
if host == "" {
|
|
host = mctx.Target
|
|
}
|
|
_ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) {
|
|
h.AIFindings = append(h.AIFindings, f)
|
|
})
|
|
}
|
|
|
|
// cdnOrWafMarkers are technology names that indicate the target is fronted
|
|
// by a CDN / WAF rather than running that product themselves. Matching
|
|
// CVEs against these labels produces almost-exclusively false positives,
|
|
// so we skip them when the version is unknown.
|
|
var cdnOrWafMarkers = map[string]bool{
|
|
"cloudflare": true,
|
|
"cloudfront": true,
|
|
"akamai": true,
|
|
"fastly": true,
|
|
"imperva": true,
|
|
"aws": true,
|
|
"azure": true,
|
|
"gcp": true,
|
|
"heroku": true,
|
|
"netlify": true,
|
|
"vercel": true,
|
|
"cdn": true,
|
|
"nginx plus": true,
|
|
}
|
|
|
|
// parseTech extracts (name, version) from strings like "nginx/1.18.0",
|
|
// "nginx/1.18.0 (Ubuntu)", "Apache/2.4.52", or "Apache 2.4".
|
|
func parseTech(raw string) (name, version string) {
|
|
raw = strings.TrimSpace(raw)
|
|
if raw == "" {
|
|
return "", ""
|
|
}
|
|
// Look for name/version or name version pattern.
|
|
for _, sep := range []string{"/", " "} {
|
|
if idx := strings.Index(raw, sep); idx > 0 {
|
|
name = strings.TrimSpace(raw[:idx])
|
|
rest := strings.TrimSpace(raw[idx+1:])
|
|
rest = strings.TrimPrefix(rest, "v")
|
|
// Pull digits.digits.digits out of rest
|
|
end := 0
|
|
for end < len(rest) {
|
|
c := rest[end]
|
|
if (c >= '0' && c <= '9') || c == '.' {
|
|
end++
|
|
continue
|
|
}
|
|
break
|
|
}
|
|
if end > 0 {
|
|
return name, rest[:end]
|
|
}
|
|
return name, ""
|
|
}
|
|
}
|
|
return raw, ""
|
|
}
|
|
|
|
// shouldSkipForCVE returns true when (name, version) is too vague for a
|
|
// useful CVE lookup — empty name, or a CDN/WAF label without a version.
|
|
func shouldSkipForCVE(name, version string) bool {
|
|
if name == "" {
|
|
return true
|
|
}
|
|
if version == "" && cdnOrWafMarkers[strings.ToLower(name)] {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func versionOrUnknown(v string) string {
|
|
if v == "" {
|
|
return "(unknown version)"
|
|
}
|
|
return "v" + v
|
|
}
|
|
|
|
// buildScanSummary compiles a compact text representation of the store
|
|
// for the DetectAnomalies / GenerateReport prompts. Kept under ~3KB to
|
|
// fit comfortably in every model's context window.
|
|
func buildScanSummary(hosts []*store.Host) string {
|
|
var sb strings.Builder
|
|
sb.WriteString(fmt.Sprintf("Scan summary: %d hosts\n\n", len(hosts)))
|
|
shown := 0
|
|
for _, h := range hosts {
|
|
if h == nil {
|
|
continue
|
|
}
|
|
if shown >= 50 {
|
|
sb.WriteString(fmt.Sprintf("\n... and %d more hosts\n", len(hosts)-shown))
|
|
break
|
|
}
|
|
sb.WriteString(fmt.Sprintf("- %s (status=%d, tech=%s)",
|
|
h.Subdomain, h.StatusCode, strings.Join(h.Technologies, ",")))
|
|
if len(h.Vulnerabilities) > 0 {
|
|
sb.WriteString(fmt.Sprintf(" vulns=%d", len(h.Vulnerabilities)))
|
|
}
|
|
if len(h.Secrets) > 0 {
|
|
sb.WriteString(fmt.Sprintf(" secrets=%d", len(h.Secrets)))
|
|
}
|
|
if len(h.CVEs) > 0 {
|
|
sb.WriteString(fmt.Sprintf(" cves=%d", len(h.CVEs)))
|
|
}
|
|
sb.WriteString("\n")
|
|
shown++
|
|
}
|
|
return sb.String()
|
|
}
|