Files
Vyntral 3a4c230aa7 feat: v2.0 full rewrite — event-driven pipeline, AI + Nuclei + proxy
Complete architectural overhaul. Replaces the v0.1 monolithic scanner
with an event-driven pipeline of auto-registered modules.

Foundation (internal/):
- eventbus: typed pub/sub, 20 event types, race-safe, drop counter
- module: registry with phase-based selection
- store: thread-safe host store with per-host locks + deep-copy reads
- pipeline: coordinator with phase barriers + panic recovery
- config: 5 scan profiles + 3 AI tiers + YAML loader + auto-discovery

Modules (26 auto-registered across 6 phases):
- Discovery: passive (26 sources), bruteforce, recursive, AXFR, GitHub
  dorks, CT streaming, permutation, reverse DNS, vhost, ASN, supply
  chain (npm + PyPI)
- Enrichment: HTTP probe + tech fingerprint + TLS appliance ID, ports
- Analysis: security checks, takeover (110+ sigs), cloud, JavaScript,
  GraphQL, JWT, headers (OWASP), HTTP smuggling, AI cascade, Nuclei
- Reporting: TXT/JSON/CSV writer + AI scan brief

AI layer (internal/ai/ + internal/modules/ai/):
- Three profiles: lean (16 GB), balanced (32 GB MoE), heavy (64 GB)
- Six event-driven handlers: CVE, JS file, HTTP response, secret
  filter, multi-agent vuln enrichment, anomaly + executive report
- Content-hash cache dedups Ollama calls across hosts
- Auto-pull of missing models via /api/pull with streaming progress
- End-of-scan AI SCAN BRIEF in terminal with top chains + next actions

Nuclei compat layer (internal/nucleitpl/):
- Executes ~13k community templates (HTTP subset)
- Auto-download of nuclei-templates ZIP to ~/.god-eye/nuclei-templates
- Scope filter rejects off-host templates (eliminates OSINT FPs)

Operations:
- Interactive wizard (internal/wizard/) — zero-flag launch
- LivePrinter (internal/tui/) — colorized event stream
- Diff engine + scheduler (internal/diff, internal/scheduler) for
  continuous ASM monitoring with webhook alerts
- Proxy support (internal/proxyconf/): http / https / socks5 / socks5h
  + basic auth

Fixes #1 — native SOCKS5 / Tor compatibility via --proxy flag.

185 unit tests across 15 packages, all race-detector clean.
2026-04-18 16:48:41 +02:00

661 lines
22 KiB
Go

// Package ai is the v2 adapter that wires the Ollama client into the
// event-driven pipeline. Unlike the initial skeleton (which only called
// CVEMatch on TechDetected), this module subscribes to five event types
// and dispatches each to the appropriate v1 client method:
//
// TechDetected → CVEMatch → CVEMatch events
// JSFileDiscovered → AnalyzeJavaScript → AIFinding + SecretFound
// HTTPProbed → AnalyzeHTTPResponse (for 5xx / suspicious 4xx) → AIFinding
// SecretFound → FilterSecrets (triage real vs regex noise) → AIFinding tag
// VulnerabilityFound → multi-agent orchestrator (agents package) → AIFinding with remediation
// ScanCompleted → DetectAnomalies + GenerateReport → AIFinding + report artifact
//
// Every handler:
// - is a no-op when ai.enabled=false (module Run returns immediately)
// - dedups by content hash to avoid hammering Ollama with duplicates
// - cascades through the fast triage model before the deep model
// - emits AIFinding events so downstream reporters/TUI pick them up
//
// The module is the primary value of God's Eye v2's "local LLM" story —
// without this wiring, the AI layer was essentially a 20GB curiosity
// that added a single CVE string per scan.
package ai
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"strings"
"sync"
"sync/atomic"
"time"
"god-eye/internal/ai"
"god-eye/internal/ai/agents"
"god-eye/internal/eventbus"
"god-eye/internal/module"
"god-eye/internal/store"
)
const ModuleName = "ai.cascade"
type aiModule struct {
client *ai.OllamaClient
orchestrator *agents.AgentOrchestrator
// queryCache dedups expensive Ollama calls across a single scan.
// Keyed by SHA256 of (method + input), value is a flag struct so
// the same (method, input) pair is processed exactly once.
cache sync.Map // map[string]struct{}
// Counters surfaced at scan end for observability.
cveLookups atomic.Int64
jsAnalyses atomic.Int64
httpAnalyses atomic.Int64
secretValidations atomic.Int64
vulnEnrichments atomic.Int64
anomalyScans atomic.Int64
reportGenerations atomic.Int64
}
func Register() { module.Register(&aiModule{}) }
func (*aiModule) Name() string { return ModuleName }
func (*aiModule) Phase() module.Phase { return module.PhaseAnalysis }
func (*aiModule) Consumes() []eventbus.EventType {
return []eventbus.EventType{
eventbus.EventTechDetected,
eventbus.EventJSFile,
eventbus.EventHTTPProbed,
eventbus.EventSecret,
eventbus.EventVulnerability,
eventbus.EventScanCompleted,
}
}
func (*aiModule) Produces() []eventbus.EventType {
return []eventbus.EventType{
eventbus.EventAIFinding,
eventbus.EventCVEMatch,
eventbus.EventSecret, // validated/re-emitted
}
}
// DefaultEnabled returns true so the module is always loaded; Run() no-ops
// unless the user set ai.enabled via --enable-ai / wizard / YAML.
func (*aiModule) DefaultEnabled() bool { return true }
// Run is the heart of the v2 AI layer: wires six event subscriptions,
// drains initial store state, and waits for late events in a bounded
// window.
func (a *aiModule) Run(mctx module.Context) error {
if !mctx.Config.Bool("ai.enabled", false) {
return nil
}
a.client = ai.NewOllamaClient(
mctx.Config.String("ai.url", "http://localhost:11434"),
mctx.Config.String("ai.fast_model", "qwen3:1.7b"),
mctx.Config.String("ai.deep_model", "qwen2.5-coder:14b"),
mctx.Config.Bool("ai.cascade", true),
)
if mctx.Config.Bool("ai.verbose", false) {
a.client.Verbose = true
}
if !a.client.IsAvailable() {
mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{
EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: mctx.Target},
Module: ModuleName,
Err: "Ollama not reachable at " + mctx.Config.String("ai.url", "http://localhost:11434"),
})
return nil
}
// Multi-agent orchestrator is opt-in: only worth spinning up when the
// user explicitly enables it. The orchestrator holds one client per
// agent type (8 agents) and can take ~200ms to initialise.
if mctx.Config.Bool("ai.multi_agent", false) {
a.orchestrator = agents.NewAgentOrchestrator(
mctx.Config.String("ai.url", "http://localhost:11434"),
mctx.Config.String("ai.fast_model", "qwen3:1.7b"),
mctx.Config.String("ai.deep_model", "qwen2.5-coder:14b"),
)
}
var wg sync.WaitGroup
// Subscribe to every event type we care about. Each handler runs in its
// own goroutine off the bus; we track them with wg so we can drain at
// the end.
subs := []*eventbus.Subscription{
mctx.Bus.Subscribe(eventbus.EventTechDetected, func(_ context.Context, e eventbus.Event) {
if ev, ok := e.(eventbus.TechDetected); ok {
wg.Add(1)
go func() { defer wg.Done(); a.handleTech(mctx, ev.Host, ev.Technology, ev.Version) }()
}
}),
mctx.Bus.Subscribe(eventbus.EventJSFile, func(_ context.Context, e eventbus.Event) {
if ev, ok := e.(eventbus.JSFileDiscovered); ok {
wg.Add(1)
go func() { defer wg.Done(); a.handleJSFile(mctx, ev) }()
}
}),
mctx.Bus.Subscribe(eventbus.EventHTTPProbed, func(_ context.Context, e eventbus.Event) {
if ev, ok := e.(eventbus.HTTPProbed); ok {
wg.Add(1)
go func() { defer wg.Done(); a.handleHTTP(mctx, ev) }()
}
}),
mctx.Bus.Subscribe(eventbus.EventSecret, func(_ context.Context, e eventbus.Event) {
if ev, ok := e.(eventbus.SecretFound); ok {
wg.Add(1)
go func() { defer wg.Done(); a.handleSecret(mctx, ev) }()
}
}),
mctx.Bus.Subscribe(eventbus.EventVulnerability, func(_ context.Context, e eventbus.Event) {
if ev, ok := e.(eventbus.VulnerabilityFound); ok {
wg.Add(1)
go func() { defer wg.Done(); a.handleVuln(mctx, ev) }()
}
}),
}
defer func() {
for _, s := range subs {
s.Unsubscribe()
}
}()
// Drain store: any host already populated with tech/HTTP info gets
// processed on module startup (covers the common case where AI is in a
// later phase than discovery/enrichment).
for _, h := range mctx.Store.All(mctx.Ctx) {
if h == nil {
continue
}
for _, tech := range h.Technologies {
tech := tech
host := h.Subdomain
wg.Add(1)
go func() { defer wg.Done(); a.handleTech(mctx, host, tech, "") }()
}
if h.StatusCode != 0 {
ev := eventbus.HTTPProbed{
EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: h.Subdomain},
URL: "https://" + h.Subdomain,
StatusCode: h.StatusCode,
Title: h.Title,
Server: h.Server,
}
wg.Add(1)
go func() { defer wg.Done(); a.handleHTTP(mctx, ev) }()
}
}
// Brief window for late events (recursive discovery, slow probes) to
// arrive before we wrap up.
select {
case <-time.After(1500 * time.Millisecond):
case <-mctx.Ctx.Done():
}
wg.Wait()
// End-of-scan analyses run once, after all per-event handlers drain.
a.handleScanEnd(mctx)
return nil
}
// --- Handlers ------------------------------------------------------------
// handleTech runs CVE correlation for a (tech, version) pair. Cached by
// (tech, version) so the same pair across many hosts fires one query.
func (a *aiModule) handleTech(mctx module.Context, host, tech, version string) {
if tech == "" || shouldSkipForCVE(tech, version) {
return
}
name, v := parseTech(tech)
if version == "" {
version = v
}
if shouldSkipForCVE(name, version) {
return
}
key := "cve:" + name + "|" + version
if !a.firstSeen(key) {
return
}
a.cveLookups.Add(1)
cves, err := a.client.CVEMatch(name, version)
if err != nil || cves == "" {
return
}
// Upsert to the specific host that triggered this.
now := time.Now()
cve := store.CVE{
ID: cves, Technology: name, Version: version,
Severity: string(eventbus.SeverityHigh), Description: cves, FoundAt: now,
}
_ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { h.CVEs = append(h.CVEs, cve) })
mctx.Bus.Publish(mctx.Ctx, eventbus.CVEMatch{
EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: host},
CVE: cves,
Technology: name,
Version: version,
Severity: eventbus.SeverityHigh,
Description: fmt.Sprintf("AI-assisted CVE match for %s %s", name, versionOrUnknown(version)),
})
}
// handleJSFile fetches the JS file via the shared HTTP client and feeds it
// to AnalyzeJavaScript. Cached by JS URL — a single JS file seen on 5
// hosts is analysed once.
//
// Note: we do NOT re-download the JS content here. The v1 AnalyzeJavaScript
// method expects the code itself as input; since the upstream javascript
// module already has the content, the proper integration path is to have
// JSFileDiscovered carry the content. For now, we skip the deep analysis
// when content isn't inlined, and rely on the v1 regex results enriched
// by AI at secret-validation time (see handleSecret).
func (a *aiModule) handleJSFile(mctx module.Context, ev eventbus.JSFileDiscovered) {
key := "js:" + ev.URL
if !a.firstSeen(key) {
return
}
a.jsAnalyses.Add(1)
// Deep JS analysis is deferred until JSFileDiscovered carries the
// content (Fase 2 follow-up). We still produce an AIFinding noting
// the JS file was indexed, which helps reporting aggregate per-host
// JS exposure.
mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{
EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: ev.Host},
Subject: ev.Host,
Agent: "js-indexer",
Model: a.client.FastModel,
Severity: eventbus.SeverityInfo,
Title: "JavaScript file indexed for secret review",
Evidence: ev.URL,
})
}
// handleHTTP triages the HTTP response and dispatches deep analysis only
// for interesting status codes / signals. "Interesting" means anything
// that isn't a normal 200/301 — 5xx, verbose 4xx with bodies, weird
// headers.
func (a *aiModule) handleHTTP(mctx module.Context, ev eventbus.HTTPProbed) {
if !isInterestingHTTP(ev) {
return
}
key := fmt.Sprintf("http:%s:%d:%s", ev.Meta().Target, ev.StatusCode, hashShort(ev.Title))
if !a.firstSeen(key) {
return
}
a.httpAnalyses.Add(1)
// Compose the content we hand to the deep model. Keep it compact —
// Ollama's context is ample but we're summarising for the cascade.
headerLines := []string{}
if ev.Server != "" {
headerLines = append(headerLines, "Server: "+ev.Server)
}
for k, v := range ev.Headers {
headerLines = append(headerLines, k+": "+v)
}
result, err := a.client.AnalyzeHTTPResponse(ev.Meta().Target, ev.StatusCode, headerLines, ev.Title)
if err != nil || result == nil || len(result.Findings) == 0 {
return
}
now := time.Now()
host := ev.Meta().Target
for _, f := range result.Findings {
persistAIFinding(mctx, host, store.AIFinding{
Agent: "http-analyzer", Model: a.client.DeepModel,
Severity: result.Severity, Title: "Suspicious HTTP response",
Description: f, Evidence: fmt.Sprintf("status=%d title=%q", ev.StatusCode, ev.Title),
FoundAt: now,
})
mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{
EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: host},
Subject: host,
Agent: "http-analyzer",
Model: a.client.DeepModel,
Severity: eventbus.Severity(result.Severity),
Title: "Suspicious HTTP response",
Description: f,
Evidence: fmt.Sprintf("status=%d title=%q", ev.StatusCode, ev.Title),
})
}
}
// handleSecret validates a regex-surfaced secret through FilterSecrets.
// If the AI confirms it's real, an AIFinding event fires tagging it as
// validated. Regex noise (UI strings, unrelated third-party URLs) is
// dropped silently — the v1 Secret event is left in place but the AI
// emission is what a dashboard would prefer to render as a real finding.
func (a *aiModule) handleSecret(mctx module.Context, ev eventbus.SecretFound) {
key := "secret:" + hashShort(ev.Match+"|"+ev.Location)
if !a.firstSeen(key) {
return
}
a.secretValidations.Add(1)
validated, err := a.client.FilterSecrets([]string{ev.Match})
if err != nil || len(validated) == 0 {
return // AI says not a real secret, or Ollama unavailable
}
now := time.Now()
persistAIFinding(mctx, ev.Meta().Target, store.AIFinding{
Agent: "secret-validator", Model: a.client.FastModel,
Severity: string(eventbus.SeverityHigh),
Title: "Secret likely valid (AI-confirmed)",
Description: fmt.Sprintf("FilterSecrets confirmed '%s' is a real secret, not regex noise.", ev.Kind),
Evidence: fmt.Sprintf("%s @ %s", ev.Kind, ev.Location),
FoundAt: now,
})
mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{
EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: ev.Meta().Target},
Subject: ev.Meta().Target,
Agent: "secret-validator",
Model: a.client.FastModel,
Severity: eventbus.SeverityHigh,
Title: "Secret likely valid (AI-confirmed)",
Description: fmt.Sprintf("FilterSecrets confirmed '%s' is a real secret, not regex noise.",
ev.Kind),
Evidence: fmt.Sprintf("%s @ %s", ev.Kind, ev.Location),
})
}
// handleVuln routes a vulnerability finding through the multi-agent
// orchestrator for specialist analysis. When multi-agent is disabled,
// this is a no-op.
func (a *aiModule) handleVuln(mctx module.Context, ev eventbus.VulnerabilityFound) {
if a.orchestrator == nil {
return
}
key := "vuln:" + ev.ID + ":" + ev.Meta().Target
if !a.firstSeen(key) {
return
}
a.vulnEnrichments.Add(1)
finding := agents.Finding{
Type: "vulnerability",
URL: ev.URL,
Context: ev.Description + "\n\nEvidence:\n" + ev.Evidence,
}
// Respect ctx — orchestrator methods accept context.Context for
// cancellation. Allow up to 60s for deep-analysis cascade.
ctx, cancel := context.WithTimeout(mctx.Ctx, 60*time.Second)
defer cancel()
result, err := a.orchestrator.Analyze(ctx, finding)
if err != nil || result == nil {
return
}
now := time.Now()
for _, f := range result.Findings {
persistAIFinding(mctx, ev.Meta().Target, store.AIFinding{
Agent: string(result.AgentType), Model: result.Model,
Severity: strings.ToLower(f.Severity),
Title: f.Title, Description: f.Description, Evidence: f.Evidence,
CVEs: f.CVEs, OWASP: f.OWASP, Confidence: result.Confidence,
FoundAt: now,
})
mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{
EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: ev.Meta().Target},
Subject: ev.Meta().Target,
Agent: string(result.AgentType),
Model: result.Model,
Severity: eventbus.Severity(strings.ToLower(f.Severity)),
Title: f.Title,
Description: f.Description,
Evidence: f.Evidence,
CVEs: f.CVEs,
OWASP: f.OWASP,
Confidence: result.Confidence,
})
}
}
// handleScanEnd runs two expensive end-of-scan analyses:
//
// 1. DetectAnomalies — cross-host pattern review (dev stacks leaking into
// prod, unusual version mixes, orphaned endpoints)
// 2. GenerateReport — executive summary of findings by severity
//
// Both run only when the store has enough data to be worth summarising
// (≥ 3 findings or ≥ 5 hosts).
func (a *aiModule) handleScanEnd(mctx module.Context) {
hosts := mctx.Store.All(mctx.Ctx)
if len(hosts) == 0 {
return
}
totalFindings := 0
for _, h := range hosts {
totalFindings += len(h.Vulnerabilities) + len(h.Secrets) + len(h.CVEs) + len(h.AIFindings)
}
if totalFindings < 3 && len(hosts) < 5 {
return // not worth the Ollama spin-up
}
// Anomaly detection ------------------------------------------------------
summary := buildScanSummary(hosts)
a.anomalyScans.Add(1)
if result, err := a.client.DetectAnomalies(summary); err == nil && result != nil {
now := time.Now()
for _, f := range result.Findings {
persistAIFinding(mctx, mctx.Target, store.AIFinding{
Agent: "anomaly-detector", Model: a.client.DeepModel,
Severity: result.Severity,
Title: "Cross-subdomain anomaly",
Description: f, FoundAt: now,
})
mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{
EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: mctx.Target},
Subject: mctx.Target,
Agent: "anomaly-detector",
Model: a.client.DeepModel,
Severity: eventbus.Severity(result.Severity),
Title: "Cross-subdomain anomaly",
Description: f,
})
}
}
// Executive report ------------------------------------------------------
stats := map[string]int{
"hosts": len(hosts),
"findings": totalFindings,
}
a.reportGenerations.Add(1)
if report, err := a.client.GenerateReport(summary, stats); err == nil && report != "" {
now := time.Now()
persistAIFinding(mctx, mctx.Target, store.AIFinding{
Agent: "report-writer", Model: a.client.DeepModel,
Severity: string(eventbus.SeverityInfo),
Title: "AI executive report",
Description: report,
FoundAt: now,
})
mctx.Bus.Publish(mctx.Ctx, eventbus.AIFinding{
EventMeta: eventbus.EventMeta{At: now, Source: ModuleName, Target: mctx.Target},
Subject: mctx.Target,
Agent: "report-writer",
Model: a.client.DeepModel,
Severity: eventbus.SeverityInfo,
Title: "AI executive report",
Description: report,
})
}
// Emit a module-error style observability event with per-handler counts.
mctx.Bus.Publish(mctx.Ctx, eventbus.ModuleError{
EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: mctx.Target},
Module: ModuleName,
Err: fmt.Sprintf("AI activity: cve=%d js=%d http=%d secrets=%d vulns=%d anomaly=%d report=%d",
a.cveLookups.Load(),
a.jsAnalyses.Load(),
a.httpAnalyses.Load(),
a.secretValidations.Load(),
a.vulnEnrichments.Load(),
a.anomalyScans.Load(),
a.reportGenerations.Load()),
})
}
// --- helpers -------------------------------------------------------------
// firstSeen returns true the first time we see a given cache key, false
// on every subsequent call. Implemented via sync.Map.LoadOrStore which is
// atomic.
func (a *aiModule) firstSeen(key string) bool {
h := sha256.Sum256([]byte(key))
hx := hex.EncodeToString(h[:])
_, loaded := a.cache.LoadOrStore(hx, struct{}{})
return !loaded
}
// isInterestingHTTP gates which HTTP responses are worth sending to the
// deep model. Normal 2xx/3xx are skipped; 5xx, verbose 4xx with titles,
// and anything with a server-banner mismatch qualifies.
func isInterestingHTTP(ev eventbus.HTTPProbed) bool {
switch {
case ev.StatusCode >= 500:
return true
case ev.StatusCode == 401 || ev.StatusCode == 403:
return true // auth surface worth inspecting
case ev.StatusCode >= 400 && ev.Title != "" && ev.ContentLength > 1000:
return true // verbose error page
case ev.TLSSelfSigned:
return true // self-signed on a live host is usually an appliance
}
return false
}
// hashShort returns a short hex prefix of SHA-256(s) — used for cache
// keys where the full input is too long but identity matters.
func hashShort(s string) string {
h := sha256.Sum256([]byte(s))
return hex.EncodeToString(h[:8])
}
// persistAIFinding appends an AIFinding to the host's store record so
// that downstream modules (notably the report.brief module running in
// PhaseReporting, which subscribes to the bus AFTER PhaseAnalysis has
// drained) can still surface the finding. Store is the single source
// of truth for cross-phase handoff.
func persistAIFinding(mctx module.Context, host string, f store.AIFinding) {
if host == "" {
host = mctx.Target
}
_ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) {
h.AIFindings = append(h.AIFindings, f)
})
}
// cdnOrWafMarkers are technology names that indicate the target is fronted
// by a CDN / WAF rather than running that product themselves. Matching
// CVEs against these labels produces almost-exclusively false positives,
// so we skip them when the version is unknown.
var cdnOrWafMarkers = map[string]bool{
"cloudflare": true,
"cloudfront": true,
"akamai": true,
"fastly": true,
"imperva": true,
"aws": true,
"azure": true,
"gcp": true,
"heroku": true,
"netlify": true,
"vercel": true,
"cdn": true,
"nginx plus": true,
}
// parseTech extracts (name, version) from strings like "nginx/1.18.0",
// "nginx/1.18.0 (Ubuntu)", "Apache/2.4.52", or "Apache 2.4".
func parseTech(raw string) (name, version string) {
raw = strings.TrimSpace(raw)
if raw == "" {
return "", ""
}
// Look for name/version or name version pattern.
for _, sep := range []string{"/", " "} {
if idx := strings.Index(raw, sep); idx > 0 {
name = strings.TrimSpace(raw[:idx])
rest := strings.TrimSpace(raw[idx+1:])
rest = strings.TrimPrefix(rest, "v")
// Pull digits.digits.digits out of rest
end := 0
for end < len(rest) {
c := rest[end]
if (c >= '0' && c <= '9') || c == '.' {
end++
continue
}
break
}
if end > 0 {
return name, rest[:end]
}
return name, ""
}
}
return raw, ""
}
// shouldSkipForCVE returns true when (name, version) is too vague for a
// useful CVE lookup — empty name, or a CDN/WAF label without a version.
func shouldSkipForCVE(name, version string) bool {
if name == "" {
return true
}
if version == "" && cdnOrWafMarkers[strings.ToLower(name)] {
return true
}
return false
}
func versionOrUnknown(v string) string {
if v == "" {
return "(unknown version)"
}
return "v" + v
}
// buildScanSummary compiles a compact text representation of the store
// for the DetectAnomalies / GenerateReport prompts. Kept under ~3KB to
// fit comfortably in every model's context window.
func buildScanSummary(hosts []*store.Host) string {
var sb strings.Builder
sb.WriteString(fmt.Sprintf("Scan summary: %d hosts\n\n", len(hosts)))
shown := 0
for _, h := range hosts {
if h == nil {
continue
}
if shown >= 50 {
sb.WriteString(fmt.Sprintf("\n... and %d more hosts\n", len(hosts)-shown))
break
}
sb.WriteString(fmt.Sprintf("- %s (status=%d, tech=%s)",
h.Subdomain, h.StatusCode, strings.Join(h.Technologies, ",")))
if len(h.Vulnerabilities) > 0 {
sb.WriteString(fmt.Sprintf(" vulns=%d", len(h.Vulnerabilities)))
}
if len(h.Secrets) > 0 {
sb.WriteString(fmt.Sprintf(" secrets=%d", len(h.Secrets)))
}
if len(h.CVEs) > 0 {
sb.WriteString(fmt.Sprintf(" cves=%d", len(h.CVEs)))
}
sb.WriteString("\n")
shown++
}
return sb.String()
}