// Package javascript downloads JS files from probed hosts and scans them // for secrets with the v1 analyzer. Drains the store at start; also listens // for late HTTPProbed events. package javascript import ( "context" "strings" "sync" "time" "god-eye/internal/eventbus" gohttp "god-eye/internal/http" "god-eye/internal/module" "god-eye/internal/scanner" "god-eye/internal/store" ) // publicAPIDenylist covers well-known public/third-party APIs and font // services that the v1 regex scanner flags as "API Endpoint" but which // are never secrets. Matched case-insensitively as a substring. var publicAPIDenylist = []string{ "fonts.googleapis.com", "fonts.gstatic.com", "www.googleapis.com", "content.googleapis.com", "api.fastmail.com", "api.forwardemail.net", "cdn.jsdelivr.net", "cdnjs.cloudflare.com", "unpkg.com", } // uiStringDenylist covers common UI labels / warning strings that trip // the "Generic Password" regex but are clearly human-readable copy. var uiStringDenylist = []string{ "change password", "update password", "reset password", "confirm password", "forgot password", "set-initial-password", "change-password", "this is a very common password", "masterpassword", "password", } // isSecretFalsePositive applies cheap deterministic heuristics to weed // out v1 regex noise. Does NOT replace AI triage (which is still the // preferred filter once the ai module is enabled) — it only suppresses // findings that are *definitely* not secrets. func isSecretFalsePositive(secret string) bool { low := strings.ToLower(strings.TrimSpace(secret)) for _, s := range publicAPIDenylist { if strings.Contains(low, s) { return true } } for _, s := range uiStringDenylist { if strings.Contains(low, s) { return true } } // Very short matches (< 8 chars of unique content) are almost always // labels, not credentials. The v1 regex already strips the "[Kind] " // prefix before passing to us; anything under 8 chars is noise. if len(low) > 0 && len(low) < 8 { return true } return false } const ModuleName = "js.analyzer" type jsModule struct{} func Register() { module.Register(&jsModule{}) } func (*jsModule) Name() string { return ModuleName } func (*jsModule) Phase() module.Phase { return module.PhaseAnalysis } func (*jsModule) Consumes() []eventbus.EventType { return []eventbus.EventType{eventbus.EventHTTPProbed} } func (*jsModule) Produces() []eventbus.EventType { return []eventbus.EventType{eventbus.EventJSFile, eventbus.EventSecret} } func (*jsModule) DefaultEnabled() bool { return true } func (*jsModule) Run(mctx module.Context) error { timeout := mctx.Config.Int("timeout", 5) client := gohttp.GetSharedClient(timeout) processed := make(map[string]struct{}) var mu sync.Mutex shouldProcess := func(host string) bool { mu.Lock() defer mu.Unlock() if _, ok := processed[host]; ok { return false } processed[host] = struct{}{} return true } analyze := func(host string) { if mctx.Ctx.Err() != nil { return } jsFiles, secrets := scanner.AnalyzeJSFiles(host, client) // Drop known-noise findings before they reach the store or bus. filtered := secrets[:0] for _, s := range secrets { if isSecretFalsePositive(s) { continue } filtered = append(filtered, s) } secrets = filtered if len(jsFiles) == 0 && len(secrets) == 0 { return } _ = mctx.Store.Upsert(mctx.Ctx, host, func(h *store.Host) { for _, sec := range secrets { h.Secrets = append(h.Secrets, store.Secret{ Kind: "js-regex", Match: sec, Severity: string(eventbus.SeverityHigh), FoundAt: time.Now(), }) } }) for _, jsf := range jsFiles { mctx.Bus.Publish(mctx.Ctx, eventbus.JSFileDiscovered{ EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, URL: jsf, Host: host, }) } for _, s := range secrets { mctx.Bus.Publish(mctx.Ctx, eventbus.SecretFound{ EventMeta: eventbus.EventMeta{At: time.Now(), Source: ModuleName, Target: host}, Kind: "js-regex", Match: s, Location: "js-file", Severity: eventbus.SeverityHigh, }) } } var wg sync.WaitGroup // Drain: every probed host (StatusCode > 0). for _, h := range mctx.Store.All(mctx.Ctx) { if h == nil || h.StatusCode == 0 { continue } if !shouldProcess(h.Subdomain) { continue } host := h.Subdomain wg.Add(1) go func() { defer wg.Done(); analyze(host) }() } // Late events. sub := mctx.Bus.Subscribe(eventbus.EventHTTPProbed, func(_ context.Context, e eventbus.Event) { ev, ok := e.(eventbus.HTTPProbed) if !ok || ev.StatusCode == 0 { return } host := ev.Meta().Target if !shouldProcess(host) { return } wg.Add(1) go func() { defer wg.Done(); analyze(host) }() }) defer sub.Unsubscribe() select { case <-time.After(500 * time.Millisecond): case <-mctx.Ctx.Done(): } wg.Wait() return nil }