diff --git a/cmd/cli/dns_intercept_darwin.go b/cmd/cli/dns_intercept_darwin.go index 5740b41..7b80ca0 100644 --- a/cmd/cli/dns_intercept_darwin.go +++ b/cmd/cli/dns_intercept_darwin.go @@ -1742,3 +1742,7 @@ func (p *prog) forceReloadPFMainRuleset() { mainLog.Load().Info().Msg("DNS intercept: force reload — pf ruleset and anchor reloaded successfully") } + +// osHealthcheckSuppressed always returns false on darwin — WFP loopback +// protect (the trigger for suppression) is Windows-only. +func (p *prog) osHealthcheckSuppressed() bool { return false } diff --git a/cmd/cli/dns_intercept_others.go b/cmd/cli/dns_intercept_others.go index 9f3c903..50c7fd0 100644 --- a/cmd/cli/dns_intercept_others.go +++ b/cmd/cli/dns_intercept_others.go @@ -37,3 +37,7 @@ func (p *prog) scheduleDelayedRechecks() {} // pfInterceptMonitor is a no-op on unsupported platforms. func (p *prog) pfInterceptMonitor() {} + +// osHealthcheckSuppressed always returns false on non-Windows platforms — +// WFP loopback protect (the trigger for suppression) is Windows-only. +func (p *prog) osHealthcheckSuppressed() bool { return false } diff --git a/cmd/cli/dns_intercept_windows.go b/cmd/cli/dns_intercept_windows.go index 063d4f9..208f0e2 100644 --- a/cmd/cli/dns_intercept_windows.go +++ b/cmd/cli/dns_intercept_windows.go @@ -1260,6 +1260,26 @@ func (p *prog) activateLoopbackWFPProtect(state *wfpState) error { return nil } +// osHealthcheckSuppressed reports whether the upstream.os healthcheck should +// be skipped because DNS intercept mode is active and the WFP loopback protect +// has been engaged. Loopback protect is only activated when an external WFP +// block filter (e.g. OpenVPN's block-outside-dns) is interfering with DNS, +// which is the same condition that makes the OS resolver healthcheck fail +// every 2s with i/o timeout — so suppressing the check avoids the log spam +// described in issue #526. +func (p *prog) osHealthcheckSuppressed() bool { + if !dnsIntercept || p.dnsInterceptState == nil { + return false + } + state, ok := p.dnsInterceptState.(*wfpState) + if !ok || state == nil { + return false + } + state.mu.Lock() + defer state.mu.Unlock() + return state.loopbackProtectActive +} + // deactivateLoopbackWFPProtectLocked is the lock-free inner implementation. // Caller must hold state.mu. func (p *prog) deactivateLoopbackWFPProtectLocked(state *wfpState) { diff --git a/cmd/cli/dns_proxy.go b/cmd/cli/dns_proxy.go index c2c248e..c8a2b74 100644 --- a/cmd/cli/dns_proxy.go +++ b/cmd/cli/dns_proxy.go @@ -1809,6 +1809,8 @@ func interfaceIPsEqual(a, b []netip.Prefix) bool { return true } +var errOsHealthcheckSuppressed = errors.New("upstream os health check suppressed") + // checkUpstreamOnce sends a test query to the specified upstream. // Returns nil if the upstream responds successfully. func (p *prog) checkUpstreamOnce(upstream string, uc *ctrld.UpstreamConfig) error { @@ -1838,11 +1840,19 @@ func (p *prog) checkUpstreamOnce(upstream string, uc *ctrld.UpstreamConfig) erro duration := time.Since(start) if err != nil { + // Demote upstream.os check failures to debug while WFP loopback + // protect is active: an external WFP block filter is interfering + // with plain DNS so repeated failures here are expected. Other + // upstreams keep error level so real outages stay visible. + if upstream == upstreamOS && p.osHealthcheckSuppressed() { + p.Debug().Err(err).Msgf("Upstream %s check failed after %v (WFP loopback protect active)", upstream, duration) + return errOsHealthcheckSuppressed + } p.Error().Err(err).Msgf("Upstream %s check failed after %v", upstream, duration) - } else { - p.Debug().Msgf("Upstream %s responded successfully in %v", upstream, duration) + return err } - return err + p.Debug().Msgf("Upstream %s responded successfully in %v", upstream, duration) + return nil } // handleRecovery orchestrates the recovery process by coordinating multiple smaller methods. @@ -2121,7 +2131,8 @@ func (p *prog) waitForUpstreamRecovery(ctx context.Context, upstreams map[string default: attempts++ // checkUpstreamOnce will reset any failure counters on success. - if err := p.checkUpstreamOnce(name, uc); err == nil { + err := p.checkUpstreamOnce(name, uc) + if err == nil || errors.Is(err, errOsHealthcheckSuppressed) { p.Debug().Msgf("Upstream %s recovered successfully", name) select { case recoveredCh <- name: