diff --git a/cmd/cli/dns_intercept_darwin.go b/cmd/cli/dns_intercept_darwin.go index 62fc73f..88d7310 100644 --- a/cmd/cli/dns_intercept_darwin.go +++ b/cmd/cli/dns_intercept_darwin.go @@ -1816,3 +1816,7 @@ func (p *prog) forceReloadPFMainRuleset() { mainLog.Load().Info().Msg("DNS intercept: force reload — pf ruleset and anchor reloaded successfully") } + +// osHealthcheckSuppressed always returns false on darwin — WFP loopback +// protect (the trigger for suppression) is Windows-only. +func (p *prog) osHealthcheckSuppressed() bool { return false } diff --git a/cmd/cli/dns_intercept_others.go b/cmd/cli/dns_intercept_others.go index 9f3c903..50c7fd0 100644 --- a/cmd/cli/dns_intercept_others.go +++ b/cmd/cli/dns_intercept_others.go @@ -37,3 +37,7 @@ func (p *prog) scheduleDelayedRechecks() {} // pfInterceptMonitor is a no-op on unsupported platforms. func (p *prog) pfInterceptMonitor() {} + +// osHealthcheckSuppressed always returns false on non-Windows platforms — +// WFP loopback protect (the trigger for suppression) is Windows-only. +func (p *prog) osHealthcheckSuppressed() bool { return false } diff --git a/cmd/cli/dns_intercept_windows.go b/cmd/cli/dns_intercept_windows.go index bfe07e1..a79fa0f 100644 --- a/cmd/cli/dns_intercept_windows.go +++ b/cmd/cli/dns_intercept_windows.go @@ -1184,6 +1184,26 @@ func (p *prog) activateLoopbackWFPProtect(state *wfpState) error { return nil } +// osHealthcheckSuppressed reports whether the upstream.os healthcheck should +// be skipped because DNS intercept mode is active and the WFP loopback protect +// has been engaged. Loopback protect is only activated when an external WFP +// block filter (e.g. OpenVPN's block-outside-dns) is interfering with DNS, +// which is the same condition that makes the OS resolver healthcheck fail +// every 2s with i/o timeout — so suppressing the check avoids the log spam +// described in issue #526. +func (p *prog) osHealthcheckSuppressed() bool { + if !dnsIntercept || p.dnsInterceptState == nil { + return false + } + state, ok := p.dnsInterceptState.(*wfpState) + if !ok || state == nil { + return false + } + state.mu.Lock() + defer state.mu.Unlock() + return state.loopbackProtectActive +} + // deactivateLoopbackWFPProtectLocked is the lock-free inner implementation. // Caller must hold state.mu. func (p *prog) deactivateLoopbackWFPProtectLocked(state *wfpState) { diff --git a/cmd/cli/dns_proxy.go b/cmd/cli/dns_proxy.go index d12ab35..d3ddb04 100644 --- a/cmd/cli/dns_proxy.go +++ b/cmd/cli/dns_proxy.go @@ -744,6 +744,7 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse { var reason RecoveryReason if upstreams[0] == upstreamOS { reason = RecoveryReasonOSFailure + } else { reason = RecoveryReasonRegularFailure } @@ -1657,6 +1658,8 @@ func interfaceIPsEqual(a, b []netip.Prefix) bool { return true } +var errOsHealthcheckSuppressed = errors.New("upstream os health check suppressed") + // checkUpstreamOnce sends a test query to the specified upstream. // Returns nil if the upstream responds successfully. func (p *prog) checkUpstreamOnce(upstream string, uc *ctrld.UpstreamConfig) error { @@ -1686,11 +1689,19 @@ func (p *prog) checkUpstreamOnce(upstream string, uc *ctrld.UpstreamConfig) erro duration := time.Since(start) if err != nil { + // Demote upstream.os check failures to debug while WFP loopback + // protect is active: an external WFP block filter is interfering + // with plain DNS so repeated failures here are expected. Other + // upstreams keep error level so real outages stay visible. + if upstream == upstreamOS && p.osHealthcheckSuppressed() { + mainLog.Load().Debug().Err(err).Msgf("Upstream %s check failed after %v (WFP loopback protect active)", upstream, duration) + return errOsHealthcheckSuppressed + } mainLog.Load().Error().Err(err).Msgf("Upstream %s check failed after %v", upstream, duration) - } else { - mainLog.Load().Debug().Msgf("Upstream %s responded successfully in %v", upstream, duration) + return err } - return err + mainLog.Load().Debug().Msgf("Upstream %s responded successfully in %v", upstream, duration) + return nil } // recoveryDebounceWindow is the time to wait after the last network change @@ -1909,7 +1920,8 @@ func (p *prog) waitForUpstreamRecovery(ctx context.Context, upstreams map[string default: attempts++ // checkUpstreamOnce will reset any failure counters on success. - if err := p.checkUpstreamOnce(name, uc); err == nil { + err := p.checkUpstreamOnce(name, uc) + if err == nil || errors.Is(err, errOsHealthcheckSuppressed) { mainLog.Load().Debug().Msgf("Upstream %s recovered successfully", name) select { case recoveredCh <- name: