From 1735d3d55b6664d352c2182ab2f63640b71ed423 Mon Sep 17 00:00:00 2001
From: Codescribe <codescribe@windscribe.com>
Date: Wed, 6 May 2026 04:47:34 -0400
Subject: [PATCH] cmd/cli: skip upstream.os healthcheck when WFP loopback
 protect enabled

When WFP loopback protect is active, the upstream.os healthcheck will
always fail because an external WFP block filter is interfering with
plain DNS. This demotes those expected failures to debug level and
returns errOsHealthcheckSuppressed so the recovery loop treats them
as non-fatal, eliminating the log spam described in #526.
---
 cmd/cli/dns_intercept_darwin.go  |  4 ++++
 cmd/cli/dns_intercept_others.go  |  4 ++++
 cmd/cli/dns_intercept_windows.go | 20 ++++++++++++++++++++
 cmd/cli/dns_proxy.go             | 19 +++++++++++++++----
 4 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/cmd/cli/dns_intercept_darwin.go b/cmd/cli/dns_intercept_darwin.go
index 5740b41..7b80ca0 100644
--- a/cmd/cli/dns_intercept_darwin.go
+++ b/cmd/cli/dns_intercept_darwin.go
@@ -1742,3 +1742,7 @@ func (p *prog) forceReloadPFMainRuleset() {
 
 	mainLog.Load().Info().Msg("DNS intercept: force reload — pf ruleset and anchor reloaded successfully")
 }
+
+// osHealthcheckSuppressed always returns false on darwin — WFP loopback
+// protect (the trigger for suppression) is Windows-only.
+func (p *prog) osHealthcheckSuppressed() bool { return false }
diff --git a/cmd/cli/dns_intercept_others.go b/cmd/cli/dns_intercept_others.go
index 9f3c903..50c7fd0 100644
--- a/cmd/cli/dns_intercept_others.go
+++ b/cmd/cli/dns_intercept_others.go
@@ -37,3 +37,7 @@ func (p *prog) scheduleDelayedRechecks() {}
 
 // pfInterceptMonitor is a no-op on unsupported platforms.
 func (p *prog) pfInterceptMonitor() {}
+
+// osHealthcheckSuppressed always returns false on non-Windows platforms —
+// WFP loopback protect (the trigger for suppression) is Windows-only.
+func (p *prog) osHealthcheckSuppressed() bool { return false }
diff --git a/cmd/cli/dns_intercept_windows.go b/cmd/cli/dns_intercept_windows.go
index 063d4f9..208f0e2 100644
--- a/cmd/cli/dns_intercept_windows.go
+++ b/cmd/cli/dns_intercept_windows.go
@@ -1260,6 +1260,26 @@ func (p *prog) activateLoopbackWFPProtect(state *wfpState) error {
 	return nil
 }
 
+// osHealthcheckSuppressed reports whether the upstream.os healthcheck should
+// be skipped because DNS intercept mode is active and the WFP loopback protect
+// has been engaged. Loopback protect is only activated when an external WFP
+// block filter (e.g. OpenVPN's block-outside-dns) is interfering with DNS,
+// which is the same condition that makes the OS resolver healthcheck fail
+// every 2s with i/o timeout — so suppressing the check avoids the log spam
+// described in issue #526.
+func (p *prog) osHealthcheckSuppressed() bool {
+	if !dnsIntercept || p.dnsInterceptState == nil {
+		return false
+	}
+	state, ok := p.dnsInterceptState.(*wfpState)
+	if !ok || state == nil {
+		return false
+	}
+	state.mu.Lock()
+	defer state.mu.Unlock()
+	return state.loopbackProtectActive
+}
+
 // deactivateLoopbackWFPProtectLocked is the lock-free inner implementation.
 // Caller must hold state.mu.
 func (p *prog) deactivateLoopbackWFPProtectLocked(state *wfpState) {
diff --git a/cmd/cli/dns_proxy.go b/cmd/cli/dns_proxy.go
index c2c248e..c8a2b74 100644
--- a/cmd/cli/dns_proxy.go
+++ b/cmd/cli/dns_proxy.go
@@ -1809,6 +1809,8 @@ func interfaceIPsEqual(a, b []netip.Prefix) bool {
 	return true
 }
 
+var errOsHealthcheckSuppressed = errors.New("upstream os health check suppressed")
+
 // checkUpstreamOnce sends a test query to the specified upstream.
 // Returns nil if the upstream responds successfully.
 func (p *prog) checkUpstreamOnce(upstream string, uc *ctrld.UpstreamConfig) error {
@@ -1838,11 +1840,19 @@ func (p *prog) checkUpstreamOnce(upstream string, uc *ctrld.UpstreamConfig) erro
 	duration := time.Since(start)
 
 	if err != nil {
+		// Demote upstream.os check failures to debug while WFP loopback
+		// protect is active: an external WFP block filter is interfering
+		// with plain DNS so repeated failures here are expected. Other
+		// upstreams keep error level so real outages stay visible.
+		if upstream == upstreamOS && p.osHealthcheckSuppressed() {
+			p.Debug().Err(err).Msgf("Upstream %s check failed after %v (WFP loopback protect active)", upstream, duration)
+			return errOsHealthcheckSuppressed
+		}
 		p.Error().Err(err).Msgf("Upstream %s check failed after %v", upstream, duration)
-	} else {
-		p.Debug().Msgf("Upstream %s responded successfully in %v", upstream, duration)
+		return err
 	}
-	return err
+	p.Debug().Msgf("Upstream %s responded successfully in %v", upstream, duration)
+	return nil
 }
 
 // handleRecovery orchestrates the recovery process by coordinating multiple smaller methods.
@@ -2121,7 +2131,8 @@ func (p *prog) waitForUpstreamRecovery(ctx context.Context, upstreams map[string
 				default:
 					attempts++
 					// checkUpstreamOnce will reset any failure counters on success.
-					if err := p.checkUpstreamOnce(name, uc); err == nil {
+					err := p.checkUpstreamOnce(name, uc)
+					if err == nil || errors.Is(err, errOsHealthcheckSuppressed) {
 						p.Debug().Msgf("Upstream %s recovered successfully", name)
 						select {
 						case recoveredCh <- name: