mirror of
https://github.com/Control-D-Inc/ctrld.git
synced 2026-05-15 00:50:25 +02:00
cmd/cli: skip upstream.os healthcheck when WFP loopback protect enabled
Since the check will always be failed in this case, causing unnecessary log spamming.
This commit is contained in:
committed by
Cuong Manh Le
parent
2b27c148be
commit
5dd5846cca
@@ -1816,3 +1816,7 @@ func (p *prog) forceReloadPFMainRuleset() {
|
||||
|
||||
mainLog.Load().Info().Msg("DNS intercept: force reload — pf ruleset and anchor reloaded successfully")
|
||||
}
|
||||
|
||||
// osHealthcheckSuppressed always returns false on darwin — WFP loopback
|
||||
// protect (the trigger for suppression) is Windows-only.
|
||||
func (p *prog) osHealthcheckSuppressed() bool { return false }
|
||||
|
||||
@@ -37,3 +37,7 @@ func (p *prog) scheduleDelayedRechecks() {}
|
||||
|
||||
// pfInterceptMonitor is a no-op on unsupported platforms.
|
||||
func (p *prog) pfInterceptMonitor() {}
|
||||
|
||||
// osHealthcheckSuppressed always returns false on non-Windows platforms —
|
||||
// WFP loopback protect (the trigger for suppression) is Windows-only.
|
||||
func (p *prog) osHealthcheckSuppressed() bool { return false }
|
||||
|
||||
@@ -1184,6 +1184,26 @@ func (p *prog) activateLoopbackWFPProtect(state *wfpState) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// osHealthcheckSuppressed reports whether the upstream.os healthcheck should
|
||||
// be skipped because DNS intercept mode is active and the WFP loopback protect
|
||||
// has been engaged. Loopback protect is only activated when an external WFP
|
||||
// block filter (e.g. OpenVPN's block-outside-dns) is interfering with DNS,
|
||||
// which is the same condition that makes the OS resolver healthcheck fail
|
||||
// every 2s with i/o timeout — so suppressing the check avoids the log spam
|
||||
// described in issue #526.
|
||||
func (p *prog) osHealthcheckSuppressed() bool {
|
||||
if !dnsIntercept || p.dnsInterceptState == nil {
|
||||
return false
|
||||
}
|
||||
state, ok := p.dnsInterceptState.(*wfpState)
|
||||
if !ok || state == nil {
|
||||
return false
|
||||
}
|
||||
state.mu.Lock()
|
||||
defer state.mu.Unlock()
|
||||
return state.loopbackProtectActive
|
||||
}
|
||||
|
||||
// deactivateLoopbackWFPProtectLocked is the lock-free inner implementation.
|
||||
// Caller must hold state.mu.
|
||||
func (p *prog) deactivateLoopbackWFPProtectLocked(state *wfpState) {
|
||||
|
||||
+16
-4
@@ -744,6 +744,7 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
|
||||
var reason RecoveryReason
|
||||
if upstreams[0] == upstreamOS {
|
||||
reason = RecoveryReasonOSFailure
|
||||
|
||||
} else {
|
||||
reason = RecoveryReasonRegularFailure
|
||||
}
|
||||
@@ -1657,6 +1658,8 @@ func interfaceIPsEqual(a, b []netip.Prefix) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
var errOsHealthcheckSuppressed = errors.New("upstream os health check suppressed")
|
||||
|
||||
// checkUpstreamOnce sends a test query to the specified upstream.
|
||||
// Returns nil if the upstream responds successfully.
|
||||
func (p *prog) checkUpstreamOnce(upstream string, uc *ctrld.UpstreamConfig) error {
|
||||
@@ -1686,11 +1689,19 @@ func (p *prog) checkUpstreamOnce(upstream string, uc *ctrld.UpstreamConfig) erro
|
||||
duration := time.Since(start)
|
||||
|
||||
if err != nil {
|
||||
// Demote upstream.os check failures to debug while WFP loopback
|
||||
// protect is active: an external WFP block filter is interfering
|
||||
// with plain DNS so repeated failures here are expected. Other
|
||||
// upstreams keep error level so real outages stay visible.
|
||||
if upstream == upstreamOS && p.osHealthcheckSuppressed() {
|
||||
mainLog.Load().Debug().Err(err).Msgf("Upstream %s check failed after %v (WFP loopback protect active)", upstream, duration)
|
||||
return errOsHealthcheckSuppressed
|
||||
}
|
||||
mainLog.Load().Error().Err(err).Msgf("Upstream %s check failed after %v", upstream, duration)
|
||||
} else {
|
||||
mainLog.Load().Debug().Msgf("Upstream %s responded successfully in %v", upstream, duration)
|
||||
return err
|
||||
}
|
||||
return err
|
||||
mainLog.Load().Debug().Msgf("Upstream %s responded successfully in %v", upstream, duration)
|
||||
return nil
|
||||
}
|
||||
|
||||
// recoveryDebounceWindow is the time to wait after the last network change
|
||||
@@ -1909,7 +1920,8 @@ func (p *prog) waitForUpstreamRecovery(ctx context.Context, upstreams map[string
|
||||
default:
|
||||
attempts++
|
||||
// checkUpstreamOnce will reset any failure counters on success.
|
||||
if err := p.checkUpstreamOnce(name, uc); err == nil {
|
||||
err := p.checkUpstreamOnce(name, uc)
|
||||
if err == nil || errors.Is(err, errOsHealthcheckSuppressed) {
|
||||
mainLog.Load().Debug().Msgf("Upstream %s recovered successfully", name)
|
||||
select {
|
||||
case recoveredCh <- name:
|
||||
|
||||
Reference in New Issue
Block a user