do the reset after recovery finished

This commit is contained in:
Alex
2025-02-07 16:03:36 -05:00
committed by Cuong Manh Le
parent d37d0e942c
commit 60e65a37a6
2 changed files with 4 additions and 57 deletions

View File

@@ -1429,6 +1429,9 @@ func (p *prog) handleRecovery(reason RecoveryReason) {
}
mainLog.Load().Info().Msgf("Upstream %q recovered; re-applying DNS settings", recovered)
// reset the upstream failure count and down state
p.um.reset(recovered)
// For network changes we also reinitialize the OS resolver.
if reason == RecoveryReasonNetworkChange {
ns := ctrld.InitializeOsResolver(true)

View File

@@ -1,12 +1,9 @@
package cli
import (
"context"
"sync"
"time"
"github.com/miekg/dns"
"github.com/Control-D-Inc/ctrld"
)
@@ -80,11 +77,10 @@ func (um *upstreamMonitor) isDown(upstream string) bool {
// reset marks an upstream as up and set failed queries counter to zero.
func (um *upstreamMonitor) reset(upstream string) {
um.mu.Lock()
defer um.mu.Unlock()
um.failureReq[upstream] = 0
um.down[upstream] = false
um.recovered[upstream] = true
um.mu.Unlock()
go func() {
// debounce the recovery to avoid incrementing failure counts already in flight
time.Sleep(1 * time.Second)
@@ -94,58 +90,6 @@ func (um *upstreamMonitor) reset(upstream string) {
}()
}
// checkUpstream checks the given upstream status, periodically sending query to upstream
// until successfully. An upstream status/counter will be reset once it becomes reachable.
func (p *prog) checkUpstream(upstream string, uc *ctrld.UpstreamConfig) {
p.um.mu.Lock()
isChecking := p.um.checking[upstream]
if isChecking {
p.um.mu.Unlock()
return
}
p.um.checking[upstream] = true
p.um.mu.Unlock()
defer func() {
p.um.mu.Lock()
p.um.checking[upstream] = false
p.um.mu.Unlock()
}()
resolver, err := ctrld.NewResolver(uc)
if err != nil {
mainLog.Load().Warn().Err(err).Msg("could not check upstream")
return
}
msg := new(dns.Msg)
msg.SetQuestion(".", dns.TypeNS)
timeout := 1000 * time.Millisecond
if uc.Timeout > 0 {
timeout = time.Duration(uc.Timeout) * time.Millisecond
}
check := func() error {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
uc.ReBootstrap()
_, err := resolver.Resolve(ctx, msg)
return err
}
endpoint := uc.Endpoint
if endpoint == "" {
endpoint = uc.Name
}
mainLog.Load().Warn().Msgf("upstream %q is offline", endpoint)
for {
if err := check(); err == nil {
mainLog.Load().Warn().Msgf("upstream %q is online", endpoint)
p.um.reset(upstream)
return
} else {
mainLog.Load().Debug().Msgf("checked upstream %q failed: %v", endpoint, err)
}
time.Sleep(checkUpstreamBackoffSleep)
}
}
// countHealthy returns the number of upstreams in the provided map that are considered healthy.
func (um *upstreamMonitor) countHealthy(upstreams []string) int {
var count int