mirror of
https://github.com/Control-D-Inc/ctrld.git
synced 2026-02-03 22:18:39 +00:00
do the reset after recovery finished
This commit is contained in:
@@ -1429,6 +1429,9 @@ func (p *prog) handleRecovery(reason RecoveryReason) {
|
||||
}
|
||||
mainLog.Load().Info().Msgf("Upstream %q recovered; re-applying DNS settings", recovered)
|
||||
|
||||
// reset the upstream failure count and down state
|
||||
p.um.reset(recovered)
|
||||
|
||||
// For network changes we also reinitialize the OS resolver.
|
||||
if reason == RecoveryReasonNetworkChange {
|
||||
ns := ctrld.InitializeOsResolver(true)
|
||||
|
||||
@@ -1,12 +1,9 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/miekg/dns"
|
||||
|
||||
"github.com/Control-D-Inc/ctrld"
|
||||
)
|
||||
|
||||
@@ -80,11 +77,10 @@ func (um *upstreamMonitor) isDown(upstream string) bool {
|
||||
// reset marks an upstream as up and set failed queries counter to zero.
|
||||
func (um *upstreamMonitor) reset(upstream string) {
|
||||
um.mu.Lock()
|
||||
defer um.mu.Unlock()
|
||||
|
||||
um.failureReq[upstream] = 0
|
||||
um.down[upstream] = false
|
||||
um.recovered[upstream] = true
|
||||
um.mu.Unlock()
|
||||
go func() {
|
||||
// debounce the recovery to avoid incrementing failure counts already in flight
|
||||
time.Sleep(1 * time.Second)
|
||||
@@ -94,58 +90,6 @@ func (um *upstreamMonitor) reset(upstream string) {
|
||||
}()
|
||||
}
|
||||
|
||||
// checkUpstream checks the given upstream status, periodically sending query to upstream
|
||||
// until successfully. An upstream status/counter will be reset once it becomes reachable.
|
||||
func (p *prog) checkUpstream(upstream string, uc *ctrld.UpstreamConfig) {
|
||||
p.um.mu.Lock()
|
||||
isChecking := p.um.checking[upstream]
|
||||
if isChecking {
|
||||
p.um.mu.Unlock()
|
||||
return
|
||||
}
|
||||
p.um.checking[upstream] = true
|
||||
p.um.mu.Unlock()
|
||||
defer func() {
|
||||
p.um.mu.Lock()
|
||||
p.um.checking[upstream] = false
|
||||
p.um.mu.Unlock()
|
||||
}()
|
||||
|
||||
resolver, err := ctrld.NewResolver(uc)
|
||||
if err != nil {
|
||||
mainLog.Load().Warn().Err(err).Msg("could not check upstream")
|
||||
return
|
||||
}
|
||||
msg := new(dns.Msg)
|
||||
msg.SetQuestion(".", dns.TypeNS)
|
||||
timeout := 1000 * time.Millisecond
|
||||
if uc.Timeout > 0 {
|
||||
timeout = time.Duration(uc.Timeout) * time.Millisecond
|
||||
}
|
||||
check := func() error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer cancel()
|
||||
uc.ReBootstrap()
|
||||
_, err := resolver.Resolve(ctx, msg)
|
||||
return err
|
||||
}
|
||||
endpoint := uc.Endpoint
|
||||
if endpoint == "" {
|
||||
endpoint = uc.Name
|
||||
}
|
||||
mainLog.Load().Warn().Msgf("upstream %q is offline", endpoint)
|
||||
for {
|
||||
if err := check(); err == nil {
|
||||
mainLog.Load().Warn().Msgf("upstream %q is online", endpoint)
|
||||
p.um.reset(upstream)
|
||||
return
|
||||
} else {
|
||||
mainLog.Load().Debug().Msgf("checked upstream %q failed: %v", endpoint, err)
|
||||
}
|
||||
time.Sleep(checkUpstreamBackoffSleep)
|
||||
}
|
||||
}
|
||||
|
||||
// countHealthy returns the number of upstreams in the provided map that are considered healthy.
|
||||
func (um *upstreamMonitor) countHealthy(upstreams []string) int {
|
||||
var count int
|
||||
|
||||
Reference in New Issue
Block a user