diff --git a/cmd/cli/loop.go b/cmd/cli/loop.go index 5e6d911..a9d3972 100644 --- a/cmd/cli/loop.go +++ b/cmd/cli/loop.go @@ -56,7 +56,10 @@ func (p *prog) checkDnsLoop() { mainLog.Load().Debug().Msg("start checking DNS loop") upstream := make(map[string]*ctrld.UpstreamConfig) p.loopMu.Lock() - for _, uc := range p.cfg.Upstream { + for n, uc := range p.cfg.Upstream { + if p.um.isDown("upstream." + n) { + continue + } uid := uc.UID() p.loop[uid] = false upstream[uid] = uc diff --git a/cmd/cli/prog.go b/cmd/cli/prog.go index a475a77..d304cce 100644 --- a/cmd/cli/prog.go +++ b/cmd/cli/prog.go @@ -282,14 +282,14 @@ func (p *prog) run(reload bool, reloadCh chan struct{}) { } } - // Check for possible DNS loop. - p.checkDnsLoop() close(p.onStartedDone) - // Start check DNS loop ticker. wg.Add(1) go func() { defer wg.Done() + // Check for possible DNS loop. + p.checkDnsLoop() + // Start check DNS loop ticker. p.checkDnsLoopTicker(ctx) }() diff --git a/cmd/cli/upstream_monitor.go b/cmd/cli/upstream_monitor.go index 4b3ee69..83087a4 100644 --- a/cmd/cli/upstream_monitor.go +++ b/cmd/cli/upstream_monitor.go @@ -7,7 +7,6 @@ import ( "time" "github.com/miekg/dns" - "tailscale.com/logtail/backoff" "github.com/Control-D-Inc/ctrld" ) @@ -15,8 +14,8 @@ import ( const ( // maxFailureRequest is the maximum failed queries allowed before an upstream is marked as down. maxFailureRequest = 100 - // checkUpstreamMaxBackoff is the max backoff time when checking upstream status. - checkUpstreamMaxBackoff = 2 * time.Minute + // checkUpstreamBackoffSleep is the time interval between each upstream checks. + checkUpstreamBackoffSleep = 2 * time.Second ) // upstreamMonitor performs monitoring upstreams health. @@ -76,7 +75,6 @@ func (um *upstreamMonitor) checkUpstream(upstream string, uc *ctrld.UpstreamConf um.checking[upstream] = true um.mu.Unlock() - bo := backoff.NewBackoff("checkUpstream", logf, checkUpstreamMaxBackoff) resolver, err := ctrld.NewResolver(uc) if err != nil { mainLog.Load().Warn().Err(err).Msg("could not check upstream") @@ -84,15 +82,20 @@ func (um *upstreamMonitor) checkUpstream(upstream string, uc *ctrld.UpstreamConf } msg := new(dns.Msg) msg.SetQuestion(".", dns.TypeNS) - ctx := context.Background() - for { + check := func() error { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + uc.ReBootstrap() _, err := resolver.Resolve(ctx, msg) - if err == nil { + return err + } + for { + if err := check(); err == nil { mainLog.Load().Debug().Msgf("upstream %q is online", uc.Endpoint) um.reset(upstream) return } - bo.BackOff(ctx, err) + time.Sleep(checkUpstreamBackoffSleep) } }