From 823195c5040faf79536af9d0f9d9dab749f55a07 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Thu, 7 Sep 2023 11:12:50 +0000 Subject: [PATCH] internal/clientinfo: monitor nameserver health In case the resolver could not reach nameserver, ptr discover should only print error message once, then stop doing the query until the nameserver is reachable. This would prevent ptr discover from flooding ctrld log with a lot of duplicated messages. --- internal/clientinfo/ptr_lookup.go | 37 ++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/internal/clientinfo/ptr_lookup.go b/internal/clientinfo/ptr_lookup.go index 0a8867b..6a9d99b 100644 --- a/internal/clientinfo/ptr_lookup.go +++ b/internal/clientinfo/ptr_lookup.go @@ -3,16 +3,19 @@ package clientinfo import ( "context" "sync" + "sync/atomic" "time" "github.com/miekg/dns" + "tailscale.com/logtail/backoff" "github.com/Control-D-Inc/ctrld" ) type ptrDiscover struct { - hostname sync.Map // ip => hostname - resolver ctrld.Resolver + hostname sync.Map // ip => hostname + resolver ctrld.Resolver + serverDown atomic.Bool } func (p *ptrDiscover) refresh() error { @@ -60,6 +63,10 @@ func (p *ptrDiscover) lookupHostnameFromCache(ip string) string { } func (p *ptrDiscover) lookupHostname(ip string) string { + // If nameserver is down, do nothing. + if p.serverDown.Load() { + return "" + } ctx, cancel := context.WithTimeout(context.Background(), time.Second) defer cancel() msg := new(dns.Msg) @@ -71,7 +78,9 @@ func (p *ptrDiscover) lookupHostname(ip string) string { msg.SetQuestion(addr, dns.TypePTR) ans, err := p.resolver.Resolve(ctx, msg) if err != nil { - ctrld.ProxyLogger.Load().Warn().Str("discovery", "ptr").Err(err).Msg("could not lookup IP") + ctrld.ProxyLogger.Load().Warn().Str("discovery", "ptr").Err(err).Msg("could not perform PTR lookup") + p.serverDown.Store(true) + go p.checkServer() return "" } for _, rr := range ans.Answer { @@ -83,3 +92,25 @@ func (p *ptrDiscover) lookupHostname(ip string) string { } return "" } + +// checkServer monitors if the resolver can reach its nameserver. When the nameserver +// is reachable, set p.serverDown to false, so p.lookupHostname can continue working. +func (p *ptrDiscover) checkServer() { + bo := backoff.NewBackoff("ptrDiscover", func(format string, args ...any) {}, time.Minute*5) + m := new(dns.Msg) + m.SetQuestion(".", dns.TypeNS) + ping := func() error { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + _, err := p.resolver.Resolve(ctx, m) + return err + } + for { + if err := ping(); err != nil { + bo.BackOff(context.Background(), err) + continue + } + break + } + p.serverDown.Store(false) +}