internal/clientinfo: monitor nameserver health

In case the resolver could not reach nameserver, ptr discover should
only print error message once, then stop doing the query until the
nameserver is reachable. This would prevent ptr discover from flooding
ctrld log with a lot of duplicated messages.
This commit is contained in:
Cuong Manh Le
2023-09-07 11:12:50 +00:00
committed by Cuong Manh Le
parent 0f3e8c7ada
commit 823195c504

View File

@@ -3,16 +3,19 @@ package clientinfo
import (
"context"
"sync"
"sync/atomic"
"time"
"github.com/miekg/dns"
"tailscale.com/logtail/backoff"
"github.com/Control-D-Inc/ctrld"
)
type ptrDiscover struct {
hostname sync.Map // ip => hostname
resolver ctrld.Resolver
hostname sync.Map // ip => hostname
resolver ctrld.Resolver
serverDown atomic.Bool
}
func (p *ptrDiscover) refresh() error {
@@ -60,6 +63,10 @@ func (p *ptrDiscover) lookupHostnameFromCache(ip string) string {
}
func (p *ptrDiscover) lookupHostname(ip string) string {
// If nameserver is down, do nothing.
if p.serverDown.Load() {
return ""
}
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
msg := new(dns.Msg)
@@ -71,7 +78,9 @@ func (p *ptrDiscover) lookupHostname(ip string) string {
msg.SetQuestion(addr, dns.TypePTR)
ans, err := p.resolver.Resolve(ctx, msg)
if err != nil {
ctrld.ProxyLogger.Load().Warn().Str("discovery", "ptr").Err(err).Msg("could not lookup IP")
ctrld.ProxyLogger.Load().Warn().Str("discovery", "ptr").Err(err).Msg("could not perform PTR lookup")
p.serverDown.Store(true)
go p.checkServer()
return ""
}
for _, rr := range ans.Answer {
@@ -83,3 +92,25 @@ func (p *ptrDiscover) lookupHostname(ip string) string {
}
return ""
}
// checkServer monitors if the resolver can reach its nameserver. When the nameserver
// is reachable, set p.serverDown to false, so p.lookupHostname can continue working.
func (p *ptrDiscover) checkServer() {
bo := backoff.NewBackoff("ptrDiscover", func(format string, args ...any) {}, time.Minute*5)
m := new(dns.Msg)
m.SetQuestion(".", dns.TypeNS)
ping := func() error {
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
_, err := p.resolver.Resolve(ctx, m)
return err
}
for {
if err := ping(); err != nil {
bo.BackOff(context.Background(), err)
continue
}
break
}
p.serverDown.Store(false)
}