mirror of
https://github.com/Control-D-Inc/ctrld.git
synced 2026-02-03 22:18:39 +00:00
internal/clientinfo: monitor nameserver health
In case the resolver could not reach nameserver, ptr discover should only print error message once, then stop doing the query until the nameserver is reachable. This would prevent ptr discover from flooding ctrld log with a lot of duplicated messages.
This commit is contained in:
committed by
Cuong Manh Le
parent
0f3e8c7ada
commit
823195c504
@@ -3,16 +3,19 @@ package clientinfo
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/miekg/dns"
|
||||
"tailscale.com/logtail/backoff"
|
||||
|
||||
"github.com/Control-D-Inc/ctrld"
|
||||
)
|
||||
|
||||
type ptrDiscover struct {
|
||||
hostname sync.Map // ip => hostname
|
||||
resolver ctrld.Resolver
|
||||
hostname sync.Map // ip => hostname
|
||||
resolver ctrld.Resolver
|
||||
serverDown atomic.Bool
|
||||
}
|
||||
|
||||
func (p *ptrDiscover) refresh() error {
|
||||
@@ -60,6 +63,10 @@ func (p *ptrDiscover) lookupHostnameFromCache(ip string) string {
|
||||
}
|
||||
|
||||
func (p *ptrDiscover) lookupHostname(ip string) string {
|
||||
// If nameserver is down, do nothing.
|
||||
if p.serverDown.Load() {
|
||||
return ""
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||
defer cancel()
|
||||
msg := new(dns.Msg)
|
||||
@@ -71,7 +78,9 @@ func (p *ptrDiscover) lookupHostname(ip string) string {
|
||||
msg.SetQuestion(addr, dns.TypePTR)
|
||||
ans, err := p.resolver.Resolve(ctx, msg)
|
||||
if err != nil {
|
||||
ctrld.ProxyLogger.Load().Warn().Str("discovery", "ptr").Err(err).Msg("could not lookup IP")
|
||||
ctrld.ProxyLogger.Load().Warn().Str("discovery", "ptr").Err(err).Msg("could not perform PTR lookup")
|
||||
p.serverDown.Store(true)
|
||||
go p.checkServer()
|
||||
return ""
|
||||
}
|
||||
for _, rr := range ans.Answer {
|
||||
@@ -83,3 +92,25 @@ func (p *ptrDiscover) lookupHostname(ip string) string {
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// checkServer monitors if the resolver can reach its nameserver. When the nameserver
|
||||
// is reachable, set p.serverDown to false, so p.lookupHostname can continue working.
|
||||
func (p *ptrDiscover) checkServer() {
|
||||
bo := backoff.NewBackoff("ptrDiscover", func(format string, args ...any) {}, time.Minute*5)
|
||||
m := new(dns.Msg)
|
||||
m.SetQuestion(".", dns.TypeNS)
|
||||
ping := func() error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||
defer cancel()
|
||||
_, err := p.resolver.Resolve(ctx, m)
|
||||
return err
|
||||
}
|
||||
for {
|
||||
if err := ping(); err != nil {
|
||||
bo.BackOff(context.Background(), err)
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
p.serverDown.Store(false)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user