mirror of
https://github.com/Control-D-Inc/ctrld.git
synced 2026-05-27 12:52:27 +02:00
cmd/cli: avoid accessing mainLog when possible
By adding a logger field to "prog" struct, and use this field inside its method instead of always accessing global mainLog variable. This at least ensure more consistent usage of the logger during ctrld prog runtime, and also help refactoring the code more easily in the future (like replacing the logger library).
This commit is contained in:
committed by
Cuong Manh Le
parent
fc527dbdfb
commit
b9b9cfcade
+91
-90
@@ -87,14 +87,14 @@ type upstreamForResult struct {
|
||||
func (p *prog) serveDNS(mainCtx context.Context, listenerNum string) error {
|
||||
// Start network monitoring
|
||||
if err := p.monitorNetworkChanges(mainCtx); err != nil {
|
||||
mainLog.Load().Error().Err(err).Msg("Failed to start network monitoring")
|
||||
p.Error().Err(err).Msg("Failed to start network monitoring")
|
||||
// Don't return here as we still want DNS service to run
|
||||
}
|
||||
|
||||
listenerConfig := p.cfg.Listener[listenerNum]
|
||||
// make sure ip is allocated
|
||||
if allocErr := p.allocateIP(listenerConfig.IP); allocErr != nil {
|
||||
mainLog.Load().Error().Err(allocErr).Str("ip", listenerConfig.IP).Msg("serveUDP: failed to allocate listen ip")
|
||||
p.Error().Err(allocErr).Str("ip", listenerConfig.IP).Msg("serveUDP: failed to allocate listen ip")
|
||||
return allocErr
|
||||
}
|
||||
|
||||
@@ -110,9 +110,9 @@ func (p *prog) serveDNS(mainCtx context.Context, listenerNum string) error {
|
||||
listenerConfig := p.cfg.Listener[listenerNum]
|
||||
reqId := requestID()
|
||||
ctx := context.WithValue(context.Background(), ctrld.ReqIdCtxKey{}, reqId)
|
||||
ctx = ctrld.LoggerCtx(ctx, mainLog.Load())
|
||||
ctx = ctrld.LoggerCtx(ctx, p.logger.Load())
|
||||
if !listenerConfig.AllowWanClients && isWanClient(w.RemoteAddr()) {
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "query refused, listener does not allow WAN clients: %s", w.RemoteAddr().String())
|
||||
ctrld.Log(ctx, p.Debug(), "query refused, listener does not allow WAN clients: %s", w.RemoteAddr().String())
|
||||
answer := new(dns.Msg)
|
||||
answer.SetRcode(m, dns.RcodeRefused)
|
||||
_ = w.WriteMsg(answer)
|
||||
@@ -135,7 +135,7 @@ func (p *prog) serveDNS(mainCtx context.Context, listenerNum string) error {
|
||||
|
||||
if _, ok := p.cacheFlushDomainsMap[domain]; ok && p.cache != nil {
|
||||
p.cache.Purge()
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "received query %q, local cache is purged", domain)
|
||||
ctrld.Log(ctx, p.Debug(), "received query %q, local cache is purged", domain)
|
||||
}
|
||||
remoteIP, _, _ := net.SplitHostPort(w.RemoteAddr().String())
|
||||
ci := p.getClientInfo(remoteIP, m)
|
||||
@@ -144,7 +144,7 @@ func (p *prog) serveDNS(mainCtx context.Context, listenerNum string) error {
|
||||
remoteAddr := spoofRemoteAddr(w.RemoteAddr(), ci)
|
||||
fmtSrcToDest := fmtRemoteToLocal(listenerNum, ci.Hostname, remoteAddr.String())
|
||||
t := time.Now()
|
||||
ctrld.Log(ctx, mainLog.Load().Info(), "QUERY: %s: %s %s", fmtSrcToDest, dns.TypeToString[q.Qtype], domain)
|
||||
ctrld.Log(ctx, p.Info(), "QUERY: %s: %s %s", fmtSrcToDest, dns.TypeToString[q.Qtype], domain)
|
||||
ur := p.upstreamFor(ctx, listenerNum, listenerConfig, remoteAddr, ci.Mac, domain)
|
||||
|
||||
labelValues := make([]string, 0, len(statsQueriesCountLabels))
|
||||
@@ -155,7 +155,7 @@ func (p *prog) serveDNS(mainCtx context.Context, listenerNum string) error {
|
||||
|
||||
var answer *dns.Msg
|
||||
if !ur.matched && listenerConfig.Restricted {
|
||||
ctrld.Log(ctx, mainLog.Load().Info(), "query refused, %s does not match any network policy", remoteAddr.String())
|
||||
ctrld.Log(ctx, p.Info(), "query refused, %s does not match any network policy", remoteAddr.String())
|
||||
answer = new(dns.Msg)
|
||||
answer.SetRcode(m, dns.RcodeRefused)
|
||||
labelValues = append(labelValues, "") // no upstream
|
||||
@@ -174,7 +174,7 @@ func (p *prog) serveDNS(mainCtx context.Context, listenerNum string) error {
|
||||
|
||||
answer = pr.answer
|
||||
rtt := time.Since(t)
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "received response of %d bytes in %s", answer.Len(), rtt)
|
||||
ctrld.Log(ctx, p.Debug(), "received response of %d bytes in %s", answer.Len(), rtt)
|
||||
upstream := pr.upstream
|
||||
switch {
|
||||
case pr.cached:
|
||||
@@ -192,7 +192,7 @@ func (p *prog) serveDNS(mainCtx context.Context, listenerNum string) error {
|
||||
p.forceFetchingAPI(domain)
|
||||
}()
|
||||
if err := w.WriteMsg(answer); err != nil {
|
||||
ctrld.Log(ctx, mainLog.Load().Error().Err(err), "serveDNS: failed to send DNS response to client")
|
||||
ctrld.Log(ctx, p.Error().Err(err), "serveDNS: failed to send DNS response to client")
|
||||
}
|
||||
})
|
||||
|
||||
@@ -209,7 +209,7 @@ func (p *prog) serveDNS(mainCtx context.Context, listenerNum string) error {
|
||||
case err := <-errCh:
|
||||
// Local ipv6 listener should not terminate ctrld.
|
||||
// It's a workaround for a quirk on Windows.
|
||||
mainLog.Load().Warn().Err(err).Msg("local ipv6 listener failed")
|
||||
p.Warn().Err(err).Msg("local ipv6 listener failed")
|
||||
}
|
||||
return nil
|
||||
})
|
||||
@@ -229,7 +229,7 @@ func (p *prog) serveDNS(mainCtx context.Context, listenerNum string) error {
|
||||
case err := <-errCh:
|
||||
// RFC1918 listener should not terminate ctrld.
|
||||
// It's a workaround for a quirk on system with systemd-resolved.
|
||||
mainLog.Load().Warn().Err(err).Msgf("could not listen on %s: %s", proto, listenAddr)
|
||||
p.Warn().Err(err).Msgf("could not listen on %s: %s", proto, listenAddr)
|
||||
}
|
||||
}()
|
||||
}
|
||||
@@ -371,8 +371,8 @@ func (p *prog) proxyPrivatePtrLookup(ctx context.Context, msg *dns.Msg) *dns.Msg
|
||||
},
|
||||
Ptr: dns.Fqdn(name),
|
||||
}}
|
||||
ctrld.Log(ctx, mainLog.Load().Info(), "private PTR lookup, using client info table")
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "client info: %v", ctrld.ClientInfo{
|
||||
ctrld.Log(ctx, p.Info(), "private PTR lookup, using client info table")
|
||||
ctrld.Log(ctx, p.Debug(), "client info: %v", ctrld.ClientInfo{
|
||||
Mac: p.ciTable.LookupMac(ip.String()),
|
||||
IP: ip.String(),
|
||||
Hostname: name,
|
||||
@@ -416,8 +416,8 @@ func (p *prog) proxyLanHostnameQuery(ctx context.Context, msg *dns.Msg) *dns.Msg
|
||||
AAAA: ip.AsSlice(),
|
||||
}}
|
||||
}
|
||||
ctrld.Log(ctx, mainLog.Load().Info(), "lan hostname lookup, using client info table")
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "client info: %v", ctrld.ClientInfo{
|
||||
ctrld.Log(ctx, p.Info(), "lan hostname lookup, using client info table")
|
||||
ctrld.Log(ctx, p.Debug(), "client info: %v", ctrld.ClientInfo{
|
||||
Mac: p.ciTable.LookupMac(ip.String()),
|
||||
IP: ip.String(),
|
||||
Hostname: hostname,
|
||||
@@ -441,7 +441,7 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
|
||||
// running and listening on local addresses, these local addresses must be used
|
||||
// as nameservers, so queries for ADDC could be resolved as expected.
|
||||
if p.isAdDomainQuery(req.msg) {
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(),
|
||||
ctrld.Log(ctx, p.Debug(),
|
||||
"AD domain query detected for %s in domain %s",
|
||||
req.msg.Question[0].Name, p.adDomain)
|
||||
upstreamConfigs = []*ctrld.UpstreamConfig{localUpstreamConfig}
|
||||
@@ -459,14 +459,14 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
|
||||
// 4. Try remote upstream.
|
||||
isLanOrPtrQuery := false
|
||||
if req.ufr.matched {
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "%s, %s, %s -> %v", req.ufr.matchedPolicy, req.ufr.matchedNetwork, req.ufr.matchedRule, upstreams)
|
||||
ctrld.Log(ctx, p.Debug(), "%s, %s, %s -> %v", req.ufr.matchedPolicy, req.ufr.matchedNetwork, req.ufr.matchedRule, upstreams)
|
||||
} else {
|
||||
switch {
|
||||
case isSrvLanLookup(req.msg):
|
||||
upstreams = []string{upstreamOS}
|
||||
upstreamConfigs = []*ctrld.UpstreamConfig{osUpstreamConfig}
|
||||
ctx = ctrld.LanQueryCtx(ctx)
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "SRV record lookup, using upstreams: %v", upstreams)
|
||||
ctrld.Log(ctx, p.Debug(), "SRV record lookup, using upstreams: %v", upstreams)
|
||||
case isPrivatePtrLookup(req.msg):
|
||||
isLanOrPtrQuery = true
|
||||
if answer := p.proxyPrivatePtrLookup(ctx, req.msg); answer != nil {
|
||||
@@ -476,7 +476,7 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
|
||||
}
|
||||
upstreams, upstreamConfigs = p.upstreamsAndUpstreamConfigForPtr(upstreams, upstreamConfigs)
|
||||
ctx = ctrld.LanQueryCtx(ctx)
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "private PTR lookup, using upstreams: %v", upstreams)
|
||||
ctrld.Log(ctx, p.Debug(), "private PTR lookup, using upstreams: %v", upstreams)
|
||||
case isLanHostnameQuery(req.msg):
|
||||
isLanOrPtrQuery = true
|
||||
if answer := p.proxyLanHostnameQuery(ctx, req.msg); answer != nil {
|
||||
@@ -487,9 +487,9 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
|
||||
upstreams = []string{upstreamOS}
|
||||
upstreamConfigs = []*ctrld.UpstreamConfig{osUpstreamConfig}
|
||||
ctx = ctrld.LanQueryCtx(ctx)
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "lan hostname lookup, using upstreams: %v", upstreams)
|
||||
ctrld.Log(ctx, p.Debug(), "lan hostname lookup, using upstreams: %v", upstreams)
|
||||
default:
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "no explicit policy matched, using default routing -> %v", upstreams)
|
||||
ctrld.Log(ctx, p.Debug(), "no explicit policy matched, using default routing -> %v", upstreams)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -504,7 +504,7 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
|
||||
ctrld.SetCacheReply(answer, req.msg, answer.Rcode)
|
||||
now := time.Now()
|
||||
if cachedValue.Expire.After(now) {
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "hit cached response")
|
||||
ctrld.Log(ctx, p.Debug(), "hit cached response")
|
||||
setCachedAnswerTTL(answer, now, cachedValue.Expire)
|
||||
res.answer = answer
|
||||
res.cached = true
|
||||
@@ -514,10 +514,10 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
|
||||
}
|
||||
}
|
||||
resolve1 := func(upstream string, upstreamConfig *ctrld.UpstreamConfig, msg *dns.Msg) (*dns.Msg, error) {
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "sending query to %s: %s", upstream, upstreamConfig.Name)
|
||||
ctrld.Log(ctx, p.Debug(), "sending query to %s: %s", upstream, upstreamConfig.Name)
|
||||
dnsResolver, err := ctrld.NewResolver(ctx, upstreamConfig)
|
||||
if err != nil {
|
||||
ctrld.Log(ctx, mainLog.Load().Error().Err(err), "failed to create resolver")
|
||||
ctrld.Log(ctx, p.Error().Err(err), "failed to create resolver")
|
||||
return nil, err
|
||||
}
|
||||
resolveCtx, cancel := upstreamConfig.Context(ctx)
|
||||
@@ -526,7 +526,7 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
|
||||
}
|
||||
resolve := func(upstream string, upstreamConfig *ctrld.UpstreamConfig, msg *dns.Msg) *dns.Msg {
|
||||
if upstreamConfig.UpstreamSendClientInfo() && req.ci != nil {
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "including client info with the request")
|
||||
ctrld.Log(ctx, p.Debug(), "including client info with the request")
|
||||
ctx = context.WithValue(ctx, ctrld.ClientInfoCtxKey{}, req.ci)
|
||||
}
|
||||
answer, err := resolve1(upstream, upstreamConfig, msg)
|
||||
@@ -540,7 +540,7 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
|
||||
return answer
|
||||
}
|
||||
|
||||
ctrld.Log(ctx, mainLog.Load().Error().Err(err), "failed to resolve query")
|
||||
ctrld.Log(ctx, p.Error().Err(err), "failed to resolve query")
|
||||
|
||||
// increase failure count when there is no answer
|
||||
// rehardless of what kind of error we get
|
||||
@@ -564,7 +564,7 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
|
||||
if upstreamConfig == nil {
|
||||
continue
|
||||
}
|
||||
logger := mainLog.Load().Debug().
|
||||
logger := p.Debug().
|
||||
Str("upstream", upstreamConfig.String()).
|
||||
Str("query", req.msg.Question[0].Name).
|
||||
Bool("is_ad_query", p.isAdDomainQuery(req.msg)).
|
||||
@@ -577,7 +577,7 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
|
||||
answer := resolve(upstreams[n], upstreamConfig, req.msg)
|
||||
if answer == nil {
|
||||
if serveStaleCache && staleAnswer != nil {
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "serving stale cached response")
|
||||
ctrld.Log(ctx, p.Debug(), "serving stale cached response")
|
||||
now := time.Now()
|
||||
setCachedAnswerTTL(staleAnswer, now, now.Add(staleTTL))
|
||||
res.answer = staleAnswer
|
||||
@@ -589,11 +589,11 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
|
||||
// We are doing LAN/PTR lookup using private resolver, so always process next one.
|
||||
// Except for the last, we want to send response instead of saying all upstream failed.
|
||||
if answer.Rcode != dns.RcodeSuccess && isLanOrPtrQuery && n != len(upstreamConfigs)-1 {
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "no response from %s, process to next upstream", upstreams[n])
|
||||
ctrld.Log(ctx, p.Debug(), "no response from %s, process to next upstream", upstreams[n])
|
||||
continue
|
||||
}
|
||||
if answer.Rcode != dns.RcodeSuccess && len(upstreamConfigs) > 1 && containRcode(req.failoverRcodes, answer.Rcode) {
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "failover rcode matched, process to next upstream")
|
||||
ctrld.Log(ctx, p.Debug(), "failover rcode matched, process to next upstream")
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -609,18 +609,18 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
|
||||
}
|
||||
setCachedAnswerTTL(answer, now, expired)
|
||||
p.cache.Add(dnscache.NewKey(req.msg, upstreams[n]), dnscache.NewValue(answer, expired))
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "add cached response")
|
||||
ctrld.Log(ctx, p.Debug(), "add cached response")
|
||||
}
|
||||
hostname := ""
|
||||
if req.ci != nil {
|
||||
hostname = req.ci.Hostname
|
||||
}
|
||||
ctrld.Log(ctx, mainLog.Load().Info(), "REPLY: %s -> %s (%s): %s", upstreams[n], req.ufr.srcAddr, hostname, dns.RcodeToString[answer.Rcode])
|
||||
ctrld.Log(ctx, p.Info(), "REPLY: %s -> %s (%s): %s", upstreams[n], req.ufr.srcAddr, hostname, dns.RcodeToString[answer.Rcode])
|
||||
res.answer = answer
|
||||
res.upstream = upstreamConfig.Endpoint
|
||||
return res
|
||||
}
|
||||
ctrld.Log(ctx, mainLog.Load().Error(), "all %v endpoints failed", upstreams)
|
||||
ctrld.Log(ctx, p.Error(), "all %v endpoints failed", upstreams)
|
||||
|
||||
// if we have no healthy upstreams, trigger recovery flow
|
||||
if p.leakOnUpstreamFailure() {
|
||||
@@ -633,28 +633,28 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
|
||||
} else {
|
||||
reason = RecoveryReasonRegularFailure
|
||||
}
|
||||
mainLog.Load().Debug().Msgf("No healthy upstreams, triggering recovery with reason: %v", reason)
|
||||
p.Debug().Msgf("No healthy upstreams, triggering recovery with reason: %v", reason)
|
||||
go p.handleRecovery(reason)
|
||||
} else {
|
||||
mainLog.Load().Debug().Msg("Recovery already in progress; skipping duplicate trigger from down detection")
|
||||
p.Debug().Msg("Recovery already in progress; skipping duplicate trigger from down detection")
|
||||
}
|
||||
p.recoveryCancelMu.Unlock()
|
||||
} else {
|
||||
mainLog.Load().Debug().Msg("One upstream is down but at least one is healthy; skipping recovery trigger")
|
||||
p.Debug().Msg("One upstream is down but at least one is healthy; skipping recovery trigger")
|
||||
}
|
||||
|
||||
// attempt query to OS resolver while as a retry catch all
|
||||
// we dont want this to happen if leakOnUpstreamFailure is false
|
||||
if upstreams[0] != upstreamOS {
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "attempting query to OS resolver as a retry catch all")
|
||||
ctrld.Log(ctx, p.Debug(), "attempting query to OS resolver as a retry catch all")
|
||||
answer := resolve(upstreamOS, osUpstreamConfig, req.msg)
|
||||
if answer != nil {
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "OS resolver retry query successful")
|
||||
ctrld.Log(ctx, p.Debug(), "OS resolver retry query successful")
|
||||
res.answer = answer
|
||||
res.upstream = osUpstreamConfig.Endpoint
|
||||
return res
|
||||
}
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "OS resolver retry query failed")
|
||||
ctrld.Log(ctx, p.Debug(), "OS resolver retry query failed")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -958,10 +958,10 @@ func (p *prog) doSelfUninstall(answer *dns.Msg) {
|
||||
return
|
||||
}
|
||||
|
||||
logger := mainLog.Load().With().Str("mode", "self-uninstall").Logger()
|
||||
logger := p.logger.Load().With().Str("mode", "self-uninstall").Logger()
|
||||
if p.refusedQueryCount > selfUninstallMaxQueries {
|
||||
p.checkingSelfUninstall = true
|
||||
loggerCtx := ctrld.LoggerCtx(context.Background(), mainLog.Load())
|
||||
loggerCtx := ctrld.LoggerCtx(context.Background(), p.logger.Load())
|
||||
_, err := controld.FetchResolverConfig(loggerCtx, cdUID, rootCmd.Version, cdDev)
|
||||
logger.Debug().Msg("maximum number of refused queries reached, checking device status")
|
||||
selfUninstallCheck(err, p, logger)
|
||||
@@ -1031,7 +1031,7 @@ func (p *prog) queryFromSelf(ip string) bool {
|
||||
netIP := netip.MustParseAddr(ip)
|
||||
regularIPs, loopbackIPs, err := netmon.LocalAddresses()
|
||||
if err != nil {
|
||||
mainLog.Load().Warn().Err(err).Msg("could not get local addresses")
|
||||
p.Warn().Err(err).Msg("could not get local addresses")
|
||||
return false
|
||||
}
|
||||
for _, localIP := range slices.Concat(regularIPs, loopbackIPs) {
|
||||
@@ -1151,7 +1151,8 @@ func isWanClient(na net.Addr) bool {
|
||||
|
||||
// resolveInternalDomainTestQuery resolves internal test domain query, returning the answer to the caller.
|
||||
func resolveInternalDomainTestQuery(ctx context.Context, domain string, m *dns.Msg) *dns.Msg {
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "internal domain test query")
|
||||
logger := ctrld.LoggerFromCtx(ctx)
|
||||
ctrld.Log(ctx, logger.Debug(), "internal domain test query")
|
||||
|
||||
q := m.Question[0]
|
||||
answer := new(dns.Msg)
|
||||
@@ -1192,7 +1193,7 @@ func FlushDNSCache() error {
|
||||
func (p *prog) monitorNetworkChanges(ctx context.Context) error {
|
||||
mon, err := netmon.New(func(format string, args ...any) {
|
||||
// Always fetch the latest logger (and inject the prefix)
|
||||
mainLog.Load().Printf("netmon: "+format, args...)
|
||||
p.logger.Load().Printf("netmon: "+format, args...)
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating network monitor: %w", err)
|
||||
@@ -1204,7 +1205,7 @@ func (p *prog) monitorNetworkChanges(ctx context.Context) error {
|
||||
|
||||
isMajorChange := mon.IsMajorChangeFrom(delta.Old, delta.New)
|
||||
|
||||
mainLog.Load().Debug().
|
||||
p.Debug().
|
||||
Interface("old_state", delta.Old).
|
||||
Interface("new_state", delta.New).
|
||||
Bool("is_major_change", isMajorChange).
|
||||
@@ -1232,7 +1233,7 @@ func (p *prog) monitorNetworkChanges(ctx context.Context) error {
|
||||
if newIface.IsUp() && len(usableNewIPs) > 0 {
|
||||
changed = true
|
||||
changeIPs = usableNewIPs
|
||||
mainLog.Load().Debug().
|
||||
p.Debug().
|
||||
Str("interface", ifaceName).
|
||||
Interface("new_ips", usableNewIPs).
|
||||
Msg("Interface newly appeared (was not present in old state)")
|
||||
@@ -1254,7 +1255,7 @@ func (p *prog) monitorNetworkChanges(ctx context.Context) error {
|
||||
if newIface.IsUp() && len(usableNewIPs) > 0 {
|
||||
changed = true
|
||||
changeIPs = usableNewIPs
|
||||
mainLog.Load().Debug().
|
||||
p.Debug().
|
||||
Str("interface", ifaceName).
|
||||
Interface("old_ips", oldIPs).
|
||||
Interface("new_ips", usableNewIPs).
|
||||
@@ -1267,39 +1268,39 @@ func (p *prog) monitorNetworkChanges(ctx context.Context) error {
|
||||
// if the default route changed, set changed to true
|
||||
if delta.New.DefaultRouteInterface != delta.Old.DefaultRouteInterface {
|
||||
changed = true
|
||||
mainLog.Load().Debug().Msgf("Default route changed from %s to %s", delta.Old.DefaultRouteInterface, delta.New.DefaultRouteInterface)
|
||||
p.Debug().Msgf("Default route changed from %s to %s", delta.Old.DefaultRouteInterface, delta.New.DefaultRouteInterface)
|
||||
}
|
||||
|
||||
if !changed {
|
||||
mainLog.Load().Debug().Msg("Ignoring interface change - no valid interfaces affected")
|
||||
p.Debug().Msg("Ignoring interface change - no valid interfaces affected")
|
||||
// check if the default IPs are still on an interface that is up
|
||||
ValidateDefaultLocalIPsFromDelta(delta.New)
|
||||
return
|
||||
}
|
||||
|
||||
if !activeInterfaceExists {
|
||||
mainLog.Load().Debug().Msg("No active interfaces found, skipping reinitialization")
|
||||
p.Debug().Msg("No active interfaces found, skipping reinitialization")
|
||||
return
|
||||
}
|
||||
|
||||
// Get IPs from default route interface in new state
|
||||
selfIP := defaultRouteIP()
|
||||
selfIP := p.defaultRouteIP()
|
||||
|
||||
// Ensure that selfIP is an IPv4 address.
|
||||
// If defaultRouteIP mistakenly returns an IPv6 (such as a ULA), clear it
|
||||
if ip := net.ParseIP(selfIP); ip != nil && ip.To4() == nil {
|
||||
mainLog.Load().Debug().Msgf("defaultRouteIP returned a non-IPv4 address: %s, ignoring it", selfIP)
|
||||
p.Debug().Msgf("defaultRouteIP returned a non-IPv4 address: %s, ignoring it", selfIP)
|
||||
selfIP = ""
|
||||
}
|
||||
var ipv6 string
|
||||
|
||||
if delta.New.DefaultRouteInterface != "" {
|
||||
mainLog.Load().Debug().Msgf("default route interface: %s, IPs: %v", delta.New.DefaultRouteInterface, delta.New.InterfaceIPs[delta.New.DefaultRouteInterface])
|
||||
p.Debug().Msgf("default route interface: %s, IPs: %v", delta.New.DefaultRouteInterface, delta.New.InterfaceIPs[delta.New.DefaultRouteInterface])
|
||||
for _, ip := range delta.New.InterfaceIPs[delta.New.DefaultRouteInterface] {
|
||||
ipAddr, _ := netip.ParsePrefix(ip.String())
|
||||
addr := ipAddr.Addr()
|
||||
if selfIP == "" && addr.Is4() {
|
||||
mainLog.Load().Debug().Msgf("checking IP: %s", addr.String())
|
||||
p.Debug().Msgf("checking IP: %s", addr.String())
|
||||
if !addr.IsLoopback() && !addr.IsLinkLocalUnicast() {
|
||||
selfIP = addr.String()
|
||||
}
|
||||
@@ -1310,12 +1311,12 @@ func (p *prog) monitorNetworkChanges(ctx context.Context) error {
|
||||
}
|
||||
} else {
|
||||
// If no default route interface is set yet, use the changed IPs
|
||||
mainLog.Load().Debug().Msgf("no default route interface found, using changed IPs: %v", changeIPs)
|
||||
p.Debug().Msgf("no default route interface found, using changed IPs: %v", changeIPs)
|
||||
for _, ip := range changeIPs {
|
||||
ipAddr, _ := netip.ParsePrefix(ip.String())
|
||||
addr := ipAddr.Addr()
|
||||
if selfIP == "" && addr.Is4() {
|
||||
mainLog.Load().Debug().Msgf("checking IP: %s", addr.String())
|
||||
p.Debug().Msgf("checking IP: %s", addr.String())
|
||||
if !addr.IsLoopback() && !addr.IsLinkLocalUnicast() {
|
||||
selfIP = addr.String()
|
||||
}
|
||||
@@ -1328,15 +1329,15 @@ func (p *prog) monitorNetworkChanges(ctx context.Context) error {
|
||||
|
||||
// Only set the IPv4 default if selfIP is a valid IPv4 address.
|
||||
if ip := net.ParseIP(selfIP); ip != nil && ip.To4() != nil {
|
||||
ctrld.SetDefaultLocalIPv4(ctrld.LoggerCtx(ctx, mainLog.Load()), ip)
|
||||
ctrld.SetDefaultLocalIPv4(ctrld.LoggerCtx(ctx, p.logger.Load()), ip)
|
||||
if !isMobile() && p.ciTable != nil {
|
||||
p.ciTable.SetSelfIP(selfIP)
|
||||
}
|
||||
}
|
||||
if ip := net.ParseIP(ipv6); ip != nil {
|
||||
ctrld.SetDefaultLocalIPv6(ctrld.LoggerCtx(ctx, mainLog.Load()), ip)
|
||||
ctrld.SetDefaultLocalIPv6(ctrld.LoggerCtx(ctx, p.logger.Load()), ip)
|
||||
}
|
||||
mainLog.Load().Debug().Msgf("Set default local IPv4: %s, IPv6: %s", selfIP, ipv6)
|
||||
p.Debug().Msgf("Set default local IPv4: %s, IPv6: %s", selfIP, ipv6)
|
||||
|
||||
// we only trigger recovery flow for network changes on non router devices
|
||||
if router.Name() == "" {
|
||||
@@ -1345,7 +1346,7 @@ func (p *prog) monitorNetworkChanges(ctx context.Context) error {
|
||||
})
|
||||
|
||||
mon.Start()
|
||||
mainLog.Load().Debug().Msg("Network monitor started")
|
||||
p.Debug().Msg("Network monitor started")
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1400,11 +1401,11 @@ func interfaceIPsEqual(a, b []netip.Prefix) bool {
|
||||
// checkUpstreamOnce sends a test query to the specified upstream.
|
||||
// Returns nil if the upstream responds successfully.
|
||||
func (p *prog) checkUpstreamOnce(upstream string, uc *ctrld.UpstreamConfig) error {
|
||||
mainLog.Load().Debug().Msgf("Starting check for upstream: %s", upstream)
|
||||
p.Debug().Msgf("Starting check for upstream: %s", upstream)
|
||||
|
||||
resolver, err := ctrld.NewResolver(ctrld.LoggerCtx(context.Background(), mainLog.Load()), uc)
|
||||
resolver, err := ctrld.NewResolver(ctrld.LoggerCtx(context.Background(), p.logger.Load()), uc)
|
||||
if err != nil {
|
||||
mainLog.Load().Error().Err(err).Msgf("Failed to create resolver for upstream %s", upstream)
|
||||
p.Error().Err(err).Msgf("Failed to create resolver for upstream %s", upstream)
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -1415,22 +1416,22 @@ func (p *prog) checkUpstreamOnce(upstream string, uc *ctrld.UpstreamConfig) erro
|
||||
if uc.Timeout > 0 {
|
||||
timeout = time.Millisecond * time.Duration(uc.Timeout)
|
||||
}
|
||||
mainLog.Load().Debug().Msgf("Timeout for upstream %s: %s", upstream, timeout)
|
||||
p.Debug().Msgf("Timeout for upstream %s: %s", upstream, timeout)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer cancel()
|
||||
|
||||
uc.ReBootstrap(ctrld.LoggerCtx(ctx, mainLog.Load()))
|
||||
mainLog.Load().Debug().Msgf("Rebootstrapping resolver for upstream: %s", upstream)
|
||||
uc.ReBootstrap(ctrld.LoggerCtx(ctx, p.logger.Load()))
|
||||
p.Debug().Msgf("Rebootstrapping resolver for upstream: %s", upstream)
|
||||
|
||||
start := time.Now()
|
||||
_, err = resolver.Resolve(ctx, msg)
|
||||
duration := time.Since(start)
|
||||
|
||||
if err != nil {
|
||||
mainLog.Load().Error().Err(err).Msgf("Upstream %s check failed after %v", upstream, duration)
|
||||
p.Error().Err(err).Msgf("Upstream %s check failed after %v", upstream, duration)
|
||||
} else {
|
||||
mainLog.Load().Debug().Msgf("Upstream %s responded successfully in %v", upstream, duration)
|
||||
p.Debug().Msgf("Upstream %s responded successfully in %v", upstream, duration)
|
||||
}
|
||||
return err
|
||||
}
|
||||
@@ -1440,13 +1441,13 @@ func (p *prog) checkUpstreamOnce(upstream string, uc *ctrld.UpstreamConfig) erro
|
||||
// upstream failure recoveries, waiting for recovery to complete (using a cancellable context without timeout),
|
||||
// and then re-applying the DNS settings.
|
||||
func (p *prog) handleRecovery(reason RecoveryReason) {
|
||||
mainLog.Load().Debug().Msg("Starting recovery process: removing DNS settings")
|
||||
p.Debug().Msg("Starting recovery process: removing DNS settings")
|
||||
|
||||
// For network changes, cancel any existing recovery check because the network state has changed.
|
||||
if reason == RecoveryReasonNetworkChange {
|
||||
p.recoveryCancelMu.Lock()
|
||||
if p.recoveryCancel != nil {
|
||||
mainLog.Load().Debug().Msg("Cancelling existing recovery check (network change)")
|
||||
p.Debug().Msg("Cancelling existing recovery check (network change)")
|
||||
p.recoveryCancel()
|
||||
p.recoveryCancel = nil
|
||||
}
|
||||
@@ -1455,7 +1456,7 @@ func (p *prog) handleRecovery(reason RecoveryReason) {
|
||||
// For upstream failures, if a recovery is already in progress, do nothing new.
|
||||
p.recoveryCancelMu.Lock()
|
||||
if p.recoveryCancel != nil {
|
||||
mainLog.Load().Debug().Msg("Upstream recovery already in progress; skipping duplicate trigger")
|
||||
p.Debug().Msg("Upstream recovery already in progress; skipping duplicate trigger")
|
||||
p.recoveryCancelMu.Unlock()
|
||||
return
|
||||
}
|
||||
@@ -1476,15 +1477,15 @@ func (p *prog) handleRecovery(reason RecoveryReason) {
|
||||
// will be appended to nameservers from the saved interface values
|
||||
p.resetDNS(false, false)
|
||||
|
||||
loggerCtx := ctrld.LoggerCtx(context.Background(), mainLog.Load())
|
||||
loggerCtx := ctrld.LoggerCtx(context.Background(), p.logger.Load())
|
||||
// For an OS failure, reinitialize OS resolver nameservers immediately.
|
||||
if reason == RecoveryReasonOSFailure {
|
||||
mainLog.Load().Debug().Msg("OS resolver failure detected; reinitializing OS resolver nameservers")
|
||||
p.Debug().Msg("OS resolver failure detected; reinitializing OS resolver nameservers")
|
||||
ns := ctrld.InitializeOsResolver(loggerCtx, true)
|
||||
if len(ns) == 0 {
|
||||
mainLog.Load().Warn().Msg("No nameservers found for OS resolver; using existing values")
|
||||
p.Warn().Msg("No nameservers found for OS resolver; using existing values")
|
||||
} else {
|
||||
mainLog.Load().Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns)
|
||||
p.Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1494,13 +1495,13 @@ func (p *prog) handleRecovery(reason RecoveryReason) {
|
||||
// Wait indefinitely until one of the upstreams recovers.
|
||||
recovered, err := p.waitForUpstreamRecovery(recoveryCtx, upstreams)
|
||||
if err != nil {
|
||||
mainLog.Load().Error().Err(err).Msg("Recovery canceled; DNS settings remain removed")
|
||||
p.Error().Err(err).Msg("Recovery canceled; DNS settings remain removed")
|
||||
p.recoveryCancelMu.Lock()
|
||||
p.recoveryCancel = nil
|
||||
p.recoveryCancelMu.Unlock()
|
||||
return
|
||||
}
|
||||
mainLog.Load().Info().Msgf("Upstream %q recovered; re-applying DNS settings", recovered)
|
||||
p.Info().Msgf("Upstream %q recovered; re-applying DNS settings", recovered)
|
||||
|
||||
// reset the upstream failure count and down state
|
||||
p.um.reset(recovered)
|
||||
@@ -1509,9 +1510,9 @@ func (p *prog) handleRecovery(reason RecoveryReason) {
|
||||
if reason == RecoveryReasonNetworkChange {
|
||||
ns := ctrld.InitializeOsResolver(loggerCtx, true)
|
||||
if len(ns) == 0 {
|
||||
mainLog.Load().Warn().Msg("No nameservers found for OS resolver during network-change recovery; using existing values")
|
||||
p.Warn().Msg("No nameservers found for OS resolver during network-change recovery; using existing values")
|
||||
} else {
|
||||
mainLog.Load().Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns)
|
||||
p.Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1534,44 +1535,44 @@ func (p *prog) waitForUpstreamRecovery(ctx context.Context, upstreams map[string
|
||||
recoveredCh := make(chan string, 1)
|
||||
var wg sync.WaitGroup
|
||||
|
||||
mainLog.Load().Debug().Msgf("Starting upstream recovery check for %d upstreams", len(upstreams))
|
||||
p.Debug().Msgf("Starting upstream recovery check for %d upstreams", len(upstreams))
|
||||
|
||||
for name, uc := range upstreams {
|
||||
wg.Add(1)
|
||||
go func(name string, uc *ctrld.UpstreamConfig) {
|
||||
defer wg.Done()
|
||||
mainLog.Load().Debug().Msgf("Starting recovery check loop for upstream: %s", name)
|
||||
p.Debug().Msgf("Starting recovery check loop for upstream: %s", name)
|
||||
attempts := 0
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
mainLog.Load().Debug().Msgf("Context canceled for upstream %s", name)
|
||||
p.Debug().Msgf("Context canceled for upstream %s", name)
|
||||
return
|
||||
default:
|
||||
attempts++
|
||||
// checkUpstreamOnce will reset any failure counters on success.
|
||||
if err := p.checkUpstreamOnce(name, uc); err == nil {
|
||||
mainLog.Load().Debug().Msgf("Upstream %s recovered successfully", name)
|
||||
p.Debug().Msgf("Upstream %s recovered successfully", name)
|
||||
select {
|
||||
case recoveredCh <- name:
|
||||
mainLog.Load().Debug().Msgf("Sent recovery notification for upstream %s", name)
|
||||
p.Debug().Msgf("Sent recovery notification for upstream %s", name)
|
||||
default:
|
||||
mainLog.Load().Debug().Msg("Recovery channel full, another upstream already recovered")
|
||||
p.Debug().Msg("Recovery channel full, another upstream already recovered")
|
||||
}
|
||||
return
|
||||
}
|
||||
mainLog.Load().Debug().Msgf("Upstream %s check failed, sleeping before retry", name)
|
||||
p.Debug().Msgf("Upstream %s check failed, sleeping before retry", name)
|
||||
time.Sleep(checkUpstreamBackoffSleep)
|
||||
|
||||
// if this is the upstreamOS and it's the 3rd attempt (or multiple of 3),
|
||||
// we should try to reinit the OS resolver to ensure we can recover
|
||||
if name == upstreamOS && attempts%3 == 0 {
|
||||
mainLog.Load().Debug().Msgf("UpstreamOS check failed on attempt %d, reinitializing OS resolver", attempts)
|
||||
ns := ctrld.InitializeOsResolver(ctrld.LoggerCtx(ctx, mainLog.Load()), true)
|
||||
p.Debug().Msgf("UpstreamOS check failed on attempt %d, reinitializing OS resolver", attempts)
|
||||
ns := ctrld.InitializeOsResolver(ctrld.LoggerCtx(ctx, p.logger.Load()), true)
|
||||
if len(ns) == 0 {
|
||||
mainLog.Load().Warn().Msg("No nameservers found for OS resolver; using existing values")
|
||||
p.Warn().Msg("No nameservers found for OS resolver; using existing values")
|
||||
} else {
|
||||
mainLog.Load().Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns)
|
||||
p.Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user