mirror of
https://github.com/Control-D-Inc/ctrld.git
synced 2026-02-03 22:18:39 +00:00
all: leaking queries to OS resolver instead of SRVFAIL
So it would work in more general case than just captive portal network, which ctrld have supported recently. Uses who may want no leaking behavior can use a config to turn off this feature.
This commit is contained in:
committed by
Cuong Manh Le
parent
cfe1209d61
commit
3e388c2857
@@ -6,7 +6,6 @@ import (
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/netip"
|
||||
"runtime"
|
||||
@@ -17,7 +16,6 @@ import (
|
||||
|
||||
"github.com/miekg/dns"
|
||||
"golang.org/x/sync/errgroup"
|
||||
"tailscale.com/net/captivedetection"
|
||||
"tailscale.com/net/netaddr"
|
||||
"tailscale.com/net/netmon"
|
||||
"tailscale.com/net/tsaddr"
|
||||
@@ -412,6 +410,16 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
|
||||
upstreams := req.ufr.upstreams
|
||||
serveStaleCache := p.cache != nil && p.cfg.Service.CacheServeStale
|
||||
upstreamConfigs := p.upstreamConfigsFromUpstreamNumbers(upstreams)
|
||||
|
||||
// If ctrld is going to leak query to OS resolver, check remote upstream in background,
|
||||
// so ctrld could be back to normal operation as long as the network is back online.
|
||||
if len(upstreamConfigs) > 0 && p.leakingQuery.Load() {
|
||||
for n, uc := range upstreamConfigs {
|
||||
go p.checkUpstream(upstreams[n], uc)
|
||||
}
|
||||
upstreamConfigs = nil
|
||||
}
|
||||
|
||||
if len(upstreamConfigs) == 0 {
|
||||
upstreamConfigs = []*ctrld.UpstreamConfig{osUpstreamConfig}
|
||||
upstreams = []string{upstreamOS}
|
||||
@@ -501,17 +509,9 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
|
||||
if isNetworkErr {
|
||||
p.um.increaseFailureCount(upstreams[n])
|
||||
if p.um.isDown(upstreams[n]) {
|
||||
go p.um.checkUpstream(upstreams[n], upstreamConfig)
|
||||
go p.checkUpstream(upstreams[n], upstreamConfig)
|
||||
}
|
||||
}
|
||||
if cdUID != "" && (isNetworkErr || err == io.EOF) {
|
||||
p.captivePortalMu.Lock()
|
||||
if !p.captivePortalCheckWasRun {
|
||||
p.captivePortalCheckWasRun = true
|
||||
go p.performCaptivePortalDetection()
|
||||
}
|
||||
p.captivePortalMu.Unlock()
|
||||
}
|
||||
// For timeout error (i.e: context deadline exceed), force re-bootstrapping.
|
||||
var e net.Error
|
||||
if errors.As(err, &e) && e.Timeout() {
|
||||
@@ -580,6 +580,14 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
|
||||
return res
|
||||
}
|
||||
ctrld.Log(ctx, mainLog.Load().Error(), "all %v endpoints failed", upstreams)
|
||||
if cdUID != "" && p.leakOnUpstreamFailure() {
|
||||
p.leakingQueryMu.Lock()
|
||||
if !p.leakingQueryWasRun {
|
||||
p.leakingQueryWasRun = true
|
||||
go p.performLeakingQuery()
|
||||
}
|
||||
p.leakingQueryMu.Unlock()
|
||||
}
|
||||
answer := new(dns.Msg)
|
||||
answer.SetRcode(req.msg, dns.RcodeServerFailure)
|
||||
res.answer = answer
|
||||
@@ -597,9 +605,6 @@ func (p *prog) upstreamsAndUpstreamConfigForLanAndPtr(upstreams []string, upstre
|
||||
}
|
||||
|
||||
func (p *prog) upstreamConfigsFromUpstreamNumbers(upstreams []string) []*ctrld.UpstreamConfig {
|
||||
if p.captivePortalDetected.Load() {
|
||||
return nil // always use OS resolver if behind captive portal.
|
||||
}
|
||||
upstreamConfigs := make([]*ctrld.UpstreamConfig, 0, len(upstreams))
|
||||
for _, upstream := range upstreams {
|
||||
upstreamNum := strings.TrimPrefix(upstream, upstreamPrefix)
|
||||
@@ -903,31 +908,16 @@ func (p *prog) selfUninstallCoolOfPeriod() {
|
||||
p.selfUninstallMu.Unlock()
|
||||
}
|
||||
|
||||
// performCaptivePortalDetection check if ctrld is running behind a captive portal.
|
||||
func (p *prog) performCaptivePortalDetection() {
|
||||
mainLog.Load().Warn().Msg("Performing captive portal detection")
|
||||
d := captivedetection.NewDetector(logf)
|
||||
found := true
|
||||
var resetDnsOnce sync.Once
|
||||
for found {
|
||||
time.Sleep(2 * time.Second)
|
||||
found = d.Detect(context.Background(), netmon.NewStatic(), nil, 0)
|
||||
if found {
|
||||
resetDnsOnce.Do(func() {
|
||||
mainLog.Load().Warn().Msg("found captive portal, leaking query to OS resolver")
|
||||
// Store the result once here, so changes made below won't be reverted by DNS watchers.
|
||||
p.captivePortalDetected.Store(found)
|
||||
p.resetDNS()
|
||||
})
|
||||
}
|
||||
p.captivePortalDetected.Store(found)
|
||||
}
|
||||
|
||||
p.captivePortalMu.Lock()
|
||||
p.captivePortalCheckWasRun = false
|
||||
p.captivePortalMu.Unlock()
|
||||
// performLeakingQuery performs necessary works to leak queries to OS resolver.
|
||||
func (p *prog) performLeakingQuery() {
|
||||
mainLog.Load().Warn().Msg("leaking query to OS resolver")
|
||||
// Signal dns watchers to stop, so changes made below won't be reverted.
|
||||
p.leakingQuery.Store(true)
|
||||
p.resetDNS()
|
||||
ns := ctrld.InitializeOsResolver()
|
||||
mainLog.Load().Debug().Msgf("re-initialized OS resolver with nameservers: %v", ns)
|
||||
p.dnsWg.Wait()
|
||||
p.setDNS()
|
||||
mainLog.Load().Warn().Msg("captive portal login finished, stop leaking query")
|
||||
}
|
||||
|
||||
// forceFetchingAPI sends signal to force syncing API config if run in cd mode,
|
||||
|
||||
@@ -107,9 +107,9 @@ type prog struct {
|
||||
loopMu sync.Mutex
|
||||
loop map[string]bool
|
||||
|
||||
captivePortalMu sync.Mutex
|
||||
captivePortalCheckWasRun bool
|
||||
captivePortalDetected atomic.Bool
|
||||
leakingQueryMu sync.Mutex
|
||||
leakingQueryWasRun bool
|
||||
leakingQuery atomic.Bool
|
||||
|
||||
started chan struct{}
|
||||
onStartedDone chan struct{}
|
||||
@@ -685,7 +685,7 @@ func (p *prog) dnsWatchdog(iface *net.Interface, nameservers []string, allIfaces
|
||||
mainLog.Load().Debug().Msg("stop dns watchdog")
|
||||
return
|
||||
case <-ticker.C:
|
||||
if p.captivePortalDetected.Load() {
|
||||
if p.leakingQuery.Load() {
|
||||
return
|
||||
}
|
||||
if dnsChanged(iface, ns) {
|
||||
@@ -742,6 +742,18 @@ func (p *prog) resetDNS() {
|
||||
}
|
||||
}
|
||||
|
||||
// leakOnUpstreamFailure reports whether ctrld should leak query to OS resolver when failed to connect all upstreams.
|
||||
func (p *prog) leakOnUpstreamFailure() bool {
|
||||
if ptr := p.cfg.Service.LeakOnUpstreamFailure; ptr != nil {
|
||||
return *ptr
|
||||
}
|
||||
// Default is false on routers, since this leaking is only useful for devices that move between networks.
|
||||
if router.Name() != "" {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func randomLocalIP() string {
|
||||
n := rand.Intn(254-2) + 2
|
||||
return fmt.Sprintf("127.0.0.%d", n)
|
||||
|
||||
@@ -40,7 +40,7 @@ func (p *prog) watchResolvConf(iface *net.Interface, ns []netip.Addr, setDnsFn f
|
||||
mainLog.Load().Debug().Msgf("stopping watcher for %s", resolvConfPath)
|
||||
return
|
||||
case event, ok := <-watcher.Events:
|
||||
if p.captivePortalDetected.Load() {
|
||||
if p.leakingQuery.Load() {
|
||||
return
|
||||
}
|
||||
if !ok {
|
||||
|
||||
@@ -71,19 +71,19 @@ func (um *upstreamMonitor) reset(upstream string) {
|
||||
|
||||
// checkUpstream checks the given upstream status, periodically sending query to upstream
|
||||
// until successfully. An upstream status/counter will be reset once it becomes reachable.
|
||||
func (um *upstreamMonitor) checkUpstream(upstream string, uc *ctrld.UpstreamConfig) {
|
||||
um.mu.Lock()
|
||||
isChecking := um.checking[upstream]
|
||||
func (p *prog) checkUpstream(upstream string, uc *ctrld.UpstreamConfig) {
|
||||
p.um.mu.Lock()
|
||||
isChecking := p.um.checking[upstream]
|
||||
if isChecking {
|
||||
um.mu.Unlock()
|
||||
p.um.mu.Unlock()
|
||||
return
|
||||
}
|
||||
um.checking[upstream] = true
|
||||
um.mu.Unlock()
|
||||
p.um.checking[upstream] = true
|
||||
p.um.mu.Unlock()
|
||||
defer func() {
|
||||
um.mu.Lock()
|
||||
um.checking[upstream] = false
|
||||
um.mu.Unlock()
|
||||
p.um.mu.Lock()
|
||||
p.um.checking[upstream] = false
|
||||
p.um.mu.Unlock()
|
||||
}()
|
||||
|
||||
resolver, err := ctrld.NewResolver(uc)
|
||||
@@ -104,7 +104,13 @@ func (um *upstreamMonitor) checkUpstream(upstream string, uc *ctrld.UpstreamConf
|
||||
for {
|
||||
if err := check(); err == nil {
|
||||
mainLog.Load().Debug().Msgf("upstream %q is online", uc.Endpoint)
|
||||
um.reset(upstream)
|
||||
p.um.reset(upstream)
|
||||
if p.leakingQuery.CompareAndSwap(true, false) {
|
||||
p.leakingQueryMu.Lock()
|
||||
p.leakingQueryWasRun = false
|
||||
p.leakingQueryMu.Unlock()
|
||||
mainLog.Load().Warn().Msg("stop leaking query")
|
||||
}
|
||||
return
|
||||
}
|
||||
time.Sleep(checkUpstreamBackoffSleep)
|
||||
|
||||
@@ -218,6 +218,7 @@ type ServiceConfig struct {
|
||||
DnsWatchdogInvterval *time.Duration `mapstructure:"dns_watchdog_interval" toml:"dns_watchdog_interval,omitempty"`
|
||||
RefetchTime *int `mapstructure:"refetch_time" toml:"refetch_time,omitempty"`
|
||||
ForceRefetchWaitTime *int `mapstructure:"force_refetch_wait_time" toml:"force_refetch_wait_time,omitempty"`
|
||||
LeakOnUpstreamFailure *bool `mapstructure:"leak_on_upstream_failure" toml:"leak_on_upstream_failure,omitempty"`
|
||||
Daemon bool `mapstructure:"-" toml:"-"`
|
||||
AllocateIP bool `mapstructure:"-" toml:"-"`
|
||||
}
|
||||
|
||||
@@ -281,6 +281,13 @@ The value must be a positive number, any invalid value will be ignored and defau
|
||||
- Required: no
|
||||
- Default: 3600
|
||||
|
||||
### leak_on_upstream_failure
|
||||
Once ctrld is "offline", mean ctrld could not connect to any upstream, next queries will be leaked to OS resolver.
|
||||
|
||||
- Type: boolean
|
||||
- Required: no
|
||||
- Default: true on Windows, MacOS and non-router Linux.
|
||||
|
||||
## Upstream
|
||||
The `[upstream]` section specifies the DNS upstream servers that `ctrld` will forward DNS requests to.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user