diff --git a/cmd/cli/dns_proxy.go b/cmd/cli/dns_proxy.go index 5652f07..81be1d0 100644 --- a/cmd/cli/dns_proxy.go +++ b/cmd/cli/dns_proxy.go @@ -6,7 +6,6 @@ import ( "encoding/hex" "errors" "fmt" - "io" "net" "net/netip" "runtime" @@ -17,7 +16,6 @@ import ( "github.com/miekg/dns" "golang.org/x/sync/errgroup" - "tailscale.com/net/captivedetection" "tailscale.com/net/netaddr" "tailscale.com/net/netmon" "tailscale.com/net/tsaddr" @@ -412,6 +410,16 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse { upstreams := req.ufr.upstreams serveStaleCache := p.cache != nil && p.cfg.Service.CacheServeStale upstreamConfigs := p.upstreamConfigsFromUpstreamNumbers(upstreams) + + // If ctrld is going to leak query to OS resolver, check remote upstream in background, + // so ctrld could be back to normal operation as long as the network is back online. + if len(upstreamConfigs) > 0 && p.leakingQuery.Load() { + for n, uc := range upstreamConfigs { + go p.checkUpstream(upstreams[n], uc) + } + upstreamConfigs = nil + } + if len(upstreamConfigs) == 0 { upstreamConfigs = []*ctrld.UpstreamConfig{osUpstreamConfig} upstreams = []string{upstreamOS} @@ -501,17 +509,9 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse { if isNetworkErr { p.um.increaseFailureCount(upstreams[n]) if p.um.isDown(upstreams[n]) { - go p.um.checkUpstream(upstreams[n], upstreamConfig) + go p.checkUpstream(upstreams[n], upstreamConfig) } } - if cdUID != "" && (isNetworkErr || err == io.EOF) { - p.captivePortalMu.Lock() - if !p.captivePortalCheckWasRun { - p.captivePortalCheckWasRun = true - go p.performCaptivePortalDetection() - } - p.captivePortalMu.Unlock() - } // For timeout error (i.e: context deadline exceed), force re-bootstrapping. var e net.Error if errors.As(err, &e) && e.Timeout() { @@ -580,6 +580,14 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse { return res } ctrld.Log(ctx, mainLog.Load().Error(), "all %v endpoints failed", upstreams) + if cdUID != "" && p.leakOnUpstreamFailure() { + p.leakingQueryMu.Lock() + if !p.leakingQueryWasRun { + p.leakingQueryWasRun = true + go p.performLeakingQuery() + } + p.leakingQueryMu.Unlock() + } answer := new(dns.Msg) answer.SetRcode(req.msg, dns.RcodeServerFailure) res.answer = answer @@ -597,9 +605,6 @@ func (p *prog) upstreamsAndUpstreamConfigForLanAndPtr(upstreams []string, upstre } func (p *prog) upstreamConfigsFromUpstreamNumbers(upstreams []string) []*ctrld.UpstreamConfig { - if p.captivePortalDetected.Load() { - return nil // always use OS resolver if behind captive portal. - } upstreamConfigs := make([]*ctrld.UpstreamConfig, 0, len(upstreams)) for _, upstream := range upstreams { upstreamNum := strings.TrimPrefix(upstream, upstreamPrefix) @@ -903,31 +908,16 @@ func (p *prog) selfUninstallCoolOfPeriod() { p.selfUninstallMu.Unlock() } -// performCaptivePortalDetection check if ctrld is running behind a captive portal. -func (p *prog) performCaptivePortalDetection() { - mainLog.Load().Warn().Msg("Performing captive portal detection") - d := captivedetection.NewDetector(logf) - found := true - var resetDnsOnce sync.Once - for found { - time.Sleep(2 * time.Second) - found = d.Detect(context.Background(), netmon.NewStatic(), nil, 0) - if found { - resetDnsOnce.Do(func() { - mainLog.Load().Warn().Msg("found captive portal, leaking query to OS resolver") - // Store the result once here, so changes made below won't be reverted by DNS watchers. - p.captivePortalDetected.Store(found) - p.resetDNS() - }) - } - p.captivePortalDetected.Store(found) - } - - p.captivePortalMu.Lock() - p.captivePortalCheckWasRun = false - p.captivePortalMu.Unlock() +// performLeakingQuery performs necessary works to leak queries to OS resolver. +func (p *prog) performLeakingQuery() { + mainLog.Load().Warn().Msg("leaking query to OS resolver") + // Signal dns watchers to stop, so changes made below won't be reverted. + p.leakingQuery.Store(true) + p.resetDNS() + ns := ctrld.InitializeOsResolver() + mainLog.Load().Debug().Msgf("re-initialized OS resolver with nameservers: %v", ns) + p.dnsWg.Wait() p.setDNS() - mainLog.Load().Warn().Msg("captive portal login finished, stop leaking query") } // forceFetchingAPI sends signal to force syncing API config if run in cd mode, diff --git a/cmd/cli/prog.go b/cmd/cli/prog.go index a87d7e8..54ea194 100644 --- a/cmd/cli/prog.go +++ b/cmd/cli/prog.go @@ -107,9 +107,9 @@ type prog struct { loopMu sync.Mutex loop map[string]bool - captivePortalMu sync.Mutex - captivePortalCheckWasRun bool - captivePortalDetected atomic.Bool + leakingQueryMu sync.Mutex + leakingQueryWasRun bool + leakingQuery atomic.Bool started chan struct{} onStartedDone chan struct{} @@ -685,7 +685,7 @@ func (p *prog) dnsWatchdog(iface *net.Interface, nameservers []string, allIfaces mainLog.Load().Debug().Msg("stop dns watchdog") return case <-ticker.C: - if p.captivePortalDetected.Load() { + if p.leakingQuery.Load() { return } if dnsChanged(iface, ns) { @@ -742,6 +742,18 @@ func (p *prog) resetDNS() { } } +// leakOnUpstreamFailure reports whether ctrld should leak query to OS resolver when failed to connect all upstreams. +func (p *prog) leakOnUpstreamFailure() bool { + if ptr := p.cfg.Service.LeakOnUpstreamFailure; ptr != nil { + return *ptr + } + // Default is false on routers, since this leaking is only useful for devices that move between networks. + if router.Name() != "" { + return false + } + return true +} + func randomLocalIP() string { n := rand.Intn(254-2) + 2 return fmt.Sprintf("127.0.0.%d", n) diff --git a/cmd/cli/resolvconf.go b/cmd/cli/resolvconf.go index 21e435d..6df7be6 100644 --- a/cmd/cli/resolvconf.go +++ b/cmd/cli/resolvconf.go @@ -40,7 +40,7 @@ func (p *prog) watchResolvConf(iface *net.Interface, ns []netip.Addr, setDnsFn f mainLog.Load().Debug().Msgf("stopping watcher for %s", resolvConfPath) return case event, ok := <-watcher.Events: - if p.captivePortalDetected.Load() { + if p.leakingQuery.Load() { return } if !ok { diff --git a/cmd/cli/upstream_monitor.go b/cmd/cli/upstream_monitor.go index 67ae13d..b17cb32 100644 --- a/cmd/cli/upstream_monitor.go +++ b/cmd/cli/upstream_monitor.go @@ -71,19 +71,19 @@ func (um *upstreamMonitor) reset(upstream string) { // checkUpstream checks the given upstream status, periodically sending query to upstream // until successfully. An upstream status/counter will be reset once it becomes reachable. -func (um *upstreamMonitor) checkUpstream(upstream string, uc *ctrld.UpstreamConfig) { - um.mu.Lock() - isChecking := um.checking[upstream] +func (p *prog) checkUpstream(upstream string, uc *ctrld.UpstreamConfig) { + p.um.mu.Lock() + isChecking := p.um.checking[upstream] if isChecking { - um.mu.Unlock() + p.um.mu.Unlock() return } - um.checking[upstream] = true - um.mu.Unlock() + p.um.checking[upstream] = true + p.um.mu.Unlock() defer func() { - um.mu.Lock() - um.checking[upstream] = false - um.mu.Unlock() + p.um.mu.Lock() + p.um.checking[upstream] = false + p.um.mu.Unlock() }() resolver, err := ctrld.NewResolver(uc) @@ -104,7 +104,13 @@ func (um *upstreamMonitor) checkUpstream(upstream string, uc *ctrld.UpstreamConf for { if err := check(); err == nil { mainLog.Load().Debug().Msgf("upstream %q is online", uc.Endpoint) - um.reset(upstream) + p.um.reset(upstream) + if p.leakingQuery.CompareAndSwap(true, false) { + p.leakingQueryMu.Lock() + p.leakingQueryWasRun = false + p.leakingQueryMu.Unlock() + mainLog.Load().Warn().Msg("stop leaking query") + } return } time.Sleep(checkUpstreamBackoffSleep) diff --git a/config.go b/config.go index d20c695..6c66f62 100644 --- a/config.go +++ b/config.go @@ -218,6 +218,7 @@ type ServiceConfig struct { DnsWatchdogInvterval *time.Duration `mapstructure:"dns_watchdog_interval" toml:"dns_watchdog_interval,omitempty"` RefetchTime *int `mapstructure:"refetch_time" toml:"refetch_time,omitempty"` ForceRefetchWaitTime *int `mapstructure:"force_refetch_wait_time" toml:"force_refetch_wait_time,omitempty"` + LeakOnUpstreamFailure *bool `mapstructure:"leak_on_upstream_failure" toml:"leak_on_upstream_failure,omitempty"` Daemon bool `mapstructure:"-" toml:"-"` AllocateIP bool `mapstructure:"-" toml:"-"` } diff --git a/docs/config.md b/docs/config.md index 8c216ec..136cb04 100644 --- a/docs/config.md +++ b/docs/config.md @@ -281,6 +281,13 @@ The value must be a positive number, any invalid value will be ignored and defau - Required: no - Default: 3600 +### leak_on_upstream_failure +Once ctrld is "offline", mean ctrld could not connect to any upstream, next queries will be leaked to OS resolver. + +- Type: boolean +- Required: no +- Default: true on Windows, MacOS and non-router Linux. + ## Upstream The `[upstream]` section specifies the DNS upstream servers that `ctrld` will forward DNS requests to.