From 1e8240bd1c1e2237ce0914ab40cb29f1e3c3c990 Mon Sep 17 00:00:00 2001 From: Codescribe Date: Tue, 3 Mar 2026 02:06:49 -0500 Subject: [PATCH] feat: introduce DNS intercept mode infrastructure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add --intercept-mode flag (dns/hard/off) with configuration support, recovery bypass for captive portals, probe-based interception verification, VPN DNS coexistence in the proxy layer, and IPv6 loopback listener guard. Remove standalone mDNSResponder hack files — the port 53 binding logic is now handled within the intercept mode infrastructure. Squashed from intercept mode development on v1.0 branch (#497). --- .gitignore | 2 + cmd/cli/cli.go | 126 ++++-- cmd/cli/commands.go | 170 +++++++-- cmd/cli/dns_intercept_others.go | 39 ++ cmd/cli/dns_proxy.go | 386 ++++++++++++++++--- cmd/cli/dns_proxy_test.go | 24 +- cmd/cli/main.go | 37 ++ cmd/cli/mdnsresponder_hack_darwin.go | 154 -------- cmd/cli/mdnsresponder_hack_others.go | 21 - cmd/cli/prog.go | 119 +++++- cmd/cli/service_args_darwin.go | 134 +++++++ cmd/cli/service_args_others.go | 38 ++ cmd/cli/service_args_windows.go | 153 ++++++++ config.go | 71 ++++ config_internal_test.go | 6 +- docs/dns-intercept-mode.md | 551 +++++++++++++++++++++++++++ resolver.go | 73 ++++ 17 files changed, 1813 insertions(+), 291 deletions(-) create mode 100644 cmd/cli/dns_intercept_others.go delete mode 100644 cmd/cli/mdnsresponder_hack_darwin.go delete mode 100644 cmd/cli/mdnsresponder_hack_others.go create mode 100644 cmd/cli/service_args_darwin.go create mode 100644 cmd/cli/service_args_others.go create mode 100644 cmd/cli/service_args_windows.go create mode 100644 docs/dns-intercept-mode.md diff --git a/.gitignore b/.gitignore index 8e70cc6..799011f 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,5 @@ ctrld-* # generated file cmd/cli/rsrc_*.syso +ctrld +ctrld.exe diff --git a/cmd/cli/cli.go b/cmd/cli/cli.go index 70c2312..691d308 100644 --- a/cmd/cli/cli.go +++ b/cmd/cli/cli.go @@ -345,6 +345,16 @@ func run(appCallback *AppCallback, stopCh chan struct{}) { processLogAndCacheFlags(v, &cfg) } + // Persist intercept_mode to config when provided via CLI flag on full install. + // This ensures the config file reflects the actual running mode for RMM/MDM visibility. + if interceptMode == "dns" || interceptMode == "hard" { + if cfg.Service.InterceptMode != interceptMode { + cfg.Service.InterceptMode = interceptMode + updated = true + mainLog.Load().Info().Msgf("writing intercept_mode = %q to config", interceptMode) + } + } + if updated { if err := writeConfigFile(&cfg); err != nil { notifyExitToLogServer() @@ -647,7 +657,7 @@ func processCDFlags(cfg *ctrld.Config) (*controld.ResolverConfig, error) { req := &controld.ResolverConfigRequest{ RawUID: cdUID, Version: rootCmd.Version, - Metadata: ctrld.SystemMetadata(ctx), + Metadata: ctrld.SystemMetadataRuntime(context.Background()), } resolverConfig, err := controld.FetchResolverConfig(req, cdDev) for { @@ -901,9 +911,6 @@ func selfCheckStatus(ctx context.Context, s service.Service, sockDir string) (bo lc := cfg.FirstListener() addr := net.JoinHostPort(lc.IP, strconv.Itoa(lc.Port)) - if needMdnsResponderHack { - addr = "127.0.0.1:53" - } mainLog.Load().Debug().Msgf("performing listener test, sending queries to %s", addr) @@ -1116,10 +1123,6 @@ func uninstall(p *prog, s service.Service) { // Stop already did router.Cleanup and report any error if happens, // ignoring error here to prevent false positive. _ = p.router.Cleanup() - - // Run mDNS responder cleanup if necessary - doMdnsResponderCleanup() - mainLog.Load().Notice().Msg("Service uninstalled") return } @@ -1227,18 +1230,105 @@ func updateListenerConfig(cfg *ctrld.Config, notifyToLogServerFunc func()) bool return updated } +// tryUpdateListenerConfigIntercept handles listener binding for dns-intercept mode on macOS. +// In intercept mode, pf redirects all outbound port-53 traffic to ctrld's listener, +// so ctrld can safely listen on a non-standard port if port 53 is unavailable +// (e.g., mDNSResponder holds *:53). +// +// Flow: +// 1. If config has explicit (non-default) IP:port → use exactly that, no fallback +// 2. Otherwise → try 127.0.0.1:53, then 127.0.0.1:5354, then fatal +func tryUpdateListenerConfigIntercept(cfg *ctrld.Config, notifyFunc func(), fatal bool) (updated, ok bool) { + ok = true + lc := cfg.FirstListener() + if lc == nil { + return false, true + } + + hasExplicitConfig := lc.IP != "" && lc.IP != "0.0.0.0" && lc.Port != 0 + if !hasExplicitConfig { + // Set defaults for intercept mode + if lc.IP == "" || lc.IP == "0.0.0.0" { + lc.IP = "127.0.0.1" + updated = true + } + if lc.Port == 0 { + lc.Port = 53 + updated = true + } + } + + tryListen := func(ip string, port int) bool { + addr := net.JoinHostPort(ip, strconv.Itoa(port)) + udpLn, udpErr := net.ListenPacket("udp", addr) + if udpLn != nil { + udpLn.Close() + } + tcpLn, tcpErr := net.Listen("tcp", addr) + if tcpLn != nil { + tcpLn.Close() + } + return udpErr == nil && tcpErr == nil + } + + addr := net.JoinHostPort(lc.IP, strconv.Itoa(lc.Port)) + if tryListen(lc.IP, lc.Port) { + mainLog.Load().Debug().Msgf("DNS intercept: listener available at %s", addr) + return updated, true + } + + mainLog.Load().Info().Msgf("DNS intercept: cannot bind %s", addr) + + if hasExplicitConfig { + // User specified explicit address — don't guess, just fail + if fatal { + notifyFunc() + mainLog.Load().Fatal().Msgf("DNS intercept: cannot listen on configured address %s", addr) + } + return updated, false + } + + // Fallback: try port 5354 (mDNSResponder likely holds *:53) + if tryListen("127.0.0.1", 5354) { + mainLog.Load().Info().Msg("DNS intercept: port 53 unavailable (likely mDNSResponder), using 127.0.0.1:5354") + lc.IP = "127.0.0.1" + lc.Port = 5354 + return true, true + } + + if fatal { + notifyFunc() + mainLog.Load().Fatal().Msg("DNS intercept: cannot bind 127.0.0.1:53 or 127.0.0.1:5354") + } + return updated, false +} + // tryUpdateListenerConfig tries updating listener config with a working one. // If fatal is true, and there's listen address conflicted, the function do // fatal error. func tryUpdateListenerConfig(cfg *ctrld.Config, infoLogger *zerolog.Logger, notifyFunc func(), fatal bool) (updated, ok bool) { + // In intercept mode (macOS), pf redirects all port-53 traffic to ctrld's listener, + // so ctrld can safely listen on a non-standard port. Use a simple two-attempt flow: + // 1. If config has explicit non-default IP:port, use exactly that + // 2. Otherwise: try 127.0.0.1:53, then 127.0.0.1:5354, then fatal + // This bypasses the full cd-mode listener probing loop entirely. + // Check interceptMode (CLI flag) first, then fall back to config value. + // dnsIntercept bool is derived later in prog.run(), but we need to know + // the intercept mode here to select the right listener probing strategy. + im := interceptMode + if im == "" || im == "off" { + im = cfg.Service.InterceptMode + } + if (im == "dns" || im == "hard") && runtime.GOOS == "darwin" { + return tryUpdateListenerConfigIntercept(cfg, notifyFunc, fatal) + } + ok = true lcc := make(map[string]*listenerConfigCheck) cdMode := cdUID != "" nextdnsMode := nextdns != "" // For Windows server with local Dns server running, we can only try on random local IP. hasLocalDnsServer := hasLocalDnsServerRunning() - // For Macos with mDNSResponder running on port 53, we must use 0.0.0.0 to prevent conflicting. - needMdnsResponderHack := needMdnsResponderHack notRouter := router.Name() == "" isDesktop := ctrld.IsDesktopPlatform() for n, listener := range cfg.Listener { @@ -1272,12 +1362,6 @@ func tryUpdateListenerConfig(cfg *ctrld.Config, infoLogger *zerolog.Logger, noti lcc[n].Port = false } } - if needMdnsResponderHack { - listener.IP = "0.0.0.0" - listener.Port = 53 - lcc[n].IP = false - lcc[n].Port = false - } updated = updated || lcc[n].IP || lcc[n].Port } @@ -1310,9 +1394,6 @@ func tryUpdateListenerConfig(cfg *ctrld.Config, infoLogger *zerolog.Logger, noti // Created listeners will be kept in listeners slice above, and close // before function finished. tryListen := func(addr string) error { - if needMdnsResponderHack { - killMdnsResponder() - } udpLn, udpErr := net.ListenPacket("udp", addr) if udpLn != nil { closers = append(closers, udpLn) @@ -1376,9 +1457,6 @@ func tryUpdateListenerConfig(cfg *ctrld.Config, infoLogger *zerolog.Logger, noti } attempts := 0 maxAttempts := 10 - if needMdnsResponderHack { - maxAttempts = 1 - } for { if attempts == maxAttempts { notifyFunc() @@ -1889,10 +1967,12 @@ func runningIface(s service.Service) *ifaceResponse { // doValidateCdRemoteConfig fetches and validates custom config for cdUID. func doValidateCdRemoteConfig(cdUID string, fatal bool) error { + // Username is only sent during initial provisioning (cdUIDFromProvToken). + // All subsequent calls use lightweight metadata to avoid EDR triggers. req := &controld.ResolverConfigRequest{ RawUID: cdUID, Version: rootCmd.Version, - Metadata: ctrld.SystemMetadata(context.Background()), + Metadata: ctrld.SystemMetadataRuntime(context.Background()), } rc, err := controld.FetchResolverConfig(req, cdDev) if err != nil { diff --git a/cmd/cli/commands.go b/cmd/cli/commands.go index dbd13bf..eaee812 100644 --- a/cmd/cli/commands.go +++ b/cmd/cli/commands.go @@ -190,6 +190,7 @@ func initRunCmd() *cobra.Command { _ = runCmd.Flags().MarkHidden("iface") runCmd.Flags().StringVarP(&cdUpstreamProto, "proto", "", ctrld.ResolverTypeDOH, `Control D upstream type, either "doh" or "doh3"`) runCmd.Flags().BoolVarP(&rfc1918, "rfc1918", "", false, "Listen on RFC1918 addresses when 127.0.0.1 is the only listener") + runCmd.Flags().StringVarP(&interceptMode, "intercept-mode", "", "", "OS-level DNS interception mode: 'dns' (with VPN split routing) or 'hard' (all DNS through ctrld, no VPN split routing)") runCmd.FParseErrWhitelist = cobra.FParseErrWhitelist{UnknownFlags: true} rootCmd.AddCommand(runCmd) @@ -229,6 +230,14 @@ NOTE: running "ctrld start" without any arguments will start already installed c setDependencies(sc) sc.Arguments = append([]string{"run"}, osArgs...) + // Validate --intercept-mode early, before installing the service. + // Without this, a typo like "--intercept-mode fds" would install the service, + // the child process would Fatal() on the invalid value, and the parent would + // then uninstall — confusing and destructive. + if interceptMode != "" && !validInterceptMode(interceptMode) { + mainLog.Load().Fatal().Msgf("invalid --intercept-mode value %q: must be 'off', 'dns', or 'hard'", interceptMode) + } + p := &prog{ router: router.New(&cfg, cdUID != ""), cfg: &cfg, @@ -247,6 +256,49 @@ NOTE: running "ctrld start" without any arguments will start already installed c // Get current running iface, if any. var currentIface *ifaceResponse + // Handle "ctrld start --intercept-mode dns|hard" on an existing + // service BEFORE the pin check. Adding intercept mode is an enhancement, not + // deactivation, so it doesn't require the deactivation pin. We modify the + // plist/registry directly and restart the service via the OS service manager. + osArgsEarly := os.Args[2:] + if os.Args[1] == "service" { + osArgsEarly = os.Args[3:] + } + osArgsEarly = filterEmptyStrings(osArgsEarly) + interceptOnly := onlyInterceptFlags(osArgsEarly) + svcExists := serviceConfigFileExists() + mainLog.Load().Debug().Msgf("intercept upgrade check: args=%v interceptOnly=%v svcConfigExists=%v interceptMode=%q", osArgsEarly, interceptOnly, svcExists, interceptMode) + if interceptOnly && svcExists { + // Remove any existing intercept flags before applying the new value. + _ = removeServiceFlag("--intercept-mode") + + if interceptMode == "off" { + // "off" = remove intercept mode entirely (just the removal above). + mainLog.Load().Notice().Msg("Existing service detected — removing --intercept-mode from service arguments") + } else { + // Add the new mode value. + mainLog.Load().Notice().Msgf("Existing service detected — appending --intercept-mode %s to service arguments", interceptMode) + if err := appendServiceFlag("--intercept-mode"); err != nil { + mainLog.Load().Fatal().Err(err).Msg("failed to append intercept flag to service arguments") + } + if err := appendServiceFlag(interceptMode); err != nil { + mainLog.Load().Fatal().Err(err).Msg("failed to append intercept mode value to service arguments") + } + } + + // Stop the service if running (bypasses ctrld pin — this is an + // enhancement, not deactivation). Then fall through to the normal + // startOnly path which handles start, self-check, and reporting. + if isCtrldRunning { + mainLog.Load().Notice().Msg("Stopping service for intercept mode upgrade") + _ = s.Stop() + isCtrldRunning = false + } + startOnly = true + isCtrldInstalled = true + // Fall through to startOnly path below. + } + // If pin code was set, do not allow running start command. if isCtrldRunning { if err := checkDeactivationPin(s, nil); isCheckDeactivationPinErr(err) { @@ -271,20 +323,31 @@ NOTE: running "ctrld start" without any arguments will start already installed c return } if res.OK { - name := res.Name - if iff, err := net.InterfaceByName(name); err == nil { - _, _ = patchNetIfaceName(iff) - name = iff.Name - } - logger := mainLog.Load().With().Str("iface", name).Logger() - logger.Debug().Msg("setting DNS successfully") - if res.All { - // Log that DNS is set for other interfaces. - withEachPhysicalInterfaces( - name, - "set DNS", - func(i *net.Interface) error { return nil }, - ) + // In intercept mode, show intercept-specific status instead of + // per-interface DNS messages (which are irrelevant). + if res.InterceptMode != "" { + switch res.InterceptMode { + case "hard": + mainLog.Load().Notice().Msg("DNS hard intercept mode active — all DNS traffic intercepted, no VPN split routing") + default: + mainLog.Load().Notice().Msg("DNS intercept mode active — all DNS traffic intercepted via OS packet filter") + } + } else { + name := res.Name + if iff, err := net.InterfaceByName(name); err == nil { + _, _ = patchNetIfaceName(iff) + name = iff.Name + } + logger := mainLog.Load().With().Str("iface", name).Logger() + logger.Debug().Msg("setting DNS successfully") + if res.All { + // Log that DNS is set for other interfaces. + withEachPhysicalInterfaces( + name, + "set DNS", + func(i *net.Interface) error { return nil }, + ) + } } } } @@ -344,6 +407,7 @@ NOTE: running "ctrld start" without any arguments will start already installed c if !startOnly { startOnly = len(osArgs) == 0 } + // If user run "ctrld start" and ctrld is already installed, starting existing service. if startOnly && isCtrldInstalled { tryReadingConfigWithNotice(false, true) @@ -359,10 +423,6 @@ NOTE: running "ctrld start" without any arguments will start already installed c initInteractiveLogging() tasks := []task{ - {func() error { - doMdnsResponderCleanup() - return nil - }, false, "Cleanup service before installation"}, {func() error { // Save current DNS so we can restore later. withEachPhysicalInterfaces("", "saveCurrentStaticDNS", func(i *net.Interface) error { @@ -378,10 +438,6 @@ NOTE: running "ctrld start" without any arguments will start already installed c }, false, "Configure service failure actions"}, {s.Start, true, "Start"}, {noticeWritingControlDConfig, false, "Notice writing ControlD config"}, - {func() error { - doMdnsResponderHackPostInstall() - return nil - }, false, "Configure service post installation"}, } mainLog.Load().Notice().Msg("Starting existing ctrld service") if doTasks(tasks) { @@ -392,6 +448,10 @@ NOTE: running "ctrld start" without any arguments will start already installed c os.Exit(1) } reportSetDnsOk(sockDir) + // Verify service registration after successful start. + if err := verifyServiceRegistration(); err != nil { + mainLog.Load().Warn().Err(err).Msg("Service registry verification failed") + } } else { mainLog.Load().Error().Err(err).Msg("Failed to start existing ctrld service") os.Exit(1) @@ -400,7 +460,8 @@ NOTE: running "ctrld start" without any arguments will start already installed c } if cdUID != "" { - _ = doValidateCdRemoteConfig(cdUID, true) + // Skip doValidateCdRemoteConfig() here - run command will handle + // validation and config fetch via processCDFlags(). } else if uid := cdUIDFromProvToken(); uid != "" { cdUID = uid mainLog.Load().Debug().Msg("using uid from provision token") @@ -445,10 +506,6 @@ NOTE: running "ctrld start" without any arguments will start already installed c } tasks := []task{ - {func() error { - doMdnsResponderCleanup() - return nil - }, false, "Cleanup service before installation"}, {s.Stop, false, "Stop"}, {func() error { return doGenerateNextDNSConfig(nextdns) }, true, "Checking config"}, {func() error { return ensureUninstall(s) }, false, "Ensure uninstall"}, @@ -471,10 +528,6 @@ NOTE: running "ctrld start" without any arguments will start already installed c // Note that startCmd do not actually write ControlD config, but the config file was // generated after s.Start, so we notice users here for consistent with nextdns mode. {noticeWritingControlDConfig, false, "Notice writing ControlD config"}, - {func() error { - doMdnsResponderHackPostInstall() - return nil - }, false, "Configure service post installation"}, } mainLog.Load().Notice().Msg("Starting service") if doTasks(tasks) { @@ -525,6 +578,10 @@ NOTE: running "ctrld start" without any arguments will start already installed c os.Exit(1) } reportSetDnsOk(sockDir) + // Verify service registration after successful start. + if err := verifyServiceRegistration(); err != nil { + mainLog.Load().Warn().Err(err).Msg("Service registry verification failed") + } } }, } @@ -549,6 +606,7 @@ NOTE: running "ctrld start" without any arguments will start already installed c startCmd.Flags().BoolVarP(&startOnly, "start_only", "", false, "Do not install new service") _ = startCmd.Flags().MarkHidden("start_only") startCmd.Flags().BoolVarP(&rfc1918, "rfc1918", "", false, "Listen on RFC1918 addresses when 127.0.0.1 is the only listener") + startCmd.Flags().StringVarP(&interceptMode, "intercept-mode", "", "", "OS-level DNS interception mode: 'dns' (with VPN split routing) or 'hard' (all DNS through ctrld, no VPN split routing)") routerCmd := &cobra.Command{ Use: "setup", @@ -1411,3 +1469,53 @@ func filterEmptyStrings(slice []string) []string { return s == "" }) } + +// validInterceptMode reports whether the given value is a recognized --intercept-mode. +// This is the single source of truth for mode validation — used by the early start +// command check, the runtime validation in prog.go, and onlyInterceptFlags below. +// Add new modes here to have them recognized everywhere. +func validInterceptMode(mode string) bool { + switch mode { + case "off", "dns", "hard": + return true + } + return false +} + +// onlyInterceptFlags reports whether args contain only intercept mode +// flags (--intercept-mode ) and flags that are auto-added by the +// start command alias (--iface). This is used to detect "ctrld start --intercept-mode dns" +// (or "off" to disable) on an existing installation, where the intent is to modify the +// intercept flag on the existing service without replacing other arguments. +// +// Note: the startCmdAlias appends "--iface=auto" to os.Args when --iface isn't +// explicitly provided, so we must allow it here. +func onlyInterceptFlags(args []string) bool { + hasIntercept := false + for i := 0; i < len(args); i++ { + arg := args[i] + switch { + case arg == "--intercept-mode": + // Next arg must be a valid mode value. + if i+1 < len(args) && validInterceptMode(args[i+1]) { + hasIntercept = true + i++ // skip the value + } else { + return false + } + case strings.HasPrefix(arg, "--intercept-mode="): + val := strings.TrimPrefix(arg, "--intercept-mode=") + if validInterceptMode(val) { + hasIntercept = true + } else { + return false + } + case arg == "--iface=auto" || arg == "--iface" || arg == "auto": + // Auto-added by startCmdAlias or its value; safe to ignore. + continue + default: + return false + } + } + return hasIntercept +} diff --git a/cmd/cli/dns_intercept_others.go b/cmd/cli/dns_intercept_others.go new file mode 100644 index 0000000..9f3c903 --- /dev/null +++ b/cmd/cli/dns_intercept_others.go @@ -0,0 +1,39 @@ +//go:build !windows && !darwin + +package cli + +import ( + "fmt" +) + +// startDNSIntercept is not supported on this platform. +// DNS intercept mode is only available on Windows (via WFP) and macOS (via pf). +func (p *prog) startDNSIntercept() error { + return fmt.Errorf("dns intercept: not supported on this platform (only Windows and macOS)") +} + +// stopDNSIntercept is a no-op on unsupported platforms. +func (p *prog) stopDNSIntercept() error { + return nil +} + +// exemptVPNDNSServers is a no-op on unsupported platforms. +func (p *prog) exemptVPNDNSServers(exemptions []vpnDNSExemption) error { + return nil +} + +// ensurePFAnchorActive is a no-op on unsupported platforms. +func (p *prog) ensurePFAnchorActive() bool { + return false +} + +// checkTunnelInterfaceChanges is a no-op on unsupported platforms. +func (p *prog) checkTunnelInterfaceChanges() bool { + return false +} + +// scheduleDelayedRechecks is a no-op on unsupported platforms. +func (p *prog) scheduleDelayedRechecks() {} + +// pfInterceptMonitor is a no-op on unsupported platforms. +func (p *prog) pfInterceptMonitor() {} diff --git a/cmd/cli/dns_proxy.go b/cmd/cli/dns_proxy.go index 60dfd49..ac9d10b 100644 --- a/cmd/cli/dns_proxy.go +++ b/cmd/cli/dns_proxy.go @@ -101,19 +101,10 @@ func (p *prog) serveDNS(listenerNum string) error { _ = w.WriteMsg(answer) return } - // When mDNSResponder hack has been done, ctrld was listening on 0.0.0.0:53, but only requests - // to 127.0.0.1:53 are accepted. Since binding to 0.0.0.0 will make the IP info of the local address - // hidden (appeared as [::]), we checked for requests originated from 127.0.0.1 instead. - if needMdnsResponderHack && !strings.HasPrefix(w.RemoteAddr().String(), "127.0.0.1:") { - answer := new(dns.Msg) - answer.SetRcode(m, dns.RcodeRefused) - _ = w.WriteMsg(answer) - return - } listenerConfig := p.cfg.Listener[listenerNum] reqId := requestID() ctx := context.WithValue(context.Background(), ctrld.ReqIdCtxKey{}, reqId) - if !listenerConfig.AllowWanClients && isWanClient(w.RemoteAddr()) { + if !listenerConfig.AllowWanClients && isWanClient(w.RemoteAddr()) && !isIPv6LoopbackListener(w.LocalAddr()) { ctrld.Log(ctx, mainLog.Load().Debug(), "query refused, listener does not allow WAN clients: %s", w.RemoteAddr().String()) answer := new(dns.Msg) answer.SetRcode(m, dns.RcodeRefused) @@ -135,6 +126,23 @@ func (p *prog) serveDNS(listenerNum string) error { return } + // Interception probe: if we're expecting a probe query and this matches, + // signal the prober and respond NXDOMAIN. Used by both macOS pf probes + // (_pf-probe-*) and Windows NRPT probes (_nrpt-probe-*) to verify that + // DNS interception is actually routing queries to ctrld's listener. + if probeID, ok := p.pfProbeExpected.Load().(string); ok && probeID != "" && domain == probeID { + if chPtr, ok := p.pfProbeCh.Load().(*chan struct{}); ok && chPtr != nil { + select { + case *chPtr <- struct{}{}: + default: + } + } + answer := new(dns.Msg) + answer.SetRcode(m, dns.RcodeNameError) // NXDOMAIN + _ = w.WriteMsg(answer) + return + } + if _, ok := p.cacheFlushDomainsMap[domain]; ok && p.cache != nil { p.cache.Purge() ctrld.Log(ctx, mainLog.Load().Debug(), "received query %q, local cache is purged", domain) @@ -201,7 +209,7 @@ func (p *prog) serveDNS(listenerNum string) error { g, ctx := errgroup.WithContext(context.Background()) for _, proto := range []string{"udp", "tcp"} { proto := proto - if needLocalIPv6Listener() { + if needLocalIPv6Listener(p.cfg.Service.InterceptMode) { g.Go(func() error { s, errCh := runDNSServer(net.JoinHostPort("::1", strconv.Itoa(listenerConfig.Port)), proto, handler) defer s.Shutdown() @@ -430,6 +438,24 @@ func (p *prog) proxyLanHostnameQuery(ctx context.Context, msg *dns.Msg) *dns.Msg } func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse { + // DNS intercept recovery bypass: forward all queries to OS/DHCP resolver. + // This runs when upstreams are unreachable (e.g., captive portal network) + // and allows the network's DNS to handle authentication pages. + if dnsIntercept && p.recoveryBypass.Load() { + ctrld.Log(ctx, mainLog.Load().Debug(), "Recovery bypass active: forwarding to OS resolver") + resolver, err := ctrld.NewResolver(osUpstreamConfig) + if err == nil { + resolveCtx, cancel := osUpstreamConfig.Context(ctx) + defer cancel() + answer, _ := resolver.Resolve(resolveCtx, req.msg) + if answer != nil { + return &proxyResponse{answer: answer} + } + } + ctrld.Log(ctx, mainLog.Load().Debug(), "OS resolver failed during recovery bypass") + // Fall through to normal flow as last resort + } + var staleAnswer *dns.Msg upstreams := req.ufr.upstreams serveStaleCache := p.cache != nil && p.cfg.Service.CacheServeStale @@ -442,9 +468,9 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse { // However, on Active Directory Domain Controller, where it has local DNS server // running and listening on local addresses, these local addresses must be used // as nameservers, so queries for ADDC could be resolved as expected. - if p.isAdDomainQuery(req.msg) { + if p.isAdDomainQuery(req.msg) && p.hasLocalDNS { ctrld.Log(ctx, mainLog.Load().Debug(), - "AD domain query detected for %s in domain %s", + "AD domain query detected for %s in domain %s, using local DNS server", req.msg.Question[0].Name, p.adDomain) upstreamConfigs = []*ctrld.UpstreamConfig{localUpstreamConfig} upstreams = []string{upstreamOSLocal} @@ -515,6 +541,92 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse { staleAnswer = answer } } + + // VPN DNS split routing (only in dns-intercept mode) + if dnsIntercept && p.vpnDNS != nil && len(req.msg.Question) > 0 { + domain := req.msg.Question[0].Name + if vpnServers := p.vpnDNS.UpstreamForDomain(domain); len(vpnServers) > 0 { + ctrld.Log(ctx, mainLog.Load().Debug(), "VPN DNS route matched for domain %s, using servers: %v", domain, vpnServers) + + for _, server := range vpnServers { + upstreamConfig := p.vpnDNS.upstreamConfigFor(server) + ctrld.Log(ctx, mainLog.Load().Debug(), "Querying VPN DNS server: %s", server) + + dnsResolver, err := ctrld.NewResolver(upstreamConfig) + if err != nil { + ctrld.Log(ctx, mainLog.Load().Error().Err(err), "failed to create VPN DNS resolver") + continue + } + resolveCtx, cancel := upstreamConfig.Context(ctx) + answer, err := dnsResolver.Resolve(resolveCtx, req.msg) + cancel() + if answer != nil { + ctrld.Log(ctx, mainLog.Load().Debug(), "VPN DNS query successful") + if p.cache != nil { + ttl := 60 * time.Second + if len(answer.Answer) > 0 { + ttl = time.Duration(answer.Answer[0].Header().Ttl) * time.Second + } + for _, upstream := range upstreams { + p.cache.Add(dnscache.NewKey(req.msg, upstream), dnscache.NewValue(answer, time.Now().Add(ttl))) + } + } + return &proxyResponse{answer: answer} + } + ctrld.Log(ctx, mainLog.Load().Debug().Err(err), "VPN DNS server %s failed", server) + } + + ctrld.Log(ctx, mainLog.Load().Debug(), "All VPN DNS servers failed, falling back to normal upstreams") + } + } + + // Domain-less VPN DNS fallback: when a query is going to upstream.os via a + // split-rule (matched policy) and we have VPN DNS servers with no associated + // domains, try those servers for this query. This handles cases like F5 VPN + // where the VPN doesn't advertise DNS search domains but its DNS servers + // know the internal zones referenced by split-rules (e.g., *.provisur.local). + // These servers are NOT used for general OS resolver queries to avoid + // polluting captive portal / DHCP flows. + if dnsIntercept && p.vpnDNS != nil && req.ufr.matched && + len(upstreams) > 0 && upstreams[0] == upstreamOS && + len(req.msg.Question) > 0 && !p.isAdDomainQuery(req.msg) { + if dlServers := p.vpnDNS.DomainlessServers(); len(dlServers) > 0 { + domain := req.msg.Question[0].Name + ctrld.Log(ctx, mainLog.Load().Debug(), + "Split-rule query %s going to upstream.os, trying %d domain-less VPN DNS servers first: %v", + domain, len(dlServers), dlServers) + + for _, server := range dlServers { + upstreamCfg := p.vpnDNS.upstreamConfigFor(server) + ctrld.Log(ctx, mainLog.Load().Debug(), "Querying domain-less VPN DNS server: %s", server) + + dnsResolver, err := ctrld.NewResolver(upstreamCfg) + if err != nil { + ctrld.Log(ctx, mainLog.Load().Error().Err(err), "failed to create domain-less VPN DNS resolver") + continue + } + resolveCtx, cancel := upstreamCfg.Context(ctx) + answer, err := dnsResolver.Resolve(resolveCtx, req.msg) + cancel() + if answer != nil && answer.Rcode == dns.RcodeSuccess { + ctrld.Log(ctx, mainLog.Load().Debug(), + "Domain-less VPN DNS server %s answered %s successfully", server, domain) + return &proxyResponse{answer: answer} + } + if answer != nil { + ctrld.Log(ctx, mainLog.Load().Debug(), + "Domain-less VPN DNS server %s returned %s for %s, trying next", + server, dns.RcodeToString[answer.Rcode], domain) + } else { + ctrld.Log(ctx, mainLog.Load().Debug().Err(err), + "Domain-less VPN DNS server %s failed for %s", server, domain) + } + } + ctrld.Log(ctx, mainLog.Load().Debug(), + "All domain-less VPN DNS servers failed for %s, falling back to OS resolver", domain) + } + } + resolve1 := func(upstream string, upstreamConfig *ctrld.UpstreamConfig, msg *dns.Msg) (*dns.Msg, error) { ctrld.Log(ctx, mainLog.Load().Debug(), "sending query to %s: %s", upstream, upstreamConfig.Name) dnsResolver, err := ctrld.NewResolver(upstreamConfig) @@ -780,10 +892,30 @@ func ttlFromMsg(msg *dns.Msg) uint32 { return 0 } -func needLocalIPv6Listener() bool { +func needLocalIPv6Listener(interceptMode string) bool { + if !ctrldnet.SupportsIPv6ListenLocal() { + mainLog.Load().Debug().Msg("IPv6 listener: not needed — SupportsIPv6ListenLocal() is false") + return false + } // On Windows, there's no easy way for disabling/removing IPv6 DNS resolver, so we check whether we can // listen on ::1, then spawn a listener for receiving DNS requests. - return ctrldnet.SupportsIPv6ListenLocal() && runtime.GOOS == "windows" + if runtime.GOOS == "windows" { + mainLog.Load().Debug().Msg("IPv6 listener: enabled (Windows)") + return true + } + // On macOS in intercept mode, pf can't redirect IPv6 DNS to an IPv4 listener (cross-AF rdr + // not supported), and blocking IPv6 DNS causes ~1s timeouts (BSD doesn't deliver ICMP errors + // to unconnected UDP sockets). Listening on [::1] lets us intercept IPv6 DNS directly. + // + // NOTE: We accept the intercept mode string as a parameter instead of reading the global + // dnsIntercept bool, because dnsIntercept is derived later in prog.run() — after the + // listener goroutines are already spawned. Same pattern as the port 5354 fallback fix (MR !860). + if (interceptMode == "dns" || interceptMode == "hard") && runtime.GOOS == "darwin" { + mainLog.Load().Debug().Msg("IPv6 listener: enabled (macOS intercept mode)") + return true + } + mainLog.Load().Debug().Str("os", runtime.GOOS).Str("interceptMode", interceptMode).Msg("IPv6 listener: not needed") + return false } // ipAndMacFromMsg extracts IP and MAC information included in a DNS message, if any. @@ -863,9 +995,6 @@ func runDNSServer(addr, network string, handler dns.Handler) (*dns.Server, <-cha errCh := make(chan error) go func() { defer close(errCh) - if needMdnsResponderHack { - killMdnsResponder() - } if err := s.ListenAndServe(); err != nil { s.NotifyStartedFunc() mainLog.Load().Error().Err(err).Msgf("could not listen and serve on: %s", s.Addr) @@ -928,12 +1057,30 @@ func (p *prog) getClientInfo(remoteIP string, msg *dns.Msg) *ctrld.ClientInfo { } else { ci.Self = p.queryFromSelf(ci.IP) } + + // In DNS intercept mode, ALL queries are from the local machine — pf/WFP + // intercepts outbound DNS and redirects to ctrld. The source IP may be a + // virtual interface (Tailscale, VPN) that has no ARP/MAC entry, causing + // missing x-cd-mac, x-cd-host, and x-cd-os headers. Force Self=true and + // populate from the primary physical interface info. + if dnsIntercept && !ci.Self { + ci.Self = true + } + // If this is a query from self, but ci.IP is not loopback IP, // try using hostname mapping for lookback IP if presents. if ci.Self { if name := p.ciTable.LocalHostname(); name != "" { ci.Hostname = name } + // If MAC is still empty (e.g., query arrived via virtual interface IP + // like Tailscale), fall back to the loopback MAC mapping which addSelf() + // populates from the primary physical interface. + if ci.Mac == "" { + if mac := p.ciTable.LookupMac("127.0.0.1"); mac != "" { + ci.Mac = mac + } + } } p.spoofLoopbackIpInClientInfo(ci) return ci @@ -975,7 +1122,7 @@ func (p *prog) doSelfUninstall(answer *dns.Msg) { req := &controld.ResolverConfigRequest{ RawUID: cdUID, Version: rootCmd.Version, - Metadata: ctrld.SystemMetadata(context.Background()), + Metadata: ctrld.SystemMetadataRuntime(context.Background()), } _, err := controld.FetchResolverConfig(req, cdDev) logger.Debug().Msg("maximum number of refused queries reached, checking device status") @@ -1169,6 +1316,18 @@ func isWanClient(na net.Addr) bool { !tsaddr.CGNATRange().Contains(ip) } +// isIPv6LoopbackListener reports whether the listener address is [::1]. +// The [::1] listener only serves locally-redirected traffic (via pf on macOS +// or system DNS on Windows), so queries arriving on it are always from this +// machine — even when the source IP is a global IPv6 address (pf preserves the +// original source IP during rdr). +func isIPv6LoopbackListener(na net.Addr) bool { + if ap, err := netip.ParseAddrPort(na.String()); err == nil { + return ap.Addr() == netip.IPv6Loopback() + } + return false +} + // resolveInternalDomainTestQuery resolves internal test domain query, returning the answer to the caller. func resolveInternalDomainTestQuery(ctx context.Context, domain string, m *dns.Msg) *dns.Msg { ctrld.Log(ctx, mainLog.Load().Debug(), "internal domain test query") @@ -1294,6 +1453,65 @@ func (p *prog) monitorNetworkChanges() error { mainLog.Load().Debug().Msg("Ignoring interface change - no valid interfaces affected") // check if the default IPs are still on an interface that is up ValidateDefaultLocalIPsFromDelta(delta.New) + // Even minor interface changes can trigger macOS pf reloads — verify anchor. + // We check immediately AND schedule delayed re-checks (2s + 4s) to catch + // programs like Windscribe that modify pf rules and DNS settings + // asynchronously after the network change event fires. + if dnsIntercept && p.dnsInterceptState != nil { + if !p.pfStabilizing.Load() { + p.ensurePFAnchorActive() + } + // Check tunnel interfaces unconditionally — it decides internally + // whether to enter stabilization or rebuild immediately. + p.checkTunnelInterfaceChanges() + // Schedule delayed re-checks to catch async VPN teardown changes. + // These also refresh the OS resolver and VPN DNS routes. + p.scheduleDelayedRechecks() + + // Detect interface appearance/disappearance — hypervisors (Parallels, + // VMware, VirtualBox) reload pf when creating/destroying virtual network + // interfaces, which can corrupt pf's internal translation state. The rdr + // rules survive in text form (watchdog says "intact") but stop evaluating. + // Spawn an async monitor that probes pf interception with backoff and + // forces a full pf reload if broken. + if delta.Old != nil { + interfaceChanged := false + var changedIface string + for ifaceName := range delta.Old.Interface { + if ifaceName == "lo0" { + continue + } + if _, exists := delta.New.Interface[ifaceName]; !exists { + interfaceChanged = true + changedIface = ifaceName + break + } + } + if !interfaceChanged { + for ifaceName := range delta.New.Interface { + if ifaceName == "lo0" { + continue + } + if _, exists := delta.Old.Interface[ifaceName]; !exists { + interfaceChanged = true + changedIface = ifaceName + break + } + } + } + if interfaceChanged { + mainLog.Load().Info().Str("interface", changedIface). + Msg("DNS intercept: interface appeared/disappeared — starting interception probe monitor") + go p.pfInterceptMonitor() + } + } + } + // Refresh VPN DNS on tunnel interface changes (e.g., Tailscale connect/disconnect) + // even though the physical interface didn't change. Runs after tunnel checks + // so the pf anchor rebuild includes current VPN DNS exemptions. + if dnsIntercept && p.vpnDNS != nil { + p.vpnDNS.Refresh(true) + } return } @@ -1367,6 +1585,26 @@ func (p *prog) monitorNetworkChanges() error { if router.Name() == "" { p.handleRecovery(RecoveryReasonNetworkChange) } + + // After network changes, verify our pf anchor is still active and + // refresh VPN DNS state. Order matters: tunnel checks first (may rebuild + // anchor), then VPN DNS refresh (updates exemptions in anchor), then + // delayed re-checks for async VPN teardown. + if dnsIntercept && p.dnsInterceptState != nil { + if !p.pfStabilizing.Load() { + p.ensurePFAnchorActive() + } + // Check tunnel interfaces unconditionally — it decides internally + // whether to enter stabilization or rebuild immediately. + p.checkTunnelInterfaceChanges() + // Refresh VPN DNS routes — runs after tunnel checks so the anchor + // rebuild includes current VPN DNS exemptions. + if p.vpnDNS != nil { + p.vpnDNS.Refresh(true) + } + // Schedule delayed re-checks to catch async VPN teardown changes. + p.scheduleDelayedRechecks() + } }) mon.Start() @@ -1491,22 +1729,57 @@ func (p *prog) handleRecovery(reason RecoveryReason) { p.recoveryCancel = cancel p.recoveryCancelMu.Unlock() - // Immediately remove our DNS settings from the interface. // set recoveryRunning to true to prevent watchdogs from putting the listener back on the interface p.recoveryRunning.Store(true) - // we do not want to restore any static DNS settings - // we must try to get the DHCP values, any static DNS settings - // will be appended to nameservers from the saved interface values - p.resetDNS(false, false) - // For an OS failure, reinitialize OS resolver nameservers immediately. - if reason == RecoveryReasonOSFailure { - mainLog.Load().Debug().Msg("OS resolver failure detected; reinitializing OS resolver nameservers") - ns := ctrld.InitializeOsResolver(true) - if len(ns) == 0 { - mainLog.Load().Warn().Msg("No nameservers found for OS resolver; using existing values") + // In DNS intercept mode, don't tear down WFP/pf filters. + // Instead, enable recovery bypass so proxy() forwards queries to + // the OS/DHCP resolver. This handles captive portal authentication + // without the overhead of filter teardown/rebuild. + if dnsIntercept && p.dnsInterceptState != nil { + p.recoveryBypass.Store(true) + mainLog.Load().Info().Msg("DNS intercept recovery: enabling DHCP bypass (filters stay active)") + + // Reinitialize OS resolver to discover DHCP servers on the new network. + mainLog.Load().Debug().Msg("DNS intercept recovery: discovering DHCP nameservers") + dhcpServers := ctrld.InitializeOsResolver(true) + if len(dhcpServers) == 0 { + mainLog.Load().Warn().Msg("DNS intercept recovery: no DHCP nameservers found") } else { - mainLog.Load().Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns) + mainLog.Load().Info().Msgf("DNS intercept recovery: found DHCP nameservers: %v", dhcpServers) + } + + // Exempt DHCP nameservers from intercept filters so the OS resolver + // can actually reach them on port 53. + if len(dhcpServers) > 0 { + // Build exemptions without an Interface — DHCP servers are not VPN-specific, + // so they only generate group-scoped pf rules (ctrld process only). + exemptions := make([]vpnDNSExemption, 0, len(dhcpServers)) + for _, s := range dhcpServers { + host := s + if h, _, err := net.SplitHostPort(s); err == nil { + host = h + } + exemptions = append(exemptions, vpnDNSExemption{Server: host}) + } + mainLog.Load().Info().Msgf("DNS intercept recovery: exempting DHCP nameservers from filters: %v", exemptions) + if err := p.exemptVPNDNSServers(exemptions); err != nil { + mainLog.Load().Warn().Err(err).Msg("DNS intercept recovery: failed to exempt DHCP nameservers — recovery queries may fail") + } + } + } else { + // Traditional flow: remove DNS settings to expose DHCP nameservers + p.resetDNS(false, false) + + // For an OS failure, reinitialize OS resolver nameservers immediately. + if reason == RecoveryReasonOSFailure { + mainLog.Load().Debug().Msg("OS resolver failure detected; reinitializing OS resolver nameservers") + ns := ctrld.InitializeOsResolver(true) + if len(ns) == 0 { + mainLog.Load().Warn().Msg("No nameservers found for OS resolver; using existing values") + } else { + mainLog.Load().Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns) + } } } @@ -1527,23 +1800,46 @@ func (p *prog) handleRecovery(reason RecoveryReason) { // reset the upstream failure count and down state p.um.reset(recovered) - // For network changes we also reinitialize the OS resolver. - if reason == RecoveryReasonNetworkChange { - ns := ctrld.InitializeOsResolver(true) - if len(ns) == 0 { - mainLog.Load().Warn().Msg("No nameservers found for OS resolver during network-change recovery; using existing values") - } else { - mainLog.Load().Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns) + // In DNS intercept mode, just disable the bypass — filters are still active. + if dnsIntercept && p.dnsInterceptState != nil { + p.recoveryBypass.Store(false) + mainLog.Load().Info().Msg("DNS intercept recovery complete: disabling DHCP bypass, resuming normal flow") + + // Refresh VPN DNS routes in case VPN state changed during recovery. + if p.vpnDNS != nil { + p.vpnDNS.Refresh(true) } + + // Reinitialize OS resolver for the recovered state. + if reason == RecoveryReasonNetworkChange { + ns := ctrld.InitializeOsResolver(true) + if len(ns) == 0 { + mainLog.Load().Warn().Msg("No nameservers found for OS resolver during network-change recovery; using existing values") + } else { + mainLog.Load().Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns) + } + } + + p.recoveryRunning.Store(false) + } else { + // For network changes we also reinitialize the OS resolver. + if reason == RecoveryReasonNetworkChange { + ns := ctrld.InitializeOsResolver(true) + if len(ns) == 0 { + mainLog.Load().Warn().Msg("No nameservers found for OS resolver during network-change recovery; using existing values") + } else { + mainLog.Load().Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns) + } + } + + // Apply our DNS settings back and log the interface state. + p.setDNS() + p.logInterfacesState() + + // allow watchdogs to put the listener back on the interface if its changed for any reason + p.recoveryRunning.Store(false) } - // Apply our DNS settings back and log the interface state. - p.setDNS() - p.logInterfacesState() - - // allow watchdogs to put the listener back on the interface if its changed for any reason - p.recoveryRunning.Store(false) - // Clear the recovery cancellation for a clean slate. p.recoveryCancelMu.Lock() p.recoveryCancel = nil diff --git a/cmd/cli/dns_proxy_test.go b/cmd/cli/dns_proxy_test.go index f909e96..7d94dbd 100644 --- a/cmd/cli/dns_proxy_test.go +++ b/cmd/cli/dns_proxy_test.go @@ -22,15 +22,15 @@ func Test_wildcardMatches(t *testing.T) { domain string match bool }{ - {"domain - prefix parent should not match", "*.windscribe.com", "windscribe.com", false}, - {"domain - prefix", "*.windscribe.com", "anything.windscribe.com", true}, - {"domain - prefix not match other s", "*.windscribe.com", "example.com", false}, - {"domain - prefix not match s in name", "*.windscribe.com", "wwindscribe.com", false}, - {"domain - suffix", "suffix.*", "suffix.windscribe.com", true}, - {"domain - suffix not match other", "suffix.*", "suffix1.windscribe.com", false}, - {"domain - both", "suffix.*.windscribe.com", "suffix.anything.windscribe.com", true}, - {"domain - both not match", "suffix.*.windscribe.com", "suffix1.suffix.windscribe.com", false}, - {"domain - case-insensitive", "*.WINDSCRIBE.com", "anything.windscribe.com", true}, + {"domain - prefix parent should not match", "*.example.com", "example.com", false}, + {"domain - prefix", "*.example.com", "anything.example.com", true}, + {"domain - prefix not match other s", "*.example.com", "other.org", false}, + {"domain - prefix not match s in name", "*.example.com", "eexample.com", false}, + {"domain - suffix", "suffix.*", "suffix.example.com", true}, + {"domain - suffix not match other", "suffix.*", "suffix1.example.com", false}, + {"domain - both", "suffix.*.example.com", "suffix.anything.example.com", true}, + {"domain - both not match", "suffix.*.example.com", "suffix1.suffix.example.com", false}, + {"domain - case-insensitive", "*.EXAMPLE.com", "anything.example.com", true}, {"mac - prefix", "*:98:05:b4:2b", "d4:67:98:05:b4:2b", true}, {"mac - prefix not match other s", "*:98:05:b4:2b", "0d:ba:54:09:94:2c", false}, {"mac - prefix not match s in name", "*:98:05:b4:2b", "e4:67:97:05:b4:2b", false}, @@ -57,9 +57,9 @@ func Test_canonicalName(t *testing.T) { domain string canonical string }{ - {"fqdn to canonical", "windscribe.com.", "windscribe.com"}, - {"already canonical", "windscribe.com", "windscribe.com"}, - {"case insensitive", "Windscribe.Com.", "windscribe.com"}, + {"fqdn to canonical", "example.com.", "example.com"}, + {"already canonical", "example.com", "example.com"}, + {"case insensitive", "Example.Com.", "example.com"}, } for _, tc := range tests { diff --git a/cmd/cli/main.go b/cmd/cli/main.go index 0783975..972c308 100644 --- a/cmd/cli/main.go +++ b/cmd/cli/main.go @@ -1,7 +1,9 @@ package cli import ( + "encoding/hex" "io" + "net" "os" "path/filepath" "sync/atomic" @@ -40,6 +42,9 @@ var ( cleanup bool startOnly bool rfc1918 bool + interceptMode string // "", "dns", or "hard" — set via --intercept-mode flag or config + dnsIntercept bool // derived: interceptMode == "dns" || interceptMode == "hard" + hardIntercept bool // derived: interceptMode == "hard" mainLog atomic.Pointer[zerolog.Logger] consoleWriter zerolog.ConsoleWriter @@ -59,6 +64,16 @@ func init() { } func Main() { + // Fast path for pf interception probe subprocess. This runs before cobra + // initialization to minimize startup time. The parent process spawns us with + // "pf-probe-send " and a non-_ctrld GID so pf + // intercepts the DNS query. If pf rdr is working, the query reaches ctrld's + // listener; if not, it goes to the real DNS server and ctrld detects the miss. + if len(os.Args) >= 4 && os.Args[1] == "pf-probe-send" { + pfProbeSend(os.Args[2], os.Args[3]) + return + } + ctrld.InitConfig(v, "ctrld") initCLI() if err := rootCmd.Execute(); err != nil { @@ -189,3 +204,25 @@ func initCache() { cfg.Service.CacheSize = 4096 } } + +// pfProbeSend is a minimal subprocess that sends a pre-built DNS query packet +// to the specified host on port 53. It's invoked by probePFIntercept() with a +// non-_ctrld GID so pf interception applies to the query. +// +// Usage: ctrld pf-probe-send +func pfProbeSend(host, hexPacket string) { + packet, err := hex.DecodeString(hexPacket) + if err != nil { + os.Exit(1) + } + conn, err := net.DialTimeout("udp", net.JoinHostPort(host, "53"), time.Second) + if err != nil { + os.Exit(1) + } + defer conn.Close() + conn.SetDeadline(time.Now().Add(time.Second)) + _, _ = conn.Write(packet) + // Read response (don't care about result, just need the send to happen) + buf := make([]byte, 512) + _, _ = conn.Read(buf) +} diff --git a/cmd/cli/mdnsresponder_hack_darwin.go b/cmd/cli/mdnsresponder_hack_darwin.go deleted file mode 100644 index 6687bc5..0000000 --- a/cmd/cli/mdnsresponder_hack_darwin.go +++ /dev/null @@ -1,154 +0,0 @@ -package cli - -import ( - "bufio" - "errors" - "io" - "os" - "os/exec" - "path/filepath" - "strings" - - "tailscale.com/net/netmon" -) - -// On macOS, the system daemon mDNSResponder (used for proxy/mDNS/Bonjour discovery) -// listens on UDP and TCP port 53. That conflicts with ctrld when it needs to -// run a DNS proxy on port 53. The kernel does not allow two processes to bind -// the same address/port, so ctrld would fail with "address already in use" if we -// did nothing. -// -// If ctrld started before mDNSResponder and listened only on 127.0.0.1, mDNSResponder -// would bind port 53 on other interfaces, so system processes would use it as the -// DNS resolver instead of ctrld, leading to inconsistent behavior. -// -// This file implements a Darwin-only workaround: -// -// - We detect at startup whether mDNSResponder is using port 53 (or a -// persisted marker file exists from a previous run). -// - When the workaround is active, we force the listener to 0.0.0.0:53 and, -// before binding, run killall mDNSResponder so that ctrld can bind to port 53. -// - We use SO_REUSEPORT (see listener setup) so that the socket can be bound -// even when the port was recently used. -// - On install we create a marker file in the user's home directory so that -// the workaround is applied on subsequent starts; on uninstall we remove -// that file and bounce the en0 interface to restore normal mDNSResponder -// behavior. -// -// Without this, users on macOS would be unable to run ctrld as the system DNS -// on port 53 when mDNSResponder is active. - -var ( - - // needMdnsResponderHack determines if a system-specific workaround for mDNSResponder is necessary at runtime. - needMdnsResponderHack = mDNSResponderHack() - mDNSResponderHackFilename = ".mdnsResponderHack" -) - -// mDNSResponderHack checks if the mDNSResponder process and its environments meet specific criteria for operation. -func mDNSResponderHack() bool { - if st, err := os.Stat(mDNSResponderFile()); err == nil && st.Mode().IsRegular() { - return true - } - out, err := lsofCheckPort53() - if err != nil { - return false - } - if !isMdnsResponderListeningPort53(strings.NewReader(out)) { - return false - } - return true -} - -// mDNSResponderFile constructs and returns the absolute path to the mDNSResponder hack file in the user's home directory. -func mDNSResponderFile() string { - if d, err := userHomeDir(); err == nil && d != "" { - return filepath.Join(d, mDNSResponderHackFilename) - } - return "" -} - -// doMdnsResponderCleanup performs cleanup tasks for the mDNSResponder hack file and resets the network interface "en0". -func doMdnsResponderCleanup() { - fn := mDNSResponderFile() - if fn == "" { - return - } - if st, err := os.Stat(fn); err != nil || !st.Mode().IsRegular() { - return - } - if err := os.Remove(fn); err != nil { - mainLog.Load().Error().Err(err).Msg("failed to remove mDNSResponder hack file") - } - - ifName := "en0" - if din, err := netmon.DefaultRouteInterface(); err == nil { - ifName = din - } - if err := exec.Command("ifconfig", ifName, "down").Run(); err != nil { - mainLog.Load().Error().Err(err).Msg("failed to disable en0") - } - if err := exec.Command("ifconfig", ifName, "up").Run(); err != nil { - mainLog.Load().Error().Err(err).Msg("failed to enable en0") - } -} - -// doMdnsResponderHackPostInstall creates a hack file for mDNSResponder if required and logs debug or error messages. -func doMdnsResponderHackPostInstall() { - if !needMdnsResponderHack { - return - } - fn := mDNSResponderFile() - if fn == "" { - return - } - if f, err := os.OpenFile(fn, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0400); err != nil { - mainLog.Load().Warn().Err(err).Msgf("Could not create %s", fn) - } else { - if err := f.Close(); err != nil { - mainLog.Load().Warn().Err(err).Msgf("Could not close %s", fn) - } else { - mainLog.Load().Debug().Msgf("Created %s", fn) - } - } -} - -// killMdnsResponder attempts to terminate the mDNSResponder process by running the "killall" command multiple times. -// Logs any accumulated errors if the attempts to terminate the process fail. -func killMdnsResponder() { - numAttempts := 10 - errs := make([]error, 0, numAttempts) - for range numAttempts { - if err := exec.Command("killall", "mDNSResponder").Run(); err != nil { - // Exit code 1 means the process not found, do not log it. - if !strings.Contains(err.Error(), "exit status 1") { - errs = append(errs, err) - } - } - } - if len(errs) > 0 { - mainLog.Load().Debug().Err(errors.Join(errs...)).Msg("failed to kill mDNSResponder") - } -} - -// lsofCheckPort53 executes the lsof command to check if any process is listening on port 53 and returns the output. -func lsofCheckPort53() (string, error) { - cmd := exec.Command("lsof", "+c0", "-i:53", "-n", "-P") - out, err := cmd.CombinedOutput() - if err != nil { - return "", err - } - return string(out), nil -} - -// isMdnsResponderListeningPort53 checks if the output provided by the reader contains an mDNSResponder process. -func isMdnsResponderListeningPort53(r io.Reader) bool { - scanner := bufio.NewScanner(r) - for scanner.Scan() { - fields := strings.Fields(scanner.Text()) - if len(fields) > 0 && strings.EqualFold(fields[0], "mDNSResponder") { - return true - } - } - return false -} diff --git a/cmd/cli/mdnsresponder_hack_others.go b/cmd/cli/mdnsresponder_hack_others.go deleted file mode 100644 index 5d6ada5..0000000 --- a/cmd/cli/mdnsresponder_hack_others.go +++ /dev/null @@ -1,21 +0,0 @@ -//go:build !darwin - -package cli - -// needMdnsResponderHack determines if a system-specific workaround for mDNSResponder is necessary at runtime. -var needMdnsResponderHack = mDNSResponderHack() - -// mDNSResponderHack checks if the mDNSResponder process and its environments meet specific criteria for operation. -func mDNSResponderHack() bool { - return false -} - -// killMdnsResponder attempts to terminate the mDNSResponder process by running the "killall" command multiple times. -// Logs any accumulated errors if the attempts to terminate the process fail. -func killMdnsResponder() {} - -// doMdnsResponderCleanup performs cleanup tasks for the mDNSResponder hack file and resets the network interface "en0". -func doMdnsResponderCleanup() {} - -// doMdnsResponderHackPostInstall creates a hack file for mDNSResponder if required and logs debug or error messages. -func doMdnsResponderHackPostInstall() {} diff --git a/cmd/cli/prog.go b/cmd/cli/prog.go index c499f84..c579a80 100644 --- a/cmd/cli/prog.go +++ b/cmd/cli/prog.go @@ -131,6 +131,7 @@ type prog struct { runningIface string requiredMultiNICsConfig bool adDomain string + hasLocalDNS bool runningOnDomainController bool selfUninstallMu sync.Mutex @@ -145,6 +146,55 @@ type prog struct { recoveryCancel context.CancelFunc recoveryRunning atomic.Bool + // recoveryBypass is set when dns-intercept mode enters recovery. + // When true, proxy() forwards all queries to OS/DHCP resolver + // instead of using the normal upstream flow. + recoveryBypass atomic.Bool + + // DNS intercept mode state (platform-specific). + // On Windows: *wfpState, on macOS: *pfState, nil on other platforms. + dnsInterceptState any + + // lastTunnelIfaces tracks the set of active VPN/tunnel interfaces (utun*, ipsec*, etc.) + // discovered during the last pf anchor rule build. When the set changes (e.g., a VPN + // connects and creates utun420), we rebuild the pf anchor to add interface-specific + // intercept rules for the new interface. Protected by mu. + lastTunnelIfaces []string //lint:ignore U1000 used on darwin + + // pfStabilizing is true while we're waiting for a VPN's pf ruleset to settle. + // While true, the watchdog and network change callbacks do NOT restore our rules. + pfStabilizing atomic.Bool + + // pfStabilizeCancel cancels the active stabilization goroutine, if any. + // Protected by mu. + pfStabilizeCancel context.CancelFunc //lint:ignore U1000 used on darwin + + // pfLastRestoreTime records when we last restored our anchor (unix millis). + // Used to detect immediate re-wipes (VPN reconnect cycle). + pfLastRestoreTime atomic.Int64 //lint:ignore U1000 used on darwin + + // pfBackoffMultiplier tracks exponential backoff for stabilization. + // Resets to 0 when rules survive for >60s. + pfBackoffMultiplier atomic.Int32 //lint:ignore U1000 used on darwin + + // pfMonitorRunning ensures only one pfInterceptMonitor goroutine runs at a time. + // When an interface appears/disappears, we spawn a monitor that probes pf + // interception with exponential backoff and auto-heals if broken. + pfMonitorRunning atomic.Bool //lint:ignore U1000 used on darwin + + // pfProbeExpected holds the domain name of a pending pf interception probe. + // When non-empty, the DNS handler checks incoming queries against this value + // and signals pfProbeCh if matched. The probe verifies that pf's rdr rules + // are actually translating packets (not just present in rule text). + pfProbeExpected atomic.Value // string + + // pfProbeCh is signaled when the DNS handler receives the expected probe query. + // The channel is created by probePFIntercept() and closed when the probe arrives. + pfProbeCh atomic.Value // *chan struct{} + + // VPN DNS manager for split DNS routing when intercept mode is active. + vpnDNS *vpnDNSManager + started chan struct{} onStartedDone chan struct{} onStarted []func() @@ -328,7 +378,7 @@ func (p *prog) apiConfigReload() { req := &controld.ResolverConfigRequest{ RawUID: cdUID, Version: rootCmd.Version, - Metadata: ctrld.SystemMetadata(context.Background()), + Metadata: ctrld.SystemMetadataRuntime(context.Background()), } resolverConfig, err := controld.FetchResolverConfig(req, cdDev) selfUninstallCheck(err, p, logger) @@ -491,9 +541,13 @@ func (p *prog) run(reload bool, reloadCh chan struct{}) { } } } - if domain, err := getActiveDirectoryDomain(); err == nil && domain != "" && hasLocalDnsServerRunning() { + if domain, err := getActiveDirectoryDomain(); err == nil && domain != "" { mainLog.Load().Debug().Msgf("active directory domain: %s", domain) p.adDomain = domain + if hasLocalDnsServerRunning() { + mainLog.Load().Debug().Msg("local DNS server detected (Domain Controller)") + p.hasLocalDNS = true + } } var wg sync.WaitGroup @@ -724,6 +778,54 @@ func (p *prog) setDNS() { p.csSetDnsOk = setDnsOK }() + // Validate and resolve intercept mode. + // CLI flag (--intercept-mode) takes priority over config file. + // Valid values: "" (off), "dns" (with VPN split routing), "hard" (all DNS through ctrld). + if interceptMode != "" && !validInterceptMode(interceptMode) { + mainLog.Load().Fatal().Msgf("invalid --intercept-mode value %q: must be 'off', 'dns', or 'hard'", interceptMode) + } + if interceptMode == "" || interceptMode == "off" { + interceptMode = cfg.Service.InterceptMode + if interceptMode != "" && interceptMode != "off" { + mainLog.Load().Info().Msgf("Intercept mode enabled via config (intercept_mode = %q)", interceptMode) + } + } + + // Derive convenience bools from interceptMode. + switch interceptMode { + case "dns": + dnsIntercept = true + case "hard": + dnsIntercept = true + hardIntercept = true + } + + // DNS intercept mode: use OS-level packet interception (WFP/pf) instead of + // modifying interface DNS settings. This eliminates race conditions with VPN + // software that also manages DNS. See issue #489. + if dnsIntercept { + if err := p.startDNSIntercept(); err != nil { + mainLog.Load().Error().Err(err).Msg("DNS intercept mode failed — falling back to interface DNS settings") + // Fall through to traditional setDNS behavior. + } else { + if hardIntercept { + mainLog.Load().Info().Msg("Hard intercept mode active — all DNS through ctrld, no VPN split routing") + } else { + mainLog.Load().Info().Msg("DNS intercept mode active — skipping interface DNS configuration and watchdog") + + // Initialize VPN DNS manager for split DNS routing. + // Discovers search domains from virtual/VPN interfaces and forwards + // matching queries to the DNS server on that interface. + // Skipped in --intercept-mode hard where all DNS goes through ctrld. + p.vpnDNS = newVPNDNSManager(p.exemptVPNDNSServers) + p.vpnDNS.Refresh(true) + } + + setDnsOK = true + return + } + } + if cfg.Listener == nil { return } @@ -750,7 +852,7 @@ func (p *prog) setDNS() { if needRFC1918Listeners(lc) { nameservers = append(nameservers, ctrld.Rfc1918Addresses()...) } - if needLocalIPv6Listener() { + if needLocalIPv6Listener(p.cfg.Service.InterceptMode) { nameservers = append(nameservers, "::1") } @@ -945,7 +1047,18 @@ func (p *prog) dnsWatchdog(iface *net.Interface, nameservers []string) { } // resetDNS performs a DNS reset for all interfaces. +// In DNS intercept mode, this tears down the WFP/pf filters instead. func (p *prog) resetDNS(isStart bool, restoreStatic bool) { + if dnsIntercept && p.dnsInterceptState != nil { + if err := p.stopDNSIntercept(); err != nil { + mainLog.Load().Error().Err(err).Msg("Failed to stop DNS intercept mode during reset") + } + + // Clean up VPN DNS manager + p.vpnDNS = nil + + return + } netIfaceName := "" if netIface := p.resetDNSForRunningIface(isStart, restoreStatic); netIface != nil { netIfaceName = netIface.Name diff --git a/cmd/cli/service_args_darwin.go b/cmd/cli/service_args_darwin.go new file mode 100644 index 0000000..d588960 --- /dev/null +++ b/cmd/cli/service_args_darwin.go @@ -0,0 +1,134 @@ +//go:build darwin + +package cli + +import ( + "fmt" + "os" + "os/exec" + "strings" +) + +const launchdPlistPath = "/Library/LaunchDaemons/ctrld.plist" + +// serviceConfigFileExists returns true if the launchd plist for ctrld exists on disk. +// This is more reliable than checking launchctl status, which may report "not found" +// if the service was unloaded but the plist file still exists. +func serviceConfigFileExists() bool { + _, err := os.Stat(launchdPlistPath) + return err == nil +} + +// appendServiceFlag appends a CLI flag (e.g., "--intercept-mode") to the installed +// service's launch arguments. This is used when upgrading an existing installation +// to intercept mode without losing the existing --cd flag and other arguments. +// +// On macOS, this modifies the launchd plist at /Library/LaunchDaemons/ctrld.plist +// using the "defaults" command, which is the standard way to edit plists. +// +// The function is idempotent: if the flag already exists, it's a no-op. +func appendServiceFlag(flag string) error { + // Read current ProgramArguments from plist. + out, err := exec.Command("defaults", "read", launchdPlistPath, "ProgramArguments").CombinedOutput() + if err != nil { + return fmt.Errorf("failed to read plist ProgramArguments: %w (output: %s)", err, strings.TrimSpace(string(out))) + } + + // Check if the flag is already present (idempotent). + args := string(out) + if strings.Contains(args, flag) { + mainLog.Load().Debug().Msgf("Service flag %q already present in plist, skipping", flag) + return nil + } + + // Use PlistBuddy to append the flag to ProgramArguments array. + // PlistBuddy is more reliable than "defaults" for array manipulation. + addCmd := exec.Command( + "/usr/libexec/PlistBuddy", + "-c", fmt.Sprintf("Add :ProgramArguments: string %s", flag), + launchdPlistPath, + ) + if out, err := addCmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to append %q to plist ProgramArguments: %w (output: %s)", flag, err, strings.TrimSpace(string(out))) + } + + mainLog.Load().Info().Msgf("Appended %q to service launch arguments", flag) + return nil +} + +// verifyServiceRegistration is a no-op on macOS (launchd plist verification not needed). +func verifyServiceRegistration() error { + return nil +} + +// removeServiceFlag removes a CLI flag (and its value, if the next argument is not +// a flag) from the installed service's launch arguments. For example, removing +// "--intercept-mode" also removes the following "dns" or "hard" value argument. +// +// The function is idempotent: if the flag doesn't exist, it's a no-op. +func removeServiceFlag(flag string) error { + // Read current ProgramArguments to find the index. + out, err := exec.Command("/usr/libexec/PlistBuddy", "-c", "Print :ProgramArguments", launchdPlistPath).CombinedOutput() + if err != nil { + return fmt.Errorf("failed to read plist ProgramArguments: %w (output: %s)", err, strings.TrimSpace(string(out))) + } + + // Parse the PlistBuddy output to find the flag's index. + // PlistBuddy prints arrays as: + // Array { + // /path/to/ctrld + // run + // --cd=xxx + // --intercept-mode + // dns + // } + lines := strings.Split(string(out), "\n") + var entries []string + for _, line := range lines { + trimmed := strings.TrimSpace(line) + if trimmed == "Array {" || trimmed == "}" || trimmed == "" { + continue + } + entries = append(entries, trimmed) + } + + index := -1 + for i, entry := range entries { + if entry == flag { + index = i + break + } + } + + if index < 0 { + mainLog.Load().Debug().Msgf("Service flag %q not present in plist, skipping removal", flag) + return nil + } + + // Check if the next entry is a value (not a flag). If so, delete it first + // (deleting by index shifts subsequent entries down, so delete value before flag). + hasValue := index+1 < len(entries) && !strings.HasPrefix(entries[index+1], "-") + if hasValue { + delVal := exec.Command( + "/usr/libexec/PlistBuddy", + "-c", fmt.Sprintf("Delete :ProgramArguments:%d", index+1), + launchdPlistPath, + ) + if out, err := delVal.CombinedOutput(); err != nil { + return fmt.Errorf("failed to remove value for %q from plist: %w (output: %s)", flag, err, strings.TrimSpace(string(out))) + } + } + + // Delete the flag itself. + delCmd := exec.Command( + "/usr/libexec/PlistBuddy", + "-c", fmt.Sprintf("Delete :ProgramArguments:%d", index), + launchdPlistPath, + ) + if out, err := delCmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to remove %q from plist ProgramArguments: %w (output: %s)", flag, err, strings.TrimSpace(string(out))) + } + + mainLog.Load().Info().Msgf("Removed %q from service launch arguments", flag) + return nil +} diff --git a/cmd/cli/service_args_others.go b/cmd/cli/service_args_others.go new file mode 100644 index 0000000..07edda2 --- /dev/null +++ b/cmd/cli/service_args_others.go @@ -0,0 +1,38 @@ +//go:build !darwin && !windows + +package cli + +import ( + "fmt" + "os" +) + +// serviceConfigFileExists checks common service config file locations on Linux. +func serviceConfigFileExists() bool { + // systemd unit file + if _, err := os.Stat("/etc/systemd/system/ctrld.service"); err == nil { + return true + } + // SysV init script + if _, err := os.Stat("/etc/init.d/ctrld"); err == nil { + return true + } + return false +} + +// appendServiceFlag is not yet implemented on this platform. +// Linux services (systemd) store args in unit files; intercept mode +// should be set via the config file (intercept_mode) on these platforms. +func appendServiceFlag(flag string) error { + return fmt.Errorf("appending service flags is not supported on this platform; use intercept_mode in config instead") +} + +// verifyServiceRegistration is a no-op on this platform. +func verifyServiceRegistration() error { + return nil +} + +// removeServiceFlag is not yet implemented on this platform. +func removeServiceFlag(flag string) error { + return fmt.Errorf("removing service flags is not supported on this platform; use intercept_mode in config instead") +} diff --git a/cmd/cli/service_args_windows.go b/cmd/cli/service_args_windows.go new file mode 100644 index 0000000..246a009 --- /dev/null +++ b/cmd/cli/service_args_windows.go @@ -0,0 +1,153 @@ +//go:build windows + +package cli + +import ( + "fmt" + "strings" + + "golang.org/x/sys/windows/svc/mgr" +) + +// serviceConfigFileExists returns true if the ctrld Windows service is registered. +func serviceConfigFileExists() bool { + m, err := mgr.Connect() + if err != nil { + return false + } + defer m.Disconnect() + s, err := m.OpenService(ctrldServiceName) + if err != nil { + return false + } + s.Close() + return true +} + +// appendServiceFlag appends a CLI flag (e.g., "--intercept-mode") to the installed +// Windows service's BinPath arguments. This is used when upgrading an existing +// installation to intercept mode without losing the existing --cd flag. +// +// The function is idempotent: if the flag already exists, it's a no-op. +func appendServiceFlag(flag string) error { + m, err := mgr.Connect() + if err != nil { + return fmt.Errorf("failed to connect to Windows SCM: %w", err) + } + defer m.Disconnect() + + s, err := m.OpenService(ctrldServiceName) + if err != nil { + return fmt.Errorf("failed to open service %q: %w", ctrldServiceName, err) + } + defer s.Close() + + config, err := s.Config() + if err != nil { + return fmt.Errorf("failed to read service config: %w", err) + } + + // Check if flag already present (idempotent). + if strings.Contains(config.BinaryPathName, flag) { + mainLog.Load().Debug().Msgf("Service flag %q already present in BinPath, skipping", flag) + return nil + } + + // Append the flag to BinPath. + config.BinaryPathName = strings.TrimSpace(config.BinaryPathName) + " " + flag + + if err := s.UpdateConfig(config); err != nil { + return fmt.Errorf("failed to update service config with %q: %w", flag, err) + } + + mainLog.Load().Info().Msgf("Appended %q to service BinPath", flag) + return nil +} + +// verifyServiceRegistration opens the Windows Service Control Manager and verifies +// that the ctrld service is correctly registered: logs the BinaryPathName, checks +// that --intercept-mode is present if expected, and verifies SERVICE_AUTO_START. +func verifyServiceRegistration() error { + m, err := mgr.Connect() + if err != nil { + return fmt.Errorf("failed to connect to Windows SCM: %w", err) + } + defer m.Disconnect() + + s, err := m.OpenService(ctrldServiceName) + if err != nil { + return fmt.Errorf("failed to open service %q: %w", ctrldServiceName, err) + } + defer s.Close() + + config, err := s.Config() + if err != nil { + return fmt.Errorf("failed to read service config: %w", err) + } + + mainLog.Load().Debug().Msgf("Service registry: BinaryPathName = %q", config.BinaryPathName) + + // If intercept mode is set, verify the flag is present in BinPath. + if interceptMode == "dns" || interceptMode == "hard" { + if !strings.Contains(config.BinaryPathName, "--intercept-mode") { + return fmt.Errorf("service registry: --intercept-mode flag missing from BinaryPathName (expected mode %q)", interceptMode) + } + mainLog.Load().Debug().Msgf("Service registry: --intercept-mode flag present in BinaryPathName") + } + + // Verify auto-start. mgr.StartAutomatic == 2 == SERVICE_AUTO_START. + if config.StartType != mgr.StartAutomatic { + return fmt.Errorf("service registry: StartType is %d, expected SERVICE_AUTO_START (%d)", config.StartType, mgr.StartAutomatic) + } + + return nil +} + +// removeServiceFlag removes a CLI flag (and its value, if present) from the installed +// Windows service's BinPath. For example, removing "--intercept-mode" also removes +// the following "dns" or "hard" value. The function is idempotent. +func removeServiceFlag(flag string) error { + m, err := mgr.Connect() + if err != nil { + return fmt.Errorf("failed to connect to Windows SCM: %w", err) + } + defer m.Disconnect() + + s, err := m.OpenService(ctrldServiceName) + if err != nil { + return fmt.Errorf("failed to open service %q: %w", ctrldServiceName, err) + } + defer s.Close() + + config, err := s.Config() + if err != nil { + return fmt.Errorf("failed to read service config: %w", err) + } + + if !strings.Contains(config.BinaryPathName, flag) { + mainLog.Load().Debug().Msgf("Service flag %q not present in BinPath, skipping removal", flag) + return nil + } + + // Split BinPath into parts, find and remove the flag + its value (if any). + parts := strings.Fields(config.BinaryPathName) + var newParts []string + for i := 0; i < len(parts); i++ { + if parts[i] == flag { + // Skip the flag. Also skip the next part if it's a value (not a flag). + if i+1 < len(parts) && !strings.HasPrefix(parts[i+1], "-") { + i++ // skip value too + } + continue + } + newParts = append(newParts, parts[i]) + } + config.BinaryPathName = strings.Join(newParts, " ") + + if err := s.UpdateConfig(config); err != nil { + return fmt.Errorf("failed to update service config: %w", err) + } + + mainLog.Load().Info().Msgf("Removed %q from service BinPath", flag) + return nil +} diff --git a/config.go b/config.go index bdfa389..edba183 100644 --- a/config.go +++ b/config.go @@ -240,6 +240,7 @@ type ServiceConfig struct { RefetchTime *int `mapstructure:"refetch_time" toml:"refetch_time,omitempty"` ForceRefetchWaitTime *int `mapstructure:"force_refetch_wait_time" toml:"force_refetch_wait_time,omitempty"` LeakOnUpstreamFailure *bool `mapstructure:"leak_on_upstream_failure" toml:"leak_on_upstream_failure,omitempty"` + InterceptMode string `mapstructure:"intercept_mode" toml:"intercept_mode,omitempty" validate:"omitempty,oneof=off dns hard"` Daemon bool `mapstructure:"-" toml:"-"` AllocateIP bool `mapstructure:"-" toml:"-"` } @@ -511,6 +512,69 @@ func (uc *UpstreamConfig) ReBootstrap() { }) } +// ForceReBootstrap immediately replaces the upstream transport, closing old +// connections and creating new ones synchronously. Unlike ReBootstrap() which +// sets a lazy flag (new transport created on next query), this ensures the +// transport is ready before any queries arrive. Use when external events +// (e.g. firewall state flush) are known to have killed existing connections. +func (uc *UpstreamConfig) ForceReBootstrap() { + switch uc.Type { + case ResolverTypeDOH, ResolverTypeDOH3, ResolverTypeDOQ, ResolverTypeDOT: + default: + return + } + ProxyLogger.Load().Debug().Msgf("force re-bootstrapping upstream transport for %v", uc) + uc.SetupTransport() + // Clear any pending lazy re-bootstrap flag so ensureSetupTransport() + // doesn't redundantly recreate the transport we just built. + uc.rebootstrap.Store(rebootstrapNotStarted) +} + +// closeTransports closes idle connections on all existing transports. +// This is called before creating new transports during re-bootstrap to +// force in-flight requests on stale connections to fail quickly, rather +// than waiting for the full context deadline (e.g. 5s) after a firewall +// state table flush kills the underlying TCP/QUIC connections. +func (uc *UpstreamConfig) closeTransports() { + if t := uc.transport; t != nil { + t.CloseIdleConnections() + } + if t := uc.transport4; t != nil { + t.CloseIdleConnections() + } + if t := uc.transport6; t != nil { + t.CloseIdleConnections() + } + if p := uc.doqConnPool; p != nil { + p.CloseIdleConnections() + } + if p := uc.doqConnPool4; p != nil { + p.CloseIdleConnections() + } + if p := uc.doqConnPool6; p != nil { + p.CloseIdleConnections() + } + if p := uc.dotClientPool; p != nil { + p.CloseIdleConnections() + } + if p := uc.dotClientPool4; p != nil { + p.CloseIdleConnections() + } + if p := uc.dotClientPool6; p != nil { + p.CloseIdleConnections() + } + // http3RoundTripper is stored as http.RoundTripper but the concrete type + // (*http3.Transport) exposes CloseIdleConnections via this interface. + type idleCloser interface { + CloseIdleConnections() + } + for _, rt := range []http.RoundTripper{uc.http3RoundTripper, uc.http3RoundTripper4, uc.http3RoundTripper6} { + if c, ok := rt.(idleCloser); ok { + c.CloseIdleConnections() + } + } +} + // SetupTransport initializes the network transport used to connect to upstream servers. // For now, DoH/DoH3/DoQ/DoT upstreams are supported. func (uc *UpstreamConfig) SetupTransport() { @@ -519,6 +583,13 @@ func (uc *UpstreamConfig) SetupTransport() { default: return } + + // Close existing transport connections before creating new ones. + // This forces in-flight requests on stale connections (e.g. after a + // firewall state table flush) to fail fast instead of waiting for + // the full context deadline timeout. + uc.closeTransports() + ips := uc.bootstrapIPs switch uc.IPStack { case IpStackV4: diff --git a/config_internal_test.go b/config_internal_test.go index ca2b381..d470a14 100644 --- a/config_internal_test.go +++ b/config_internal_test.go @@ -541,10 +541,12 @@ func TestRebootstrapRace(t *testing.T) { <-started var wg sync.WaitGroup + wg.Add(goroutines) for range goroutines { - wg.Go(func() { + go func() { + defer wg.Done() uc.ensureSetupTransport() - }) + }() } wg.Wait() diff --git a/docs/dns-intercept-mode.md b/docs/dns-intercept-mode.md new file mode 100644 index 0000000..41dae1f --- /dev/null +++ b/docs/dns-intercept-mode.md @@ -0,0 +1,551 @@ +# DNS Intercept Mode + +## Overview + +DNS intercept mode is an alternative approach to DNS management that uses OS-level packet interception instead of modifying network interface DNS settings. This eliminates race conditions with VPN software, endpoint security tools, and other programs that also manage DNS. + +## The Problem + +By default, ctrld sets DNS to `127.0.0.1` on network interfaces so all queries go through ctrld's local listener. However, VPN software (F5 BIG-IP, Cisco AnyConnect, Palo Alto GlobalProtect, etc.) also overwrites interface DNS settings, creating conflicts: + +1. **DNS Setting War**: ctrld sets DNS to `127.0.0.1`, VPN overwrites to its DNS servers, ctrld's watchdog detects the change and restores `127.0.0.1`, VPN overwrites again — infinitely. + +2. **Bypass Window**: During the watchdog polling interval (up to 20 seconds), DNS queries may go to the VPN's DNS servers, bypassing ctrld's filtering profiles (malware blocking, content filtering, etc.). + +3. **Resolution Failures**: During the brief moments when DNS is being rewritten, queries may fail entirely, causing intermittent connectivity loss. + +## The Solution + +DNS intercept mode works at a lower level than interface settings: + +- **Windows**: Uses NRPT (Name Resolution Policy Table) to route all DNS queries to `127.0.0.1` (ctrld's listener) via the Windows DNS Client service. In `hard` mode, additionally uses WFP (Windows Filtering Platform) to block all outbound DNS (port 53) except to localhost and private ranges, preventing any bypass. VPN software can set interface DNS freely — NRPT's most-specific-match ensures VPN-specific domains still resolve correctly while ctrld handles everything else. + +- **macOS**: Uses pf (packet filter) to redirect all outbound DNS (port 53) traffic to ctrld's listener at `127.0.0.1:53`. Any DNS query, regardless of which DNS server the OS thinks it's using, gets transparently redirected to ctrld. + +## Usage + +```bash +# Start ctrld with DNS intercept mode (auto-detects VPN search domains) +ctrld start --intercept-mode dns --cd + +# Hard intercept: all DNS through ctrld, no VPN split routing +ctrld start --intercept-mode hard --cd + +# Or with a config file +ctrld start --intercept-mode dns -c /path/to/ctrld.toml + +# Run in foreground (debug) +ctrld run --intercept-mode dns --cd +ctrld run --intercept-mode hard --cd +``` + +### Intercept Modes + +| Flag | DNS Interception | VPN Split Routing | Captive Portal Recovery | +|------|-----------------|-------------------|------------------------| +| `--intercept-mode dns` | ✅ WFP/pf | ✅ Auto-detect & forward | ✅ Active | +| `--intercept-mode hard` | ✅ WFP/pf | ❌ All through ctrld | ✅ Active | + +**`--intercept-mode dns`** (recommended): Intercepts all DNS via WFP/pf, but automatically discovers search domains from VPN and virtual network adapters (Tailscale, F5, Cisco AnyConnect, etc.) and forwards matching queries to the DNS server on that interface. This allows VPN internal resources (e.g., `*.corp.local`) to resolve correctly while ctrld handles everything else. + +**`--intercept-mode hard`**: Same OS-level interception, but does NOT forward any queries to VPN DNS servers. Every DNS query goes through ctrld's configured upstreams. Use this when you want total DNS control and don't need VPN internal domain resolution. Captive portal recovery still works — network authentication pages are handled automatically. + +## How It Works + +### Windows (NRPT + WFP) + +Windows DNS intercept uses a two-tier architecture with mode-dependent enforcement: + +- **`dns` mode**: NRPT only — graceful DNS routing through the Windows DNS Client service. At worst, a VPN overwrites NRPT and queries bypass ctrld temporarily. DNS never breaks. +- **`hard` mode**: NRPT + WFP — same NRPT routing, plus WFP kernel-level block filters that prevent any outbound DNS bypass. Equivalent enforcement to macOS pf. + +#### Why This Design? + +WFP can only **block** or **permit** connections — it **cannot redirect** them (redirection requires kernel-mode callout drivers). Without NRPT, WFP blocks outbound DNS but doesn't tell applications where to send queries instead — they see DNS failures. NRPT provides the "positive routing" while WFP provides enforcement. + +Separating them into modes means most users get `dns` mode (safe, can never break DNS) while high-security deployments use `hard` mode (full enforcement, same guarantees as macOS pf). + +#### Startup Sequence (dns mode) + +1. Creates NRPT catch-all registry rule (`.` → `127.0.0.1`) under `HKLM\...\DnsPolicyConfig\CtrldCatchAll` +2. Triggers Group Policy refresh via `RefreshPolicyEx` (userenv.dll) so DNS Client loads NRPT immediately +3. Flushes DNS cache to clear stale entries +4. Starts NRPT health monitor (30s periodic check) +5. Launches async NRPT probe-and-heal to verify NRPT is actually routing queries + +#### Startup Sequence (hard mode) + +1. Creates NRPT catch-all rule + GP refresh + DNS flush (same as dns mode) +2. Opens WFP engine with `RPC_C_AUTHN_DEFAULT` (0xFFFFFFFF) +3. Cleans up any stale sublayer from a previous unclean shutdown +4. Creates sublayer with maximum weight (0xFFFF) +5. Adds **permit** filters (weight 10) for DNS to localhost (`127.0.0.1`/`::1` port 53) +6. Adds **permit** filters (weight 10) for DNS to RFC1918 + CGNAT subnets (10/8, 172.16/12, 192.168/16, 100.64/10) +7. Adds **block** filters (weight 1) for all other outbound DNS (port 53 UDP+TCP) +8. Starts NRPT health monitor (also verifies WFP sublayer in hard mode) +9. Launches async NRPT probe-and-heal + +**Atomic guarantee:** NRPT must succeed before WFP starts. If NRPT fails, WFP is not attempted. If WFP fails, NRPT is rolled back. This prevents DNS blackholes where WFP blocks everything but nothing routes to ctrld. + +On shutdown: stops health monitor, removes NRPT rule, flushes DNS, then (hard mode only) removes all WFP filters and closes engine. + +#### NRPT Details + +The **Name Resolution Policy Table** is a Windows feature (originally for DirectAccess) that tells the DNS Client service to route queries matching specific namespace patterns to specific DNS servers. ctrld adds a catch-all rule: + +| Registry Value | Type | Value | Purpose | +|---|---|---|---| +| `Name` | REG_MULTI_SZ | `.` | Namespace pattern (`.` = catch-all, matches everything) | +| `GenericDNSServers` | REG_SZ | `127.0.0.1` | DNS server to use for matching queries | +| `ConfigOptions` | REG_DWORD | `0x8` | Standard DNS resolution (no DirectAccess) | +| `Version` | REG_DWORD | `0x2` | NRPT rule version 2 | + +**Registry path**: `HKLM\SOFTWARE\Policies\Microsoft\Windows NT\DNSClient\DnsPolicyConfig\CtrldCatchAll` + +**Group Policy refresh**: The DNS Client service only reads NRPT from registry during Group Policy processing cycles (default: every 90 minutes). ctrld calls `RefreshPolicyEx(bMachine=TRUE, dwOptions=RP_FORCE)` from `userenv.dll` to trigger an immediate refresh. Falls back to `gpupdate /target:computer /force` if the DLL call fails. + +#### WFP Filter Architecture + +**Filter priority**: Permit filters have weight 10, block filters have weight 1. WFP evaluates higher-weight filters first, so localhost and private-range DNS is always permitted. + +**RFC1918 + CGNAT permits**: Static subnet permit filters allow DNS to private IP ranges (10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, 100.64.0.0/10). This means VPN DNS servers on private IPs (Tailscale MagicDNS on 100.100.100.100, corporate VPN DNS on 10.x.x.x, etc.) work without needing dynamic per-server exemptions. + +**VPN coexistence**: VPN software can set DNS to whatever it wants on the interface — for public IPs, the WFP block filter prevents those servers from being reached on port 53. For private IPs, the subnet permits allow it. ctrld handles all DNS routing through NRPT and can forward VPN-specific domains to VPN DNS servers through its own upstream mechanism. + +#### NRPT Probe and Auto-Heal + +`RefreshPolicyEx` returns immediately — it does NOT wait for the DNS Client service to actually load the NRPT rule. On cold machines (first boot, fresh install), the DNS Client may take several seconds to process the policy refresh. During this window, the NRPT rule exists in the registry but isn't active. + +ctrld verifies NRPT is actually working by sending a probe DNS query (`_nrpt-probe-.nrpt-probe.ctrld.test`) through Go's `net.Resolver` (which calls `GetAddrInfoW` → DNS Client → NRPT path). If ctrld receives the probe on its listener, NRPT is active. + +**Startup probe (async, non-blocking):** After NRPT setup, an async goroutine probes with escalating remediation: (1) immediate probe, (2) GP refresh + retry, (3) DNS Client service restart + retry, (4) final retry. Only one probe sequence runs at a time. + +**DNS Client restart (nuclear option):** If GP refresh alone isn't enough, ctrld restarts the `Dnscache` service to force full NRPT re-initialization. This briefly interrupts all DNS (~100ms) but only fires when NRPT is already not working. + +#### NRPT Health Monitor + +A dedicated background goroutine (`nrptHealthMonitor`) runs every 30 seconds and now performs active probing: + +1. **Registry check:** If the NRPT catch-all rule is missing from the registry, restore it + GP refresh + probe-and-heal +2. **Active probe:** If the rule exists, send a probe query to verify it's actually routing — catches cases where the registry key is present but DNS Client hasn't loaded it +3. **(hard mode)** Verify WFP sublayer exists; full restart on loss + +This is periodic (not just network-event-driven) because VPN software can clear NRPT at any time. Additionally, `scheduleDelayedRechecks()` (called on network change events) performs immediate NRPT verification at 2s and 4s after changes. + +#### Known Caveats + +- **`nslookup` bypasses NRPT**: `nslookup.exe` uses its own DNS resolver implementation and does NOT go through the Windows DNS Client service, so it ignores NRPT rules entirely. Use `Resolve-DnsName` (PowerShell) or `ping` to verify DNS resolution through NRPT. This is a well-known Windows behavior, not a ctrld bug. +- **`RPC_C_AUTHN_DEFAULT`**: `FwpmEngineOpen0` requires `RPC_C_AUTHN_DEFAULT` (0xFFFFFFFF) for the authentication service parameter. Using `RPC_C_AUTHN_NONE` (0) returns `ERROR_NOT_SUPPORTED` on some configurations (e.g., Parallels VMs). +- **FWP_DATA_TYPE enum**: The `FWP_DATA_TYPE` enum starts at `FWP_EMPTY=0`, making `FWP_UINT8=1`, `FWP_UINT16=2`, etc. Some documentation examples incorrectly start at 0. + +### macOS (pf) + +1. ctrld writes a pf anchor file at `/etc/pf.anchors/com.controld.ctrld` +2. Adds the anchor reference to `/etc/pf.conf` (if not present) +3. Loads the anchor with `pfctl -a com.controld.ctrld -f ` +4. Enables pf with `pfctl -e` (if not already enabled) +5. The anchor redirects all outbound DNS (port 53) on non-loopback interfaces to `127.0.0.1:53` +6. On shutdown, the anchor is flushed, the file removed, and references cleaned from `pf.conf` + +**ctrld's own traffic**: ctrld's upstream queries use DoH (HTTPS on port 443), not plain DNS on port 53, so the pf redirect does not create a loop for DoH upstreams. **Warning:** If an "os" upstream is configured (which uses plain DNS on port 53 to external servers), the pf redirect will capture ctrld's own outbound queries and create a loop. ctrld will log a warning at startup if this is detected. Use DoH upstreams when DNS intercept mode is active. + +## What Changes vs Default Mode + +| Behavior | Default Mode | DNS Intercept Mode | +|----------|-------------|-------------------| +| Interface DNS settings | Set to `127.0.0.1` | **Not modified** | +| DNS watchdog | Active (polls every 20s) | **Disabled** | +| VPN DNS conflict | Race condition possible | **Eliminated** | +| Profile bypass window | Up to 20 seconds | **Zero** | +| Requires admin/root | Yes | Yes | +| Additional OS requirements | None | WFP (Windows), pf (macOS) | + +## Logging + +DNS intercept mode produces detailed logs for troubleshooting: + +``` +DNS intercept: initializing Windows Filtering Platform (WFP) +DNS intercept: WFP engine opened (handle: 0x1a2b3c) +DNS intercept: WFP sublayer created (weight: 0xFFFF — maximum priority) +DNS intercept: added permit filter "Permit DNS to localhost (IPv4/UDP)" (ID: 12345) +DNS intercept: added block filter "Block outbound DNS (IPv4/UDP)" (ID: 12349) +DNS intercept: WFP filters active — all outbound DNS (port 53) blocked except to localhost +``` + +On macOS: +``` +DNS intercept: initializing macOS packet filter (pf) redirect +DNS intercept: wrote pf anchor file: /etc/pf.anchors/com.controld.ctrld +DNS intercept: loaded pf anchor "com.controld.ctrld" +DNS intercept: pf anchor "com.controld.ctrld" active with 3 rules +DNS intercept: pf redirect active — all outbound DNS (port 53) redirected to 127.0.0.1:53 +``` + +## Troubleshooting + +### Windows + +```powershell +# Check NRPT rules (should show CtrldCatchAll with . → 127.0.0.1) +Get-DnsClientNrptRule + +# Check NRPT registry directly +Get-ChildItem "HKLM:\SOFTWARE\Policies\Microsoft\Windows NT\DNSClient\DnsPolicyConfig" + +# Force Group Policy refresh (if NRPT not taking effect) +gpupdate /target:computer /force + +# Check if WFP filters are active +netsh wfp show filters + +# Check ctrld's specific filters (look for "ctrld" in output) +netsh wfp show filters | Select-String "ctrld" + +# Test DNS resolution (use Resolve-DnsName, NOT nslookup!) +# nslookup bypasses DNS Client / NRPT — it will NOT reflect NRPT routing +Resolve-DnsName example.com +ping example.com + +# If you must use nslookup, specify localhost explicitly: +nslookup example.com 127.0.0.1 +``` + +### macOS + +```bash +# Check if pf is enabled +sudo pfctl -si + +# Check ctrld's anchor rules +sudo pfctl -a com.controld.ctrld -sr +sudo pfctl -a com.controld.ctrld -sn + +# Check pf.conf for anchor reference +cat /etc/pf.conf | grep ctrld + +# Test DNS is going through ctrld +dig @127.0.0.1 example.com +``` + +## Limitations + +- **Linux**: Not supported. Linux uses `systemd-resolved` or `/etc/resolv.conf` which don't have the same VPN conflict issues. If needed in the future, `iptables`/`nftables` REDIRECT could be used. + +- **Split DNS for VPN internal domains**: In `--intercept-mode dns` mode, VPN search domains are auto-detected from virtual network adapters and forwarded to the VPN's DNS servers automatically. In `--intercept-mode hard` mode, VPN internal domains (e.g., `*.corp.local`) will NOT resolve unless configured as explicit upstream rules in ctrld's configuration. + +- **macOS mDNSResponder interaction**: On macOS, ctrld uses a workaround ("mDNSResponder hack") that binds to `0.0.0.0:53` instead of `127.0.0.1:53` and refuses queries from non-localhost sources. In dns-intercept mode, pf's `rdr` rewrites the destination IP to `127.0.0.1:53` but preserves the original source IP (e.g., `192.168.2.73`). The mDNSResponder source-IP check is automatically bypassed in dns-intercept mode because the pf/WFP rules already ensure only legitimate intercepted DNS traffic reaches ctrld's listener. + +- **Other WFP/pf users**: If other software (VPN, firewall, endpoint security) also uses WFP or pf for DNS interception, there may be priority conflicts. ctrld uses maximum sublayer weight on Windows and a named anchor on macOS to minimize this risk. See "VPN App Coexistence" below for macOS-specific defenses. + +## VPN App Coexistence (macOS) + +VPN apps (Windscribe, Cisco AnyConnect, F5 BIG-IP, etc.) often manage pf rules themselves, which can interfere with ctrld's DNS intercept. ctrld uses a multi-layered defense strategy: + +### 1. Anchor Priority Enforcement + +When injecting our anchor reference into the running pf ruleset, ctrld **prepends** both the `rdr-anchor` and `anchor` references before all other anchors. pf evaluates rules top-to-bottom, so our DNS intercept `quick` rules match port 53 traffic before a VPN app's broader rules in their own anchor. + +### 2. Interface-Specific Tunnel Rules + +VPN apps commonly add rules like `pass out quick on ipsec0 inet all` that match ALL traffic on the VPN interface. If their anchor is evaluated before ours (e.g., after a ruleset reload), these broad rules capture DNS. ctrld counters this by adding explicit DNS intercept rules for each active tunnel interface (ipsec*, utun*, ppp*, tap*, tun*). These interface-specific rules match port 53 only, so they take priority over the VPN app's broader "all" match even within the same anchor evaluation pass. + +### 3. Dynamic Tunnel Interface Detection + +The network change monitor (`validInterfacesMap()`) only tracks physical hardware ports (en0, bridge0, etc.) — it doesn't see tunnel interfaces (utun*, ipsec*, etc.) created by VPN software. When a VPN connects and creates a new interface (e.g., utun420 for WireGuard), ctrld detects this through a separate tunnel interface change check and rebuilds the pf anchor to include explicit intercept rules for the new interface. This runs on every network change event, even if no physical interface changed. + +### 4. pf Watchdog + Network Change Hooks + +A background watchdog (30s interval) plus immediate checks on network change events detect when another program replaces the entire pf ruleset (e.g., Windscribe's `pfctl -f /etc/pf.conf`). When detected, ctrld rebuilds its anchor with up-to-date tunnel interface rules and re-injects the anchor reference at the top of the ruleset. A 2-second delayed re-check catches race conditions where the other program clears rules slightly after the network event. + +### 4a. Active Interception Probe (pf Translation State Corruption) + +Programs like Parallels Desktop reload `/etc/pf.conf` when creating/destroying virtual network interfaces (bridge100, vmenet0). This can corrupt pf's internal translation engine — rdr rules survive in text form but stop evaluating, causing DNS interception to silently fail while the watchdog reports "intact." + +ctrld detects interface appearance/disappearance and spawns an async probe monitor: + +1. **Probe mechanism:** A subprocess runs with GID=0 (wheel, not `_ctrld`) and sends a DNS query to the OS resolver. If pf interception is working, the query gets redirected to ctrld (127.0.0.1:53) and is detected in the DNS handler. If broken, it times out after 1s. +2. **Backoff schedule:** Probes at 0, 0.5, 1, 2, 4 seconds (~8s window) to win the race against async pf reloads by the hypervisor. Only one monitor runs at a time (atomic singleton). +3. **Auto-heal:** On probe failure, `forceReloadPFMainRuleset()` dumps the running ruleset and pipes it back through `pfctl -f -`, resetting pf's translation engine. VPN-safe because it reassembles from the current running state. +4. **Watchdog integration:** The 30s watchdog also runs the probe when rule text checks pass, as a safety net for unknown corruption causes. + +This approach detects **actual broken DNS** rather than guessing from trigger events, making it robust against future unknown corruption scenarios. + +### 5. Proactive DoH Connection Pool Reset + +When the watchdog detects a pf ruleset replacement, it force-rebootstraps all upstream transports via `ForceReBootstrap()`. This is necessary because `pfctl -f` flushes the entire pf state table, which kills existing TCP connections (including ctrld's DoH connections to upstream DNS servers like 76.76.2.22:443). + +The force-rebootstrap does two things that the lazy `ReBootstrap()` cannot: +1. **Closes idle connections on the old transport** (`CloseIdleConnections()`), causing in-flight HTTP/2 requests on dead connections to fail immediately instead of waiting for the 5s context deadline +2. **Creates the new transport synchronously**, so it's ready before any DNS queries arrive post-wipe + +Without this, Go's `http.Transport` keeps trying dead connections until each request's context deadline expires (~5s), then the lazy rebootstrap creates a new transport for the *next* request. With force-rebootstrap, the blackout is reduced from ~5s to ~100ms (one fresh TLS handshake). + +### 6. Blanket Process Exemption (group _ctrld) + +ctrld creates a macOS system group (`_ctrld`) and sets its effective GID at startup via `syscall.Setegid()`. The pf anchor includes a blanket rule: + +``` +pass out quick group _ctrld +``` + +This exempts **all** outbound traffic from the ctrld process — not just DNS (port 53), but also DoH (TCP 443), DoT (TCP 853), health checks, and any other connections. This is essential because VPN firewalls like Windscribe load `block drop all` rulesets that would otherwise block ctrld's upstream connections even after the pf anchor is restored. + +Because ctrld's anchor is prepended before all other anchors, and this rule uses `quick`, it evaluates before any VPN firewall rules. The result: ctrld's traffic is never blocked regardless of what other pf rulesets are loaded. + +The per-IP exemptions (OS resolver, VPN DNS) remain as defense-in-depth for the DNS redirect loop prevention — the blanket rule handles everything else. + +### 7. Loopback Outbound Pass Rule + +When `route-to lo0` redirects a DNS packet to loopback, pf re-evaluates the packet **outbound on lo0**. None of the existing route-to rules match on lo0 (they're all `on ! lo0` or `on utunX`), so without an explicit pass rule, the packet falls through to the main ruleset where VPN firewalls' `block drop all` drops it — before it ever reaches the inbound rdr rule. + +``` +pass out quick on lo0 inet proto udp from any to ! 127.0.0.1 port 53 +pass out quick on lo0 inet proto tcp from any to ! 127.0.0.1 port 53 +``` + +This bridges the route-to → rdr gap: route-to sends outbound on lo0 → this rule passes it → loopback reflects it inbound → rdr rewrites destination to 127.0.0.1:53 → ctrld receives the query. Without this rule, DNS intercept fails whenever a `block drop all` firewall (Windscribe, etc.) is active. + +### 8. Response Routing via `reply-to lo0` + +After rdr redirects DNS to 127.0.0.1:53, ctrld responds to the original client source IP (e.g., 100.94.163.168 — a VPN tunnel IP). Without intervention, the kernel routes this response through the VPN tunnel interface (utun420) based on its routing table, and the response is lost. + +``` +pass in quick on lo0 reply-to lo0 inet proto { udp, tcp } from any to 127.0.0.1 port 53 +``` + +`reply-to lo0` tells pf to force response packets for this connection back through lo0, overriding the kernel routing table. The response stays local, rdr reverse NAT rewrites the source from 127.0.0.1 back to the original DNS server IP (e.g., 10.255.255.3), and the client process receives a correctly-addressed response. + +### 9. VPN DNS Split Routing and Exit Mode Detection + +When a VPN like Tailscale MagicDNS is active, two distinct modes require different pf handling: + +#### The Problem: DNS Proxy Loop + +VPN DNS handlers like Tailscale's MagicDNS run as macOS Network Extensions. MagicDNS +listens on 100.100.100.100 and forwards queries to internal upstream nameservers +(e.g., 10.0.0.11, 10.0.0.12) via the VPN tunnel interface (utun13). + +Without special handling, pf's generic `pass out quick on ! lo0 route-to lo0` rule +intercepts MagicDNS's upstream queries on the tunnel interface, routing them back +to ctrld → which matches VPN DNS split routing → forwards to MagicDNS → loop: + +``` +┌──────────────────────────────────────────────────────────────────────┐ +│ THE LOOP (without passthrough rules) │ +│ │ +│ 1. dig vpn-internal.example.com │ +│ → pf intercepts → route-to lo0 → rdr → ctrld (127.0.0.1:53) │ +│ │ +│ 2. ctrld: VPN DNS match → forward to 100.100.100.100:53 │ +│ → group _ctrld exempts → reaches MagicDNS │ +│ │ +│ 3. MagicDNS: forward to upstream 10.0.0.11:53 via utun13 │ +│ → pf generic rule matches (utun13 ≠ lo0, 10.0.0.11 ≠ skip) │ +│ → route-to lo0 → rdr → back to ctrld ← LOOP! │ +└──────────────────────────────────────────────────────────────────────┘ +``` + +#### The Fix: Interface Passthrough + Exit Mode Detection + +**Split DNS mode** (VPN handles only specific domains): + +ctrld adds passthrough rules for VPN DNS interfaces that let MagicDNS's upstream +queries flow without interception. A `` table contains the VPN DNS server +IPs (e.g., 100.100.100.100) — traffic TO those IPs is NOT passed through (still +intercepted by pf → ctrld enforces profile): + +``` +table { 100.100.100.100 } + +# MagicDNS upstream queries (to 10.0.0.11 etc.) — pass through +pass out quick on utun13 inet proto udp from any to ! port 53 +pass out quick on utun13 inet proto tcp from any to ! port 53 + +# Queries TO MagicDNS (100.100.100.100) — not matched above, +# falls through to generic rule → intercepted → ctrld → profile enforced +``` + +``` +┌──────────────────────────────────────────────────────────────────────┐ +│ SPLIT DNS MODE (with passthrough rules) │ +│ │ +│ Non-VPN domain (popads.net): │ +│ dig popads.net → system routes to 100.100.100.100 on utun13 │ +│ → passthrough rule: dest IS in → NOT matched │ +│ → generic rule: route-to lo0 → rdr → ctrld → profile blocks it ✅ │ +│ │ +│ VPN domain (vpn-internal.example.com): │ +│ dig vpn-internal.example.com → pf intercepts → ctrld │ +│ → VPN DNS match → forward to 100.100.100.100 (group exempt) │ +│ → MagicDNS → upstream 10.0.0.11 on utun13 │ +│ → passthrough rule: dest NOT in → MATCHED → passes ✅ │ +│ → 10.0.0.11 returns correct internal answer (10.0.0.113) │ +└──────────────────────────────────────────────────────────────────────┘ +``` + +**Exit mode** (all traffic through VPN): + +When Tailscale exit node is enabled, MagicDNS becomes the system's **default** +resolver (not just supplemental). If we added passthrough rules, ALL DNS would +bypass ctrld — losing profile enforcement. + +Exit mode is detected using two independent signals (either triggers exit mode): + +**1. Default route detection (primary, most reliable):** +Uses `netmon.DefaultRouteInterface()` to check if the system's default route +(0.0.0.0/0) goes through a VPN DNS interface. If `DefaultRouteInterface` matches +a VPN DNS interface name (e.g., utun13), the VPN owns the default route — it's +exit mode. This is the ground truth: the routing table directly reflects whether +all traffic flows through the VPN, regardless of how the VPN presents itself in +scutil. + +**2. scutil flag detection (secondary, fallback):** +If the VPN DNS server IP appears in a `scutil --dns` resolver entry that has +**no search domains** and **no Supplemental flag**, it's acting as the system's +default resolver (exit mode). This catches edge cases where the default route +hasn't changed yet but scutil already shows the VPN as the default DNS. + +``` +# Non-exit mode — default route on en0, 100.100.100.100 is Supplemental: +$ route -n get 0.0.0.0 | grep interface + interface: en0 ← physical NIC, not VPN +resolver #1 + search domain[0] : vpn.example.com + nameserver[0] : 100.100.100.100 + flags : Supplemental, Request A records + +# Exit mode — default route on utun13, 100.100.100.100 is default resolver: +$ route -n get 0.0.0.0 | grep interface + interface: utun13 ← VPN interface! +resolver #2 + nameserver[0] : 100.100.100.100 ← MagicDNS is default + flags : Request A records ← no Supplemental! +``` + +In exit mode, NO passthrough rules are generated. pf intercepts all DNS → ctrld +enforces its profile on everything. VPN search domains still resolve correctly +via ctrld's VPN DNS split routing (forwarded to MagicDNS through the group +exemption). + +#### Summary Table + +| Scenario | Passthrough | Profile Enforced | VPN Domains | +|----------|-------------|-----------------|-------------| +| No VPN | None | ✅ All traffic | N/A | +| Split DNS (Tailscale non-exit) | ✅ VPN interface | ✅ Non-VPN domains | ✅ Via MagicDNS | +| Exit mode (Tailscale exit node) | ❌ None | ✅ All traffic | ✅ Via ctrld split routing | +| Windscribe | None (different flow) | ✅ All traffic | N/A | +| Hard intercept | None | ✅ All traffic | ❌ Not forwarded | + +### Nuclear Option (Future) + +If anchor ordering + interface rules prove insufficient, an alternative approach is available: inject DNS intercept rules directly into the **main pf ruleset** (not inside an anchor). Main ruleset rules are evaluated before ALL anchors, making them impossible for another app to override without explicitly removing them. This is more invasive and not currently implemented, but documented here as a known escalation path. + +## Known VPN Conflicts + +### F5 BIG-IP APM + +F5 BIG-IP APM VPN is a known source of DNS conflicts with ctrld (a known support scenario). The conflict occurs because F5's VPN client aggressively manages DNS: + +**How the conflict manifests:** + +1. ctrld sets system DNS to `127.0.0.1` / `::1` for local forwarding +2. F5 VPN connects and **overwrites DNS on all interfaces** by prepending its own servers (e.g., `10.20.30.1`, `10.20.30.2`) +3. F5 enforces split DNS patterns (e.g., `*.corp.example.com`) and activates its DNS Relay Proxy (`F5FltSrv.exe` / `F5FltSrv.sys`) +4. ctrld's watchdog detects the change and restores `127.0.0.1` — F5 overwrites again +5. This loop causes intermittent resolution failures, slow responses, and VPN disconnects + +**Why `--intercept-mode dns` solves this:** + +- ctrld no longer modifies interface DNS settings — there is nothing for F5 to overwrite +- WFP (Windows) blocks all outbound DNS except to localhost, so F5's prepended DNS servers are unreachable on port 53 +- F5's DNS Relay Proxy (`F5FltSrv`) becomes irrelevant since no queries reach it +- In `--intercept-mode dns` mode, F5's split DNS domains (e.g., `*.corp.example.com`) are auto-detected from the VPN adapter and forwarded to F5's DNS servers through ctrld's upstream mechanism + +**F5-side mitigations (if `--intercept-mode dns` is not available):** + +- In APM Network Access DNS settings, enable **"Allow Local DNS Servers"** (`AllowLocalDNSServersAccess = 1`) +- Disable **"Enforce DNS Name Resolution Order"** +- Switch to IP-based split tunneling instead of DNS-pattern-based to avoid activating F5's relay proxy +- Update F5 to version 17.x+ which includes DNS handling fixes (see F5 KB K80231353) + +**Additional considerations:** + +- CrowdStrike Falcon and similar endpoint security with network inspection can compound the conflict (three-way DNS stomping) +- F5's relay proxy (`F5FltSrv`) performs similar functions to ctrld — they are in direct conflict when both active +- The seemingly random failure pattern is caused by timing-dependent race conditions between ctrld's watchdog, F5's DNS enforcement, and (optionally) endpoint security inspection + +### Cisco AnyConnect + +Cisco AnyConnect exhibits similar DNS override behavior. `--intercept-mode dns` mode prevents the conflict by operating at the packet filter level rather than competing for interface DNS settings. + +### Windscribe Desktop App + +Windscribe's macOS firewall implementation (`FirewallController_mac`) replaces the entire pf ruleset when connecting/disconnecting via `pfctl -f`, which wipes ctrld's anchor references and flushes the pf state table (killing active DoH connections). ctrld handles this with multiple defenses: + +1. **pf watchdog** detects the wipe and restores anchor rules immediately on network change events (or within 30s via periodic check) +2. **DoH transport force-reset** immediately replaces upstream transports when a pf wipe is detected (closing old connections + creating new ones synchronously), reducing the DNS blackout from ~5s to ~100ms +3. **Tunnel interface detection** adds explicit intercept rules for Windscribe's WireGuard interface (e.g., utun420) when it appears +4. **Dual delayed re-checks** (2s + 4s after network event) catch race conditions where VPN apps modify pf rules and DNS settings asynchronously after the initial network change +5. **Deferred pf restore** waits for VPN to finish its pf modifications before restoring ctrld's rules, preventing the reconnect death spiral +6. **Blanket group exemption** (`pass out quick group _ctrld`) ensures all ctrld traffic (including DoH on port 443) passes through VPN firewalls like Windscribe's `block drop all` + +## 7. VPN DNS Lifecycle + +When VPN software connects or disconnects, ctrld must track DNS state changes to ensure correct routing and avoid stale state. + +### Network Change Event Flow (macOS) + +``` +Network change detected (netmon callback) + │ + ├─ Immediate actions: + │ ├─ ensurePFAnchorActive() — verify/restore pf anchor references + │ ├─ checkTunnelInterfaceChanges() — detect new/removed VPN interfaces + │ │ ├─ New tunnel → pfStartStabilization() (wait for VPN to finish pf changes) + │ │ └─ Removed tunnel → rebuild anchor immediately (with VPN DNS exemptions) + │ └─ vpnDNS.Refresh() — re-discover VPN DNS from scutil --dns + │ + ├─ Delayed re-check at 2s: + │ ├─ ensurePFAnchorActive() — catch async pf wipes + │ ├─ checkTunnelInterfaceChanges() + │ ├─ InitializeOsResolver() — clear stale DNS from scutil + │ └─ vpnDNS.Refresh() — clear stale VPN DNS routes + │ + └─ Delayed re-check at 4s: + └─ (same as 2s — catches slower VPN teardowns) +``` + +### VPN Connect Sequence + +1. VPN creates tunnel interface (e.g., utun420) +2. Network change fires → `checkTunnelInterfaceChanges()` detects new tunnel +3. **Stabilization mode** activates — suppresses pf restores while VPN modifies rules +4. Stabilization loop polls `pfctl -sr` hash every 1.5s +5. When hash stable for 6s → VPN finished → restore ctrld's pf anchor +6. `vpnDNS.Refresh()` discovers VPN's search domains and DNS servers from `scutil --dns` +7. Anchor rebuild includes VPN DNS exemptions (so ctrld can reach VPN DNS on port 53) + +### VPN Disconnect Sequence + +1. VPN removes tunnel interface +2. Network change fires → `checkTunnelInterfaceChanges()` detects removal +3. Anchor rebuilt immediately (no stabilization needed for removals) +4. VPN app may asynchronously wipe pf rules (`pfctl -f /etc/pf.conf`) +5. VPN app may asynchronously clean up DNS settings from `scutil --dns` +6. **2s delayed re-check**: restores pf anchor if wiped, refreshes OS resolver +7. **4s delayed re-check**: catches slower VPN teardowns +8. `vpnDNS.Refresh()` returns empty → `onServersChanged(nil)` clears stale exemptions +9. `InitializeOsResolver()` re-reads `scutil --dns` → clears stale LAN nameservers + +### Key Design Decisions + +- **`buildPFAnchorRules()` receives VPN DNS servers**: All call sites (tunnel rebuild, watchdog restore, stabilization exit) pass `vpnDNS.CurrentServers()` so exemptions are preserved for still-active VPNs. +- **`onServersChanged` called even when server list is empty**: Ensures stale pf exemptions from a previous VPN session are cleaned up on disconnect. +- **OS resolver refresh in delayed re-checks**: VPN apps often finish DNS cleanup 1-3s after the network change event. The delayed `InitializeOsResolver()` call ensures stale LAN nameservers (e.g., a VPN's DNS IP (e.g., 10.255.255.3)) don't cause 2s query timeouts. +- **Ordering: tunnel checks → VPN DNS refresh → delayed re-checks**: Ensures anchor rebuilds from tunnel changes include current VPN DNS exemptions. + +## Related + +- F5 BIG-IP APM VPN DNS conflict (a known support scenario) diff --git a/resolver.go b/resolver.go index 914233d..fbd2ad6 100644 --- a/resolver.go +++ b/resolver.go @@ -234,6 +234,79 @@ type publicResponse struct { server string } +// OsResolverNameservers returns the current OS resolver nameservers (host:port format). +// Returns nil if the OS resolver has not been initialized. +func OsResolverNameservers() []string { + resolverMutex.Lock() + r := or + resolverMutex.Unlock() + if r == nil { + return nil + } + var nss []string + if lan := r.lanServers.Load(); lan != nil { + nss = append(nss, *lan...) + } + if pub := r.publicServers.Load(); pub != nil { + nss = append(nss, *pub...) + } + return nss +} + +// AppendOsResolverNameservers adds additional nameservers to the existing OS resolver +// without reinitializing it. This is used for late-arriving nameservers such as AD +// domain controller IPs discovered via background retry. +// Returns true if nameservers were actually added. +func AppendOsResolverNameservers(servers []string) bool { + if len(servers) == 0 { + return false + } + resolverMutex.Lock() + defer resolverMutex.Unlock() + if or == nil { + return false + } + + // Collect existing nameservers to avoid duplicates. + existing := make(map[string]bool) + if lan := or.lanServers.Load(); lan != nil { + for _, s := range *lan { + existing[s] = true + } + } + if pub := or.publicServers.Load(); pub != nil { + for _, s := range *pub { + existing[s] = true + } + } + + var added bool + for _, s := range servers { + // Normalize to host:port format. + if _, _, err := net.SplitHostPort(s); err != nil { + s = net.JoinHostPort(s, "53") + } + if existing[s] { + continue + } + existing[s] = true + added = true + + ip, _, _ := net.SplitHostPort(s) + addr, _ := netip.ParseAddr(ip) + if isLanAddr(addr) { + lan := or.lanServers.Load() + newLan := append(append([]string{}, (*lan)...), s) + or.lanServers.Store(&newLan) + } else { + pub := or.publicServers.Load() + newPub := append(append([]string{}, (*pub)...), s) + or.publicServers.Store(&newPub) + } + } + return added +} + // SetDefaultLocalIPv4 updates the stored local IPv4. func SetDefaultLocalIPv4(ip net.IP) { Log(context.Background(), ProxyLogger.Load().Debug(), "SetDefaultLocalIPv4: %s", ip)