feat: introduce DNS intercept mode infrastructure

Add --intercept-mode flag (dns/hard/off) with configuration support,
recovery bypass for captive portals, probe-based interception
verification, VPN DNS coexistence in the proxy layer, and IPv6
loopback listener guard.

Remove standalone mDNSResponder hack files — the port 53 binding
logic is now handled within the intercept mode infrastructure.

Squashed from intercept mode development on v1.0 branch (#497).
This commit is contained in:
Codescribe
2026-03-03 02:06:49 -05:00
committed by Cuong Manh Le
parent 12715e6f24
commit 1e8240bd1c
17 changed files with 1813 additions and 291 deletions

View File

@@ -345,6 +345,16 @@ func run(appCallback *AppCallback, stopCh chan struct{}) {
processLogAndCacheFlags(v, &cfg)
}
// Persist intercept_mode to config when provided via CLI flag on full install.
// This ensures the config file reflects the actual running mode for RMM/MDM visibility.
if interceptMode == "dns" || interceptMode == "hard" {
if cfg.Service.InterceptMode != interceptMode {
cfg.Service.InterceptMode = interceptMode
updated = true
mainLog.Load().Info().Msgf("writing intercept_mode = %q to config", interceptMode)
}
}
if updated {
if err := writeConfigFile(&cfg); err != nil {
notifyExitToLogServer()
@@ -647,7 +657,7 @@ func processCDFlags(cfg *ctrld.Config) (*controld.ResolverConfig, error) {
req := &controld.ResolverConfigRequest{
RawUID: cdUID,
Version: rootCmd.Version,
Metadata: ctrld.SystemMetadata(ctx),
Metadata: ctrld.SystemMetadataRuntime(context.Background()),
}
resolverConfig, err := controld.FetchResolverConfig(req, cdDev)
for {
@@ -901,9 +911,6 @@ func selfCheckStatus(ctx context.Context, s service.Service, sockDir string) (bo
lc := cfg.FirstListener()
addr := net.JoinHostPort(lc.IP, strconv.Itoa(lc.Port))
if needMdnsResponderHack {
addr = "127.0.0.1:53"
}
mainLog.Load().Debug().Msgf("performing listener test, sending queries to %s", addr)
@@ -1116,10 +1123,6 @@ func uninstall(p *prog, s service.Service) {
// Stop already did router.Cleanup and report any error if happens,
// ignoring error here to prevent false positive.
_ = p.router.Cleanup()
// Run mDNS responder cleanup if necessary
doMdnsResponderCleanup()
mainLog.Load().Notice().Msg("Service uninstalled")
return
}
@@ -1227,18 +1230,105 @@ func updateListenerConfig(cfg *ctrld.Config, notifyToLogServerFunc func()) bool
return updated
}
// tryUpdateListenerConfigIntercept handles listener binding for dns-intercept mode on macOS.
// In intercept mode, pf redirects all outbound port-53 traffic to ctrld's listener,
// so ctrld can safely listen on a non-standard port if port 53 is unavailable
// (e.g., mDNSResponder holds *:53).
//
// Flow:
// 1. If config has explicit (non-default) IP:port → use exactly that, no fallback
// 2. Otherwise → try 127.0.0.1:53, then 127.0.0.1:5354, then fatal
func tryUpdateListenerConfigIntercept(cfg *ctrld.Config, notifyFunc func(), fatal bool) (updated, ok bool) {
ok = true
lc := cfg.FirstListener()
if lc == nil {
return false, true
}
hasExplicitConfig := lc.IP != "" && lc.IP != "0.0.0.0" && lc.Port != 0
if !hasExplicitConfig {
// Set defaults for intercept mode
if lc.IP == "" || lc.IP == "0.0.0.0" {
lc.IP = "127.0.0.1"
updated = true
}
if lc.Port == 0 {
lc.Port = 53
updated = true
}
}
tryListen := func(ip string, port int) bool {
addr := net.JoinHostPort(ip, strconv.Itoa(port))
udpLn, udpErr := net.ListenPacket("udp", addr)
if udpLn != nil {
udpLn.Close()
}
tcpLn, tcpErr := net.Listen("tcp", addr)
if tcpLn != nil {
tcpLn.Close()
}
return udpErr == nil && tcpErr == nil
}
addr := net.JoinHostPort(lc.IP, strconv.Itoa(lc.Port))
if tryListen(lc.IP, lc.Port) {
mainLog.Load().Debug().Msgf("DNS intercept: listener available at %s", addr)
return updated, true
}
mainLog.Load().Info().Msgf("DNS intercept: cannot bind %s", addr)
if hasExplicitConfig {
// User specified explicit address — don't guess, just fail
if fatal {
notifyFunc()
mainLog.Load().Fatal().Msgf("DNS intercept: cannot listen on configured address %s", addr)
}
return updated, false
}
// Fallback: try port 5354 (mDNSResponder likely holds *:53)
if tryListen("127.0.0.1", 5354) {
mainLog.Load().Info().Msg("DNS intercept: port 53 unavailable (likely mDNSResponder), using 127.0.0.1:5354")
lc.IP = "127.0.0.1"
lc.Port = 5354
return true, true
}
if fatal {
notifyFunc()
mainLog.Load().Fatal().Msg("DNS intercept: cannot bind 127.0.0.1:53 or 127.0.0.1:5354")
}
return updated, false
}
// tryUpdateListenerConfig tries updating listener config with a working one.
// If fatal is true, and there's listen address conflicted, the function do
// fatal error.
func tryUpdateListenerConfig(cfg *ctrld.Config, infoLogger *zerolog.Logger, notifyFunc func(), fatal bool) (updated, ok bool) {
// In intercept mode (macOS), pf redirects all port-53 traffic to ctrld's listener,
// so ctrld can safely listen on a non-standard port. Use a simple two-attempt flow:
// 1. If config has explicit non-default IP:port, use exactly that
// 2. Otherwise: try 127.0.0.1:53, then 127.0.0.1:5354, then fatal
// This bypasses the full cd-mode listener probing loop entirely.
// Check interceptMode (CLI flag) first, then fall back to config value.
// dnsIntercept bool is derived later in prog.run(), but we need to know
// the intercept mode here to select the right listener probing strategy.
im := interceptMode
if im == "" || im == "off" {
im = cfg.Service.InterceptMode
}
if (im == "dns" || im == "hard") && runtime.GOOS == "darwin" {
return tryUpdateListenerConfigIntercept(cfg, notifyFunc, fatal)
}
ok = true
lcc := make(map[string]*listenerConfigCheck)
cdMode := cdUID != ""
nextdnsMode := nextdns != ""
// For Windows server with local Dns server running, we can only try on random local IP.
hasLocalDnsServer := hasLocalDnsServerRunning()
// For Macos with mDNSResponder running on port 53, we must use 0.0.0.0 to prevent conflicting.
needMdnsResponderHack := needMdnsResponderHack
notRouter := router.Name() == ""
isDesktop := ctrld.IsDesktopPlatform()
for n, listener := range cfg.Listener {
@@ -1272,12 +1362,6 @@ func tryUpdateListenerConfig(cfg *ctrld.Config, infoLogger *zerolog.Logger, noti
lcc[n].Port = false
}
}
if needMdnsResponderHack {
listener.IP = "0.0.0.0"
listener.Port = 53
lcc[n].IP = false
lcc[n].Port = false
}
updated = updated || lcc[n].IP || lcc[n].Port
}
@@ -1310,9 +1394,6 @@ func tryUpdateListenerConfig(cfg *ctrld.Config, infoLogger *zerolog.Logger, noti
// Created listeners will be kept in listeners slice above, and close
// before function finished.
tryListen := func(addr string) error {
if needMdnsResponderHack {
killMdnsResponder()
}
udpLn, udpErr := net.ListenPacket("udp", addr)
if udpLn != nil {
closers = append(closers, udpLn)
@@ -1376,9 +1457,6 @@ func tryUpdateListenerConfig(cfg *ctrld.Config, infoLogger *zerolog.Logger, noti
}
attempts := 0
maxAttempts := 10
if needMdnsResponderHack {
maxAttempts = 1
}
for {
if attempts == maxAttempts {
notifyFunc()
@@ -1889,10 +1967,12 @@ func runningIface(s service.Service) *ifaceResponse {
// doValidateCdRemoteConfig fetches and validates custom config for cdUID.
func doValidateCdRemoteConfig(cdUID string, fatal bool) error {
// Username is only sent during initial provisioning (cdUIDFromProvToken).
// All subsequent calls use lightweight metadata to avoid EDR triggers.
req := &controld.ResolverConfigRequest{
RawUID: cdUID,
Version: rootCmd.Version,
Metadata: ctrld.SystemMetadata(context.Background()),
Metadata: ctrld.SystemMetadataRuntime(context.Background()),
}
rc, err := controld.FetchResolverConfig(req, cdDev)
if err != nil {

View File

@@ -190,6 +190,7 @@ func initRunCmd() *cobra.Command {
_ = runCmd.Flags().MarkHidden("iface")
runCmd.Flags().StringVarP(&cdUpstreamProto, "proto", "", ctrld.ResolverTypeDOH, `Control D upstream type, either "doh" or "doh3"`)
runCmd.Flags().BoolVarP(&rfc1918, "rfc1918", "", false, "Listen on RFC1918 addresses when 127.0.0.1 is the only listener")
runCmd.Flags().StringVarP(&interceptMode, "intercept-mode", "", "", "OS-level DNS interception mode: 'dns' (with VPN split routing) or 'hard' (all DNS through ctrld, no VPN split routing)")
runCmd.FParseErrWhitelist = cobra.FParseErrWhitelist{UnknownFlags: true}
rootCmd.AddCommand(runCmd)
@@ -229,6 +230,14 @@ NOTE: running "ctrld start" without any arguments will start already installed c
setDependencies(sc)
sc.Arguments = append([]string{"run"}, osArgs...)
// Validate --intercept-mode early, before installing the service.
// Without this, a typo like "--intercept-mode fds" would install the service,
// the child process would Fatal() on the invalid value, and the parent would
// then uninstall — confusing and destructive.
if interceptMode != "" && !validInterceptMode(interceptMode) {
mainLog.Load().Fatal().Msgf("invalid --intercept-mode value %q: must be 'off', 'dns', or 'hard'", interceptMode)
}
p := &prog{
router: router.New(&cfg, cdUID != ""),
cfg: &cfg,
@@ -247,6 +256,49 @@ NOTE: running "ctrld start" without any arguments will start already installed c
// Get current running iface, if any.
var currentIface *ifaceResponse
// Handle "ctrld start --intercept-mode dns|hard" on an existing
// service BEFORE the pin check. Adding intercept mode is an enhancement, not
// deactivation, so it doesn't require the deactivation pin. We modify the
// plist/registry directly and restart the service via the OS service manager.
osArgsEarly := os.Args[2:]
if os.Args[1] == "service" {
osArgsEarly = os.Args[3:]
}
osArgsEarly = filterEmptyStrings(osArgsEarly)
interceptOnly := onlyInterceptFlags(osArgsEarly)
svcExists := serviceConfigFileExists()
mainLog.Load().Debug().Msgf("intercept upgrade check: args=%v interceptOnly=%v svcConfigExists=%v interceptMode=%q", osArgsEarly, interceptOnly, svcExists, interceptMode)
if interceptOnly && svcExists {
// Remove any existing intercept flags before applying the new value.
_ = removeServiceFlag("--intercept-mode")
if interceptMode == "off" {
// "off" = remove intercept mode entirely (just the removal above).
mainLog.Load().Notice().Msg("Existing service detected — removing --intercept-mode from service arguments")
} else {
// Add the new mode value.
mainLog.Load().Notice().Msgf("Existing service detected — appending --intercept-mode %s to service arguments", interceptMode)
if err := appendServiceFlag("--intercept-mode"); err != nil {
mainLog.Load().Fatal().Err(err).Msg("failed to append intercept flag to service arguments")
}
if err := appendServiceFlag(interceptMode); err != nil {
mainLog.Load().Fatal().Err(err).Msg("failed to append intercept mode value to service arguments")
}
}
// Stop the service if running (bypasses ctrld pin — this is an
// enhancement, not deactivation). Then fall through to the normal
// startOnly path which handles start, self-check, and reporting.
if isCtrldRunning {
mainLog.Load().Notice().Msg("Stopping service for intercept mode upgrade")
_ = s.Stop()
isCtrldRunning = false
}
startOnly = true
isCtrldInstalled = true
// Fall through to startOnly path below.
}
// If pin code was set, do not allow running start command.
if isCtrldRunning {
if err := checkDeactivationPin(s, nil); isCheckDeactivationPinErr(err) {
@@ -271,20 +323,31 @@ NOTE: running "ctrld start" without any arguments will start already installed c
return
}
if res.OK {
name := res.Name
if iff, err := net.InterfaceByName(name); err == nil {
_, _ = patchNetIfaceName(iff)
name = iff.Name
}
logger := mainLog.Load().With().Str("iface", name).Logger()
logger.Debug().Msg("setting DNS successfully")
if res.All {
// Log that DNS is set for other interfaces.
withEachPhysicalInterfaces(
name,
"set DNS",
func(i *net.Interface) error { return nil },
)
// In intercept mode, show intercept-specific status instead of
// per-interface DNS messages (which are irrelevant).
if res.InterceptMode != "" {
switch res.InterceptMode {
case "hard":
mainLog.Load().Notice().Msg("DNS hard intercept mode active — all DNS traffic intercepted, no VPN split routing")
default:
mainLog.Load().Notice().Msg("DNS intercept mode active — all DNS traffic intercepted via OS packet filter")
}
} else {
name := res.Name
if iff, err := net.InterfaceByName(name); err == nil {
_, _ = patchNetIfaceName(iff)
name = iff.Name
}
logger := mainLog.Load().With().Str("iface", name).Logger()
logger.Debug().Msg("setting DNS successfully")
if res.All {
// Log that DNS is set for other interfaces.
withEachPhysicalInterfaces(
name,
"set DNS",
func(i *net.Interface) error { return nil },
)
}
}
}
}
@@ -344,6 +407,7 @@ NOTE: running "ctrld start" without any arguments will start already installed c
if !startOnly {
startOnly = len(osArgs) == 0
}
// If user run "ctrld start" and ctrld is already installed, starting existing service.
if startOnly && isCtrldInstalled {
tryReadingConfigWithNotice(false, true)
@@ -359,10 +423,6 @@ NOTE: running "ctrld start" without any arguments will start already installed c
initInteractiveLogging()
tasks := []task{
{func() error {
doMdnsResponderCleanup()
return nil
}, false, "Cleanup service before installation"},
{func() error {
// Save current DNS so we can restore later.
withEachPhysicalInterfaces("", "saveCurrentStaticDNS", func(i *net.Interface) error {
@@ -378,10 +438,6 @@ NOTE: running "ctrld start" without any arguments will start already installed c
}, false, "Configure service failure actions"},
{s.Start, true, "Start"},
{noticeWritingControlDConfig, false, "Notice writing ControlD config"},
{func() error {
doMdnsResponderHackPostInstall()
return nil
}, false, "Configure service post installation"},
}
mainLog.Load().Notice().Msg("Starting existing ctrld service")
if doTasks(tasks) {
@@ -392,6 +448,10 @@ NOTE: running "ctrld start" without any arguments will start already installed c
os.Exit(1)
}
reportSetDnsOk(sockDir)
// Verify service registration after successful start.
if err := verifyServiceRegistration(); err != nil {
mainLog.Load().Warn().Err(err).Msg("Service registry verification failed")
}
} else {
mainLog.Load().Error().Err(err).Msg("Failed to start existing ctrld service")
os.Exit(1)
@@ -400,7 +460,8 @@ NOTE: running "ctrld start" without any arguments will start already installed c
}
if cdUID != "" {
_ = doValidateCdRemoteConfig(cdUID, true)
// Skip doValidateCdRemoteConfig() here - run command will handle
// validation and config fetch via processCDFlags().
} else if uid := cdUIDFromProvToken(); uid != "" {
cdUID = uid
mainLog.Load().Debug().Msg("using uid from provision token")
@@ -445,10 +506,6 @@ NOTE: running "ctrld start" without any arguments will start already installed c
}
tasks := []task{
{func() error {
doMdnsResponderCleanup()
return nil
}, false, "Cleanup service before installation"},
{s.Stop, false, "Stop"},
{func() error { return doGenerateNextDNSConfig(nextdns) }, true, "Checking config"},
{func() error { return ensureUninstall(s) }, false, "Ensure uninstall"},
@@ -471,10 +528,6 @@ NOTE: running "ctrld start" without any arguments will start already installed c
// Note that startCmd do not actually write ControlD config, but the config file was
// generated after s.Start, so we notice users here for consistent with nextdns mode.
{noticeWritingControlDConfig, false, "Notice writing ControlD config"},
{func() error {
doMdnsResponderHackPostInstall()
return nil
}, false, "Configure service post installation"},
}
mainLog.Load().Notice().Msg("Starting service")
if doTasks(tasks) {
@@ -525,6 +578,10 @@ NOTE: running "ctrld start" without any arguments will start already installed c
os.Exit(1)
}
reportSetDnsOk(sockDir)
// Verify service registration after successful start.
if err := verifyServiceRegistration(); err != nil {
mainLog.Load().Warn().Err(err).Msg("Service registry verification failed")
}
}
},
}
@@ -549,6 +606,7 @@ NOTE: running "ctrld start" without any arguments will start already installed c
startCmd.Flags().BoolVarP(&startOnly, "start_only", "", false, "Do not install new service")
_ = startCmd.Flags().MarkHidden("start_only")
startCmd.Flags().BoolVarP(&rfc1918, "rfc1918", "", false, "Listen on RFC1918 addresses when 127.0.0.1 is the only listener")
startCmd.Flags().StringVarP(&interceptMode, "intercept-mode", "", "", "OS-level DNS interception mode: 'dns' (with VPN split routing) or 'hard' (all DNS through ctrld, no VPN split routing)")
routerCmd := &cobra.Command{
Use: "setup",
@@ -1411,3 +1469,53 @@ func filterEmptyStrings(slice []string) []string {
return s == ""
})
}
// validInterceptMode reports whether the given value is a recognized --intercept-mode.
// This is the single source of truth for mode validation — used by the early start
// command check, the runtime validation in prog.go, and onlyInterceptFlags below.
// Add new modes here to have them recognized everywhere.
func validInterceptMode(mode string) bool {
switch mode {
case "off", "dns", "hard":
return true
}
return false
}
// onlyInterceptFlags reports whether args contain only intercept mode
// flags (--intercept-mode <value>) and flags that are auto-added by the
// start command alias (--iface). This is used to detect "ctrld start --intercept-mode dns"
// (or "off" to disable) on an existing installation, where the intent is to modify the
// intercept flag on the existing service without replacing other arguments.
//
// Note: the startCmdAlias appends "--iface=auto" to os.Args when --iface isn't
// explicitly provided, so we must allow it here.
func onlyInterceptFlags(args []string) bool {
hasIntercept := false
for i := 0; i < len(args); i++ {
arg := args[i]
switch {
case arg == "--intercept-mode":
// Next arg must be a valid mode value.
if i+1 < len(args) && validInterceptMode(args[i+1]) {
hasIntercept = true
i++ // skip the value
} else {
return false
}
case strings.HasPrefix(arg, "--intercept-mode="):
val := strings.TrimPrefix(arg, "--intercept-mode=")
if validInterceptMode(val) {
hasIntercept = true
} else {
return false
}
case arg == "--iface=auto" || arg == "--iface" || arg == "auto":
// Auto-added by startCmdAlias or its value; safe to ignore.
continue
default:
return false
}
}
return hasIntercept
}

View File

@@ -0,0 +1,39 @@
//go:build !windows && !darwin
package cli
import (
"fmt"
)
// startDNSIntercept is not supported on this platform.
// DNS intercept mode is only available on Windows (via WFP) and macOS (via pf).
func (p *prog) startDNSIntercept() error {
return fmt.Errorf("dns intercept: not supported on this platform (only Windows and macOS)")
}
// stopDNSIntercept is a no-op on unsupported platforms.
func (p *prog) stopDNSIntercept() error {
return nil
}
// exemptVPNDNSServers is a no-op on unsupported platforms.
func (p *prog) exemptVPNDNSServers(exemptions []vpnDNSExemption) error {
return nil
}
// ensurePFAnchorActive is a no-op on unsupported platforms.
func (p *prog) ensurePFAnchorActive() bool {
return false
}
// checkTunnelInterfaceChanges is a no-op on unsupported platforms.
func (p *prog) checkTunnelInterfaceChanges() bool {
return false
}
// scheduleDelayedRechecks is a no-op on unsupported platforms.
func (p *prog) scheduleDelayedRechecks() {}
// pfInterceptMonitor is a no-op on unsupported platforms.
func (p *prog) pfInterceptMonitor() {}

View File

@@ -101,19 +101,10 @@ func (p *prog) serveDNS(listenerNum string) error {
_ = w.WriteMsg(answer)
return
}
// When mDNSResponder hack has been done, ctrld was listening on 0.0.0.0:53, but only requests
// to 127.0.0.1:53 are accepted. Since binding to 0.0.0.0 will make the IP info of the local address
// hidden (appeared as [::]), we checked for requests originated from 127.0.0.1 instead.
if needMdnsResponderHack && !strings.HasPrefix(w.RemoteAddr().String(), "127.0.0.1:") {
answer := new(dns.Msg)
answer.SetRcode(m, dns.RcodeRefused)
_ = w.WriteMsg(answer)
return
}
listenerConfig := p.cfg.Listener[listenerNum]
reqId := requestID()
ctx := context.WithValue(context.Background(), ctrld.ReqIdCtxKey{}, reqId)
if !listenerConfig.AllowWanClients && isWanClient(w.RemoteAddr()) {
if !listenerConfig.AllowWanClients && isWanClient(w.RemoteAddr()) && !isIPv6LoopbackListener(w.LocalAddr()) {
ctrld.Log(ctx, mainLog.Load().Debug(), "query refused, listener does not allow WAN clients: %s", w.RemoteAddr().String())
answer := new(dns.Msg)
answer.SetRcode(m, dns.RcodeRefused)
@@ -135,6 +126,23 @@ func (p *prog) serveDNS(listenerNum string) error {
return
}
// Interception probe: if we're expecting a probe query and this matches,
// signal the prober and respond NXDOMAIN. Used by both macOS pf probes
// (_pf-probe-*) and Windows NRPT probes (_nrpt-probe-*) to verify that
// DNS interception is actually routing queries to ctrld's listener.
if probeID, ok := p.pfProbeExpected.Load().(string); ok && probeID != "" && domain == probeID {
if chPtr, ok := p.pfProbeCh.Load().(*chan struct{}); ok && chPtr != nil {
select {
case *chPtr <- struct{}{}:
default:
}
}
answer := new(dns.Msg)
answer.SetRcode(m, dns.RcodeNameError) // NXDOMAIN
_ = w.WriteMsg(answer)
return
}
if _, ok := p.cacheFlushDomainsMap[domain]; ok && p.cache != nil {
p.cache.Purge()
ctrld.Log(ctx, mainLog.Load().Debug(), "received query %q, local cache is purged", domain)
@@ -201,7 +209,7 @@ func (p *prog) serveDNS(listenerNum string) error {
g, ctx := errgroup.WithContext(context.Background())
for _, proto := range []string{"udp", "tcp"} {
proto := proto
if needLocalIPv6Listener() {
if needLocalIPv6Listener(p.cfg.Service.InterceptMode) {
g.Go(func() error {
s, errCh := runDNSServer(net.JoinHostPort("::1", strconv.Itoa(listenerConfig.Port)), proto, handler)
defer s.Shutdown()
@@ -430,6 +438,24 @@ func (p *prog) proxyLanHostnameQuery(ctx context.Context, msg *dns.Msg) *dns.Msg
}
func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
// DNS intercept recovery bypass: forward all queries to OS/DHCP resolver.
// This runs when upstreams are unreachable (e.g., captive portal network)
// and allows the network's DNS to handle authentication pages.
if dnsIntercept && p.recoveryBypass.Load() {
ctrld.Log(ctx, mainLog.Load().Debug(), "Recovery bypass active: forwarding to OS resolver")
resolver, err := ctrld.NewResolver(osUpstreamConfig)
if err == nil {
resolveCtx, cancel := osUpstreamConfig.Context(ctx)
defer cancel()
answer, _ := resolver.Resolve(resolveCtx, req.msg)
if answer != nil {
return &proxyResponse{answer: answer}
}
}
ctrld.Log(ctx, mainLog.Load().Debug(), "OS resolver failed during recovery bypass")
// Fall through to normal flow as last resort
}
var staleAnswer *dns.Msg
upstreams := req.ufr.upstreams
serveStaleCache := p.cache != nil && p.cfg.Service.CacheServeStale
@@ -442,9 +468,9 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
// However, on Active Directory Domain Controller, where it has local DNS server
// running and listening on local addresses, these local addresses must be used
// as nameservers, so queries for ADDC could be resolved as expected.
if p.isAdDomainQuery(req.msg) {
if p.isAdDomainQuery(req.msg) && p.hasLocalDNS {
ctrld.Log(ctx, mainLog.Load().Debug(),
"AD domain query detected for %s in domain %s",
"AD domain query detected for %s in domain %s, using local DNS server",
req.msg.Question[0].Name, p.adDomain)
upstreamConfigs = []*ctrld.UpstreamConfig{localUpstreamConfig}
upstreams = []string{upstreamOSLocal}
@@ -515,6 +541,92 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
staleAnswer = answer
}
}
// VPN DNS split routing (only in dns-intercept mode)
if dnsIntercept && p.vpnDNS != nil && len(req.msg.Question) > 0 {
domain := req.msg.Question[0].Name
if vpnServers := p.vpnDNS.UpstreamForDomain(domain); len(vpnServers) > 0 {
ctrld.Log(ctx, mainLog.Load().Debug(), "VPN DNS route matched for domain %s, using servers: %v", domain, vpnServers)
for _, server := range vpnServers {
upstreamConfig := p.vpnDNS.upstreamConfigFor(server)
ctrld.Log(ctx, mainLog.Load().Debug(), "Querying VPN DNS server: %s", server)
dnsResolver, err := ctrld.NewResolver(upstreamConfig)
if err != nil {
ctrld.Log(ctx, mainLog.Load().Error().Err(err), "failed to create VPN DNS resolver")
continue
}
resolveCtx, cancel := upstreamConfig.Context(ctx)
answer, err := dnsResolver.Resolve(resolveCtx, req.msg)
cancel()
if answer != nil {
ctrld.Log(ctx, mainLog.Load().Debug(), "VPN DNS query successful")
if p.cache != nil {
ttl := 60 * time.Second
if len(answer.Answer) > 0 {
ttl = time.Duration(answer.Answer[0].Header().Ttl) * time.Second
}
for _, upstream := range upstreams {
p.cache.Add(dnscache.NewKey(req.msg, upstream), dnscache.NewValue(answer, time.Now().Add(ttl)))
}
}
return &proxyResponse{answer: answer}
}
ctrld.Log(ctx, mainLog.Load().Debug().Err(err), "VPN DNS server %s failed", server)
}
ctrld.Log(ctx, mainLog.Load().Debug(), "All VPN DNS servers failed, falling back to normal upstreams")
}
}
// Domain-less VPN DNS fallback: when a query is going to upstream.os via a
// split-rule (matched policy) and we have VPN DNS servers with no associated
// domains, try those servers for this query. This handles cases like F5 VPN
// where the VPN doesn't advertise DNS search domains but its DNS servers
// know the internal zones referenced by split-rules (e.g., *.provisur.local).
// These servers are NOT used for general OS resolver queries to avoid
// polluting captive portal / DHCP flows.
if dnsIntercept && p.vpnDNS != nil && req.ufr.matched &&
len(upstreams) > 0 && upstreams[0] == upstreamOS &&
len(req.msg.Question) > 0 && !p.isAdDomainQuery(req.msg) {
if dlServers := p.vpnDNS.DomainlessServers(); len(dlServers) > 0 {
domain := req.msg.Question[0].Name
ctrld.Log(ctx, mainLog.Load().Debug(),
"Split-rule query %s going to upstream.os, trying %d domain-less VPN DNS servers first: %v",
domain, len(dlServers), dlServers)
for _, server := range dlServers {
upstreamCfg := p.vpnDNS.upstreamConfigFor(server)
ctrld.Log(ctx, mainLog.Load().Debug(), "Querying domain-less VPN DNS server: %s", server)
dnsResolver, err := ctrld.NewResolver(upstreamCfg)
if err != nil {
ctrld.Log(ctx, mainLog.Load().Error().Err(err), "failed to create domain-less VPN DNS resolver")
continue
}
resolveCtx, cancel := upstreamCfg.Context(ctx)
answer, err := dnsResolver.Resolve(resolveCtx, req.msg)
cancel()
if answer != nil && answer.Rcode == dns.RcodeSuccess {
ctrld.Log(ctx, mainLog.Load().Debug(),
"Domain-less VPN DNS server %s answered %s successfully", server, domain)
return &proxyResponse{answer: answer}
}
if answer != nil {
ctrld.Log(ctx, mainLog.Load().Debug(),
"Domain-less VPN DNS server %s returned %s for %s, trying next",
server, dns.RcodeToString[answer.Rcode], domain)
} else {
ctrld.Log(ctx, mainLog.Load().Debug().Err(err),
"Domain-less VPN DNS server %s failed for %s", server, domain)
}
}
ctrld.Log(ctx, mainLog.Load().Debug(),
"All domain-less VPN DNS servers failed for %s, falling back to OS resolver", domain)
}
}
resolve1 := func(upstream string, upstreamConfig *ctrld.UpstreamConfig, msg *dns.Msg) (*dns.Msg, error) {
ctrld.Log(ctx, mainLog.Load().Debug(), "sending query to %s: %s", upstream, upstreamConfig.Name)
dnsResolver, err := ctrld.NewResolver(upstreamConfig)
@@ -780,10 +892,30 @@ func ttlFromMsg(msg *dns.Msg) uint32 {
return 0
}
func needLocalIPv6Listener() bool {
func needLocalIPv6Listener(interceptMode string) bool {
if !ctrldnet.SupportsIPv6ListenLocal() {
mainLog.Load().Debug().Msg("IPv6 listener: not needed — SupportsIPv6ListenLocal() is false")
return false
}
// On Windows, there's no easy way for disabling/removing IPv6 DNS resolver, so we check whether we can
// listen on ::1, then spawn a listener for receiving DNS requests.
return ctrldnet.SupportsIPv6ListenLocal() && runtime.GOOS == "windows"
if runtime.GOOS == "windows" {
mainLog.Load().Debug().Msg("IPv6 listener: enabled (Windows)")
return true
}
// On macOS in intercept mode, pf can't redirect IPv6 DNS to an IPv4 listener (cross-AF rdr
// not supported), and blocking IPv6 DNS causes ~1s timeouts (BSD doesn't deliver ICMP errors
// to unconnected UDP sockets). Listening on [::1] lets us intercept IPv6 DNS directly.
//
// NOTE: We accept the intercept mode string as a parameter instead of reading the global
// dnsIntercept bool, because dnsIntercept is derived later in prog.run() — after the
// listener goroutines are already spawned. Same pattern as the port 5354 fallback fix (MR !860).
if (interceptMode == "dns" || interceptMode == "hard") && runtime.GOOS == "darwin" {
mainLog.Load().Debug().Msg("IPv6 listener: enabled (macOS intercept mode)")
return true
}
mainLog.Load().Debug().Str("os", runtime.GOOS).Str("interceptMode", interceptMode).Msg("IPv6 listener: not needed")
return false
}
// ipAndMacFromMsg extracts IP and MAC information included in a DNS message, if any.
@@ -863,9 +995,6 @@ func runDNSServer(addr, network string, handler dns.Handler) (*dns.Server, <-cha
errCh := make(chan error)
go func() {
defer close(errCh)
if needMdnsResponderHack {
killMdnsResponder()
}
if err := s.ListenAndServe(); err != nil {
s.NotifyStartedFunc()
mainLog.Load().Error().Err(err).Msgf("could not listen and serve on: %s", s.Addr)
@@ -928,12 +1057,30 @@ func (p *prog) getClientInfo(remoteIP string, msg *dns.Msg) *ctrld.ClientInfo {
} else {
ci.Self = p.queryFromSelf(ci.IP)
}
// In DNS intercept mode, ALL queries are from the local machine — pf/WFP
// intercepts outbound DNS and redirects to ctrld. The source IP may be a
// virtual interface (Tailscale, VPN) that has no ARP/MAC entry, causing
// missing x-cd-mac, x-cd-host, and x-cd-os headers. Force Self=true and
// populate from the primary physical interface info.
if dnsIntercept && !ci.Self {
ci.Self = true
}
// If this is a query from self, but ci.IP is not loopback IP,
// try using hostname mapping for lookback IP if presents.
if ci.Self {
if name := p.ciTable.LocalHostname(); name != "" {
ci.Hostname = name
}
// If MAC is still empty (e.g., query arrived via virtual interface IP
// like Tailscale), fall back to the loopback MAC mapping which addSelf()
// populates from the primary physical interface.
if ci.Mac == "" {
if mac := p.ciTable.LookupMac("127.0.0.1"); mac != "" {
ci.Mac = mac
}
}
}
p.spoofLoopbackIpInClientInfo(ci)
return ci
@@ -975,7 +1122,7 @@ func (p *prog) doSelfUninstall(answer *dns.Msg) {
req := &controld.ResolverConfigRequest{
RawUID: cdUID,
Version: rootCmd.Version,
Metadata: ctrld.SystemMetadata(context.Background()),
Metadata: ctrld.SystemMetadataRuntime(context.Background()),
}
_, err := controld.FetchResolverConfig(req, cdDev)
logger.Debug().Msg("maximum number of refused queries reached, checking device status")
@@ -1169,6 +1316,18 @@ func isWanClient(na net.Addr) bool {
!tsaddr.CGNATRange().Contains(ip)
}
// isIPv6LoopbackListener reports whether the listener address is [::1].
// The [::1] listener only serves locally-redirected traffic (via pf on macOS
// or system DNS on Windows), so queries arriving on it are always from this
// machine — even when the source IP is a global IPv6 address (pf preserves the
// original source IP during rdr).
func isIPv6LoopbackListener(na net.Addr) bool {
if ap, err := netip.ParseAddrPort(na.String()); err == nil {
return ap.Addr() == netip.IPv6Loopback()
}
return false
}
// resolveInternalDomainTestQuery resolves internal test domain query, returning the answer to the caller.
func resolveInternalDomainTestQuery(ctx context.Context, domain string, m *dns.Msg) *dns.Msg {
ctrld.Log(ctx, mainLog.Load().Debug(), "internal domain test query")
@@ -1294,6 +1453,65 @@ func (p *prog) monitorNetworkChanges() error {
mainLog.Load().Debug().Msg("Ignoring interface change - no valid interfaces affected")
// check if the default IPs are still on an interface that is up
ValidateDefaultLocalIPsFromDelta(delta.New)
// Even minor interface changes can trigger macOS pf reloads — verify anchor.
// We check immediately AND schedule delayed re-checks (2s + 4s) to catch
// programs like Windscribe that modify pf rules and DNS settings
// asynchronously after the network change event fires.
if dnsIntercept && p.dnsInterceptState != nil {
if !p.pfStabilizing.Load() {
p.ensurePFAnchorActive()
}
// Check tunnel interfaces unconditionally — it decides internally
// whether to enter stabilization or rebuild immediately.
p.checkTunnelInterfaceChanges()
// Schedule delayed re-checks to catch async VPN teardown changes.
// These also refresh the OS resolver and VPN DNS routes.
p.scheduleDelayedRechecks()
// Detect interface appearance/disappearance — hypervisors (Parallels,
// VMware, VirtualBox) reload pf when creating/destroying virtual network
// interfaces, which can corrupt pf's internal translation state. The rdr
// rules survive in text form (watchdog says "intact") but stop evaluating.
// Spawn an async monitor that probes pf interception with backoff and
// forces a full pf reload if broken.
if delta.Old != nil {
interfaceChanged := false
var changedIface string
for ifaceName := range delta.Old.Interface {
if ifaceName == "lo0" {
continue
}
if _, exists := delta.New.Interface[ifaceName]; !exists {
interfaceChanged = true
changedIface = ifaceName
break
}
}
if !interfaceChanged {
for ifaceName := range delta.New.Interface {
if ifaceName == "lo0" {
continue
}
if _, exists := delta.Old.Interface[ifaceName]; !exists {
interfaceChanged = true
changedIface = ifaceName
break
}
}
}
if interfaceChanged {
mainLog.Load().Info().Str("interface", changedIface).
Msg("DNS intercept: interface appeared/disappeared — starting interception probe monitor")
go p.pfInterceptMonitor()
}
}
}
// Refresh VPN DNS on tunnel interface changes (e.g., Tailscale connect/disconnect)
// even though the physical interface didn't change. Runs after tunnel checks
// so the pf anchor rebuild includes current VPN DNS exemptions.
if dnsIntercept && p.vpnDNS != nil {
p.vpnDNS.Refresh(true)
}
return
}
@@ -1367,6 +1585,26 @@ func (p *prog) monitorNetworkChanges() error {
if router.Name() == "" {
p.handleRecovery(RecoveryReasonNetworkChange)
}
// After network changes, verify our pf anchor is still active and
// refresh VPN DNS state. Order matters: tunnel checks first (may rebuild
// anchor), then VPN DNS refresh (updates exemptions in anchor), then
// delayed re-checks for async VPN teardown.
if dnsIntercept && p.dnsInterceptState != nil {
if !p.pfStabilizing.Load() {
p.ensurePFAnchorActive()
}
// Check tunnel interfaces unconditionally — it decides internally
// whether to enter stabilization or rebuild immediately.
p.checkTunnelInterfaceChanges()
// Refresh VPN DNS routes — runs after tunnel checks so the anchor
// rebuild includes current VPN DNS exemptions.
if p.vpnDNS != nil {
p.vpnDNS.Refresh(true)
}
// Schedule delayed re-checks to catch async VPN teardown changes.
p.scheduleDelayedRechecks()
}
})
mon.Start()
@@ -1491,22 +1729,57 @@ func (p *prog) handleRecovery(reason RecoveryReason) {
p.recoveryCancel = cancel
p.recoveryCancelMu.Unlock()
// Immediately remove our DNS settings from the interface.
// set recoveryRunning to true to prevent watchdogs from putting the listener back on the interface
p.recoveryRunning.Store(true)
// we do not want to restore any static DNS settings
// we must try to get the DHCP values, any static DNS settings
// will be appended to nameservers from the saved interface values
p.resetDNS(false, false)
// For an OS failure, reinitialize OS resolver nameservers immediately.
if reason == RecoveryReasonOSFailure {
mainLog.Load().Debug().Msg("OS resolver failure detected; reinitializing OS resolver nameservers")
ns := ctrld.InitializeOsResolver(true)
if len(ns) == 0 {
mainLog.Load().Warn().Msg("No nameservers found for OS resolver; using existing values")
// In DNS intercept mode, don't tear down WFP/pf filters.
// Instead, enable recovery bypass so proxy() forwards queries to
// the OS/DHCP resolver. This handles captive portal authentication
// without the overhead of filter teardown/rebuild.
if dnsIntercept && p.dnsInterceptState != nil {
p.recoveryBypass.Store(true)
mainLog.Load().Info().Msg("DNS intercept recovery: enabling DHCP bypass (filters stay active)")
// Reinitialize OS resolver to discover DHCP servers on the new network.
mainLog.Load().Debug().Msg("DNS intercept recovery: discovering DHCP nameservers")
dhcpServers := ctrld.InitializeOsResolver(true)
if len(dhcpServers) == 0 {
mainLog.Load().Warn().Msg("DNS intercept recovery: no DHCP nameservers found")
} else {
mainLog.Load().Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns)
mainLog.Load().Info().Msgf("DNS intercept recovery: found DHCP nameservers: %v", dhcpServers)
}
// Exempt DHCP nameservers from intercept filters so the OS resolver
// can actually reach them on port 53.
if len(dhcpServers) > 0 {
// Build exemptions without an Interface — DHCP servers are not VPN-specific,
// so they only generate group-scoped pf rules (ctrld process only).
exemptions := make([]vpnDNSExemption, 0, len(dhcpServers))
for _, s := range dhcpServers {
host := s
if h, _, err := net.SplitHostPort(s); err == nil {
host = h
}
exemptions = append(exemptions, vpnDNSExemption{Server: host})
}
mainLog.Load().Info().Msgf("DNS intercept recovery: exempting DHCP nameservers from filters: %v", exemptions)
if err := p.exemptVPNDNSServers(exemptions); err != nil {
mainLog.Load().Warn().Err(err).Msg("DNS intercept recovery: failed to exempt DHCP nameservers — recovery queries may fail")
}
}
} else {
// Traditional flow: remove DNS settings to expose DHCP nameservers
p.resetDNS(false, false)
// For an OS failure, reinitialize OS resolver nameservers immediately.
if reason == RecoveryReasonOSFailure {
mainLog.Load().Debug().Msg("OS resolver failure detected; reinitializing OS resolver nameservers")
ns := ctrld.InitializeOsResolver(true)
if len(ns) == 0 {
mainLog.Load().Warn().Msg("No nameservers found for OS resolver; using existing values")
} else {
mainLog.Load().Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns)
}
}
}
@@ -1527,23 +1800,46 @@ func (p *prog) handleRecovery(reason RecoveryReason) {
// reset the upstream failure count and down state
p.um.reset(recovered)
// For network changes we also reinitialize the OS resolver.
if reason == RecoveryReasonNetworkChange {
ns := ctrld.InitializeOsResolver(true)
if len(ns) == 0 {
mainLog.Load().Warn().Msg("No nameservers found for OS resolver during network-change recovery; using existing values")
} else {
mainLog.Load().Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns)
// In DNS intercept mode, just disable the bypass — filters are still active.
if dnsIntercept && p.dnsInterceptState != nil {
p.recoveryBypass.Store(false)
mainLog.Load().Info().Msg("DNS intercept recovery complete: disabling DHCP bypass, resuming normal flow")
// Refresh VPN DNS routes in case VPN state changed during recovery.
if p.vpnDNS != nil {
p.vpnDNS.Refresh(true)
}
// Reinitialize OS resolver for the recovered state.
if reason == RecoveryReasonNetworkChange {
ns := ctrld.InitializeOsResolver(true)
if len(ns) == 0 {
mainLog.Load().Warn().Msg("No nameservers found for OS resolver during network-change recovery; using existing values")
} else {
mainLog.Load().Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns)
}
}
p.recoveryRunning.Store(false)
} else {
// For network changes we also reinitialize the OS resolver.
if reason == RecoveryReasonNetworkChange {
ns := ctrld.InitializeOsResolver(true)
if len(ns) == 0 {
mainLog.Load().Warn().Msg("No nameservers found for OS resolver during network-change recovery; using existing values")
} else {
mainLog.Load().Info().Msgf("Reinitialized OS resolver with nameservers: %v", ns)
}
}
// Apply our DNS settings back and log the interface state.
p.setDNS()
p.logInterfacesState()
// allow watchdogs to put the listener back on the interface if its changed for any reason
p.recoveryRunning.Store(false)
}
// Apply our DNS settings back and log the interface state.
p.setDNS()
p.logInterfacesState()
// allow watchdogs to put the listener back on the interface if its changed for any reason
p.recoveryRunning.Store(false)
// Clear the recovery cancellation for a clean slate.
p.recoveryCancelMu.Lock()
p.recoveryCancel = nil

View File

@@ -22,15 +22,15 @@ func Test_wildcardMatches(t *testing.T) {
domain string
match bool
}{
{"domain - prefix parent should not match", "*.windscribe.com", "windscribe.com", false},
{"domain - prefix", "*.windscribe.com", "anything.windscribe.com", true},
{"domain - prefix not match other s", "*.windscribe.com", "example.com", false},
{"domain - prefix not match s in name", "*.windscribe.com", "wwindscribe.com", false},
{"domain - suffix", "suffix.*", "suffix.windscribe.com", true},
{"domain - suffix not match other", "suffix.*", "suffix1.windscribe.com", false},
{"domain - both", "suffix.*.windscribe.com", "suffix.anything.windscribe.com", true},
{"domain - both not match", "suffix.*.windscribe.com", "suffix1.suffix.windscribe.com", false},
{"domain - case-insensitive", "*.WINDSCRIBE.com", "anything.windscribe.com", true},
{"domain - prefix parent should not match", "*.example.com", "example.com", false},
{"domain - prefix", "*.example.com", "anything.example.com", true},
{"domain - prefix not match other s", "*.example.com", "other.org", false},
{"domain - prefix not match s in name", "*.example.com", "eexample.com", false},
{"domain - suffix", "suffix.*", "suffix.example.com", true},
{"domain - suffix not match other", "suffix.*", "suffix1.example.com", false},
{"domain - both", "suffix.*.example.com", "suffix.anything.example.com", true},
{"domain - both not match", "suffix.*.example.com", "suffix1.suffix.example.com", false},
{"domain - case-insensitive", "*.EXAMPLE.com", "anything.example.com", true},
{"mac - prefix", "*:98:05:b4:2b", "d4:67:98:05:b4:2b", true},
{"mac - prefix not match other s", "*:98:05:b4:2b", "0d:ba:54:09:94:2c", false},
{"mac - prefix not match s in name", "*:98:05:b4:2b", "e4:67:97:05:b4:2b", false},
@@ -57,9 +57,9 @@ func Test_canonicalName(t *testing.T) {
domain string
canonical string
}{
{"fqdn to canonical", "windscribe.com.", "windscribe.com"},
{"already canonical", "windscribe.com", "windscribe.com"},
{"case insensitive", "Windscribe.Com.", "windscribe.com"},
{"fqdn to canonical", "example.com.", "example.com"},
{"already canonical", "example.com", "example.com"},
{"case insensitive", "Example.Com.", "example.com"},
}
for _, tc := range tests {

View File

@@ -1,7 +1,9 @@
package cli
import (
"encoding/hex"
"io"
"net"
"os"
"path/filepath"
"sync/atomic"
@@ -40,6 +42,9 @@ var (
cleanup bool
startOnly bool
rfc1918 bool
interceptMode string // "", "dns", or "hard" — set via --intercept-mode flag or config
dnsIntercept bool // derived: interceptMode == "dns" || interceptMode == "hard"
hardIntercept bool // derived: interceptMode == "hard"
mainLog atomic.Pointer[zerolog.Logger]
consoleWriter zerolog.ConsoleWriter
@@ -59,6 +64,16 @@ func init() {
}
func Main() {
// Fast path for pf interception probe subprocess. This runs before cobra
// initialization to minimize startup time. The parent process spawns us with
// "pf-probe-send <host> <hex-dns-packet>" and a non-_ctrld GID so pf
// intercepts the DNS query. If pf rdr is working, the query reaches ctrld's
// listener; if not, it goes to the real DNS server and ctrld detects the miss.
if len(os.Args) >= 4 && os.Args[1] == "pf-probe-send" {
pfProbeSend(os.Args[2], os.Args[3])
return
}
ctrld.InitConfig(v, "ctrld")
initCLI()
if err := rootCmd.Execute(); err != nil {
@@ -189,3 +204,25 @@ func initCache() {
cfg.Service.CacheSize = 4096
}
}
// pfProbeSend is a minimal subprocess that sends a pre-built DNS query packet
// to the specified host on port 53. It's invoked by probePFIntercept() with a
// non-_ctrld GID so pf interception applies to the query.
//
// Usage: ctrld pf-probe-send <host> <hex-encoded-dns-packet>
func pfProbeSend(host, hexPacket string) {
packet, err := hex.DecodeString(hexPacket)
if err != nil {
os.Exit(1)
}
conn, err := net.DialTimeout("udp", net.JoinHostPort(host, "53"), time.Second)
if err != nil {
os.Exit(1)
}
defer conn.Close()
conn.SetDeadline(time.Now().Add(time.Second))
_, _ = conn.Write(packet)
// Read response (don't care about result, just need the send to happen)
buf := make([]byte, 512)
_, _ = conn.Read(buf)
}

View File

@@ -1,154 +0,0 @@
package cli
import (
"bufio"
"errors"
"io"
"os"
"os/exec"
"path/filepath"
"strings"
"tailscale.com/net/netmon"
)
// On macOS, the system daemon mDNSResponder (used for proxy/mDNS/Bonjour discovery)
// listens on UDP and TCP port 53. That conflicts with ctrld when it needs to
// run a DNS proxy on port 53. The kernel does not allow two processes to bind
// the same address/port, so ctrld would fail with "address already in use" if we
// did nothing.
//
// If ctrld started before mDNSResponder and listened only on 127.0.0.1, mDNSResponder
// would bind port 53 on other interfaces, so system processes would use it as the
// DNS resolver instead of ctrld, leading to inconsistent behavior.
//
// This file implements a Darwin-only workaround:
//
// - We detect at startup whether mDNSResponder is using port 53 (or a
// persisted marker file exists from a previous run).
// - When the workaround is active, we force the listener to 0.0.0.0:53 and,
// before binding, run killall mDNSResponder so that ctrld can bind to port 53.
// - We use SO_REUSEPORT (see listener setup) so that the socket can be bound
// even when the port was recently used.
// - On install we create a marker file in the user's home directory so that
// the workaround is applied on subsequent starts; on uninstall we remove
// that file and bounce the en0 interface to restore normal mDNSResponder
// behavior.
//
// Without this, users on macOS would be unable to run ctrld as the system DNS
// on port 53 when mDNSResponder is active.
var (
// needMdnsResponderHack determines if a system-specific workaround for mDNSResponder is necessary at runtime.
needMdnsResponderHack = mDNSResponderHack()
mDNSResponderHackFilename = ".mdnsResponderHack"
)
// mDNSResponderHack checks if the mDNSResponder process and its environments meet specific criteria for operation.
func mDNSResponderHack() bool {
if st, err := os.Stat(mDNSResponderFile()); err == nil && st.Mode().IsRegular() {
return true
}
out, err := lsofCheckPort53()
if err != nil {
return false
}
if !isMdnsResponderListeningPort53(strings.NewReader(out)) {
return false
}
return true
}
// mDNSResponderFile constructs and returns the absolute path to the mDNSResponder hack file in the user's home directory.
func mDNSResponderFile() string {
if d, err := userHomeDir(); err == nil && d != "" {
return filepath.Join(d, mDNSResponderHackFilename)
}
return ""
}
// doMdnsResponderCleanup performs cleanup tasks for the mDNSResponder hack file and resets the network interface "en0".
func doMdnsResponderCleanup() {
fn := mDNSResponderFile()
if fn == "" {
return
}
if st, err := os.Stat(fn); err != nil || !st.Mode().IsRegular() {
return
}
if err := os.Remove(fn); err != nil {
mainLog.Load().Error().Err(err).Msg("failed to remove mDNSResponder hack file")
}
ifName := "en0"
if din, err := netmon.DefaultRouteInterface(); err == nil {
ifName = din
}
if err := exec.Command("ifconfig", ifName, "down").Run(); err != nil {
mainLog.Load().Error().Err(err).Msg("failed to disable en0")
}
if err := exec.Command("ifconfig", ifName, "up").Run(); err != nil {
mainLog.Load().Error().Err(err).Msg("failed to enable en0")
}
}
// doMdnsResponderHackPostInstall creates a hack file for mDNSResponder if required and logs debug or error messages.
func doMdnsResponderHackPostInstall() {
if !needMdnsResponderHack {
return
}
fn := mDNSResponderFile()
if fn == "" {
return
}
if f, err := os.OpenFile(fn, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0400); err != nil {
mainLog.Load().Warn().Err(err).Msgf("Could not create %s", fn)
} else {
if err := f.Close(); err != nil {
mainLog.Load().Warn().Err(err).Msgf("Could not close %s", fn)
} else {
mainLog.Load().Debug().Msgf("Created %s", fn)
}
}
}
// killMdnsResponder attempts to terminate the mDNSResponder process by running the "killall" command multiple times.
// Logs any accumulated errors if the attempts to terminate the process fail.
func killMdnsResponder() {
numAttempts := 10
errs := make([]error, 0, numAttempts)
for range numAttempts {
if err := exec.Command("killall", "mDNSResponder").Run(); err != nil {
// Exit code 1 means the process not found, do not log it.
if !strings.Contains(err.Error(), "exit status 1") {
errs = append(errs, err)
}
}
}
if len(errs) > 0 {
mainLog.Load().Debug().Err(errors.Join(errs...)).Msg("failed to kill mDNSResponder")
}
}
// lsofCheckPort53 executes the lsof command to check if any process is listening on port 53 and returns the output.
func lsofCheckPort53() (string, error) {
cmd := exec.Command("lsof", "+c0", "-i:53", "-n", "-P")
out, err := cmd.CombinedOutput()
if err != nil {
return "", err
}
return string(out), nil
}
// isMdnsResponderListeningPort53 checks if the output provided by the reader contains an mDNSResponder process.
func isMdnsResponderListeningPort53(r io.Reader) bool {
scanner := bufio.NewScanner(r)
for scanner.Scan() {
fields := strings.Fields(scanner.Text())
if len(fields) > 0 && strings.EqualFold(fields[0], "mDNSResponder") {
return true
}
}
return false
}

View File

@@ -1,21 +0,0 @@
//go:build !darwin
package cli
// needMdnsResponderHack determines if a system-specific workaround for mDNSResponder is necessary at runtime.
var needMdnsResponderHack = mDNSResponderHack()
// mDNSResponderHack checks if the mDNSResponder process and its environments meet specific criteria for operation.
func mDNSResponderHack() bool {
return false
}
// killMdnsResponder attempts to terminate the mDNSResponder process by running the "killall" command multiple times.
// Logs any accumulated errors if the attempts to terminate the process fail.
func killMdnsResponder() {}
// doMdnsResponderCleanup performs cleanup tasks for the mDNSResponder hack file and resets the network interface "en0".
func doMdnsResponderCleanup() {}
// doMdnsResponderHackPostInstall creates a hack file for mDNSResponder if required and logs debug or error messages.
func doMdnsResponderHackPostInstall() {}

View File

@@ -131,6 +131,7 @@ type prog struct {
runningIface string
requiredMultiNICsConfig bool
adDomain string
hasLocalDNS bool
runningOnDomainController bool
selfUninstallMu sync.Mutex
@@ -145,6 +146,55 @@ type prog struct {
recoveryCancel context.CancelFunc
recoveryRunning atomic.Bool
// recoveryBypass is set when dns-intercept mode enters recovery.
// When true, proxy() forwards all queries to OS/DHCP resolver
// instead of using the normal upstream flow.
recoveryBypass atomic.Bool
// DNS intercept mode state (platform-specific).
// On Windows: *wfpState, on macOS: *pfState, nil on other platforms.
dnsInterceptState any
// lastTunnelIfaces tracks the set of active VPN/tunnel interfaces (utun*, ipsec*, etc.)
// discovered during the last pf anchor rule build. When the set changes (e.g., a VPN
// connects and creates utun420), we rebuild the pf anchor to add interface-specific
// intercept rules for the new interface. Protected by mu.
lastTunnelIfaces []string //lint:ignore U1000 used on darwin
// pfStabilizing is true while we're waiting for a VPN's pf ruleset to settle.
// While true, the watchdog and network change callbacks do NOT restore our rules.
pfStabilizing atomic.Bool
// pfStabilizeCancel cancels the active stabilization goroutine, if any.
// Protected by mu.
pfStabilizeCancel context.CancelFunc //lint:ignore U1000 used on darwin
// pfLastRestoreTime records when we last restored our anchor (unix millis).
// Used to detect immediate re-wipes (VPN reconnect cycle).
pfLastRestoreTime atomic.Int64 //lint:ignore U1000 used on darwin
// pfBackoffMultiplier tracks exponential backoff for stabilization.
// Resets to 0 when rules survive for >60s.
pfBackoffMultiplier atomic.Int32 //lint:ignore U1000 used on darwin
// pfMonitorRunning ensures only one pfInterceptMonitor goroutine runs at a time.
// When an interface appears/disappears, we spawn a monitor that probes pf
// interception with exponential backoff and auto-heals if broken.
pfMonitorRunning atomic.Bool //lint:ignore U1000 used on darwin
// pfProbeExpected holds the domain name of a pending pf interception probe.
// When non-empty, the DNS handler checks incoming queries against this value
// and signals pfProbeCh if matched. The probe verifies that pf's rdr rules
// are actually translating packets (not just present in rule text).
pfProbeExpected atomic.Value // string
// pfProbeCh is signaled when the DNS handler receives the expected probe query.
// The channel is created by probePFIntercept() and closed when the probe arrives.
pfProbeCh atomic.Value // *chan struct{}
// VPN DNS manager for split DNS routing when intercept mode is active.
vpnDNS *vpnDNSManager
started chan struct{}
onStartedDone chan struct{}
onStarted []func()
@@ -328,7 +378,7 @@ func (p *prog) apiConfigReload() {
req := &controld.ResolverConfigRequest{
RawUID: cdUID,
Version: rootCmd.Version,
Metadata: ctrld.SystemMetadata(context.Background()),
Metadata: ctrld.SystemMetadataRuntime(context.Background()),
}
resolverConfig, err := controld.FetchResolverConfig(req, cdDev)
selfUninstallCheck(err, p, logger)
@@ -491,9 +541,13 @@ func (p *prog) run(reload bool, reloadCh chan struct{}) {
}
}
}
if domain, err := getActiveDirectoryDomain(); err == nil && domain != "" && hasLocalDnsServerRunning() {
if domain, err := getActiveDirectoryDomain(); err == nil && domain != "" {
mainLog.Load().Debug().Msgf("active directory domain: %s", domain)
p.adDomain = domain
if hasLocalDnsServerRunning() {
mainLog.Load().Debug().Msg("local DNS server detected (Domain Controller)")
p.hasLocalDNS = true
}
}
var wg sync.WaitGroup
@@ -724,6 +778,54 @@ func (p *prog) setDNS() {
p.csSetDnsOk = setDnsOK
}()
// Validate and resolve intercept mode.
// CLI flag (--intercept-mode) takes priority over config file.
// Valid values: "" (off), "dns" (with VPN split routing), "hard" (all DNS through ctrld).
if interceptMode != "" && !validInterceptMode(interceptMode) {
mainLog.Load().Fatal().Msgf("invalid --intercept-mode value %q: must be 'off', 'dns', or 'hard'", interceptMode)
}
if interceptMode == "" || interceptMode == "off" {
interceptMode = cfg.Service.InterceptMode
if interceptMode != "" && interceptMode != "off" {
mainLog.Load().Info().Msgf("Intercept mode enabled via config (intercept_mode = %q)", interceptMode)
}
}
// Derive convenience bools from interceptMode.
switch interceptMode {
case "dns":
dnsIntercept = true
case "hard":
dnsIntercept = true
hardIntercept = true
}
// DNS intercept mode: use OS-level packet interception (WFP/pf) instead of
// modifying interface DNS settings. This eliminates race conditions with VPN
// software that also manages DNS. See issue #489.
if dnsIntercept {
if err := p.startDNSIntercept(); err != nil {
mainLog.Load().Error().Err(err).Msg("DNS intercept mode failed — falling back to interface DNS settings")
// Fall through to traditional setDNS behavior.
} else {
if hardIntercept {
mainLog.Load().Info().Msg("Hard intercept mode active — all DNS through ctrld, no VPN split routing")
} else {
mainLog.Load().Info().Msg("DNS intercept mode active — skipping interface DNS configuration and watchdog")
// Initialize VPN DNS manager for split DNS routing.
// Discovers search domains from virtual/VPN interfaces and forwards
// matching queries to the DNS server on that interface.
// Skipped in --intercept-mode hard where all DNS goes through ctrld.
p.vpnDNS = newVPNDNSManager(p.exemptVPNDNSServers)
p.vpnDNS.Refresh(true)
}
setDnsOK = true
return
}
}
if cfg.Listener == nil {
return
}
@@ -750,7 +852,7 @@ func (p *prog) setDNS() {
if needRFC1918Listeners(lc) {
nameservers = append(nameservers, ctrld.Rfc1918Addresses()...)
}
if needLocalIPv6Listener() {
if needLocalIPv6Listener(p.cfg.Service.InterceptMode) {
nameservers = append(nameservers, "::1")
}
@@ -945,7 +1047,18 @@ func (p *prog) dnsWatchdog(iface *net.Interface, nameservers []string) {
}
// resetDNS performs a DNS reset for all interfaces.
// In DNS intercept mode, this tears down the WFP/pf filters instead.
func (p *prog) resetDNS(isStart bool, restoreStatic bool) {
if dnsIntercept && p.dnsInterceptState != nil {
if err := p.stopDNSIntercept(); err != nil {
mainLog.Load().Error().Err(err).Msg("Failed to stop DNS intercept mode during reset")
}
// Clean up VPN DNS manager
p.vpnDNS = nil
return
}
netIfaceName := ""
if netIface := p.resetDNSForRunningIface(isStart, restoreStatic); netIface != nil {
netIfaceName = netIface.Name

View File

@@ -0,0 +1,134 @@
//go:build darwin
package cli
import (
"fmt"
"os"
"os/exec"
"strings"
)
const launchdPlistPath = "/Library/LaunchDaemons/ctrld.plist"
// serviceConfigFileExists returns true if the launchd plist for ctrld exists on disk.
// This is more reliable than checking launchctl status, which may report "not found"
// if the service was unloaded but the plist file still exists.
func serviceConfigFileExists() bool {
_, err := os.Stat(launchdPlistPath)
return err == nil
}
// appendServiceFlag appends a CLI flag (e.g., "--intercept-mode") to the installed
// service's launch arguments. This is used when upgrading an existing installation
// to intercept mode without losing the existing --cd flag and other arguments.
//
// On macOS, this modifies the launchd plist at /Library/LaunchDaemons/ctrld.plist
// using the "defaults" command, which is the standard way to edit plists.
//
// The function is idempotent: if the flag already exists, it's a no-op.
func appendServiceFlag(flag string) error {
// Read current ProgramArguments from plist.
out, err := exec.Command("defaults", "read", launchdPlistPath, "ProgramArguments").CombinedOutput()
if err != nil {
return fmt.Errorf("failed to read plist ProgramArguments: %w (output: %s)", err, strings.TrimSpace(string(out)))
}
// Check if the flag is already present (idempotent).
args := string(out)
if strings.Contains(args, flag) {
mainLog.Load().Debug().Msgf("Service flag %q already present in plist, skipping", flag)
return nil
}
// Use PlistBuddy to append the flag to ProgramArguments array.
// PlistBuddy is more reliable than "defaults" for array manipulation.
addCmd := exec.Command(
"/usr/libexec/PlistBuddy",
"-c", fmt.Sprintf("Add :ProgramArguments: string %s", flag),
launchdPlistPath,
)
if out, err := addCmd.CombinedOutput(); err != nil {
return fmt.Errorf("failed to append %q to plist ProgramArguments: %w (output: %s)", flag, err, strings.TrimSpace(string(out)))
}
mainLog.Load().Info().Msgf("Appended %q to service launch arguments", flag)
return nil
}
// verifyServiceRegistration is a no-op on macOS (launchd plist verification not needed).
func verifyServiceRegistration() error {
return nil
}
// removeServiceFlag removes a CLI flag (and its value, if the next argument is not
// a flag) from the installed service's launch arguments. For example, removing
// "--intercept-mode" also removes the following "dns" or "hard" value argument.
//
// The function is idempotent: if the flag doesn't exist, it's a no-op.
func removeServiceFlag(flag string) error {
// Read current ProgramArguments to find the index.
out, err := exec.Command("/usr/libexec/PlistBuddy", "-c", "Print :ProgramArguments", launchdPlistPath).CombinedOutput()
if err != nil {
return fmt.Errorf("failed to read plist ProgramArguments: %w (output: %s)", err, strings.TrimSpace(string(out)))
}
// Parse the PlistBuddy output to find the flag's index.
// PlistBuddy prints arrays as:
// Array {
// /path/to/ctrld
// run
// --cd=xxx
// --intercept-mode
// dns
// }
lines := strings.Split(string(out), "\n")
var entries []string
for _, line := range lines {
trimmed := strings.TrimSpace(line)
if trimmed == "Array {" || trimmed == "}" || trimmed == "" {
continue
}
entries = append(entries, trimmed)
}
index := -1
for i, entry := range entries {
if entry == flag {
index = i
break
}
}
if index < 0 {
mainLog.Load().Debug().Msgf("Service flag %q not present in plist, skipping removal", flag)
return nil
}
// Check if the next entry is a value (not a flag). If so, delete it first
// (deleting by index shifts subsequent entries down, so delete value before flag).
hasValue := index+1 < len(entries) && !strings.HasPrefix(entries[index+1], "-")
if hasValue {
delVal := exec.Command(
"/usr/libexec/PlistBuddy",
"-c", fmt.Sprintf("Delete :ProgramArguments:%d", index+1),
launchdPlistPath,
)
if out, err := delVal.CombinedOutput(); err != nil {
return fmt.Errorf("failed to remove value for %q from plist: %w (output: %s)", flag, err, strings.TrimSpace(string(out)))
}
}
// Delete the flag itself.
delCmd := exec.Command(
"/usr/libexec/PlistBuddy",
"-c", fmt.Sprintf("Delete :ProgramArguments:%d", index),
launchdPlistPath,
)
if out, err := delCmd.CombinedOutput(); err != nil {
return fmt.Errorf("failed to remove %q from plist ProgramArguments: %w (output: %s)", flag, err, strings.TrimSpace(string(out)))
}
mainLog.Load().Info().Msgf("Removed %q from service launch arguments", flag)
return nil
}

View File

@@ -0,0 +1,38 @@
//go:build !darwin && !windows
package cli
import (
"fmt"
"os"
)
// serviceConfigFileExists checks common service config file locations on Linux.
func serviceConfigFileExists() bool {
// systemd unit file
if _, err := os.Stat("/etc/systemd/system/ctrld.service"); err == nil {
return true
}
// SysV init script
if _, err := os.Stat("/etc/init.d/ctrld"); err == nil {
return true
}
return false
}
// appendServiceFlag is not yet implemented on this platform.
// Linux services (systemd) store args in unit files; intercept mode
// should be set via the config file (intercept_mode) on these platforms.
func appendServiceFlag(flag string) error {
return fmt.Errorf("appending service flags is not supported on this platform; use intercept_mode in config instead")
}
// verifyServiceRegistration is a no-op on this platform.
func verifyServiceRegistration() error {
return nil
}
// removeServiceFlag is not yet implemented on this platform.
func removeServiceFlag(flag string) error {
return fmt.Errorf("removing service flags is not supported on this platform; use intercept_mode in config instead")
}

View File

@@ -0,0 +1,153 @@
//go:build windows
package cli
import (
"fmt"
"strings"
"golang.org/x/sys/windows/svc/mgr"
)
// serviceConfigFileExists returns true if the ctrld Windows service is registered.
func serviceConfigFileExists() bool {
m, err := mgr.Connect()
if err != nil {
return false
}
defer m.Disconnect()
s, err := m.OpenService(ctrldServiceName)
if err != nil {
return false
}
s.Close()
return true
}
// appendServiceFlag appends a CLI flag (e.g., "--intercept-mode") to the installed
// Windows service's BinPath arguments. This is used when upgrading an existing
// installation to intercept mode without losing the existing --cd flag.
//
// The function is idempotent: if the flag already exists, it's a no-op.
func appendServiceFlag(flag string) error {
m, err := mgr.Connect()
if err != nil {
return fmt.Errorf("failed to connect to Windows SCM: %w", err)
}
defer m.Disconnect()
s, err := m.OpenService(ctrldServiceName)
if err != nil {
return fmt.Errorf("failed to open service %q: %w", ctrldServiceName, err)
}
defer s.Close()
config, err := s.Config()
if err != nil {
return fmt.Errorf("failed to read service config: %w", err)
}
// Check if flag already present (idempotent).
if strings.Contains(config.BinaryPathName, flag) {
mainLog.Load().Debug().Msgf("Service flag %q already present in BinPath, skipping", flag)
return nil
}
// Append the flag to BinPath.
config.BinaryPathName = strings.TrimSpace(config.BinaryPathName) + " " + flag
if err := s.UpdateConfig(config); err != nil {
return fmt.Errorf("failed to update service config with %q: %w", flag, err)
}
mainLog.Load().Info().Msgf("Appended %q to service BinPath", flag)
return nil
}
// verifyServiceRegistration opens the Windows Service Control Manager and verifies
// that the ctrld service is correctly registered: logs the BinaryPathName, checks
// that --intercept-mode is present if expected, and verifies SERVICE_AUTO_START.
func verifyServiceRegistration() error {
m, err := mgr.Connect()
if err != nil {
return fmt.Errorf("failed to connect to Windows SCM: %w", err)
}
defer m.Disconnect()
s, err := m.OpenService(ctrldServiceName)
if err != nil {
return fmt.Errorf("failed to open service %q: %w", ctrldServiceName, err)
}
defer s.Close()
config, err := s.Config()
if err != nil {
return fmt.Errorf("failed to read service config: %w", err)
}
mainLog.Load().Debug().Msgf("Service registry: BinaryPathName = %q", config.BinaryPathName)
// If intercept mode is set, verify the flag is present in BinPath.
if interceptMode == "dns" || interceptMode == "hard" {
if !strings.Contains(config.BinaryPathName, "--intercept-mode") {
return fmt.Errorf("service registry: --intercept-mode flag missing from BinaryPathName (expected mode %q)", interceptMode)
}
mainLog.Load().Debug().Msgf("Service registry: --intercept-mode flag present in BinaryPathName")
}
// Verify auto-start. mgr.StartAutomatic == 2 == SERVICE_AUTO_START.
if config.StartType != mgr.StartAutomatic {
return fmt.Errorf("service registry: StartType is %d, expected SERVICE_AUTO_START (%d)", config.StartType, mgr.StartAutomatic)
}
return nil
}
// removeServiceFlag removes a CLI flag (and its value, if present) from the installed
// Windows service's BinPath. For example, removing "--intercept-mode" also removes
// the following "dns" or "hard" value. The function is idempotent.
func removeServiceFlag(flag string) error {
m, err := mgr.Connect()
if err != nil {
return fmt.Errorf("failed to connect to Windows SCM: %w", err)
}
defer m.Disconnect()
s, err := m.OpenService(ctrldServiceName)
if err != nil {
return fmt.Errorf("failed to open service %q: %w", ctrldServiceName, err)
}
defer s.Close()
config, err := s.Config()
if err != nil {
return fmt.Errorf("failed to read service config: %w", err)
}
if !strings.Contains(config.BinaryPathName, flag) {
mainLog.Load().Debug().Msgf("Service flag %q not present in BinPath, skipping removal", flag)
return nil
}
// Split BinPath into parts, find and remove the flag + its value (if any).
parts := strings.Fields(config.BinaryPathName)
var newParts []string
for i := 0; i < len(parts); i++ {
if parts[i] == flag {
// Skip the flag. Also skip the next part if it's a value (not a flag).
if i+1 < len(parts) && !strings.HasPrefix(parts[i+1], "-") {
i++ // skip value too
}
continue
}
newParts = append(newParts, parts[i])
}
config.BinaryPathName = strings.Join(newParts, " ")
if err := s.UpdateConfig(config); err != nil {
return fmt.Errorf("failed to update service config: %w", err)
}
mainLog.Load().Info().Msgf("Removed %q from service BinPath", flag)
return nil
}