From 82fc628bf30fb159f223cfb8a55853a98580fc7f Mon Sep 17 00:00:00 2001 From: Codescribe Date: Thu, 5 Mar 2026 06:40:09 -0500 Subject: [PATCH 01/16] docs: add DNS Intercept Mode section to README --- README.md | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5b048ca..312352c 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,7 @@ docker build -t controldns/ctrld . -f docker/Dockerfile # Usage -The cli is self documenting, so free free to run `--help` on any sub-command to get specific usages. +The cli is self documenting, so feel free to run `--help` on any sub-command to get specific usages. ## Arguments ``` @@ -266,5 +266,67 @@ The above will start a foreground process and: - Excluding `*.company.int` and `very-secure.local` matching queries, that are forwarded to `10.0.10.1:53` - Write a debug log to `/path/to/log.log` +## DNS Intercept Mode +When running `ctrld` alongside VPN software, DNS conflicts can cause intermittent failures, bypassed filtering, or configuration loops. DNS Intercept Mode prevents these issues by transparently capturing all DNS traffic on the system and routing it through `ctrld`, without modifying network adapter DNS settings. + +### When to Use +Enable DNS Intercept Mode if you: +- Use corporate VPN software (F5, Cisco AnyConnect, Palo Alto GlobalProtect, Zscaler) +- Run overlay networks like Tailscale or WireGuard +- Experience random DNS failures when VPN connects/disconnects +- See gaps in your Control D analytics when VPN is active +- Have endpoint security software that also manages DNS + +### Command + +Windows (Admin Shell) +```shell +ctrld.exe start --intercept-mode dns --cd RESOLVER_ID_HERE +``` + +macOS +```shell +sudo ctrld start --intercept-mode dns --cd RESOLVER_ID_HERE +``` + +`--intercept-mode dns` automatically detects VPN internal domains and routes them to the VPN's DNS server, while Control D handles everything else. + +To disable intercept mode on a service that already has it enabled: + +Windows (Admin Shell) +```shell +ctrld.exe start --intercept-mode off +``` + +macOS +```shell +sudo ctrld start --intercept-mode off +``` + +This removes the intercept rules and reverts to standard interface-based DNS configuration. + +### Platform Support +| Platform | Supported | Mechanism | +|----------|-----------|-----------| +| Windows | ✅ | NRPT (Name Resolution Policy Table) | +| macOS | ✅ | pf (packet filter) redirect | +| Linux | ❌ | Not currently supported | + +### Features +- **VPN split routing** — VPN-specific domains are automatically detected and forwarded to the VPN's DNS server +- **Captive portal recovery** — Wi-Fi login pages (hotels, airports, coffee shops) work automatically +- **No network adapter changes** — DNS settings stay untouched, eliminating conflicts entirely +- **Automatic port 53 conflict resolution** — if another process (e.g., `mDNSResponder` on macOS) is already using port 53, `ctrld` automatically listens on a different port. OS-level packet interception redirects all DNS traffic to `ctrld` transparently, so no manual configuration is needed. This only applies to intercept mode. + +### Tested VPN Software +- F5 BIG-IP APM +- Cisco AnyConnect +- Palo Alto GlobalProtect +- Tailscale (including Exit Nodes) +- Windscribe +- WireGuard + +For more details, see the [DNS Intercept Mode documentation](https://docs.controld.com/docs/dns-intercept). + ## Contributing See [Contribution Guideline](./docs/contributing.md) From bd9bb90dd4927061773db5f3ca23d0f2597479a0 Mon Sep 17 00:00:00 2001 From: Codescribe Date: Tue, 3 Mar 2026 13:25:36 -0500 Subject: [PATCH 02/16] Fix dnsFromResolvConf not filtering loopback IPs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The continue statement only broke out of the inner loop, so loopback/local IPs (e.g. 127.0.0.1) were never filtered. This caused ctrld to use itself as bootstrap DNS when already installed as the system resolver — a self-referential loop. Use the same isLocal flag pattern as getDNSFromScutil() and getAllDHCPNameservers(). --- nameservers_unix.go | 45 ++++++++++------- nameservers_unix_test.go | 105 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 18 deletions(-) create mode 100644 nameservers_unix_test.go diff --git a/nameservers_unix.go b/nameservers_unix.go index d8e6035..1cbad68 100644 --- a/nameservers_unix.go +++ b/nameservers_unix.go @@ -4,6 +4,7 @@ package ctrld import ( "net" + "net/netip" "slices" "time" @@ -17,6 +18,31 @@ func currentNameserversFromResolvconf() []string { return resolvconffile.NameServers() } +// localNameservers filters a list of nameserver strings, returning only those +// that are not loopback or local machine IP addresses. +func localNameservers(nss []string, regularIPs, loopbackIPs []netip.Addr) []string { + var result []string + seen := make(map[string]bool) + + for _, ns := range nss { + if ip := net.ParseIP(ns); ip != nil { + // skip loopback and local IPs + isLocal := false + for _, v := range slices.Concat(regularIPs, loopbackIPs) { + if ip.String() == v.String() { + isLocal = true + break + } + } + if !isLocal && !seen[ip.String()] { + seen[ip.String()] = true + result = append(result, ip.String()) + } + } + } + return result +} + // dnsFromResolvConf reads usable nameservers from /etc/resolv.conf file. // A nameserver is usable if it's not one of current machine's IP addresses // and loopback IP addresses. @@ -35,24 +61,7 @@ func dnsFromResolvConf() []string { } nss := resolvconffile.NameServers() - var localDNS []string - seen := make(map[string]bool) - - for _, ns := range nss { - if ip := net.ParseIP(ns); ip != nil { - // skip loopback IPs - for _, v := range slices.Concat(regularIPs, loopbackIPs) { - ipStr := v.String() - if ip.String() == ipStr { - continue - } - } - if !seen[ip.String()] { - seen[ip.String()] = true - localDNS = append(localDNS, ip.String()) - } - } - } + localDNS := localNameservers(nss, regularIPs, loopbackIPs) // If we successfully read the file and found nameservers, return them if len(localDNS) > 0 { diff --git a/nameservers_unix_test.go b/nameservers_unix_test.go new file mode 100644 index 0000000..a771dc1 --- /dev/null +++ b/nameservers_unix_test.go @@ -0,0 +1,105 @@ +//go:build unix + +package ctrld + +import ( + "net/netip" + "testing" +) + +func Test_localNameservers(t *testing.T) { + loopbackIPs := []netip.Addr{ + netip.MustParseAddr("127.0.0.1"), + netip.MustParseAddr("::1"), + } + regularIPs := []netip.Addr{ + netip.MustParseAddr("192.168.1.100"), + netip.MustParseAddr("10.0.0.5"), + } + + tests := []struct { + name string + nss []string + regularIPs []netip.Addr + loopbackIPs []netip.Addr + want []string + }{ + { + name: "filters loopback IPv4", + nss: []string{"127.0.0.1", "8.8.8.8"}, + regularIPs: nil, + loopbackIPs: loopbackIPs, + want: []string{"8.8.8.8"}, + }, + { + name: "filters loopback IPv6", + nss: []string{"::1", "1.1.1.1"}, + regularIPs: nil, + loopbackIPs: loopbackIPs, + want: []string{"1.1.1.1"}, + }, + { + name: "filters local machine IPs", + nss: []string{"192.168.1.100", "8.8.4.4"}, + regularIPs: regularIPs, + loopbackIPs: nil, + want: []string{"8.8.4.4"}, + }, + { + name: "filters both loopback and local IPs", + nss: []string{"127.0.0.1", "192.168.1.100", "8.8.8.8"}, + regularIPs: regularIPs, + loopbackIPs: loopbackIPs, + want: []string{"8.8.8.8"}, + }, + { + name: "deduplicates results", + nss: []string{"8.8.8.8", "8.8.8.8", "1.1.1.1"}, + regularIPs: regularIPs, + loopbackIPs: loopbackIPs, + want: []string{"8.8.8.8", "1.1.1.1"}, + }, + { + name: "all filtered returns nil", + nss: []string{"127.0.0.1", "::1", "192.168.1.100"}, + regularIPs: regularIPs, + loopbackIPs: loopbackIPs, + want: nil, + }, + { + name: "empty input returns nil", + nss: nil, + regularIPs: regularIPs, + loopbackIPs: loopbackIPs, + want: nil, + }, + { + name: "skips unparseable entries", + nss: []string{"not-an-ip", "8.8.8.8"}, + regularIPs: regularIPs, + loopbackIPs: loopbackIPs, + want: []string{"8.8.8.8"}, + }, + { + name: "no local IPs filters nothing", + nss: []string{"8.8.8.8", "1.1.1.1"}, + regularIPs: nil, + loopbackIPs: nil, + want: []string{"8.8.8.8", "1.1.1.1"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := localNameservers(tt.nss, tt.regularIPs, tt.loopbackIPs) + if len(got) != len(tt.want) { + t.Fatalf("localNameservers() = %v, want %v", got, tt.want) + } + for i := range got { + if got[i] != tt.want[i] { + t.Errorf("localNameservers()[%d] = %q, want %q", i, got[i], tt.want[i]) + } + } + }) + } +} From 112d1cb5a9a52a5bf08cfc27b1e6392dd02ae73e Mon Sep 17 00:00:00 2001 From: Codescribe Date: Tue, 10 Mar 2026 14:05:50 -0400 Subject: [PATCH 03/16] fix: close handle leak in hasLocalDnsServerRunning() Add defer windows.CloseHandle(h) after CreateToolhelp32Snapshot to ensure the process snapshot handle is properly released on all code paths (match found, enumeration exhausted, or error). --- cmd/cli/service_windows.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/cli/service_windows.go b/cmd/cli/service_windows.go index fddb0ef..b12982c 100644 --- a/cmd/cli/service_windows.go +++ b/cmd/cli/service_windows.go @@ -160,6 +160,7 @@ func hasLocalDnsServerRunning() bool { if e != nil { return false } + defer windows.CloseHandle(h) p := windows.ProcessEntry32{Size: processEntrySize} for { e := windows.Process32Next(h, &p) From 5c0585b2e8a4ad164fa66fdac50e9150ea211785 Mon Sep 17 00:00:00 2001 From: Codescribe Date: Wed, 4 Mar 2026 15:01:52 -0500 Subject: [PATCH 04/16] Add `log tail` command for live log streaming This commit adds a new `ctrld log tail` subcommand that streams runtime debug logs to the terminal in real-time, similar to `tail -f`. Changes: - log_writer.go: Add Subscribe/tailLastLines for fan-out to tail clients - control_server.go: Add /log/tail endpoint with streaming response - Internal logging: subscribes to logWriter for live data - File-based logging: polls log file for new data (200ms interval) - Sends last N lines as initial context on connect - commands.go: Add `log tail` cobra subcommand with --lines/-n flag - control_client.go: Add postStream() with no timeout for long-lived connections Usage: sudo ctrld log tail # shows last 10 lines then follows sudo ctrld log tail -n 50 # shows last 50 lines then follows Ctrl+C to stop --- cmd/cli/commands.go | 85 ++++++++++ cmd/cli/control_client.go | 6 + cmd/cli/control_server.go | 166 +++++++++++++++++++ cmd/cli/log_tail_test.go | 339 ++++++++++++++++++++++++++++++++++++++ cmd/cli/log_writer.go | 72 +++++++- 5 files changed, 665 insertions(+), 3 deletions(-) create mode 100644 cmd/cli/log_tail_test.go diff --git a/cmd/cli/commands.go b/cmd/cli/commands.go index eaee812..a03745f 100644 --- a/cmd/cli/commands.go +++ b/cmd/cli/commands.go @@ -11,12 +11,14 @@ import ( "net/http" "os" "os/exec" + "os/signal" "path/filepath" "runtime" "slices" "sort" "strconv" "strings" + "syscall" "time" "github.com/docker/go-units" @@ -146,6 +148,88 @@ func initLogCmd() *cobra.Command { fmt.Println(logs.Data) }, } + var tailLines int + logTailCmd := &cobra.Command{ + Use: "tail", + Short: "Tail live runtime debug logs", + Long: "Stream live runtime debug logs to the terminal, similar to tail -f. Press Ctrl+C to stop.", + Args: cobra.NoArgs, + PreRun: func(cmd *cobra.Command, args []string) { + checkHasElevatedPrivilege() + }, + Run: func(cmd *cobra.Command, args []string) { + + p := &prog{router: router.New(&cfg, false)} + s, _ := newService(p, svcConfig) + + status, err := s.Status() + if errors.Is(err, service.ErrNotInstalled) { + mainLog.Load().Warn().Msg("service not installed") + return + } + if status == service.StatusStopped { + mainLog.Load().Warn().Msg("service is not running") + return + } + + dir, err := socketDir() + if err != nil { + mainLog.Load().Fatal().Err(err).Msg("failed to find ctrld home dir") + } + cc := newControlClient(filepath.Join(dir, ctrldControlUnixSock)) + tailPath := fmt.Sprintf("%s?lines=%d", tailLogsPath, tailLines) + resp, err := cc.postStream(tailPath, nil) + if err != nil { + mainLog.Load().Fatal().Err(err).Msg("failed to connect for log tailing") + } + defer resp.Body.Close() + + switch resp.StatusCode { + case http.StatusMovedPermanently: + warnRuntimeLoggingNotEnabled() + return + case http.StatusOK: + default: + mainLog.Load().Fatal().Msgf("unexpected response status: %d", resp.StatusCode) + return + } + + // Set up signal handling for clean shutdown. + ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer stop() + + done := make(chan struct{}) + go func() { + defer close(done) + // Stream output to stdout. + buf := make([]byte, 4096) + for { + n, readErr := resp.Body.Read(buf) + if n > 0 { + os.Stdout.Write(buf[:n]) + } + if readErr != nil { + if readErr != io.EOF { + mainLog.Load().Error().Err(readErr).Msg("error reading log stream") + } + return + } + } + }() + + select { + case <-ctx.Done(): + if errors.Is(ctx.Err(), context.Canceled) { + msg := fmt.Sprintf("\nexiting: %s\n", context.Cause(ctx).Error()) + os.Stdout.WriteString(msg) + } + case <-done: + } + + }, + } + logTailCmd.Flags().IntVarP(&tailLines, "lines", "n", 10, "Number of historical lines to show on connect") + logCmd := &cobra.Command{ Use: "log", Short: "Manage runtime debug logs", @@ -156,6 +240,7 @@ func initLogCmd() *cobra.Command { } logCmd.AddCommand(logSendCmd) logCmd.AddCommand(logViewCmd) + logCmd.AddCommand(logTailCmd) rootCmd.AddCommand(logCmd) return logCmd diff --git a/cmd/cli/control_client.go b/cmd/cli/control_client.go index 7382d4e..0e3f90f 100644 --- a/cmd/cli/control_client.go +++ b/cmd/cli/control_client.go @@ -32,6 +32,12 @@ func (c *controlClient) post(path string, data io.Reader) (*http.Response, error return c.c.Post("http://unix"+path, contentTypeJson, data) } +// postStream sends a POST request with no timeout, suitable for long-lived streaming connections. +func (c *controlClient) postStream(path string, data io.Reader) (*http.Response, error) { + c.c.Timeout = 0 + return c.c.Post("http://unix"+path, contentTypeJson, data) +} + // deactivationRequest represents request for validating deactivation pin. type deactivationRequest struct { Pin int64 `json:"pin"` diff --git a/cmd/cli/control_server.go b/cmd/cli/control_server.go index 064e7fe..6f4388a 100644 --- a/cmd/cli/control_server.go +++ b/cmd/cli/control_server.go @@ -10,6 +10,7 @@ import ( "os" "reflect" "sort" + "strconv" "time" "github.com/kardianos/service" @@ -29,6 +30,7 @@ const ( ifacePath = "/iface" viewLogsPath = "/log/view" sendLogsPath = "/log/send" + tailLogsPath = "/log/tail" ) type ifaceResponse struct { @@ -344,6 +346,170 @@ func (p *prog) registerControlServerHandler() { } p.internalLogSent = time.Now() })) + p.cs.register(tailLogsPath, http.HandlerFunc(func(w http.ResponseWriter, request *http.Request) { + flusher, ok := w.(http.Flusher) + if !ok { + http.Error(w, "streaming unsupported", http.StatusInternalServerError) + return + } + + // Determine logging mode and validate before starting the stream. + var lw *logWriter + useInternalLog := p.needInternalLogging() + if useInternalLog { + p.mu.Lock() + lw = p.internalLogWriter + p.mu.Unlock() + if lw == nil { + w.WriteHeader(http.StatusMovedPermanently) + return + } + } else if p.cfg.Service.LogPath == "" { + // No logging configured at all. + w.WriteHeader(http.StatusMovedPermanently) + return + } + + // Parse optional "lines" query param for initial context. + numLines := 10 + if v := request.URL.Query().Get("lines"); v != "" { + if n, err := strconv.Atoi(v); err == nil && n >= 0 { + numLines = n + } + } + + w.Header().Set("Content-Type", "text/plain; charset=utf-8") + w.Header().Set("Transfer-Encoding", "chunked") + w.Header().Set("X-Content-Type-Options", "nosniff") + w.WriteHeader(http.StatusOK) + + if useInternalLog { + // Internal logging mode: subscribe to the logWriter. + + // Send last N lines as initial context. + if numLines > 0 { + if tail := lw.tailLastLines(numLines); len(tail) > 0 { + w.Write(tail) + flusher.Flush() + } + } + + ch, unsub := lw.Subscribe() + defer unsub() + for { + select { + case data, ok := <-ch: + if !ok { + return + } + if _, err := w.Write(data); err != nil { + return + } + flusher.Flush() + case <-request.Context().Done(): + return + } + } + } else { + // File-based logging mode: tail the log file. + logFile := normalizeLogFilePath(p.cfg.Service.LogPath) + f, err := os.Open(logFile) + if err != nil { + // Already committed 200, just return. + return + } + defer f.Close() + + // Seek to show last N lines. + if numLines > 0 { + if tail := tailFileLastLines(f, numLines); len(tail) > 0 { + w.Write(tail) + flusher.Flush() + } + } else { + // Seek to end. + f.Seek(0, io.SeekEnd) + } + + // Poll for new data. + buf := make([]byte, 4096) + ticker := time.NewTicker(200 * time.Millisecond) + defer ticker.Stop() + for { + select { + case <-ticker.C: + n, err := f.Read(buf) + if n > 0 { + if _, werr := w.Write(buf[:n]); werr != nil { + return + } + flusher.Flush() + } + if err != nil && err != io.EOF { + return + } + case <-request.Context().Done(): + return + } + } + } + })) +} + +// tailFileLastLines reads the last n lines from a file and returns them. +// The file position is left at the end of the file after this call. +func tailFileLastLines(f *os.File, n int) []byte { + stat, err := f.Stat() + if err != nil || stat.Size() == 0 { + return nil + } + + // Read from the end in chunks to find the last n lines. + const chunkSize = 4096 + fileSize := stat.Size() + var lines []byte + offset := fileSize + count := 0 + + for offset > 0 && count <= n { + readSize := int64(chunkSize) + if readSize > offset { + readSize = offset + } + offset -= readSize + buf := make([]byte, readSize) + nRead, err := f.ReadAt(buf, offset) + if err != nil && err != io.EOF { + break + } + buf = buf[:nRead] + lines = append(buf, lines...) + + // Count newlines in this chunk. + for _, b := range buf { + if b == '\n' { + count++ + } + } + } + + // Trim to last n lines. + idx := 0 + nlCount := 0 + for i := len(lines) - 1; i >= 0; i-- { + if lines[i] == '\n' { + nlCount++ + if nlCount == n+1 { + idx = i + 1 + break + } + } + } + lines = lines[idx:] + + // Seek to end of file for subsequent reads. + f.Seek(0, io.SeekEnd) + return lines } func jsonResponse(next http.Handler) http.Handler { diff --git a/cmd/cli/log_tail_test.go b/cmd/cli/log_tail_test.go new file mode 100644 index 0000000..37ad411 --- /dev/null +++ b/cmd/cli/log_tail_test.go @@ -0,0 +1,339 @@ +package cli + +import ( + "io" + "os" + "strings" + "sync" + "testing" + "time" +) + +// ============================================================================= +// logWriter.tailLastLines tests +// ============================================================================= + +func Test_logWriter_tailLastLines_Empty(t *testing.T) { + lw := newLogWriterWithSize(4096) + if got := lw.tailLastLines(10); got != nil { + t.Fatalf("expected nil for empty buffer, got %q", got) + } +} + +func Test_logWriter_tailLastLines_ZeroLines(t *testing.T) { + lw := newLogWriterWithSize(4096) + lw.Write([]byte("line1\nline2\n")) + if got := lw.tailLastLines(0); got != nil { + t.Fatalf("expected nil for n=0, got %q", got) + } +} + +func Test_logWriter_tailLastLines_NegativeLines(t *testing.T) { + lw := newLogWriterWithSize(4096) + lw.Write([]byte("line1\nline2\n")) + if got := lw.tailLastLines(-1); got != nil { + t.Fatalf("expected nil for n=-1, got %q", got) + } +} + +func Test_logWriter_tailLastLines_FewerThanN(t *testing.T) { + lw := newLogWriterWithSize(4096) + lw.Write([]byte("line1\nline2\n")) + got := string(lw.tailLastLines(10)) + want := "line1\nline2\n" + if got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func Test_logWriter_tailLastLines_ExactN(t *testing.T) { + lw := newLogWriterWithSize(4096) + lw.Write([]byte("line1\nline2\nline3\n")) + got := string(lw.tailLastLines(3)) + want := "line1\nline2\nline3\n" + if got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func Test_logWriter_tailLastLines_MoreThanN(t *testing.T) { + lw := newLogWriterWithSize(4096) + lw.Write([]byte("line1\nline2\nline3\nline4\nline5\n")) + got := string(lw.tailLastLines(2)) + want := "line4\nline5\n" + if got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func Test_logWriter_tailLastLines_NoTrailingNewline(t *testing.T) { + lw := newLogWriterWithSize(4096) + lw.Write([]byte("line1\nline2\nline3")) + // Without trailing newline, "line3" is a partial line. + // Asking for 1 line returns the last newline-terminated line plus the partial. + got := string(lw.tailLastLines(1)) + want := "line2\nline3" + if got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func Test_logWriter_tailLastLines_SingleLineNoNewline(t *testing.T) { + lw := newLogWriterWithSize(4096) + lw.Write([]byte("only line")) + got := string(lw.tailLastLines(5)) + want := "only line" + if got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func Test_logWriter_tailLastLines_SingleLineWithNewline(t *testing.T) { + lw := newLogWriterWithSize(4096) + lw.Write([]byte("only line\n")) + got := string(lw.tailLastLines(1)) + want := "only line\n" + if got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +// ============================================================================= +// logWriter.Subscribe tests +// ============================================================================= + +func Test_logWriter_Subscribe_Basic(t *testing.T) { + lw := newLogWriterWithSize(4096) + ch, unsub := lw.Subscribe() + defer unsub() + + msg := []byte("hello world\n") + lw.Write(msg) + + select { + case got := <-ch: + if string(got) != string(msg) { + t.Fatalf("got %q, want %q", got, msg) + } + case <-time.After(time.Second): + t.Fatal("timed out waiting for subscriber data") + } +} + +func Test_logWriter_Subscribe_MultipleSubscribers(t *testing.T) { + lw := newLogWriterWithSize(4096) + ch1, unsub1 := lw.Subscribe() + defer unsub1() + ch2, unsub2 := lw.Subscribe() + defer unsub2() + + msg := []byte("broadcast\n") + lw.Write(msg) + + for i, ch := range []<-chan []byte{ch1, ch2} { + select { + case got := <-ch: + if string(got) != string(msg) { + t.Fatalf("subscriber %d: got %q, want %q", i, got, msg) + } + case <-time.After(time.Second): + t.Fatalf("subscriber %d: timed out", i) + } + } +} + +func Test_logWriter_Subscribe_Unsubscribe(t *testing.T) { + lw := newLogWriterWithSize(4096) + ch, unsub := lw.Subscribe() + + // Verify subscribed. + lw.Write([]byte("before unsub\n")) + select { + case <-ch: + case <-time.After(time.Second): + t.Fatal("timed out before unsub") + } + + unsub() + + // Channel should be closed after unsub. + if _, ok := <-ch; ok { + t.Fatal("channel should be closed after unsubscribe") + } + + // Verify subscriber list is empty. + lw.mu.Lock() + count := len(lw.subscribers) + lw.mu.Unlock() + if count != 0 { + t.Fatalf("expected 0 subscribers after unsub, got %d", count) + } +} + +func Test_logWriter_Subscribe_UnsubscribeIdempotent(t *testing.T) { + lw := newLogWriterWithSize(4096) + _, unsub := lw.Subscribe() + unsub() + // Second unsub should not panic. + unsub() +} + +func Test_logWriter_Subscribe_SlowSubscriberDropped(t *testing.T) { + lw := newLogWriterWithSize(4096) + ch, unsub := lw.Subscribe() + defer unsub() + + // Fill the subscriber channel (buffer size is 256). + for i := 0; i < 300; i++ { + lw.Write([]byte("msg\n")) + } + + // Should have 256 buffered messages, rest dropped. + count := 0 + for { + select { + case <-ch: + count++ + default: + goto done + } + } +done: + if count != 256 { + t.Fatalf("expected 256 buffered messages, got %d", count) + } +} + +func Test_logWriter_Subscribe_ConcurrentWriteAndRead(t *testing.T) { + lw := newLogWriterWithSize(64 * 1024) + ch, unsub := lw.Subscribe() + defer unsub() + + const numWrites = 100 + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + for i := 0; i < numWrites; i++ { + lw.Write([]byte("concurrent write\n")) + } + }() + + received := 0 + timeout := time.After(5 * time.Second) + for received < numWrites { + select { + case <-ch: + received++ + case <-timeout: + t.Fatalf("timed out after receiving %d/%d messages", received, numWrites) + } + } + wg.Wait() +} + +// ============================================================================= +// tailFileLastLines tests +// ============================================================================= + +func writeTempFile(t *testing.T, content string) *os.File { + t.Helper() + f, err := os.CreateTemp(t.TempDir(), "tail-test-*") + if err != nil { + t.Fatal(err) + } + if _, err := f.WriteString(content); err != nil { + t.Fatal(err) + } + return f +} + +func Test_tailFileLastLines_Empty(t *testing.T) { + f := writeTempFile(t, "") + defer f.Close() + if got := tailFileLastLines(f, 10); got != nil { + t.Fatalf("expected nil for empty file, got %q", got) + } +} + +func Test_tailFileLastLines_FewerThanN(t *testing.T) { + f := writeTempFile(t, "line1\nline2\n") + defer f.Close() + got := string(tailFileLastLines(f, 10)) + want := "line1\nline2\n" + if got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func Test_tailFileLastLines_ExactN(t *testing.T) { + f := writeTempFile(t, "a\nb\nc\n") + defer f.Close() + got := string(tailFileLastLines(f, 3)) + want := "a\nb\nc\n" + if got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func Test_tailFileLastLines_MoreThanN(t *testing.T) { + f := writeTempFile(t, "line1\nline2\nline3\nline4\nline5\n") + defer f.Close() + got := string(tailFileLastLines(f, 2)) + want := "line4\nline5\n" + if got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func Test_tailFileLastLines_NoTrailingNewline(t *testing.T) { + f := writeTempFile(t, "line1\nline2\nline3") + defer f.Close() + // Without trailing newline, partial last line comes with the previous line. + got := string(tailFileLastLines(f, 1)) + want := "line2\nline3" + if got != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func Test_tailFileLastLines_LargerThanChunk(t *testing.T) { + // Build content larger than the 4096 chunk size to exercise multi-chunk reads. + var sb strings.Builder + for i := 0; i < 200; i++ { + sb.WriteString(strings.Repeat("x", 50)) + sb.WriteByte('\n') + } + f := writeTempFile(t, sb.String()) + defer f.Close() + got := string(tailFileLastLines(f, 3)) + lines := strings.Split(strings.TrimRight(got, "\n"), "\n") + if len(lines) != 3 { + t.Fatalf("expected 3 lines, got %d: %q", len(lines), got) + } + expectedLine := strings.Repeat("x", 50) + for _, line := range lines { + if line != expectedLine { + t.Fatalf("unexpected line content: %q", line) + } + } +} + +func Test_tailFileLastLines_SeeksToEnd(t *testing.T) { + f := writeTempFile(t, "line1\nline2\nline3\n") + defer f.Close() + tailFileLastLines(f, 1) + + // After tailFileLastLines, file position should be at the end. + pos, err := f.Seek(0, io.SeekCurrent) + if err != nil { + t.Fatal(err) + } + stat, err := f.Stat() + if err != nil { + t.Fatal(err) + } + if pos != stat.Size() { + t.Fatalf("expected file position at end (%d), got %d", stat.Size(), pos) + } +} diff --git a/cmd/cli/log_writer.go b/cmd/cli/log_writer.go index ab6b855..f84dcdf 100644 --- a/cmd/cli/log_writer.go +++ b/cmd/cli/log_writer.go @@ -38,11 +38,17 @@ type logReader struct { size int64 } +// logSubscriber represents a subscriber to live log output. +type logSubscriber struct { + ch chan []byte +} + // logWriter is an internal buffer to keep track of runtime log when no logging is enabled. type logWriter struct { - mu sync.Mutex - buf bytes.Buffer - size int + mu sync.Mutex + buf bytes.Buffer + size int + subscribers []*logSubscriber } // newLogWriter creates an internal log writer. @@ -61,10 +67,70 @@ func newLogWriterWithSize(size int) *logWriter { return lw } +// Subscribe returns a channel that receives new log data as it's written, +// and an unsubscribe function to clean up when done. +func (lw *logWriter) Subscribe() (<-chan []byte, func()) { + lw.mu.Lock() + defer lw.mu.Unlock() + sub := &logSubscriber{ch: make(chan []byte, 256)} + lw.subscribers = append(lw.subscribers, sub) + unsub := func() { + lw.mu.Lock() + defer lw.mu.Unlock() + for i, s := range lw.subscribers { + if s == sub { + lw.subscribers = append(lw.subscribers[:i], lw.subscribers[i+1:]...) + close(sub.ch) + break + } + } + } + return sub.ch, unsub +} + +// tailLastLines returns the last n lines from the current buffer. +func (lw *logWriter) tailLastLines(n int) []byte { + lw.mu.Lock() + defer lw.mu.Unlock() + data := lw.buf.Bytes() + if n <= 0 || len(data) == 0 { + return nil + } + // Find the last n newlines from the end. + count := 0 + pos := len(data) + for pos > 0 { + pos-- + if data[pos] == '\n' { + count++ + if count == n+1 { + pos++ // move past this newline + break + } + } + } + result := make([]byte, len(data)-pos) + copy(result, data[pos:]) + return result +} + func (lw *logWriter) Write(p []byte) (int, error) { lw.mu.Lock() defer lw.mu.Unlock() + // Fan-out to subscribers (non-blocking). + if len(lw.subscribers) > 0 { + cp := make([]byte, len(p)) + copy(cp, p) + for _, sub := range lw.subscribers { + select { + case sub.ch <- cp: + default: + // Drop if subscriber is slow to avoid blocking the logger. + } + } + } + // If writing p causes overflows, discard old data. if lw.buf.Len()+len(p) > lw.size { buf := lw.buf.Bytes() From 95dd871e2d648eafe00d27da4c3002946c22e5a3 Mon Sep 17 00:00:00 2001 From: Codescribe Date: Mon, 30 Mar 2026 13:55:08 -0400 Subject: [PATCH 05/16] fix: bracket IPv6 addresses in VPN DNS upstream config upstreamConfigFor() used strings.Contains(":") to detect whether to append ":53", but IPv6 addresses contain colons, so IPv6 servers were passed as bare addresses (e.g. "2a0d:6fc0:9b0:3600::1") to net.Dial which rejects them with "too many colons in address". Use net.JoinHostPort() which handles both IPv4 and IPv6 correctly, producing "[2a0d:6fc0:9b0:3600::1]:53" for IPv6. --- cmd/cli/vpn_dns.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/cmd/cli/vpn_dns.go b/cmd/cli/vpn_dns.go index 03b5478..bc4b1a1 100644 --- a/cmd/cli/vpn_dns.go +++ b/cmd/cli/vpn_dns.go @@ -2,6 +2,7 @@ package cli import ( "context" + "net" "strings" "sync" @@ -241,10 +242,12 @@ func (m *vpnDNSManager) Routes() map[string][]string { // upstreamConfigFor creates a legacy upstream configuration for the given VPN DNS server. func (m *vpnDNSManager) upstreamConfigFor(server string) *ctrld.UpstreamConfig { - endpoint := server - if !strings.Contains(server, ":") { - endpoint = server + ":53" - } + // Use net.JoinHostPort to correctly handle both IPv4 and IPv6 addresses. + // Previously, the strings.Contains(":") check would skip appending ":53" + // for IPv6 addresses (they contain colons), leaving a bare address like + // "2a0d:6fc0:9b0:3600::1" which net.Dial rejects with "too many colons". + // net.JoinHostPort produces "[2a0d:6fc0:9b0:3600::1]:53" as required. + endpoint := net.JoinHostPort(server, "53") return &ctrld.UpstreamConfig{ Name: "VPN DNS", From 22a796f673976287654d82f8ebe9bcbe6493defb Mon Sep 17 00:00:00 2001 From: Codescribe Date: Mon, 30 Mar 2026 13:55:52 -0400 Subject: [PATCH 06/16] fix: use raw IPv6 socket for DNS responses in macOS intercept mode macOS rejects sendmsg from [::1] to global unicast IPv6 (EINVAL), and nat on lo0 doesn't fire for route-to'd packets (pf skips translation on the second interface pass). ULA addresses on lo0 also fail (EHOSTUNREACH - kernel segregates lo0 routing). Solution: wrap the [::1] UDP listener's ResponseWriter with rawIPv6Writer that sends responses via SOCK_RAW (IPPROTO_UDP) on lo0, bypassing the kernel's routing validation. pf's rdr state reverses the address translation on the response path. Changes: - Add rawipv6_darwin.go: rawIPv6Writer wraps dns.ResponseWriter, sends UDP responses via raw IPv6 socket with proper checksum calculation - Add rawipv6_other.go: no-op wrapIPv6Handler for non-darwin platforms - Remove nat rules from pf anchor (no longer needed) - Block IPv6 TCP DNS (block return) - falls back to IPv4 (~1s, rare) - Remove IPv6 TCP rdr/route-to/pass rules (only UDP intercepted) --- cmd/cli/dns_intercept_darwin.go | 66 ++++--------- cmd/cli/dns_proxy.go | 6 +- cmd/cli/rawipv6_darwin.go | 163 ++++++++++++++++++++++++++++++++ cmd/cli/rawipv6_other.go | 12 +++ docs/pf-dns-intercept.md | 58 ++++++------ 5 files changed, 227 insertions(+), 78 deletions(-) create mode 100644 cmd/cli/rawipv6_darwin.go create mode 100644 cmd/cli/rawipv6_other.go diff --git a/cmd/cli/dns_intercept_darwin.go b/cmd/cli/dns_intercept_darwin.go index b761b3a..0854efc 100644 --- a/cmd/cli/dns_intercept_darwin.go +++ b/cmd/cli/dns_intercept_darwin.go @@ -321,14 +321,13 @@ func (p *prog) startDNSIntercept() error { // options → normalization (scrub) → queueing → translation (nat/rdr) → filtering (pass/block/anchor) // // "pfctl -sr" returns BOTH scrub-anchor (normalization) AND anchor/pass/block (filter) rules. -// "pfctl -sn" returns nat-anchor AND rdr-anchor (translation) rules. +// "pfctl -sn" returns rdr-anchor (translation) rules. // Both commands emit "No ALTQ support in kernel" warnings on stderr. // // We must reassemble in correct order: scrub → nat/rdr → filter. // // The anchor reference does not survive a reboot, but ctrld re-adds it on every start. func (p *prog) ensurePFAnchorReference() error { - natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName) rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName) anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName) @@ -347,11 +346,10 @@ func (p *prog) ensurePFAnchorReference() error { natLines := pfFilterRuleLines(string(natOut)) filterLines := pfFilterRuleLines(string(filterOut)) - hasNatAnchor := pfContainsRule(natLines, natAnchorRef) hasRdrAnchor := pfContainsRule(natLines, rdrAnchorRef) hasAnchor := pfContainsRule(filterLines, anchorRef) - if hasNatAnchor && hasRdrAnchor && hasAnchor { + if hasRdrAnchor && hasAnchor { // Verify anchor ordering: our anchor should appear before other anchors // for reliable DNS interception priority. Log a warning if out of order, // but don't force a reload (the interface-specific rules in our anchor @@ -380,15 +378,8 @@ func (p *prog) ensurePFAnchorReference() error { // rules in whichever anchor appears first win. By prepending, our DNS // intercept rules match port 53 traffic before a VPN app's broader // "pass out quick on all" rules in their anchor. - if !hasNatAnchor || !hasRdrAnchor { - var newRefs []string - if !hasNatAnchor { - newRefs = append(newRefs, natAnchorRef) - } - if !hasRdrAnchor { - newRefs = append(newRefs, rdrAnchorRef) - } - natLines = append(newRefs, natLines...) + if !hasRdrAnchor { + natLines = append([]string{rdrAnchorRef}, natLines...) } if !hasAnchor { pureFilterLines = append([]string{anchorRef}, pureFilterLines...) @@ -590,7 +581,6 @@ func (p *prog) stopDNSIntercept() error { // The anchor itself is already flushed by stopDNSIntercept, so even if removal // fails, the empty anchor is a no-op. func (p *prog) removePFAnchorReference() error { - natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName) rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName) anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName) @@ -609,7 +599,7 @@ func (p *prog) removePFAnchorReference() error { var cleanNat []string for _, line := range natLines { - if !strings.Contains(line, rdrAnchorRef) && !strings.Contains(line, natAnchorRef) { + if !strings.Contains(line, rdrAnchorRef) { cleanNat = append(cleanNat, line) } } @@ -804,23 +794,13 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string { // a stateful entry that handles response routing. Using "rdr pass" would skip filter // evaluation, and its implicit state alone is insufficient for response delivery — // proven by commit 51cf029 where responses were silently dropped. - rules.WriteString("# --- Translation rules (nat + rdr) ---\n") + rules.WriteString("# --- Translation rules (rdr) ---\n") - // NAT source to ::1 for IPv6 DNS on loopback. macOS/BSD rejects sendmsg from - // [::1] to a global unicast IPv6 address (EINVAL), unlike IPv4 where sendmsg from - // 127.0.0.1 to local private IPs works fine. The rdr rewrites the destination but - // preserves the original source (machine's global IPv6). Without nat, ctrld cannot - // reply. pf reverses both translations on the response path. - // Note: nat must appear before rdr (pf evaluates nat first in translation phase). listenerAddr6 := fmt.Sprintf("::1 port %d", listenerPort) - rules.WriteString("nat on lo0 inet6 proto udp from ! ::1 to ! ::1 port 53 -> ::1\n") - rules.WriteString("nat on lo0 inet6 proto tcp from ! ::1 to ! ::1 port 53 -> ::1\n") - rules.WriteString("# Redirect DNS on loopback to ctrld's listener.\n") rules.WriteString(fmt.Sprintf("rdr on lo0 inet proto udp from any to ! %s port 53 -> %s\n", listenerIP, listenerAddr)) rules.WriteString(fmt.Sprintf("rdr on lo0 inet proto tcp from any to ! %s port 53 -> %s\n", listenerIP, listenerAddr)) - rules.WriteString(fmt.Sprintf("rdr on lo0 inet6 proto udp from any to ! ::1 port 53 -> %s\n", listenerAddr6)) - rules.WriteString(fmt.Sprintf("rdr on lo0 inet6 proto tcp from any to ! ::1 port 53 -> %s\n\n", listenerAddr6)) + rules.WriteString(fmt.Sprintf("rdr on lo0 inet6 proto udp from any to ! ::1 port 53 -> %s\n\n", listenerAddr6)) // --- Filtering rules --- rules.WriteString("# --- Filtering rules (pass) ---\n\n") @@ -983,7 +963,6 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string { rules.WriteString(fmt.Sprintf("pass out quick on %s route-to lo0 inet proto udp from any to ! %s port 53\n", iface, listenerIP)) rules.WriteString(fmt.Sprintf("pass out quick on %s route-to lo0 inet proto tcp from any to ! %s port 53\n", iface, listenerIP)) rules.WriteString(fmt.Sprintf("pass out quick on %s route-to lo0 inet6 proto udp from any to ! ::1 port 53\n", iface)) - rules.WriteString(fmt.Sprintf("pass out quick on %s route-to lo0 inet6 proto tcp from any to ! ::1 port 53\n", iface)) } rules.WriteString("\n") } @@ -1003,10 +982,13 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string { rules.WriteString(fmt.Sprintf("pass out quick on ! lo0 route-to lo0 inet proto udp from any to ! %s port 53\n", listenerIP)) rules.WriteString(fmt.Sprintf("pass out quick on ! lo0 route-to lo0 inet proto tcp from any to ! %s port 53\n\n", listenerIP)) - // Force remaining outbound IPv6 DNS through loopback for interception. - rules.WriteString("# Force remaining outbound IPv6 DNS through loopback for interception.\n") + // Force remaining outbound IPv6 UDP DNS through loopback for interception. + // IPv6 TCP DNS is blocked instead — raw socket response injection only handles UDP, + // and TCP DNS is rare (truncated responses, zone transfers). Apps fall back to IPv4 TCP. + rules.WriteString("# Force remaining outbound IPv6 UDP DNS through loopback for interception.\n") rules.WriteString("pass out quick on ! lo0 route-to lo0 inet6 proto udp from any to ! ::1 port 53\n") - rules.WriteString("pass out quick on ! lo0 route-to lo0 inet6 proto tcp from any to ! ::1 port 53\n\n") + rules.WriteString("# Block IPv6 TCP DNS — raw socket can't handle TCP; apps fall back to IPv4.\n") + rules.WriteString("block return out quick on ! lo0 inet6 proto tcp from any to ! ::1 port 53\n\n") // Allow route-to'd DNS packets to pass outbound on lo0. // Without this, VPN firewalls with "block drop all" (e.g., Windscribe) drop the packet @@ -1018,8 +1000,7 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string { rules.WriteString("# Pass route-to'd DNS outbound on lo0 — no state to avoid bypassing rdr inbound.\n") rules.WriteString(fmt.Sprintf("pass out quick on lo0 inet proto udp from any to ! %s port 53 no state\n", listenerIP)) rules.WriteString(fmt.Sprintf("pass out quick on lo0 inet proto tcp from any to ! %s port 53 no state\n", listenerIP)) - rules.WriteString("pass out quick on lo0 inet6 proto udp from any to ! ::1 port 53 no state\n") - rules.WriteString("pass out quick on lo0 inet6 proto tcp from any to ! ::1 port 53 no state\n\n") + rules.WriteString("pass out quick on lo0 inet6 proto udp from any to ! ::1 port 53 no state\n\n") // Allow the redirected traffic through on loopback (inbound after rdr). // @@ -1034,7 +1015,7 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string { // (source 127.0.0.1 → original DNS server IP, e.g., 10.255.255.3). rules.WriteString("# Accept redirected DNS — reply-to lo0 forces response through loopback.\n") rules.WriteString(fmt.Sprintf("pass in quick on lo0 reply-to lo0 inet proto { udp, tcp } from any to %s\n", listenerAddr)) - rules.WriteString(fmt.Sprintf("pass in quick on lo0 reply-to lo0 inet6 proto { udp, tcp } from any to %s\n", listenerAddr6)) + rules.WriteString(fmt.Sprintf("pass in quick on lo0 reply-to lo0 inet6 proto udp from any to %s\n", listenerAddr6)) return rules.String() } @@ -1043,12 +1024,11 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string { // It verifies both the anchor references in the main ruleset and the rules within // our anchor. Failures are logged at ERROR level to make them impossible to miss. func (p *prog) verifyPFState() { - natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName) rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName) anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName) verified := true - // Check main ruleset for anchor references (nat-anchor + rdr-anchor in translation rules). + // Check main ruleset for anchor references (rdr-anchor in translation rules). natOut, err := exec.Command("pfctl", "-sn").CombinedOutput() if err != nil { mainLog.Load().Error().Err(err).Msg("DNS intercept: VERIFICATION FAILED — could not dump NAT rules") @@ -1059,10 +1039,6 @@ func (p *prog) verifyPFState() { mainLog.Load().Error().Msg("DNS intercept: VERIFICATION FAILED — rdr-anchor reference missing from running NAT rules") verified = false } - if !strings.Contains(natStr, natAnchorRef) { - mainLog.Load().Error().Msg("DNS intercept: VERIFICATION FAILED — nat-anchor reference missing from running NAT rules") - verified = false - } } filterOut, err := exec.Command("pfctl", "-sr").CombinedOutput() @@ -1229,6 +1205,7 @@ func stringSlicesEqual(a, b []string) bool { return true } + // pfStartStabilization enters stabilization mode, suppressing all pf restores // until the VPN's ruleset stops changing. This prevents a death spiral where // ctrld and the VPN repeatedly overwrite each other's pf rules. @@ -1347,7 +1324,6 @@ func (p *prog) ensurePFAnchorActive() bool { } } - natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName) rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName) anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName) needsRestore := false @@ -1363,10 +1339,6 @@ func (p *prog) ensurePFAnchorActive() bool { mainLog.Load().Warn().Msg("DNS intercept watchdog: rdr-anchor reference missing from running ruleset") needsRestore = true } - if !strings.Contains(natStr, natAnchorRef) { - mainLog.Load().Warn().Msg("DNS intercept watchdog: nat-anchor reference missing from running ruleset") - needsRestore = true - } if !needsRestore { filterOut, err := exec.Command("pfctl", "-sr").CombinedOutput() @@ -1762,7 +1734,6 @@ func (p *prog) pfInterceptMonitor() { // The reload is safe for VPN interop because it reassembles from the current running // ruleset (pfctl -sr/-sn), preserving all existing anchors and rules. func (p *prog) forceReloadPFMainRuleset() { - natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName) rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName) anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName) @@ -1793,9 +1764,6 @@ func (p *prog) forceReloadPFMainRuleset() { } // Ensure our anchor references are present (they may have been wiped). - if !pfContainsRule(natLines, natAnchorRef) { - natLines = append([]string{natAnchorRef}, natLines...) - } if !pfContainsRule(natLines, rdrAnchorRef) { natLines = append([]string{rdrAnchorRef}, natLines...) } diff --git a/cmd/cli/dns_proxy.go b/cmd/cli/dns_proxy.go index ac9d10b..948162b 100644 --- a/cmd/cli/dns_proxy.go +++ b/cmd/cli/dns_proxy.go @@ -211,7 +211,11 @@ func (p *prog) serveDNS(listenerNum string) error { proto := proto if needLocalIPv6Listener(p.cfg.Service.InterceptMode) { g.Go(func() error { - s, errCh := runDNSServer(net.JoinHostPort("::1", strconv.Itoa(listenerConfig.Port)), proto, handler) + ipv6Handler := handler + if proto == "udp" { + ipv6Handler = wrapIPv6Handler(handler) + } + s, errCh := runDNSServer(net.JoinHostPort("::1", strconv.Itoa(listenerConfig.Port)), proto, ipv6Handler) defer s.Shutdown() select { case <-p.stopCh: diff --git a/cmd/cli/rawipv6_darwin.go b/cmd/cli/rawipv6_darwin.go new file mode 100644 index 0000000..0509677 --- /dev/null +++ b/cmd/cli/rawipv6_darwin.go @@ -0,0 +1,163 @@ +//go:build darwin + +package cli + +import ( + "encoding/binary" + "fmt" + "net" + "syscall" + + "github.com/miekg/dns" +) + +// wrapIPv6Handler wraps a DNS handler so that UDP responses on the [::1] listener +// are sent via raw IPv6 sockets instead of the normal sendmsg path. This is needed +// because macOS rejects sendmsg from [::1] to global unicast IPv6 addresses (EINVAL). +func wrapIPv6Handler(h dns.Handler) dns.Handler { + return dns.HandlerFunc(func(w dns.ResponseWriter, r *dns.Msg) { + h.ServeDNS(&rawIPv6Writer{ResponseWriter: w}, r) + }) +} + +// rawIPv6Writer wraps a dns.ResponseWriter for the [::1] IPv6 listener on macOS. +// When pf redirects IPv6 DNS traffic via route-to + rdr to [::1]:53, the original +// client source address is a global unicast IPv6 (e.g., 2607:f0c8:...). macOS +// rejects sendmsg from [::1] to any non-loopback address (EINVAL), so the normal +// WriteMsg fails. This wrapper intercepts UDP writes and sends the response via a +// raw IPv6 socket on lo0, bypassing the kernel's routing validation. +// +// TCP is not handled — IPv6 TCP DNS is blocked by pf rules and falls back to IPv4. +type rawIPv6Writer struct { + dns.ResponseWriter +} + +// WriteMsg packs the DNS message and sends it via raw socket. +func (w *rawIPv6Writer) WriteMsg(m *dns.Msg) error { + data, err := m.Pack() + if err != nil { + return err + } + _, err = w.Write(data) + return err +} + +// Write sends raw DNS response bytes via a raw IPv6/UDP socket on lo0. +// It constructs a UDP packet (header + payload) and sends it using +// IPPROTO_RAW-like behavior via IPV6_HDRINCL-free raw UDP socket. +// +// pf's rdr state table will reverse-translate the addresses on the response: +// - src [::1]:53 → original DNS server IPv6 +// - dst [client]:port → unchanged +func (w *rawIPv6Writer) Write(payload []byte) (int, error) { + localAddr := w.ResponseWriter.LocalAddr() + remoteAddr := w.ResponseWriter.RemoteAddr() + + srcIP, srcPort, err := parseAddrPort(localAddr) + if err != nil { + return 0, fmt.Errorf("rawIPv6Writer: parse local addr %s: %w", localAddr, err) + } + dstIP, dstPort, err := parseAddrPort(remoteAddr) + if err != nil { + return 0, fmt.Errorf("rawIPv6Writer: parse remote addr %s: %w", remoteAddr, err) + } + + // Build UDP packet: 8-byte header + DNS payload. + udpLen := 8 + len(payload) + udpPacket := make([]byte, udpLen) + binary.BigEndian.PutUint16(udpPacket[0:2], uint16(srcPort)) + binary.BigEndian.PutUint16(udpPacket[2:4], uint16(dstPort)) + binary.BigEndian.PutUint16(udpPacket[4:6], uint16(udpLen)) + // Checksum placeholder — filled below. + binary.BigEndian.PutUint16(udpPacket[6:8], 0) + copy(udpPacket[8:], payload) + + // Compute UDP checksum over IPv6 pseudo-header + UDP packet. + // For IPv6, UDP checksum is mandatory (unlike IPv4 where it's optional). + csum := udp6Checksum(srcIP, dstIP, udpPacket) + binary.BigEndian.PutUint16(udpPacket[6:8], csum) + + // Open raw UDP socket. SOCK_RAW with IPPROTO_UDP lets us send + // hand-crafted UDP packets. The kernel adds the IPv6 header. + fd, err := syscall.Socket(syscall.AF_INET6, syscall.SOCK_RAW, syscall.IPPROTO_UDP) + if err != nil { + return 0, fmt.Errorf("rawIPv6Writer: socket: %w", err) + } + defer syscall.Close(fd) + + // Bind to lo0 interface so the packet exits on loopback where pf can + // reverse-translate via its rdr state table. + if err := bindToLoopback6(fd); err != nil { + return 0, fmt.Errorf("rawIPv6Writer: bind to lo0: %w", err) + } + + // Send to the client's address. + sa := &syscall.SockaddrInet6{Port: 0} // Port is in the UDP header, not the sockaddr for raw sockets. + copy(sa.Addr[:], dstIP.To16()) + + if err := syscall.Sendto(fd, udpPacket, 0, sa); err != nil { + return 0, fmt.Errorf("rawIPv6Writer: sendto [%s]:%d: %w", dstIP, dstPort, err) + } + + return len(payload), nil +} + +// parseAddrPort extracts IP and port from a net.Addr (supports *net.UDPAddr and string parsing). +func parseAddrPort(addr net.Addr) (net.IP, int, error) { + if ua, ok := addr.(*net.UDPAddr); ok { + return ua.IP, ua.Port, nil + } + host, portStr, err := net.SplitHostPort(addr.String()) + if err != nil { + return nil, 0, err + } + ip := net.ParseIP(host) + if ip == nil { + return nil, 0, fmt.Errorf("invalid IP: %s", host) + } + port, err := net.LookupPort("udp", portStr) + if err != nil { + return nil, 0, err + } + return ip, port, nil +} + +// udp6Checksum computes the UDP checksum over the IPv6 pseudo-header and UDP packet. +// The pseudo-header includes: src IP (16), dst IP (16), UDP length (4), next header (4). +func udp6Checksum(src, dst net.IP, udpPacket []byte) uint16 { + // IPv6 pseudo-header for checksum: + // Source Address (16 bytes) + // Destination Address (16 bytes) + // UDP Length (4 bytes, upper layer packet length) + // Zero (3 bytes) + Next Header (1 byte) = 17 (UDP) + psh := make([]byte, 40) + copy(psh[0:16], src.To16()) + copy(psh[16:32], dst.To16()) + binary.BigEndian.PutUint32(psh[32:36], uint32(len(udpPacket))) + psh[39] = 17 // Next Header: UDP + + // Checksum over pseudo-header + UDP packet. + var sum uint32 + data := append(psh, udpPacket...) + for i := 0; i+1 < len(data); i += 2 { + sum += uint32(binary.BigEndian.Uint16(data[i : i+2])) + } + if len(data)%2 == 1 { + sum += uint32(data[len(data)-1]) << 8 + } + for sum > 0xffff { + sum = (sum >> 16) + (sum & 0xffff) + } + return ^uint16(sum) +} + +// bindToLoopback6 binds a raw IPv6 socket to the loopback interface (lo0) +// and sets the source address to ::1. This ensures the packet exits on lo0 +// where pf's rdr state can reverse-translate the addresses. +func bindToLoopback6(fd int) error { + // Bind source to ::1 — this is the address ctrld is listening on, + // and what pf's rdr state expects as the source of the response. + sa := &syscall.SockaddrInet6{Port: 0} + copy(sa.Addr[:], net.IPv6loopback.To16()) + return syscall.Bind(fd, sa) +} diff --git a/cmd/cli/rawipv6_other.go b/cmd/cli/rawipv6_other.go new file mode 100644 index 0000000..f99895d --- /dev/null +++ b/cmd/cli/rawipv6_other.go @@ -0,0 +1,12 @@ +//go:build !darwin + +package cli + +import "github.com/miekg/dns" + +// wrapIPv6Handler is a no-op on non-darwin platforms. The raw IPv6 response +// writer is only needed on macOS where pf's rdr preserves the original global +// unicast source address, and the kernel rejects sendmsg from [::1] to it. +func wrapIPv6Handler(h dns.Handler) dns.Handler { + return h +} diff --git a/docs/pf-dns-intercept.md b/docs/pf-dns-intercept.md index 6c19925..213619b 100644 --- a/docs/pf-dns-intercept.md +++ b/docs/pf-dns-intercept.md @@ -17,7 +17,7 @@ options (set) → normalization (scrub) → queueing → translation (nat/rdr) | Anchor Type | Section | Purpose | |-------------|---------|---------| | `scrub-anchor` | Normalization | Packet normalization | -| `nat-anchor` | Translation | NAT rules | +| `nat-anchor` | Translation | NAT rules (not used by ctrld) | | `rdr-anchor` | Translation | Redirect rules | | `anchor` | Filtering | Pass/block rules | @@ -122,57 +122,60 @@ Three problems prevent a simple "mirror the IPv4 rules" approach: 3. **sendmsg from `[::1]` to global unicast fails**: Unlike IPv4 where the kernel allows `sendmsg` from `127.0.0.1` to local private IPs (e.g., `10.x.x.x`), macOS/BSD rejects `sendmsg` from `[::1]` to a global unicast IPv6 address with `EINVAL`. Since pf's `rdr` preserves the original source IP (the machine's global IPv6 address), ctrld's reply would fail. -### Solution: nat + rdr + [::1] Listener +### Solution: Raw Socket Response + rdr + [::1] Listener + +**Key insight:** pf's `nat on lo0` doesn't fire for `route-to`'d packets (pf already ran the translation phase on the original outbound interface and skips it on lo0's outbound pass). `rdr` works because it fires on lo0's *inbound* side (a new direction after loopback reflection). So we can't use `nat` to rewrite the source, and any address bound to lo0 (including ULAs like `fd00:53::1`) can't send to global unicast addresses — the kernel segregates lo0's routing. + +Instead, we use a **raw IPv6 socket** to send UDP responses. The `[::1]` listener receives queries normally via `rdr`, but responses are sent via `SOCK_RAW` with `IPPROTO_UDP`, bypassing the kernel's routing validation. The raw socket constructs the UDP packet (header + DNS payload) with correct checksums and sends it on lo0. pf matches the response against the `rdr` state table and reverse-translates the addresses. + +**IPv6 TCP DNS** is blocked (`block return`) and falls back to IPv4 — TCP DNS is rare (truncated responses, zone transfers) and raw socket injection for TCP would require managing the full TCP state machine. ``` -# NAT: rewrite source to ::1 so ctrld can reply -nat on lo0 inet6 proto udp from ! ::1 to ! ::1 port 53 -> ::1 -nat on lo0 inet6 proto tcp from ! ::1 to ! ::1 port 53 -> ::1 - -# RDR: redirect destination to ctrld's IPv6 listener +# RDR: redirect IPv6 UDP DNS to ctrld's listener (no nat needed) rdr on lo0 inet6 proto udp from any to ! ::1 port 53 -> ::1 port 53 -rdr on lo0 inet6 proto tcp from any to ! ::1 port 53 -> ::1 port 53 -# Filter: route-to forces IPv6 DNS to loopback (mirrors IPv4 rules) +# Filter: route-to forces IPv6 UDP DNS to loopback pass out quick on ! lo0 route-to lo0 inet6 proto udp from any to ! ::1 port 53 -pass out quick on ! lo0 route-to lo0 inet6 proto tcp from any to ! ::1 port 53 + +# Block IPv6 TCP DNS — raw socket can't handle TCP; apps fall back to IPv4 +block return out quick on ! lo0 inet6 proto tcp from any to ! ::1 port 53 # Pass on lo0 without state (mirrors IPv4) pass out quick on lo0 inet6 proto udp from any to ! ::1 port 53 no state -pass out quick on lo0 inet6 proto tcp from any to ! ::1 port 53 no state # Accept redirected IPv6 DNS with reply-to (mirrors IPv4) -pass in quick on lo0 reply-to lo0 inet6 proto { udp, tcp } from any to ::1 port 53 +pass in quick on lo0 reply-to lo0 inet6 proto udp from any to ::1 port 53 ``` -### IPv6 Packet Flow +### IPv6 Packet Flow (UDP) ``` Application queries [2607:f0c8:8000:8210::1]:53 (IPv6 DNS server) ↓ -pf filter: "pass out route-to lo0 inet6 ... port 53" → redirects to lo0 +pf filter: "pass out route-to lo0 inet6 proto udp ... port 53" → redirects to lo0 ↓ pf (outbound lo0): "pass out on lo0 inet6 ... no state" → passes ↓ Loopback reflects packet inbound on lo0 ↓ -pf nat: rewrites source 2607:f0c8:...:ec6e → ::1 pf rdr: rewrites dest [2607:f0c8:8000:8210::1]:53 → [::1]:53 +(source remains: 2607:f0c8:...:ec6e — the machine's global IPv6) ↓ -ctrld receives query from [::1]:port → [::1]:53 +ctrld receives query from [2607:f0c8:...:ec6e]:port → [::1]:53 ↓ -ctrld resolves via DoH, replies to [::1]:port (kernel accepts ::1 → ::1) +ctrld resolves via DoH upstream ↓ -pf reverses both translations: - - nat reverse: dest ::1 → 2607:f0c8:...:ec6e (original client) - - rdr reverse: src ::1 → 2607:f0c8:8000:8210::1 (original DNS server) +Raw IPv6 socket sends response: [::1]:53 → [2607:f0c8:...:ec6e]:port +(bypasses kernel routing validation — raw socket on lo0) + ↓ +pf reverses rdr: src [::1]:53 → [2607:f0c8:8000:8210::1]:53 ↓ Application receives response from [2607:f0c8:8000:8210::1]:53 ✓ ``` ### Client IP Recovery -The `nat` rewrites the source to `::1`, so ctrld sees the client as `::1` (loopback). The existing `spoofLoopbackIpInClientInfo()` logic detects this and replaces it with the machine's real RFC1918 IPv4 address (e.g., `10.0.10.211`). This is the same mechanism used when queries arrive from `127.0.0.1` — no client identity is lost. +pf's `rdr` preserves the original source (machine's global IPv6), so ctrld sees the real address. The existing `spoofLoopbackIpInClientInfo()` logic replaces loopback IPs with the machine's real RFC1918 IPv4 address for `X-Cd-Ip` reporting. For IPv6 intercepted queries, the source is already the real address — no spoofing needed. ### IPv6 Listener @@ -180,12 +183,10 @@ The `[::1]` listener reuses the existing infrastructure from Windows (where it w - **Windows**: Always (if IPv6 is available) - **macOS**: Only in intercept mode +On macOS, the UDP handler is wrapped with `rawIPv6Writer` which intercepts `WriteMsg`/`Write` calls and sends responses via a raw IPv6 socket on lo0 instead of the normal `sendmsg` path. + If the `[::1]` listener fails to bind, it logs a warning and continues — the IPv4 listener is primary. -### nat-anchor Requirement - -The `nat` rules in our anchor require a `nat-anchor "com.controld.ctrld"` reference in the main pf ruleset, in addition to the existing `rdr-anchor` and `anchor` references. All pf management functions (inject, remove, verify, watchdog, force-reload) handle all three anchor types. - ## Rule Ordering Within the Anchor pf requires translation rules before filter rules, even within an anchor: @@ -236,7 +237,7 @@ The trickiest part. macOS only processes anchors declared in the active pf rules 1. Read `/etc/pf.conf` 2. If our anchor reference already exists, reload as-is -3. Otherwise, inject `nat-anchor "com.controld.ctrld"` and `rdr-anchor "com.controld.ctrld"` in the translation section and `anchor "com.controld.ctrld"` in the filter section +3. Otherwise, inject `rdr-anchor "com.controld.ctrld"` in the translation section and `anchor "com.controld.ctrld"` in the filter section 4. Write to a **temp file** and load with `pfctl -f ` 5. **We never modify `/etc/pf.conf` on disk** — changes are runtime-only and don't survive reboot (ctrld re-injects on every start) @@ -376,5 +377,6 @@ We chose `route-to + rdr` as the best balance of effectiveness and deployability 9. **`pass out quick` exemptions work with route-to** — they fire in the same phase (filter), so `quick` + rule ordering means exempted packets never hit the route-to rule 10. **pf cannot cross-AF redirect** — `rdr on lo0 inet6 ... -> 127.0.0.1` is invalid. IPv6 DNS must be handled by an `[::1]` listener. 11. **`block return` doesn't work for IPv6 DNS** — BSD doesn't deliver ICMPv6 unreachable to unconnected UDP sockets (`sendto`). Apps timeout waiting for a response that never comes. -12. **sendmsg from `::1` to global unicast fails on macOS** — unlike IPv4 where `127.0.0.1` can send to any local address, `::1` cannot send to the machine's own global IPv6 address. `nat` on lo0 is required to rewrite the source. -13. **`nat-anchor` is separate from `rdr-anchor`** — pf requires both in the main ruleset for nat and rdr rules in an anchor to be evaluated. `rdr-anchor` alone does not cover nat rules. +12. **sendmsg from `::1` to global unicast fails on macOS** — unlike IPv4 where `127.0.0.1` can send to any local address, `::1` cannot send to the machine's own global IPv6 address. Solved with raw socket response injection (SOCK_RAW + IPPROTO_UDP on lo0). +13. **`nat on lo0` doesn't fire for `route-to`'d packets** — pf runs translation on the original outbound interface (en0), then skips it on lo0's outbound pass. `rdr` works because lo0 inbound is a genuinely new direction. Any lo0 address (including ULAs) can't route to global unicast — the kernel segregates lo0's routing table. +14. **Raw IPv6 sockets bypass routing validation** — `SOCK_RAW` with `IPPROTO_UDP` can send from `::1` to global unicast on lo0, unlike normal `SOCK_DGRAM` sockets. The kernel doesn't apply the same routing checks for raw sockets. From c55e2a722cd8e8c274b17bbf277582b1cc38881c Mon Sep 17 00:00:00 2001 From: Codescribe Date: Mon, 30 Mar 2026 19:49:21 -0400 Subject: [PATCH 07/16] fix: declare ipv6Handler as dns.Handler to match wrapIPv6Handler return type The handler variable is dns.HandlerFunc but wrapIPv6Handler returns dns.Handler (interface). Go's type inference picked dns.HandlerFunc for ipv6Handler, causing a compile error on assignment. Explicit type declaration fixes the mismatch. --- cmd/cli/dns_proxy.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/cli/dns_proxy.go b/cmd/cli/dns_proxy.go index 948162b..cb8393d 100644 --- a/cmd/cli/dns_proxy.go +++ b/cmd/cli/dns_proxy.go @@ -211,7 +211,7 @@ func (p *prog) serveDNS(listenerNum string) error { proto := proto if needLocalIPv6Listener(p.cfg.Service.InterceptMode) { g.Go(func() error { - ipv6Handler := handler + var ipv6Handler dns.Handler = handler if proto == "udp" { ipv6Handler = wrapIPv6Handler(handler) } From 3f59cdad1a6d2e9909fa064a3a43b1adb0f2c517 Mon Sep 17 00:00:00 2001 From: Codescribe Date: Mon, 30 Mar 2026 20:52:35 -0400 Subject: [PATCH 08/16] fix: block IPv6 DNS in intercept mode, remove raw socket approach IPv6 DNS interception on macOS is not feasible with current pf capabilities. The kernel rejects sendmsg from [::1] to global unicast (EINVAL), nat on lo0 doesn't fire for route-to'd packets, raw sockets bypass routing but pf doesn't match them against rdr state, and DIOCNATLOOK can't be used because bind() fails for non-local addresses. Replace all IPv6 interception code with a simple pf block rule: block out quick on ! lo0 inet6 proto { udp, tcp } from any to any port 53 macOS automatically retries DNS over IPv4 when IPv6 is blocked. Changes: - Remove rawipv6_darwin.go and rawipv6_other.go - Remove [::1] listener spawn on macOS (needLocalIPv6Listener returns false) - Remove IPv6 rdr, route-to, pass, and reply-to pf rules - Add block rule for all outbound IPv6 DNS - Update docs/pf-dns-intercept.md with what was tried and why it failed --- cmd/cli/dns_intercept_darwin.go | 26 ++--- cmd/cli/dns_proxy.go | 23 ++--- cmd/cli/rawipv6_darwin.go | 163 -------------------------------- cmd/cli/rawipv6_other.go | 12 --- docs/pf-dns-intercept.md | 75 ++++----------- 5 files changed, 41 insertions(+), 258 deletions(-) delete mode 100644 cmd/cli/rawipv6_darwin.go delete mode 100644 cmd/cli/rawipv6_other.go diff --git a/cmd/cli/dns_intercept_darwin.go b/cmd/cli/dns_intercept_darwin.go index 0854efc..62fc73f 100644 --- a/cmd/cli/dns_intercept_darwin.go +++ b/cmd/cli/dns_intercept_darwin.go @@ -796,11 +796,11 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string { // proven by commit 51cf029 where responses were silently dropped. rules.WriteString("# --- Translation rules (rdr) ---\n") - listenerAddr6 := fmt.Sprintf("::1 port %d", listenerPort) rules.WriteString("# Redirect DNS on loopback to ctrld's listener.\n") rules.WriteString(fmt.Sprintf("rdr on lo0 inet proto udp from any to ! %s port 53 -> %s\n", listenerIP, listenerAddr)) rules.WriteString(fmt.Sprintf("rdr on lo0 inet proto tcp from any to ! %s port 53 -> %s\n", listenerIP, listenerAddr)) - rules.WriteString(fmt.Sprintf("rdr on lo0 inet6 proto udp from any to ! ::1 port 53 -> %s\n\n", listenerAddr6)) + // No IPv6 rdr — IPv6 DNS is blocked at the filter level (see below). + rules.WriteString("\n") // --- Filtering rules --- rules.WriteString("# --- Filtering rules (pass) ---\n\n") @@ -962,7 +962,7 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string { } rules.WriteString(fmt.Sprintf("pass out quick on %s route-to lo0 inet proto udp from any to ! %s port 53\n", iface, listenerIP)) rules.WriteString(fmt.Sprintf("pass out quick on %s route-to lo0 inet proto tcp from any to ! %s port 53\n", iface, listenerIP)) - rules.WriteString(fmt.Sprintf("pass out quick on %s route-to lo0 inet6 proto udp from any to ! ::1 port 53\n", iface)) + // No IPv6 route-to — IPv6 DNS is blocked, not intercepted. } rules.WriteString("\n") } @@ -982,13 +982,14 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string { rules.WriteString(fmt.Sprintf("pass out quick on ! lo0 route-to lo0 inet proto udp from any to ! %s port 53\n", listenerIP)) rules.WriteString(fmt.Sprintf("pass out quick on ! lo0 route-to lo0 inet proto tcp from any to ! %s port 53\n\n", listenerIP)) - // Force remaining outbound IPv6 UDP DNS through loopback for interception. - // IPv6 TCP DNS is blocked instead — raw socket response injection only handles UDP, - // and TCP DNS is rare (truncated responses, zone transfers). Apps fall back to IPv4 TCP. - rules.WriteString("# Force remaining outbound IPv6 UDP DNS through loopback for interception.\n") - rules.WriteString("pass out quick on ! lo0 route-to lo0 inet6 proto udp from any to ! ::1 port 53\n") - rules.WriteString("# Block IPv6 TCP DNS — raw socket can't handle TCP; apps fall back to IPv4.\n") - rules.WriteString("block return out quick on ! lo0 inet6 proto tcp from any to ! ::1 port 53\n\n") + // Block all outbound IPv6 DNS. ctrld only intercepts IPv4 DNS via the loopback + // redirect. IPv6 DNS interception on macOS is not feasible because the kernel rejects + // sendmsg from [::1] to global unicast IPv6 (EINVAL), and pf's nat-on-lo0 doesn't + // fire for route-to'd packets. Blocking forces macOS to fall back to IPv4 DNS, + // which is fully intercepted. See docs/pf-dns-intercept.md for details. + rules.WriteString("# Block outbound IPv6 DNS — ctrld intercepts IPv4 only.\n") + rules.WriteString("# macOS falls back to IPv4 DNS automatically.\n") + rules.WriteString("block out quick on ! lo0 inet6 proto { udp, tcp } from any to any port 53\n\n") // Allow route-to'd DNS packets to pass outbound on lo0. // Without this, VPN firewalls with "block drop all" (e.g., Windscribe) drop the packet @@ -1000,7 +1001,8 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string { rules.WriteString("# Pass route-to'd DNS outbound on lo0 — no state to avoid bypassing rdr inbound.\n") rules.WriteString(fmt.Sprintf("pass out quick on lo0 inet proto udp from any to ! %s port 53 no state\n", listenerIP)) rules.WriteString(fmt.Sprintf("pass out quick on lo0 inet proto tcp from any to ! %s port 53 no state\n", listenerIP)) - rules.WriteString("pass out quick on lo0 inet6 proto udp from any to ! ::1 port 53 no state\n\n") + // No IPv6 lo0 pass — IPv6 DNS is blocked, not routed through lo0. + rules.WriteString("\n") // Allow the redirected traffic through on loopback (inbound after rdr). // @@ -1015,7 +1017,7 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string { // (source 127.0.0.1 → original DNS server IP, e.g., 10.255.255.3). rules.WriteString("# Accept redirected DNS — reply-to lo0 forces response through loopback.\n") rules.WriteString(fmt.Sprintf("pass in quick on lo0 reply-to lo0 inet proto { udp, tcp } from any to %s\n", listenerAddr)) - rules.WriteString(fmt.Sprintf("pass in quick on lo0 reply-to lo0 inet6 proto udp from any to %s\n", listenerAddr6)) + // No IPv6 pass-in — IPv6 DNS is blocked, not redirected to [::1]. return rules.String() } diff --git a/cmd/cli/dns_proxy.go b/cmd/cli/dns_proxy.go index cb8393d..f1aa810 100644 --- a/cmd/cli/dns_proxy.go +++ b/cmd/cli/dns_proxy.go @@ -211,11 +211,7 @@ func (p *prog) serveDNS(listenerNum string) error { proto := proto if needLocalIPv6Listener(p.cfg.Service.InterceptMode) { g.Go(func() error { - var ipv6Handler dns.Handler = handler - if proto == "udp" { - ipv6Handler = wrapIPv6Handler(handler) - } - s, errCh := runDNSServer(net.JoinHostPort("::1", strconv.Itoa(listenerConfig.Port)), proto, ipv6Handler) + s, errCh := runDNSServer(net.JoinHostPort("::1", strconv.Itoa(listenerConfig.Port)), proto, handler) defer s.Shutdown() select { case <-p.stopCh: @@ -907,16 +903,13 @@ func needLocalIPv6Listener(interceptMode string) bool { mainLog.Load().Debug().Msg("IPv6 listener: enabled (Windows)") return true } - // On macOS in intercept mode, pf can't redirect IPv6 DNS to an IPv4 listener (cross-AF rdr - // not supported), and blocking IPv6 DNS causes ~1s timeouts (BSD doesn't deliver ICMP errors - // to unconnected UDP sockets). Listening on [::1] lets us intercept IPv6 DNS directly. - // - // NOTE: We accept the intercept mode string as a parameter instead of reading the global - // dnsIntercept bool, because dnsIntercept is derived later in prog.run() — after the - // listener goroutines are already spawned. Same pattern as the port 5354 fallback fix (MR !860). - if (interceptMode == "dns" || interceptMode == "hard") && runtime.GOOS == "darwin" { - mainLog.Load().Debug().Msg("IPv6 listener: enabled (macOS intercept mode)") - return true + // macOS: IPv6 DNS is blocked at the pf level (not intercepted). The [::1] listener + // is not needed — macOS falls back to IPv4 DNS automatically. See #507 and + // docs/pf-dns-intercept.md for why IPv6 interception on macOS is not feasible + // (sendmsg EINVAL from ::1 to global unicast, nat-on-lo0 doesn't fire for route-to). + if runtime.GOOS == "darwin" { + mainLog.Load().Debug().Msg("IPv6 listener: not needed (macOS — IPv6 DNS blocked at pf, fallback to IPv4)") + return false } mainLog.Load().Debug().Str("os", runtime.GOOS).Str("interceptMode", interceptMode).Msg("IPv6 listener: not needed") return false diff --git a/cmd/cli/rawipv6_darwin.go b/cmd/cli/rawipv6_darwin.go deleted file mode 100644 index 0509677..0000000 --- a/cmd/cli/rawipv6_darwin.go +++ /dev/null @@ -1,163 +0,0 @@ -//go:build darwin - -package cli - -import ( - "encoding/binary" - "fmt" - "net" - "syscall" - - "github.com/miekg/dns" -) - -// wrapIPv6Handler wraps a DNS handler so that UDP responses on the [::1] listener -// are sent via raw IPv6 sockets instead of the normal sendmsg path. This is needed -// because macOS rejects sendmsg from [::1] to global unicast IPv6 addresses (EINVAL). -func wrapIPv6Handler(h dns.Handler) dns.Handler { - return dns.HandlerFunc(func(w dns.ResponseWriter, r *dns.Msg) { - h.ServeDNS(&rawIPv6Writer{ResponseWriter: w}, r) - }) -} - -// rawIPv6Writer wraps a dns.ResponseWriter for the [::1] IPv6 listener on macOS. -// When pf redirects IPv6 DNS traffic via route-to + rdr to [::1]:53, the original -// client source address is a global unicast IPv6 (e.g., 2607:f0c8:...). macOS -// rejects sendmsg from [::1] to any non-loopback address (EINVAL), so the normal -// WriteMsg fails. This wrapper intercepts UDP writes and sends the response via a -// raw IPv6 socket on lo0, bypassing the kernel's routing validation. -// -// TCP is not handled — IPv6 TCP DNS is blocked by pf rules and falls back to IPv4. -type rawIPv6Writer struct { - dns.ResponseWriter -} - -// WriteMsg packs the DNS message and sends it via raw socket. -func (w *rawIPv6Writer) WriteMsg(m *dns.Msg) error { - data, err := m.Pack() - if err != nil { - return err - } - _, err = w.Write(data) - return err -} - -// Write sends raw DNS response bytes via a raw IPv6/UDP socket on lo0. -// It constructs a UDP packet (header + payload) and sends it using -// IPPROTO_RAW-like behavior via IPV6_HDRINCL-free raw UDP socket. -// -// pf's rdr state table will reverse-translate the addresses on the response: -// - src [::1]:53 → original DNS server IPv6 -// - dst [client]:port → unchanged -func (w *rawIPv6Writer) Write(payload []byte) (int, error) { - localAddr := w.ResponseWriter.LocalAddr() - remoteAddr := w.ResponseWriter.RemoteAddr() - - srcIP, srcPort, err := parseAddrPort(localAddr) - if err != nil { - return 0, fmt.Errorf("rawIPv6Writer: parse local addr %s: %w", localAddr, err) - } - dstIP, dstPort, err := parseAddrPort(remoteAddr) - if err != nil { - return 0, fmt.Errorf("rawIPv6Writer: parse remote addr %s: %w", remoteAddr, err) - } - - // Build UDP packet: 8-byte header + DNS payload. - udpLen := 8 + len(payload) - udpPacket := make([]byte, udpLen) - binary.BigEndian.PutUint16(udpPacket[0:2], uint16(srcPort)) - binary.BigEndian.PutUint16(udpPacket[2:4], uint16(dstPort)) - binary.BigEndian.PutUint16(udpPacket[4:6], uint16(udpLen)) - // Checksum placeholder — filled below. - binary.BigEndian.PutUint16(udpPacket[6:8], 0) - copy(udpPacket[8:], payload) - - // Compute UDP checksum over IPv6 pseudo-header + UDP packet. - // For IPv6, UDP checksum is mandatory (unlike IPv4 where it's optional). - csum := udp6Checksum(srcIP, dstIP, udpPacket) - binary.BigEndian.PutUint16(udpPacket[6:8], csum) - - // Open raw UDP socket. SOCK_RAW with IPPROTO_UDP lets us send - // hand-crafted UDP packets. The kernel adds the IPv6 header. - fd, err := syscall.Socket(syscall.AF_INET6, syscall.SOCK_RAW, syscall.IPPROTO_UDP) - if err != nil { - return 0, fmt.Errorf("rawIPv6Writer: socket: %w", err) - } - defer syscall.Close(fd) - - // Bind to lo0 interface so the packet exits on loopback where pf can - // reverse-translate via its rdr state table. - if err := bindToLoopback6(fd); err != nil { - return 0, fmt.Errorf("rawIPv6Writer: bind to lo0: %w", err) - } - - // Send to the client's address. - sa := &syscall.SockaddrInet6{Port: 0} // Port is in the UDP header, not the sockaddr for raw sockets. - copy(sa.Addr[:], dstIP.To16()) - - if err := syscall.Sendto(fd, udpPacket, 0, sa); err != nil { - return 0, fmt.Errorf("rawIPv6Writer: sendto [%s]:%d: %w", dstIP, dstPort, err) - } - - return len(payload), nil -} - -// parseAddrPort extracts IP and port from a net.Addr (supports *net.UDPAddr and string parsing). -func parseAddrPort(addr net.Addr) (net.IP, int, error) { - if ua, ok := addr.(*net.UDPAddr); ok { - return ua.IP, ua.Port, nil - } - host, portStr, err := net.SplitHostPort(addr.String()) - if err != nil { - return nil, 0, err - } - ip := net.ParseIP(host) - if ip == nil { - return nil, 0, fmt.Errorf("invalid IP: %s", host) - } - port, err := net.LookupPort("udp", portStr) - if err != nil { - return nil, 0, err - } - return ip, port, nil -} - -// udp6Checksum computes the UDP checksum over the IPv6 pseudo-header and UDP packet. -// The pseudo-header includes: src IP (16), dst IP (16), UDP length (4), next header (4). -func udp6Checksum(src, dst net.IP, udpPacket []byte) uint16 { - // IPv6 pseudo-header for checksum: - // Source Address (16 bytes) - // Destination Address (16 bytes) - // UDP Length (4 bytes, upper layer packet length) - // Zero (3 bytes) + Next Header (1 byte) = 17 (UDP) - psh := make([]byte, 40) - copy(psh[0:16], src.To16()) - copy(psh[16:32], dst.To16()) - binary.BigEndian.PutUint32(psh[32:36], uint32(len(udpPacket))) - psh[39] = 17 // Next Header: UDP - - // Checksum over pseudo-header + UDP packet. - var sum uint32 - data := append(psh, udpPacket...) - for i := 0; i+1 < len(data); i += 2 { - sum += uint32(binary.BigEndian.Uint16(data[i : i+2])) - } - if len(data)%2 == 1 { - sum += uint32(data[len(data)-1]) << 8 - } - for sum > 0xffff { - sum = (sum >> 16) + (sum & 0xffff) - } - return ^uint16(sum) -} - -// bindToLoopback6 binds a raw IPv6 socket to the loopback interface (lo0) -// and sets the source address to ::1. This ensures the packet exits on lo0 -// where pf's rdr state can reverse-translate the addresses. -func bindToLoopback6(fd int) error { - // Bind source to ::1 — this is the address ctrld is listening on, - // and what pf's rdr state expects as the source of the response. - sa := &syscall.SockaddrInet6{Port: 0} - copy(sa.Addr[:], net.IPv6loopback.To16()) - return syscall.Bind(fd, sa) -} diff --git a/cmd/cli/rawipv6_other.go b/cmd/cli/rawipv6_other.go deleted file mode 100644 index f99895d..0000000 --- a/cmd/cli/rawipv6_other.go +++ /dev/null @@ -1,12 +0,0 @@ -//go:build !darwin - -package cli - -import "github.com/miekg/dns" - -// wrapIPv6Handler is a no-op on non-darwin platforms. The raw IPv6 response -// writer is only needed on macOS where pf's rdr preserves the original global -// unicast source address, and the kernel rejects sendmsg from [::1] to it. -func wrapIPv6Handler(h dns.Handler) dns.Handler { - return h -} diff --git a/docs/pf-dns-intercept.md b/docs/pf-dns-intercept.md index 213619b..f8cbb42 100644 --- a/docs/pf-dns-intercept.md +++ b/docs/pf-dns-intercept.md @@ -122,70 +122,31 @@ Three problems prevent a simple "mirror the IPv4 rules" approach: 3. **sendmsg from `[::1]` to global unicast fails**: Unlike IPv4 where the kernel allows `sendmsg` from `127.0.0.1` to local private IPs (e.g., `10.x.x.x`), macOS/BSD rejects `sendmsg` from `[::1]` to a global unicast IPv6 address with `EINVAL`. Since pf's `rdr` preserves the original source IP (the machine's global IPv6 address), ctrld's reply would fail. -### Solution: Raw Socket Response + rdr + [::1] Listener +### Solution: Block IPv6 DNS, Fallback to IPv4 -**Key insight:** pf's `nat on lo0` doesn't fire for `route-to`'d packets (pf already ran the translation phase on the original outbound interface and skips it on lo0's outbound pass). `rdr` works because it fires on lo0's *inbound* side (a new direction after loopback reflection). So we can't use `nat` to rewrite the source, and any address bound to lo0 (including ULAs like `fd00:53::1`) can't send to global unicast addresses — the kernel segregates lo0's routing. - -Instead, we use a **raw IPv6 socket** to send UDP responses. The `[::1]` listener receives queries normally via `rdr`, but responses are sent via `SOCK_RAW` with `IPPROTO_UDP`, bypassing the kernel's routing validation. The raw socket constructs the UDP packet (header + DNS payload) with correct checksums and sends it on lo0. pf matches the response against the `rdr` state table and reverse-translates the addresses. - -**IPv6 TCP DNS** is blocked (`block return`) and falls back to IPv4 — TCP DNS is rare (truncated responses, zone transfers) and raw socket injection for TCP would require managing the full TCP state machine. +After extensive testing (#507), IPv6 DNS interception on macOS is not feasible with current pf capabilities. The solution is to block all outbound IPv6 DNS: ``` -# RDR: redirect IPv6 UDP DNS to ctrld's listener (no nat needed) -rdr on lo0 inet6 proto udp from any to ! ::1 port 53 -> ::1 port 53 - -# Filter: route-to forces IPv6 UDP DNS to loopback -pass out quick on ! lo0 route-to lo0 inet6 proto udp from any to ! ::1 port 53 - -# Block IPv6 TCP DNS — raw socket can't handle TCP; apps fall back to IPv4 -block return out quick on ! lo0 inet6 proto tcp from any to ! ::1 port 53 - -# Pass on lo0 without state (mirrors IPv4) -pass out quick on lo0 inet6 proto udp from any to ! ::1 port 53 no state - -# Accept redirected IPv6 DNS with reply-to (mirrors IPv4) -pass in quick on lo0 reply-to lo0 inet6 proto udp from any to ::1 port 53 +block out quick on ! lo0 inet6 proto { udp, tcp } from any to any port 53 ``` -### IPv6 Packet Flow (UDP) +macOS automatically retries DNS over IPv4 when the IPv6 path is blocked. The IPv4 path is fully intercepted via the normal route-to + rdr mechanism. Impact is minimal — at most ~1s latency on the very first DNS query while the IPv6 attempt is blocked. -``` -Application queries [2607:f0c8:8000:8210::1]:53 (IPv6 DNS server) - ↓ -pf filter: "pass out route-to lo0 inet6 proto udp ... port 53" → redirects to lo0 - ↓ -pf (outbound lo0): "pass out on lo0 inet6 ... no state" → passes - ↓ -Loopback reflects packet inbound on lo0 - ↓ -pf rdr: rewrites dest [2607:f0c8:8000:8210::1]:53 → [::1]:53 -(source remains: 2607:f0c8:...:ec6e — the machine's global IPv6) - ↓ -ctrld receives query from [2607:f0c8:...:ec6e]:port → [::1]:53 - ↓ -ctrld resolves via DoH upstream - ↓ -Raw IPv6 socket sends response: [::1]:53 → [2607:f0c8:...:ec6e]:port -(bypasses kernel routing validation — raw socket on lo0) - ↓ -pf reverses rdr: src [::1]:53 → [2607:f0c8:8000:8210::1]:53 - ↓ -Application receives response from [2607:f0c8:8000:8210::1]:53 ✓ -``` +### What Was Tried and Why It Failed -### Client IP Recovery - -pf's `rdr` preserves the original source (machine's global IPv6), so ctrld sees the real address. The existing `spoofLoopbackIpInClientInfo()` logic replaces loopback IPs with the machine's real RFC1918 IPv4 address for `X-Cd-Ip` reporting. For IPv6 intercepted queries, the source is already the real address — no spoofing needed. +| Approach | Result | +|----------|--------| +| `nat on lo0 inet6` to rewrite source to `::1` | pf skips translation on second interface pass — nat doesn't fire for route-to'd packets arriving on lo0 | +| ULA address on lo0 (`fd00:53::1`) | Kernel rejects: `EHOSTUNREACH` — lo0's routing table is segregated from global unicast | +| Raw IPv6 socket (`SOCK_RAW` + `IPPROTO_UDP`) | Bypasses sendmsg validation, but pf doesn't match raw socket packets against rdr state — response arrives from `::1` not the original server | +| `DIOCNATLOOK` to get original dest + raw socket from that addr | Can't `bind()` to a non-local address (`EADDRNOTAVAIL`) — macOS has no `IPV6_HDRINCL` for source spoofing | +| BPF packet injection on lo0 | Theoretically possible but extremely complex — not justified for the marginal benefit | ### IPv6 Listener -The `[::1]` listener reuses the existing infrastructure from Windows (where it was added for the same reason — can't suppress IPv6 DNS resolvers from the system config). The `needLocalIPv6Listener()` function gates it, returning `true` on: -- **Windows**: Always (if IPv6 is available) -- **macOS**: Only in intercept mode - -On macOS, the UDP handler is wrapped with `rawIPv6Writer` which intercepts `WriteMsg`/`Write` calls and sends responses via a raw IPv6 socket on lo0 instead of the normal `sendmsg` path. - -If the `[::1]` listener fails to bind, it logs a warning and continues — the IPv4 listener is primary. +The `[::1]` listener is used on: +- **Windows**: Always (if IPv6 is available) — Windows can't easily suppress IPv6 DNS resolvers +- **macOS**: **Not used** — IPv6 DNS is blocked at pf, no listener needed ## Rule Ordering Within the Anchor @@ -377,6 +338,8 @@ We chose `route-to + rdr` as the best balance of effectiveness and deployability 9. **`pass out quick` exemptions work with route-to** — they fire in the same phase (filter), so `quick` + rule ordering means exempted packets never hit the route-to rule 10. **pf cannot cross-AF redirect** — `rdr on lo0 inet6 ... -> 127.0.0.1` is invalid. IPv6 DNS must be handled by an `[::1]` listener. 11. **`block return` doesn't work for IPv6 DNS** — BSD doesn't deliver ICMPv6 unreachable to unconnected UDP sockets (`sendto`). Apps timeout waiting for a response that never comes. -12. **sendmsg from `::1` to global unicast fails on macOS** — unlike IPv4 where `127.0.0.1` can send to any local address, `::1` cannot send to the machine's own global IPv6 address. Solved with raw socket response injection (SOCK_RAW + IPPROTO_UDP on lo0). +12. **sendmsg from `::1` to global unicast fails on macOS** — unlike IPv4 where `127.0.0.1` can send to any local address, `::1` cannot send to the machine's own global IPv6 address (`EINVAL`). This is the fundamental asymmetry that makes IPv6 DNS interception infeasible. 13. **`nat on lo0` doesn't fire for `route-to`'d packets** — pf runs translation on the original outbound interface (en0), then skips it on lo0's outbound pass. `rdr` works because lo0 inbound is a genuinely new direction. Any lo0 address (including ULAs) can't route to global unicast — the kernel segregates lo0's routing table. -14. **Raw IPv6 sockets bypass routing validation** — `SOCK_RAW` with `IPPROTO_UDP` can send from `::1` to global unicast on lo0, unlike normal `SOCK_DGRAM` sockets. The kernel doesn't apply the same routing checks for raw sockets. +14. **Raw IPv6 sockets bypass routing validation but pf doesn't match them** — `SOCK_RAW` can send from `::1` to global unicast, but pf treats raw socket packets as new connections (not matching rdr state), so reverse-translation doesn't happen. The client sees `::1` as the source, not the original DNS server. +15. **`DIOCNATLOOK` can find the original dest but you can't use it** — The ioctl returns the pre-rdr destination, but `bind()` fails with `EADDRNOTAVAIL` because it's not a local address. macOS IPv6 raw sockets don't support `IPV6_HDRINCL` for source spoofing. +16. **Blocking IPv6 DNS is the pragmatic solution** — macOS automatically retries over IPv4. The ~1s penalty on the first blocked query is negligible compared to the complexity of working around the kernel's IPv6 loopback restrictions. From 839b8236e7028e9f7a5bbbdf041bd9f28e9f7ce0 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Thu, 2 Apr 2026 22:59:34 +0700 Subject: [PATCH 09/16] docs: add known issue for daemon crashing on Merlin --- docs/known-issues.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/docs/known-issues.md b/docs/known-issues.md index 0d13bcc..e9a897b 100644 --- a/docs/known-issues.md +++ b/docs/known-issues.md @@ -22,6 +22,34 @@ This document outlines known issues with ctrld and their current status, workaro --- +## Merlin Issues + +### Daemon Crashing on `Ctrl+C` + +**Issue**: `ctrld` daemon terminates unexpectedly after stopping a log tailing command. This typically occurs when running the daemon and the log viewer within the same SSH session on ASUSWRT-Merlin routers. + +**Description** + +The issue is caused by `Signal Propagation` within a shared `Process Group (PGID)`. + +Steps to reproduce: + +1. You start the daemon manually: `ctrld start --cd=`. +2. You view internal logs in the same terminal: `ctrld log tail`. +3. You press `Ctrl+C` to stop viewing logs. +4. The `ctrld` daemon service stops immediately along with the log command. + +When you execute commands sequentially in a single interactive SSH session on Merlin, the shell often assigns them to the same Process Group. In Linux, the `SIGINT` signal (triggered by `Ctrl+C`) is not just sent to the foreground application, but is frequently propagated to every process belonging to that specific process group. + +Because the `ctrld` daemon remains "attached" to the terminal session's process group, it "hears" the interrupt signal intended for the `log tail` command and shuts down. + +**Workarounds**: + +To isolate the signals, avoid running the log viewer in the same window as the daemon: +* **Window A:** Start the daemon and leave it running. +* **Window B:** Open a new SSH connection to run `ctrld log tail`. +Because Window B has a different **Session ID** and **Process Group ID**, pressing `Ctrl+C` in Window B will not affect the process in Window A. + ## Contributing to Known Issues If you encounter an issue not listed here, please: From eaa171f66f92995f5ebafc1b8c2aa5d30f5cf264 Mon Sep 17 00:00:00 2001 From: Codescribe Date: Thu, 2 Apr 2026 11:47:20 -0400 Subject: [PATCH 10/16] doq: configure QUIC keep-alive and retry on idle timeout Pass a quic.Config with KeepAlivePeriod (15s) to DoQ dial calls instead of nil, so pooled connections send periodic QUIC PINGs to stay alive and detect dead paths proactively. Also add IdleTimeoutError to the DoQ retry conditions alongside io.EOF, so stale pooled connections trigger a transparent retry instead of propagating as a query failure. --- doq.go | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/doq.go b/doq.go index eb7ed1c..9d0bdd9 100644 --- a/doq.go +++ b/doq.go @@ -42,11 +42,12 @@ const doqPoolSize = 16 // doqConnPool manages a pool of QUIC connections for DoQ queries using a buffered channel. type doqConnPool struct { - uc *UpstreamConfig - addrs []string - port string - tlsConfig *tls.Config - conns chan *doqConn + uc *UpstreamConfig + addrs []string + port string + tlsConfig *tls.Config + quicConfig *quic.Config + conns chan *doqConn } type doqConn struct { @@ -65,12 +66,17 @@ func newDOQConnPool(uc *UpstreamConfig, addrs []string) *doqConnPool { ServerName: uc.Domain, } + quicConfig := &quic.Config{ + KeepAlivePeriod: 15 * time.Second, + } + pool := &doqConnPool{ - uc: uc, - addrs: addrs, - port: port, - tlsConfig: tlsConfig, - conns: make(chan *doqConn, doqPoolSize), + uc: uc, + addrs: addrs, + port: port, + tlsConfig: tlsConfig, + quicConfig: quicConfig, + conns: make(chan *doqConn, doqPoolSize), } // Use SetFinalizer here because we need to call a method on the pool itself. @@ -85,12 +91,17 @@ func newDOQConnPool(uc *UpstreamConfig, addrs []string) *doqConnPool { // Resolve performs a DNS query using a pooled QUIC connection. func (p *doqConnPool) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, error) { - // Retry logic for io.EOF errors (as per original implementation) + // Retry logic for transient errors: io.EOF (connection reset) and + // IdleTimeoutError (stale pooled connection timed out). for range 5 { answer, err := p.doResolve(ctx, msg) if err == io.EOF { continue } + var idleErr *quic.IdleTimeoutError + if errors.As(err, &idleErr) { + continue + } if err != nil { return nil, wrapCertificateVerificationError(err) } @@ -226,7 +237,7 @@ func (p *doqConnPool) dialConn(ctx context.Context) (string, *quic.Conn, error) udpConn.Close() return "", nil, err } - conn, err := quic.DialEarly(ctx, udpConn, remoteAddr, p.tlsConfig, nil) + conn, err := quic.DialEarly(ctx, udpConn, remoteAddr, p.tlsConfig, p.quicConfig) if err != nil { udpConn.Close() return "", nil, err @@ -241,7 +252,7 @@ func (p *doqConnPool) dialConn(ctx context.Context) (string, *quic.Conn, error) } pd := &quicParallelDialer{} - conn, err := pd.Dial(ctx, dialAddrs, p.tlsConfig, nil) + conn, err := pd.Dial(ctx, dialAddrs, p.tlsConfig, p.quicConfig) if err != nil { return "", nil, err } From ed981043840ee72ef7335b0256fb046b55e074ff Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Fri, 10 Apr 2026 14:56:43 +0700 Subject: [PATCH 11/16] doq: use OpenStreamSync and retry on StreamLimitReachedError Replace conn.OpenStream (non-blocking) with conn.OpenStreamSync so that the resolver waits for the server's MAX_STREAMS credit replenishment frame instead of immediately failing when the stream limit is temporarily exhausted. Also retry on StreamLimitReachedError as defense-in-depth for servers that are slow or fail to send MAX_STREAMS updates. --- doq.go | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/doq.go b/doq.go index 9d0bdd9..9729607 100644 --- a/doq.go +++ b/doq.go @@ -91,8 +91,9 @@ func newDOQConnPool(uc *UpstreamConfig, addrs []string) *doqConnPool { // Resolve performs a DNS query using a pooled QUIC connection. func (p *doqConnPool) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, error) { - // Retry logic for transient errors: io.EOF (connection reset) and - // IdleTimeoutError (stale pooled connection timed out). + // Retry logic for transient errors: io.EOF (connection reset), + // IdleTimeoutError (stale pooled connection timed out), and + // StreamLimitReachedError (stream credit exhausted before server MAX_STREAMS arrived). for range 5 { answer, err := p.doResolve(ctx, msg) if err == io.EOF { @@ -102,6 +103,10 @@ func (p *doqConnPool) Resolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, erro if errors.As(err, &idleErr) { continue } + var streamLimitErr quic.StreamLimitReachedError + if errors.As(err, &streamLimitErr) { + continue + } if err != nil { return nil, wrapCertificateVerificationError(err) } @@ -126,18 +131,25 @@ func (p *doqConnPool) doResolve(ctx context.Context, msg *dns.Msg) (*dns.Msg, er return nil, err } - // Open a new stream for this query - stream, err := conn.OpenStream() + // Ensure the context has a deadline before calling OpenStreamSync, which + // blocks until the server sends a MAX_STREAMS update. Without a deadline the + // call could block indefinitely when the server never sends the update. + deadline, ok := ctx.Deadline() + if !ok { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, 5*time.Second) + defer cancel() + deadline, _ = ctx.Deadline() + } + + // OpenStreamSync blocks until the server's MAX_STREAMS credit arrives, + // avoiding the StreamLimitReachedError race that OpenStream (non-blocking) + // triggers when the credit replenishment frame is still in flight. + stream, err := conn.OpenStreamSync(ctx) if err != nil { p.putConn(conn, false) return nil, err } - - // Set deadline - deadline, ok := ctx.Deadline() - if !ok { - deadline = time.Now().Add(5 * time.Second) - } _ = stream.SetDeadline(deadline) // Write message length (2 bytes) followed by message From d1ea70d6884696ca4ff23e791ece3a05ede996a3 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Mon, 20 Apr 2026 14:28:27 +0700 Subject: [PATCH 12/16] fix: prevent panic on network change during SetSelfIP SetSelfIP unconditionally accessed t.dhcp, but t.dhcp is only initialized when DHCP discovery is enabled. A network change event can fire SetSelfIP regardless of the discovery configuration, causing a nil pointer dereference. Guard the t.dhcp access with a nil check so the self IP is still updated on the Table even when DHCP discovery is disabled. --- internal/clientinfo/client_info.go | 6 ++- internal/clientinfo/client_info_test.go | 55 +++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/internal/clientinfo/client_info.go b/internal/clientinfo/client_info.go index f69b670..acae754 100644 --- a/internal/clientinfo/client_info.go +++ b/internal/clientinfo/client_info.go @@ -173,8 +173,10 @@ func (t *Table) SetSelfIP(ip string) { t.selfIPLock.Lock() defer t.selfIPLock.Unlock() t.selfIP = ip - t.dhcp.selfIP = t.selfIP - t.dhcp.addSelf() + if t.dhcp != nil { + t.dhcp.selfIP = t.selfIP + t.dhcp.addSelf() + } } // initSelfDiscover initializes necessary client metadata for self query. diff --git a/internal/clientinfo/client_info_test.go b/internal/clientinfo/client_info_test.go index b5bdfa5..a1e22a6 100644 --- a/internal/clientinfo/client_info_test.go +++ b/internal/clientinfo/client_info_test.go @@ -1,9 +1,64 @@ package clientinfo import ( + "sync" "testing" ) +// TestTable_SetSelfIP_NilDHCP ensures SetSelfIP does not panic when t.dhcp is +// nil, which happens when DHCP discovery is disabled and the network-change +// callback fires before or without initialisation. +func TestTable_SetSelfIP_NilDHCP(t *testing.T) { + table := &Table{} // dhcp is nil + // Must not panic. + table.SetSelfIP("192.168.1.1") + if got := table.SelfIP(); got != "192.168.1.1" { + t.Fatalf("SelfIP() = %q, want %q", got, "192.168.1.1") + } +} + +// TestTable_SetSelfIP_UpdatesDHCP ensures SetSelfIP propagates the new IP to +// the dhcp discover and calls addSelf when dhcp is initialised. +func TestTable_SetSelfIP_UpdatesDHCP(t *testing.T) { + table := &Table{ + dhcp: &dhcp{selfIP: "10.0.0.1"}, + } + table.SetSelfIP("10.0.0.2") + if got := table.SelfIP(); got != "10.0.0.2" { + t.Fatalf("SelfIP() = %q, want %q", got, "10.0.0.2") + } + if table.dhcp.selfIP != "10.0.0.2" { + t.Fatalf("dhcp.selfIP = %q, want %q", table.dhcp.selfIP, "10.0.0.2") + } +} + +// TestTable_SetSelfIP_Concurrent ensures concurrent calls to SetSelfIP do not +// race, regardless of whether dhcp is nil or not. +func TestTable_SetSelfIP_Concurrent(t *testing.T) { + for _, tc := range []struct { + name string + table *Table + }{ + {"nil dhcp", &Table{}}, + {"with dhcp", &Table{dhcp: &dhcp{}}}, + } { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + var wg sync.WaitGroup + for range 10 { + wg.Add(1) + go func() { + defer wg.Done() + tc.table.SetSelfIP("192.168.1.1") + _ = tc.table.SelfIP() + }() + } + wg.Wait() + }) + } +} + func Test_normalizeIP(t *testing.T) { tests := []struct { name string From afed925404c0bb9ca98914aa756ce269a78e5365 Mon Sep 17 00:00:00 2001 From: Codescribe Date: Tue, 21 Apr 2026 17:40:51 -0400 Subject: [PATCH 13/16] log: persist internal runtime logs to disk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add file-backed persistence to the internal logWriter so runtime logs survive service restarts. When internal logging is enabled (CD mode, no explicit log_path), writes are teed to both the existing in-memory ring buffer and a rotated file on disk (ctrld.log in the home directory). File rotation: 5MB max with 1 backup (ctrld.log.1), so max ~10MB on disk. Log view/send now reads from the persisted files (including backup) to provide complete history across restarts. Live tail continues to use the in-memory subscriber mechanism unchanged. Activation: same conditions as existing internal logging — CD mode only, no log_path configured. No new config options or dependencies. --- cmd/cli/log_writer.go | 199 ++++++++++++++++++++++++++++++++++++- cmd/cli/log_writer_test.go | 125 +++++++++++++++++++++++ 2 files changed, 323 insertions(+), 1 deletion(-) diff --git a/cmd/cli/log_writer.go b/cmd/cli/log_writer.go index f84dcdf..609b1d6 100644 --- a/cmd/cli/log_writer.go +++ b/cmd/cli/log_writer.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "os" + "path/filepath" "strings" "sync" "time" @@ -22,6 +23,9 @@ const ( logWriterSentInterval = time.Minute logWriterInitEndMarker = "\n\n=== INIT_END ===\n\n" logWriterLogEndMarker = "\n\n=== LOG_END ===\n\n" + + logFileName = "ctrld.log" + logFileMaxSize = 1024 * 1024 * 5 // 5 MB ) type logViewResponse struct { @@ -44,11 +48,18 @@ type logSubscriber struct { } // logWriter is an internal buffer to keep track of runtime log when no logging is enabled. +// When a file path is configured via setLogFile, writes are also persisted to +// a rotated file on disk (max logFileMaxSize, 1 backup) so logs survive restarts. type logWriter struct { mu sync.Mutex buf bytes.Buffer size int subscribers []*logSubscriber + + // File persistence fields. + logFile *os.File + logFilePath string + logFileSize int64 } // newLogWriter creates an internal log writer. @@ -67,6 +78,80 @@ func newLogWriterWithSize(size int) *logWriter { return lw } +// setLogFile configures file-backed persistence for the log writer. +// The directory is created if it does not exist. An existing file is +// opened in append mode and its current size is tracked for rotation. +func (lw *logWriter) setLogFile(path string) error { + dir := filepath.Dir(path) + if err := os.MkdirAll(dir, 0750); err != nil { + return fmt.Errorf("creating log directory: %w", err) + } + f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0600) + if err != nil { + return fmt.Errorf("opening log file: %w", err) + } + st, err := f.Stat() + if err != nil { + f.Close() + return fmt.Errorf("stat log file: %w", err) + } + lw.mu.Lock() + defer lw.mu.Unlock() + lw.logFile = f + lw.logFilePath = path + lw.logFileSize = st.Size() + return nil +} + +// rotateLogFile rotates the current log file to a .1 backup. +// It returns true if lw.logFile is usable after the call, false otherwise. +// Must be called with lw.mu held. +func (lw *logWriter) rotateLogFile() bool { + if lw.logFile == nil { + return false + } + lw.logFile.Close() + backupPath := lw.logFilePath + ".1" + // Best effort: rename current to backup (overwrites old backup). + os.Rename(lw.logFilePath, backupPath) + f, err := os.OpenFile(lw.logFilePath, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0600) + if err != nil { + // If we can't reopen, disable file logging. + lw.logFile = nil + lw.logFileSize = 0 + return false + } + lw.logFile = f + lw.logFileSize = 0 + return true +} + +// closeLogFile closes the backing file if open. +func (lw *logWriter) closeLogFile() { + lw.mu.Lock() + defer lw.mu.Unlock() + if lw.logFile != nil { + lw.logFile.Close() + lw.logFile = nil + } +} + +// logFilePaths returns the paths to the current log file and its backup +// (if they exist) for inclusion in log send payloads. +func (lw *logWriter) logFilePaths() (current, backup string) { + lw.mu.Lock() + defer lw.mu.Unlock() + if lw.logFilePath == "" { + return "", "" + } + current = lw.logFilePath + bp := lw.logFilePath + ".1" + if _, err := os.Stat(bp); err == nil { + backup = bp + } + return current, backup +} + // Subscribe returns a channel that receives new log data as it's written, // and an unsubscribe function to clean up when done. func (lw *logWriter) Subscribe() (<-chan []byte, func()) { @@ -131,6 +216,16 @@ func (lw *logWriter) Write(p []byte) (int, error) { } } + // Write to backing file if configured. + if lw.logFile != nil { + needsRotation := lw.logFileSize+int64(len(p)) > logFileMaxSize + if !needsRotation || lw.rotateLogFile() { + if n, err := lw.logFile.Write(p); err == nil { + lw.logFileSize += int64(n) + } + } + } + // If writing p causes overflows, discard old data. if lw.buf.Len()+len(p) > lw.size { buf := lw.buf.Bytes() @@ -168,6 +263,12 @@ func (p *prog) initLogging(backup bool) { p.initInternalLogging(logWriters) } +// internalLogFilePath returns the path for persisted internal logs. +// The file lives in the ctrld home directory alongside other runtime state. +func internalLogFilePath() string { + return absHomeDir(logFileName) +} + // initInternalLogging performs internal logging if there's no log enabled. func (p *prog) initInternalLogging(writers []io.Writer) { if !p.needInternalLogging() { @@ -178,6 +279,14 @@ func (p *prog) initInternalLogging(writers []io.Writer) { p.internalLogWriter = newLogWriter() p.internalLogSent = time.Now().Add(-logWriterSentInterval) p.internalWarnLogWriter = newSmallLogWriter() + // Persist internal logs to disk so they survive restarts. + if path := internalLogFilePath(); path != "" { + if err := p.internalLogWriter.setLogFile(path); err != nil { + mainLog.Load().Warn().Err(err).Msg("could not enable persistent internal logging") + } else { + mainLog.Load().Notice().Msgf("internal log file: %s", path) + } + } }) p.mu.Lock() lw := p.internalLogWriter @@ -232,7 +341,15 @@ func (p *prog) logReader() (*logReader, error) { if wlw == nil { return nil, errors.New("nil internal warn log writer") } - // Normal log content. + + // If we have a persisted log file, read from disk (includes data + // from previous runs that the in-memory buffer wouldn't have). + current, backup := lw.logFilePaths() + if current != "" { + return p.logReaderFromFiles(current, backup, wlw) + } + + // Fall back to in-memory buffer. lw.mu.Lock() lwReader := bytes.NewReader(lw.buf.Bytes()) lwSize := lw.buf.Len() @@ -268,3 +385,83 @@ func (p *prog) logReader() (*logReader, error) { } return lr, nil } + +// logReaderFromFiles builds a logReader that concatenates the backup file +// (if it exists), the current log file, and the in-memory warn log buffer. +func (p *prog) logReaderFromFiles(current, backup string, wlw *logWriter) (*logReader, error) { + var rcs []io.ReadCloser + var totalSize int64 + + closeAll := func() { + for _, rc := range rcs { + rc.Close() + } + } + + // Read backup file first (older entries). + if backup != "" { + if bf, err := os.Open(backup); err == nil { + if st, err := bf.Stat(); err == nil { + totalSize += st.Size() + } + rcs = append(rcs, bf) + } + } + + // Read current file. + cf, err := os.Open(current) + if err != nil { + closeAll() + return nil, fmt.Errorf("opening current log file: %w", err) + } + if st, err := cf.Stat(); err == nil { + totalSize += st.Size() + } + rcs = append(rcs, cf) + + // Append warn log content from memory. + wlw.mu.Lock() + warnData := make([]byte, wlw.buf.Len()) + copy(warnData, wlw.buf.Bytes()) + wlw.mu.Unlock() + + if len(warnData) > 0 { + rcs = append(rcs, io.NopCloser(bytes.NewReader([]byte(logWriterLogEndMarker)))) + rcs = append(rcs, io.NopCloser(bytes.NewReader(warnData))) + totalSize += int64(len(logWriterLogEndMarker) + len(warnData)) + } + + if totalSize == 0 { + closeAll() + return nil, errors.New("internal log is empty") + } + + readers := make([]io.Reader, len(rcs)) + closers := make([]io.Closer, len(rcs)) + for i, rc := range rcs { + readers[i] = rc + closers[i] = rc + } + combined := io.MultiReader(readers...) + lr := &logReader{ + r: &multiCloser{Reader: combined, closers: closers}, + size: totalSize, + } + return lr, nil +} + +// multiCloser wraps an io.Reader and closes multiple underlying closers. +type multiCloser struct { + io.Reader + closers []io.Closer +} + +func (mc *multiCloser) Close() error { + var firstErr error + for _, c := range mc.closers { + if err := c.Close(); err != nil && firstErr == nil { + firstErr = err + } + } + return firstErr +} diff --git a/cmd/cli/log_writer_test.go b/cmd/cli/log_writer_test.go index 5336d4e..f6d1f94 100644 --- a/cmd/cli/log_writer_test.go +++ b/cmd/cli/log_writer_test.go @@ -1,6 +1,8 @@ package cli import ( + "os" + "path/filepath" "strings" "sync" "testing" @@ -83,3 +85,126 @@ func Test_logWriter_MarkerInitEnd(t *testing.T) { t.Fatalf("unexpected log content: %s", lw.buf.String()) } } + +func Test_logWriter_SetLogFile(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "test.log") + lw := newLogWriterWithSize(logWriterSize) + if err := lw.setLogFile(path); err != nil { + t.Fatalf("setLogFile: %v", err) + } + defer lw.closeLogFile() + + msg := "hello file\n" + lw.Write([]byte(msg)) + + // Verify data in memory buffer. + if lw.buf.String() != msg { + t.Fatalf("buffer: got %q, want %q", lw.buf.String(), msg) + } + // Verify data on disk. + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + if string(data) != msg { + t.Fatalf("file: got %q, want %q", data, msg) + } +} + +func Test_logWriter_FileRotation(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "test.log") + // Use a tiny max size to trigger rotation quickly. + lw := newLogWriterWithSize(logWriterSize) + if err := lw.setLogFile(path); err != nil { + t.Fatalf("setLogFile: %v", err) + } + defer lw.closeLogFile() + + // Write enough to exceed logFileMaxSize. + chunk := strings.Repeat("X", 1024) + "\n" + written := 0 + for written < logFileMaxSize+1024 { + lw.Write([]byte(chunk)) + written += len(chunk) + } + + // Backup file should exist. + backupPath := path + ".1" + if _, err := os.Stat(backupPath); os.IsNotExist(err) { + t.Fatal("expected backup file to exist after rotation") + } + + // Current file should be smaller than max (it was rotated). + st, err := os.Stat(path) + if err != nil { + t.Fatalf("stat current: %v", err) + } + if st.Size() > logFileMaxSize { + t.Fatalf("current file too large after rotation: %d", st.Size()) + } +} + +func Test_logWriter_FilePaths(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "test.log") + lw := newLogWriterWithSize(logWriterSize) + + // No file configured. + c, b := lw.logFilePaths() + if c != "" || b != "" { + t.Fatalf("expected empty paths, got %q %q", c, b) + } + + if err := lw.setLogFile(path); err != nil { + t.Fatalf("setLogFile: %v", err) + } + defer lw.closeLogFile() + + // Current exists, no backup yet. + c, b = lw.logFilePaths() + if c != path { + t.Fatalf("current: got %q, want %q", c, path) + } + if b != "" { + t.Fatalf("backup should be empty, got %q", b) + } + + // Create a backup file manually. + os.WriteFile(path+".1", []byte("old"), 0600) + _, b = lw.logFilePaths() + if b != path+".1" { + t.Fatalf("backup: got %q, want %q", b, path+".1") + } +} + +func Test_logWriter_FileAppendOnRestart(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "test.log") + + // Simulate first run. + lw1 := newLogWriterWithSize(logWriterSize) + if err := lw1.setLogFile(path); err != nil { + t.Fatalf("setLogFile: %v", err) + } + lw1.Write([]byte("run1\n")) + lw1.closeLogFile() + + // Simulate second run (restart) — file should be appended. + lw2 := newLogWriterWithSize(logWriterSize) + if err := lw2.setLogFile(path); err != nil { + t.Fatalf("setLogFile: %v", err) + } + lw2.Write([]byte("run2\n")) + lw2.closeLogFile() + + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + want := "run1\nrun2\n" + if string(data) != want { + t.Fatalf("file: got %q, want %q", data, want) + } +} From 8cb383d87e6c2002c17c41088482cadab8842b88 Mon Sep 17 00:00:00 2001 From: CodeScribe Date: Wed, 29 Apr 2026 07:59:56 +0000 Subject: [PATCH 14/16] dns_intercept: add WFP loopback protect for VPN block-outside-dns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When third-party VPN software (e.g., OpenVPN) installs WFP block filters via block-outside-dns, all DNS traffic to non-tunnel interfaces is blocked — including DNS to 127.0.0.1 (ctrld's NRPT target). This breaks DNS mode interception because the NRPT catch-all rule routes queries to loopback, but WFP blocks the connection before it reaches ctrld's listener. Fix: after exhausting all NRPT recovery attempts, activate a minimal WFP session with "hard permit" filters (FWPM_FILTER_FLAG_CLEAR_ACTION_RIGHT) for DNS to localhost in a max-priority sublayer (weight 0xFFFF). This overrides the VPN's block for loopback DNS only, while preserving the VPN's DNS leak protection for all other (non-loopback) DNS traffic. The loopback protect is: - Only activated when NRPT probes fail (not preemptively) - Harmless when no conflicting WFP blocks exist (permit-only, no blocks) - Persistent until ctrld shutdown (survives VPN reconnect cycles) - Cleaned up by the existing cleanupWFPFilters path on shutdown --- cmd/cli/dns_intercept_windows.go | 285 ++++++++++++++++++++++++++++--- docs/dns-intercept-mode.md | 33 +++- docs/known-issues.md | 23 +++ docs/wfp-dns-intercept.md | 20 ++- 4 files changed, 330 insertions(+), 31 deletions(-) diff --git a/cmd/cli/dns_intercept_windows.go b/cmd/cli/dns_intercept_windows.go index fb56782..bfe07e1 100644 --- a/cmd/cli/dns_intercept_windows.go +++ b/cmd/cli/dns_intercept_windows.go @@ -9,6 +9,7 @@ import ( "net" "os/exec" "runtime" + "sync" "sync/atomic" "time" "unsafe" @@ -118,6 +119,14 @@ const ( // DNS port. dnsPort uint16 = 53 + + // FWPM_FILTER_FLAG constants from fwpmtypes.h. + // See: https://learn.microsoft.com/en-us/windows/win32/api/fwpmtypes/ns-fwpmtypes-fwpm_filter0 + // + // FWPM_FILTER_FLAG_CLEAR_ACTION_RIGHT (0x08) prevents lower-weight sublayers + // from overriding this filter's PERMIT action ("hard permit"). Used in DNS + // mode to override third-party WFP blocks (e.g., OpenVPN's block-outside-dns). + fwpmFilterFlagClearActionRight uint32 = 0x00000008 ) // WFP API structures. These mirror the C structures from fwpmtypes.h and fwptypes.h. @@ -258,6 +267,17 @@ type wfpState struct { listenerIP string // stopCh is used to shut down the NRPT health monitor goroutine. stopCh chan struct{} + // mu protects loopbackProtectActive, loopbackPermitIDs, and engineHandle + // from concurrent access between nrptProbeAndHeal (goroutine) and + // stopDNSIntercept / cleanupWFPFilters (main goroutine). + mu sync.Mutex + // loopbackProtectActive is true when DNS mode has activated a minimal WFP + // session to permit loopback DNS. This counters third-party WFP block filters + // (e.g., OpenVPN's block-outside-dns) that prevent NRPT from routing queries + // to ctrld's listener on 127.0.0.1. See issue #526. + loopbackProtectActive bool + // loopbackPermitIDs stores the filter IDs for the loopback protect permits. + loopbackPermitIDs []uint64 } // Lazy-loaded WFP DLL procedures. @@ -607,6 +627,17 @@ func (p *prog) startDNSIntercept() error { } } else { mainLog.Load().Info().Msg("DNS intercept: dns mode — NRPT only, no WFP filters (graceful)") + // Proactively add loopback WFP permit filters to protect the NRPT + // → 127.0.0.1 path from third-party DNS block filters (e.g., OpenVPN's + // block-outside-dns). These are narrowly scoped (port 53 to localhost + // only) and use CLEAR_ACTION_RIGHT to override any block from other + // sublayers. Adding them at startup eliminates the DNS outage window + // that would otherwise occur between VPN connect and reactive activation. + if err := p.activateLoopbackWFPProtect(state); err != nil { + // Non-fatal: loopback protect is a defense-in-depth measure. + // NRPT still works when no third-party WFP blocks are present. + mainLog.Load().Warn().Err(err).Msg("DNS intercept: failed to activate proactive loopback WFP protect — will retry on probe failure") + } } p.dnsInterceptState = state @@ -878,14 +909,13 @@ func (p *prog) addWFPPermitLocalhostFilter(engineHandle uintptr, name string, la return filterID, nil } -// addWFPPermitSubnetFilter adds a WFP filter that permits outbound DNS to a given -// IPv4 subnet (addr/mask in host byte order). Used to exempt RFC1918 and CGNAT ranges -// so VPN DNS servers on private IPs are not blocked. -func (p *prog) addWFPPermitSubnetFilter(engineHandle uintptr, name string, proto uint8, addr, mask uint32) (uint64, error) { +// addWFPPermitDNSFilter is the unified helper for adding a WFP permit filter for +// outbound DNS (port 53) with caller-specified address condition, flags, and weight. +// Both subnet permits (RFC1918/CGNAT, flags=0, weight=10) and hard loopback permits +// (CLEAR_ACTION_RIGHT, weight=15) use this to avoid code drift. +func (p *prog) addWFPPermitDNSFilter(engineHandle uintptr, name string, layerKey windows.GUID, proto uint8, addrCond fwpmFilterCondition0, flags uint32, weight uint8) (uint64, error) { filterName, _ := windows.UTF16PtrFromString("ctrld: " + name) - addrMask := fwpV4AddrAndMask{addr: addr, mask: mask} - conditions := make([]fwpmFilterCondition0, 3) conditions[0] = fwpmFilterCondition0{ @@ -902,22 +932,18 @@ func (p *prog) addWFPPermitSubnetFilter(engineHandle uintptr, name string, proto conditions[1].condValue.valueType = fwpUint16 conditions[1].condValue.value = uint64(dnsPort) - conditions[2] = fwpmFilterCondition0{ - fieldKey: fwpmConditionIPRemoteAddress, - matchType: fwpMatchEqual, - } - conditions[2].condValue.valueType = fwpV4AddrMask - conditions[2].condValue.value = uint64(uintptr(unsafe.Pointer(&addrMask))) + conditions[2] = addrCond filter := fwpmFilter0{ - layerKey: fwpmLayerALEAuthConnectV4, + flags: flags, + layerKey: layerKey, subLayerKey: ctrldSubLayerGUID, numFilterConds: 3, filterCondition: &conditions[0], } filter.displayData.name = filterName filter.weight.valueType = fwpUint8 - filter.weight.value = 10 + filter.weight.value = uint64(weight) filter.action.actionType = fwpActionPermit var filterID uint64 @@ -927,7 +953,6 @@ func (p *prog) addWFPPermitSubnetFilter(engineHandle uintptr, name string, proto 0, uintptr(unsafe.Pointer(&filterID)), ) - runtime.KeepAlive(&addrMask) runtime.KeepAlive(conditions) if r1 != 0 { return 0, fmt.Errorf("FwpmFilterAdd0 failed: HRESULT 0x%x", r1) @@ -935,6 +960,24 @@ func (p *prog) addWFPPermitSubnetFilter(engineHandle uintptr, name string, proto return filterID, nil } +// addWFPPermitSubnetFilter adds a WFP filter that permits outbound DNS to a given +// IPv4 subnet (addr/mask in host byte order). Used to exempt RFC1918 and CGNAT ranges +// so VPN DNS servers on private IPs are not blocked. +func (p *prog) addWFPPermitSubnetFilter(engineHandle uintptr, name string, proto uint8, addr, mask uint32) (uint64, error) { + addrMask := fwpV4AddrAndMask{addr: addr, mask: mask} + + addrCond := fwpmFilterCondition0{ + fieldKey: fwpmConditionIPRemoteAddress, + matchType: fwpMatchEqual, + } + addrCond.condValue.valueType = fwpV4AddrMask + addrCond.condValue.value = uint64(uintptr(unsafe.Pointer(&addrMask))) + + filterID, err := p.addWFPPermitDNSFilter(engineHandle, name, fwpmLayerALEAuthConnectV4, proto, addrCond, 0, 10) + runtime.KeepAlive(&addrMask) + return filterID, err +} + // wfpSublayerExists checks whether our WFP sublayer still exists in the engine. // Used by the watchdog to detect if another program removed our filters. func wfpSublayerExists(engineHandle uintptr) bool { @@ -962,6 +1005,21 @@ func (p *prog) cleanupWFPFilters(state *wfpState) { return } + // Clean up loopback protect filters (DNS mode VPN workaround). + state.mu.Lock() + loopbackIDs := state.loopbackPermitIDs + state.loopbackPermitIDs = nil + state.loopbackProtectActive = false + state.mu.Unlock() + for _, filterID := range loopbackIDs { + r1, _, _ := procFwpmFilterDeleteById0.Call(state.engineHandle, uintptr(filterID)) + if r1 != 0 { + mainLog.Load().Warn().Msgf("DNS intercept: failed to remove loopback protect filter (ID: %d, code: 0x%x)", filterID, r1) + } else { + mainLog.Load().Debug().Msgf("DNS intercept: removed loopback protect filter (ID: %d)", filterID) + } + } + for _, filterID := range state.vpnPermitFilterIDs { r1, _, _ := procFwpmFilterDeleteById0.Call(state.engineHandle, uintptr(filterID)) if r1 != 0 { @@ -1024,6 +1082,154 @@ func (p *prog) cleanupWFPFilters(state *wfpState) { } } +// activateLoopbackWFPProtect opens a minimal WFP session and adds "hard permit" +// filters for DNS to localhost. This is used in DNS mode when NRPT probe failures +// are detected, typically caused by third-party VPN software (e.g., OpenVPN) that +// installs WFP block filters via block-outside-dns. The hard permit (with +// FWPM_FILTER_FLAG_CLEAR_ACTION_RIGHT) in a max-weight sublayer overrides the +// third-party blocks without affecting their protection for non-loopback DNS. +// +// See: https://gitlab.int.windscribe.com/controld/clients/ctrld/-/issues/526 +func (p *prog) activateLoopbackWFPProtect(state *wfpState) error { + state.mu.Lock() + defer state.mu.Unlock() + + if state.loopbackProtectActive { + mainLog.Load().Debug().Msg("DNS intercept: loopback WFP protect already active") + return nil + } + // Only activate in DNS mode. Hard mode manages its own full WFP state + // (block + permit filters in the same sublayer). Activating loopback + // protect would delete the hard mode sublayer and all its filters. + if hardIntercept { + mainLog.Load().Debug().Msg("DNS intercept: skipping loopback WFP protect in hard mode") + return nil + } + + mainLog.Load().Info().Msg("DNS intercept: activating loopback WFP protect (countering third-party DNS block filters)") + + // Open WFP engine if not already open (DNS mode doesn't open it normally). + if state.engineHandle == 0 { + var engineHandle uintptr + session := fwpmSession0{} + sessionName, _ := windows.UTF16PtrFromString("ctrld DNS Loopback Protect") + session.displayData.name = sessionName + + const rpcCAuthnDefault = 0xFFFFFFFF + r1, _, _ := procFwpmEngineOpen0.Call( + 0, + uintptr(rpcCAuthnDefault), + 0, + uintptr(unsafe.Pointer(&session)), + uintptr(unsafe.Pointer(&engineHandle)), + ) + if r1 != 0 { + return fmt.Errorf("FwpmEngineOpen0 failed: HRESULT 0x%x", r1) + } + mainLog.Load().Info().Msgf("DNS intercept: WFP engine opened for loopback protect (handle: 0x%x)", engineHandle) + state.engineHandle = engineHandle + } + + // Clean up any stale sublayer from a previous session. + procFwpmSubLayerDeleteByKey0.Call( + state.engineHandle, + uintptr(unsafe.Pointer(&ctrldSubLayerGUID)), + ) + + // Create sublayer at maximum priority. + sublayer := fwpmSublayer0{ + subLayerKey: ctrldSubLayerGUID, + weight: 0xFFFF, + } + sublayerName, _ := windows.UTF16PtrFromString("ctrld DNS Loopback Protect Sublayer") + sublayerDesc, _ := windows.UTF16PtrFromString("Permits DNS to localhost, overriding third-party VPN block filters") + sublayer.displayData.name = sublayerName + sublayer.displayData.description = sublayerDesc + + r1, _, _ := procFwpmSubLayerAdd0.Call( + state.engineHandle, + uintptr(unsafe.Pointer(&sublayer)), + 0, + ) + if r1 != 0 { + return fmt.Errorf("FwpmSubLayerAdd0 failed: HRESULT 0x%x", r1) + } + + // Add hard permit filters for loopback DNS (v4+v6, UDP+TCP). + permitFilters := []struct { + name string + layer windows.GUID + proto uint8 + }{ + {"Loopback Protect: Permit DNS to localhost (IPv4/UDP)", fwpmLayerALEAuthConnectV4, ipprotoUDP}, + {"Loopback Protect: Permit DNS to localhost (IPv4/TCP)", fwpmLayerALEAuthConnectV4, ipprotoTCP}, + {"Loopback Protect: Permit DNS to localhost (IPv6/UDP)", fwpmLayerALEAuthConnectV6, ipprotoUDP}, + {"Loopback Protect: Permit DNS to localhost (IPv6/TCP)", fwpmLayerALEAuthConnectV6, ipprotoTCP}, + } + + for _, pf := range permitFilters { + filterID, err := p.addWFPHardPermitLocalhostFilter(state.engineHandle, pf.name, pf.layer, pf.proto, state.listenerIP) + if err != nil { + // Partial failure — clean up what we added (already holding mu). + p.deactivateLoopbackWFPProtectLocked(state) + return fmt.Errorf("failed to add loopback protect filter %q: %w", pf.name, err) + } + state.loopbackPermitIDs = append(state.loopbackPermitIDs, filterID) + mainLog.Load().Debug().Str("filter", pf.name).Uint64("id", filterID).Msg("DNS intercept: added loopback protect filter") + } + + state.loopbackProtectActive = true + mainLog.Load().Info().Int("filters", len(state.loopbackPermitIDs)). + Msg("DNS intercept: loopback WFP protect activated — localhost DNS permitted with CLEAR_ACTION_RIGHT") + return nil +} + +// deactivateLoopbackWFPProtectLocked is the lock-free inner implementation. +// Caller must hold state.mu. +func (p *prog) deactivateLoopbackWFPProtectLocked(state *wfpState) { + if !state.loopbackProtectActive && len(state.loopbackPermitIDs) == 0 { + return + } + + for _, filterID := range state.loopbackPermitIDs { + if state.engineHandle != 0 { + r1, _, _ := procFwpmFilterDeleteById0.Call(state.engineHandle, uintptr(filterID)) + if r1 != 0 { + mainLog.Load().Warn().Msgf("DNS intercept: failed to remove loopback protect filter (ID: %d, code: 0x%x)", filterID, r1) + } + } + } + state.loopbackPermitIDs = nil + state.loopbackProtectActive = false + mainLog.Load().Info().Msg("DNS intercept: loopback WFP protect deactivated") +} + +// addWFPHardPermitLocalhostFilter adds a WFP permit filter for DNS to localhost with +// FWPM_FILTER_FLAG_CLEAR_ACTION_RIGHT. This "hard permit" prevents lower-priority +// sublayers (e.g., OpenVPN's block-outside-dns sublayer) from blocking DNS to +// ctrld's loopback listener. Weight is set to 15 (above hard mode's permit=10). +// For IPv4, the address is derived from listenerIP (e.g., 127.0.0.1 or 127.0.0.2). +func (p *prog) addWFPHardPermitLocalhostFilter(engineHandle uintptr, name string, layerKey windows.GUID, proto uint8, listenerIP string) (uint64, error) { + addrCond := fwpmFilterCondition0{ + fieldKey: fwpmConditionIPRemoteAddress, + matchType: fwpMatchEqual, + } + + ipv6Loopback := [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1} + + if layerKey == fwpmLayerALEAuthConnectV4 { + addrCond.condValue.valueType = fwpUint32 + addrCond.condValue.value = uint64(parseIPv4AsUint32(listenerIP)) + } else { + addrCond.condValue.valueType = fwpByteArray16Type + addrCond.condValue.value = uint64(uintptr(unsafe.Pointer(&ipv6Loopback))) + } + + filterID, err := p.addWFPPermitDNSFilter(engineHandle, name, layerKey, proto, addrCond, fwpmFilterFlagClearActionRight, 15) + runtime.KeepAlive(&ipv6Loopback) + return filterID, err +} + // stopDNSIntercept removes all WFP filters and shuts down the DNS interception. func (p *prog) stopDNSIntercept() error { if p.dnsInterceptState == nil { @@ -1050,7 +1256,7 @@ func (p *prog) stopDNSIntercept() error { state.nrptActive = false } - // Only clean up WFP if we actually opened the engine (hard mode). + // Clean up WFP if the engine was opened (hard mode or loopback protect). if state.engineHandle != 0 { mainLog.Load().Info().Msg("DNS intercept: shutting down WFP filters") p.cleanupWFPFilters(state) @@ -1079,10 +1285,13 @@ func (p *prog) exemptVPNDNSServers(exemptions []vpnDNSExemption) error { if !ok || state == nil { return fmt.Errorf("DNS intercept state not available") } - // In dns mode (no WFP), VPN DNS exemptions are not needed — there are no - // block filters to exempt from. - if state.engineHandle == 0 { - mainLog.Load().Debug().Msg("DNS intercept: dns mode — skipping VPN DNS exemptions (no WFP filters)") + // In dns mode (no WFP) or loopback-protect-only mode, VPN DNS exemptions + // are not needed — there are no ctrld block filters to exempt from. + // Loopback protect only adds hard-permit filters for localhost DNS; + // VPN DNS traffic uses the tunnel interface and is already permitted by + // the VPN's own WFP rules. + if state.engineHandle == 0 || state.loopbackProtectActive { + mainLog.Load().Debug().Msg("DNS intercept: dns mode — skipping VPN DNS exemptions (no WFP block filters)") return nil } @@ -1634,6 +1843,40 @@ func (p *prog) nrptProbeAndHeal() { } logNRPTParentKeyState("probe-failed-final") - mainLog.Load().Error().Msg("DNS intercept: NRPT verification failed after all retries including two-phase recovery — " + + mainLog.Load().Warn().Msg("DNS intercept: NRPT verification failed after all retries including two-phase recovery") + + // Last resort: activate WFP loopback protection. + // Third-party VPN software (e.g., OpenVPN with block-outside-dns) may have + // installed WFP filters that block DNS to non-tunnel interfaces, including + // loopback. A high-priority "hard permit" for localhost DNS overrides these + // blocks and restores NRPT routing to ctrld's listener. + // See: https://gitlab.int.windscribe.com/controld/clients/ctrld/-/issues/526 + loopbackState, ok := p.dnsInterceptState.(*wfpState) + if !ok || loopbackState == nil { + mainLog.Load().Error().Msg("DNS intercept: no state available for loopback WFP protect") + return + } + + // Bail out if shutdown is in progress — avoid racing with cleanupWFPFilters. + select { + case <-loopbackState.stopCh: + mainLog.Load().Info().Msg("DNS intercept: shutdown in progress, skipping loopback WFP protect activation") + return + default: + } + + if err := p.activateLoopbackWFPProtect(loopbackState); err != nil { + mainLog.Load().Error().Err(err).Msg("DNS intercept: failed to activate loopback WFP protect — " + + "DNS queries may not be routed through ctrld. A network interface toggle may be needed.") + return + } + + // Retry NRPT probe now that loopback DNS is explicitly permitted through WFP. + time.Sleep(500 * time.Millisecond) + if p.probeNRPT() { + mainLog.Load().Info().Msg("DNS intercept: NRPT verified working after loopback WFP protect activation") + return + } + mainLog.Load().Error().Msg("DNS intercept: NRPT probe still failing after loopback WFP protect — " + "DNS queries may not be routed through ctrld. A network interface toggle may be needed.") } diff --git a/docs/dns-intercept-mode.md b/docs/dns-intercept-mode.md index 41dae1f..c089ea1 100644 --- a/docs/dns-intercept-mode.md +++ b/docs/dns-intercept-mode.md @@ -56,7 +56,7 @@ ctrld run --intercept-mode hard --cd Windows DNS intercept uses a two-tier architecture with mode-dependent enforcement: -- **`dns` mode**: NRPT only — graceful DNS routing through the Windows DNS Client service. At worst, a VPN overwrites NRPT and queries bypass ctrld temporarily. DNS never breaks. +- **`dns` mode**: NRPT + loopback WFP protect — graceful DNS routing through the Windows DNS Client service, with proactive WFP permit filters that protect the NRPT → localhost path from third-party DNS block filters (e.g., OpenVPN's `block-outside-dns`). - **`hard` mode**: NRPT + WFP — same NRPT routing, plus WFP kernel-level block filters that prevent any outbound DNS bypass. Equivalent enforcement to macOS pf. #### Why This Design? @@ -70,8 +70,9 @@ Separating them into modes means most users get `dns` mode (safe, can never brea 1. Creates NRPT catch-all registry rule (`.` → `127.0.0.1`) under `HKLM\...\DnsPolicyConfig\CtrldCatchAll` 2. Triggers Group Policy refresh via `RefreshPolicyEx` (userenv.dll) so DNS Client loads NRPT immediately 3. Flushes DNS cache to clear stale entries -4. Starts NRPT health monitor (30s periodic check) -5. Launches async NRPT probe-and-heal to verify NRPT is actually routing queries +4. **Activates loopback WFP protect** — adds 4 permit filters (IPv4/IPv6 × UDP/TCP) for DNS to localhost with `FWPM_FILTER_FLAG_CLEAR_ACTION_RIGHT`. These prevent third-party WFP block filters from blocking the NRPT → `127.0.0.1` path (see [Loopback WFP Protect](#loopback-wfp-protect) below). Non-fatal if this fails. +5. Starts NRPT health monitor (30s periodic check) +6. Launches async NRPT probe-and-heal to verify NRPT is actually routing queries #### Startup Sequence (hard mode) @@ -112,6 +113,32 @@ The **Name Resolution Policy Table** is a Windows feature (originally for Direct **VPN coexistence**: VPN software can set DNS to whatever it wants on the interface — for public IPs, the WFP block filter prevents those servers from being reached on port 53. For private IPs, the subnet permits allow it. ctrld handles all DNS routing through NRPT and can forward VPN-specific domains to VPN DNS servers through its own upstream mechanism. +#### Loopback WFP Protect (dns mode) + +Third-party VPN software (e.g., OpenVPN, Securepoint SSL VPN) can install WFP block filters via `block-outside-dns` that block **all** DNS traffic to non-tunnel interfaces — including loopback. This breaks the NRPT → `127.0.0.1:53` path that ctrld depends on, causing DNS resolution to time out. + +ctrld proactively adds 4 WFP "hard permit" filters at startup: + +| Filter | Layer | Protocol | +|---|---|---| +| Permit DNS to localhost (IPv4/UDP) | ALE_AUTH_CONNECT_V4 | UDP | +| Permit DNS to localhost (IPv4/TCP) | ALE_AUTH_CONNECT_V4 | TCP | +| Permit DNS to localhost (IPv6/UDP) | ALE_AUTH_CONNECT_V6 | UDP | +| Permit DNS to localhost (IPv6/TCP) | ALE_AUTH_CONNECT_V6 | TCP | + +**Key properties:** +- **Scope**: Port 53 to `127.0.0.1` (or configured listener IP) and `::1` only +- **Flag**: `FWPM_FILTER_FLAG_CLEAR_ACTION_RIGHT` (0x08) — "hard permit" that overrides BLOCK decisions from other sublayers regardless of weight or insertion order +- **Weight**: 15 (above hard mode's permit=10) +- **Sublayer**: ctrld's sublayer at maximum priority (0xFFFF) +- **Lifetime**: Process lifetime — added at startup, removed on shutdown/uninstall + +Because `CLEAR_ACTION_RIGHT` is a cross-sublayer override, the order of filter installation doesn't matter — even if a VPN connects hours later and adds its own WFP block filters, ctrld's hard permit for loopback DNS is never overridden. + +The reactive fallback in `nrptProbeAndHeal()` is preserved as defense-in-depth for edge cases where proactive activation fails at startup. + +See: [Issue #526](https://gitlab.int.windscribe.com/controld/clients/ctrld/-/issues/526) + #### NRPT Probe and Auto-Heal `RefreshPolicyEx` returns immediately — it does NOT wait for the DNS Client service to actually load the NRPT rule. On cold machines (first boot, fresh install), the DNS Client may take several seconds to process the policy refresh. During this window, the NRPT rule exists in the registry but isn't active. diff --git a/docs/known-issues.md b/docs/known-issues.md index e9a897b..77e09ce 100644 --- a/docs/known-issues.md +++ b/docs/known-issues.md @@ -50,6 +50,29 @@ To isolate the signals, avoid running the log viewer in the same window as the d * **Window B:** Open a new SSH connection to run `ctrld log tail`. Because Window B has a different **Session ID** and **Process Group ID**, pressing `Ctrl+C` in Window B will not affect the process in Window A. +## Windows Issues + +### VPN `block-outside-dns` Breaks DNS When Using ctrld in DNS Mode + +**Issue**: VPN software that uses OpenVPN's `block-outside-dns` directive installs WFP (Windows Filtering Platform) block filters that prevent DNS queries from reaching ctrld's loopback listener. + +**Status**: Fixed in v1.5.1 + +**Description**: When a VPN connects with `block-outside-dns` enabled, OpenVPN adds WFP filters that block all DNS traffic to non-tunnel interfaces — including loopback (`127.0.0.1`). Since ctrld's NRPT catch-all rule routes DNS through the Windows DNS Client to `127.0.0.1:53`, the WFP block filters prevent DNS Client from reaching ctrld, causing all DNS queries to time out. + +This affects any VPN client that implements `block-outside-dns` via WFP, including: +- OpenVPN GUI (community) +- Securepoint SSL VPN +- Any OpenVPN-based client that honors the `block-outside-dns` push directive + +**Fix**: ctrld now proactively adds WFP "hard permit" filters for DNS to localhost at startup. These use `FWPM_FILTER_FLAG_CLEAR_ACTION_RIGHT` to override block decisions from any other WFP sublayer, ensuring the NRPT → loopback path is always available regardless of VPN state. See `docs/dns-intercept-mode.md` for technical details. + +**Affected Versions**: ctrld ≤ v1.5.0 in `dns` intercept mode on Windows + +**Last Updated**: 04/28/2026 + +--- + ## Contributing to Known Issues If you encounter an issue not listed here, please: diff --git a/docs/wfp-dns-intercept.md b/docs/wfp-dns-intercept.md index 6b9c3b5..2e7ece2 100644 --- a/docs/wfp-dns-intercept.md +++ b/docs/wfp-dns-intercept.md @@ -15,11 +15,15 @@ the same enforcement guarantees as macOS pf. ``` ┌─────────────────────────────────────────────────────────────────┐ -│ dns mode (NRPT only) │ +│ dns mode (NRPT + loopback WFP protect) │ │ │ │ App DNS query → DNS Client service → NRPT lookup │ │ → "." catch-all matches → forward to 127.0.0.1 (ctrld) │ │ │ +│ Loopback WFP protect: 4 hard-permit filters (port 53 to │ +│ localhost, CLEAR_ACTION_RIGHT) prevent third-party VPN WFP │ +│ blocks (e.g., OpenVPN block-outside-dns) from breaking NRPT. │ +│ │ │ If VPN clears NRPT: health monitor re-adds within 30s │ │ Worst case: queries go to VPN DNS until NRPT restored │ │ DNS never breaks — graceful degradation │ @@ -182,8 +186,10 @@ When `vpnDNSManager.Refresh()` discovers VPN DNS servers on public IPs: - Both UDP and TCP for each IP 3. Store new filter IDs for next cleanup cycle -**In `dns` mode, VPN DNS exemptions are skipped** — there are no WFP block -filters to exempt from. +**In `dns` mode, VPN DNS exemptions are skipped** — there are no ctrld WFP block +filters to exempt from. The loopback WFP protect filters only permit localhost +DNS; VPN DNS traffic goes through the tunnel interface and is already permitted +by the VPN's own WFP rules. ### Session Lifecycle @@ -202,8 +208,8 @@ filters to exempt from. **Startup (dns mode):** ``` 1. Add NRPT catch-all rule + GP refresh + DNS flush -2. Start NRPT health monitor goroutine -3. (No WFP — done) +2. Activate loopback WFP protect (4 hard-permit filters for localhost DNS) +3. Start NRPT health monitor goroutine ``` **Shutdown:** @@ -338,9 +344,9 @@ breaking DNS. | Aspect | macOS (pf) | Windows dns mode | Windows hard mode | |--------|-----------|------------------|-------------------| | **Routing** | `rdr` redirect | NRPT policy | NRPT policy | -| **Enforcement** | `route-to` + block rules | None (graceful) | WFP block filters | +| **Enforcement** | `route-to` + block rules | Loopback WFP protect | WFP block filters | | **Can break DNS?** | Yes (pf corruption) | No | Yes (if NRPT lost) | -| **VPN coexistence** | Watchdog + stabilization | NRPT most-specific-match | Same + WFP permits | +| **VPN coexistence** | Watchdog + stabilization | NRPT + loopback hard-permit | Same + WFP permits | | **Bypass protection** | pf catches all packets | None | WFP catches all connections | | **Recovery** | Probe + auto-heal | Health monitor re-adds | Full restart on sublayer loss | From 2b27c148bedb31804680ca3aadc0972a48e35b87 Mon Sep 17 00:00:00 2001 From: Codescribe Date: Wed, 29 Apr 2026 04:01:04 -0400 Subject: [PATCH 15/16] dns: recovery race condition fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three changes to reduce worst-case recovery from ~30s to <3s: 1. debounceRecovery() for network changes (500ms window) — coalesces rapid consecutive network changes into a single recovery pass, eliminating the cancel-and-restart race. 2. ForceReBootstrap() on recovery entry — closes dead connections and creates fresh transports synchronously before probing, replacing the lazy ReBootstrap() flag that left stale connections. 3. Combined effect: recovery probes never inherit dead connections from a canceled prior recovery attempt. --- cmd/cli/dns_proxy.go | 46 +++++++++++++++++++++++++++++++++++++++++++- cmd/cli/prog.go | 6 ++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/cmd/cli/dns_proxy.go b/cmd/cli/dns_proxy.go index f1aa810..d12ab35 100644 --- a/cmd/cli/dns_proxy.go +++ b/cmd/cli/dns_proxy.go @@ -1580,7 +1580,7 @@ func (p *prog) monitorNetworkChanges() error { // we only trigger recovery flow for network changes on non router devices if router.Name() == "" { - p.handleRecovery(RecoveryReasonNetworkChange) + p.debounceRecovery() } // After network changes, verify our pf anchor is still active and @@ -1693,6 +1693,35 @@ func (p *prog) checkUpstreamOnce(upstream string, uc *ctrld.UpstreamConfig) erro return err } +// recoveryDebounceWindow is the time to wait after the last network change +// before triggering handleRecovery. This coalesces rapid consecutive network +// changes (e.g., hotspot→LAN causing en1 drop + en0 pickup + en1 re-pickup) +// into a single recovery pass, avoiding the cancel-and-restart race that +// leaves DoH transports in a stale state. +const recoveryDebounceWindow = 500 * time.Millisecond + +// debounceRecovery schedules a handleRecovery(NetworkChange) call after a debounce +// window. If called again before the window expires, the timer is reset so that +// recovery runs once with the final network state. All other state updates (IP, +// pf anchor, VPN DNS, tunnel checks) run immediately — only the recovery flow +// with its upstream probing and DHCP bypass logic is debounced. +func (p *prog) debounceRecovery() { + p.recoveryDebounceMu.Lock() + defer p.recoveryDebounceMu.Unlock() + + if p.recoveryDebounceTimer != nil { + p.recoveryDebounceTimer.Stop() + mainLog.Load().Debug().Msg("Recovery debounce: resetting timer (rapid network change)") + } + p.recoveryDebounceTimer = time.AfterFunc(recoveryDebounceWindow, func() { + p.recoveryDebounceMu.Lock() + p.recoveryDebounceTimer = nil + p.recoveryDebounceMu.Unlock() + p.handleRecovery(RecoveryReasonNetworkChange) + }) + mainLog.Load().Debug().Msg("Recovery debounce: scheduled (500ms window)") +} + // handleRecovery performs a unified recovery by removing DNS settings, // canceling existing recovery checks for network changes, but coalescing duplicate // upstream failure recoveries, waiting for recovery to complete (using a cancellable context without timeout), @@ -1720,6 +1749,21 @@ func (p *prog) handleRecovery(reason RecoveryReason) { p.recoveryCancelMu.Unlock() } + // For network changes, force-reset all upstream transports synchronously. + // The lazy ReBootstrap() called earlier in the network change callback only + // sets a flag — the old transport's dead connections can still be used by + // recovery probes, causing context deadline timeouts. ForceReBootstrap() + // closes old connections and creates fresh transports so probes succeed on + // first attempt. + if reason == RecoveryReasonNetworkChange { + for _, uc := range p.cfg.Upstream { + if uc != nil { + uc.ForceReBootstrap() + } + } + mainLog.Load().Info().Msg("Force-reset upstream transports for network change recovery") + } + // Create a new recovery context without a fixed timeout. p.recoveryCancelMu.Lock() recoveryCtx, cancel := context.WithCancel(context.Background()) diff --git a/cmd/cli/prog.go b/cmd/cli/prog.go index c579a80..96fd2fe 100644 --- a/cmd/cli/prog.go +++ b/cmd/cli/prog.go @@ -146,6 +146,12 @@ type prog struct { recoveryCancel context.CancelFunc recoveryRunning atomic.Bool + // recoveryDebounceTimer coalesces rapid NetworkChange recovery triggers + // into a single handleRecovery call. Only handleRecovery is debounced — + // all other state updates (IP, pf anchor, VPN DNS) run immediately. + recoveryDebounceMu sync.Mutex + recoveryDebounceTimer *time.Timer + // recoveryBypass is set when dns-intercept mode enters recovery. // When true, proxy() forwards all queries to OS/DHCP resolver // instead of using the normal upstream flow. From 5dd5846ccaf35d88da6bccb949694b2395299fb2 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Thu, 30 Apr 2026 22:58:30 +0700 Subject: [PATCH 16/16] cmd/cli: skip upstream.os healthcheck when WFP loopback protect enabled Since the check will always be failed in this case, causing unnecessary log spamming. --- cmd/cli/dns_intercept_darwin.go | 4 ++++ cmd/cli/dns_intercept_others.go | 4 ++++ cmd/cli/dns_intercept_windows.go | 20 ++++++++++++++++++++ cmd/cli/dns_proxy.go | 20 ++++++++++++++++---- 4 files changed, 44 insertions(+), 4 deletions(-) diff --git a/cmd/cli/dns_intercept_darwin.go b/cmd/cli/dns_intercept_darwin.go index 62fc73f..88d7310 100644 --- a/cmd/cli/dns_intercept_darwin.go +++ b/cmd/cli/dns_intercept_darwin.go @@ -1816,3 +1816,7 @@ func (p *prog) forceReloadPFMainRuleset() { mainLog.Load().Info().Msg("DNS intercept: force reload — pf ruleset and anchor reloaded successfully") } + +// osHealthcheckSuppressed always returns false on darwin — WFP loopback +// protect (the trigger for suppression) is Windows-only. +func (p *prog) osHealthcheckSuppressed() bool { return false } diff --git a/cmd/cli/dns_intercept_others.go b/cmd/cli/dns_intercept_others.go index 9f3c903..50c7fd0 100644 --- a/cmd/cli/dns_intercept_others.go +++ b/cmd/cli/dns_intercept_others.go @@ -37,3 +37,7 @@ func (p *prog) scheduleDelayedRechecks() {} // pfInterceptMonitor is a no-op on unsupported platforms. func (p *prog) pfInterceptMonitor() {} + +// osHealthcheckSuppressed always returns false on non-Windows platforms — +// WFP loopback protect (the trigger for suppression) is Windows-only. +func (p *prog) osHealthcheckSuppressed() bool { return false } diff --git a/cmd/cli/dns_intercept_windows.go b/cmd/cli/dns_intercept_windows.go index bfe07e1..a79fa0f 100644 --- a/cmd/cli/dns_intercept_windows.go +++ b/cmd/cli/dns_intercept_windows.go @@ -1184,6 +1184,26 @@ func (p *prog) activateLoopbackWFPProtect(state *wfpState) error { return nil } +// osHealthcheckSuppressed reports whether the upstream.os healthcheck should +// be skipped because DNS intercept mode is active and the WFP loopback protect +// has been engaged. Loopback protect is only activated when an external WFP +// block filter (e.g. OpenVPN's block-outside-dns) is interfering with DNS, +// which is the same condition that makes the OS resolver healthcheck fail +// every 2s with i/o timeout — so suppressing the check avoids the log spam +// described in issue #526. +func (p *prog) osHealthcheckSuppressed() bool { + if !dnsIntercept || p.dnsInterceptState == nil { + return false + } + state, ok := p.dnsInterceptState.(*wfpState) + if !ok || state == nil { + return false + } + state.mu.Lock() + defer state.mu.Unlock() + return state.loopbackProtectActive +} + // deactivateLoopbackWFPProtectLocked is the lock-free inner implementation. // Caller must hold state.mu. func (p *prog) deactivateLoopbackWFPProtectLocked(state *wfpState) { diff --git a/cmd/cli/dns_proxy.go b/cmd/cli/dns_proxy.go index d12ab35..d3ddb04 100644 --- a/cmd/cli/dns_proxy.go +++ b/cmd/cli/dns_proxy.go @@ -744,6 +744,7 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse { var reason RecoveryReason if upstreams[0] == upstreamOS { reason = RecoveryReasonOSFailure + } else { reason = RecoveryReasonRegularFailure } @@ -1657,6 +1658,8 @@ func interfaceIPsEqual(a, b []netip.Prefix) bool { return true } +var errOsHealthcheckSuppressed = errors.New("upstream os health check suppressed") + // checkUpstreamOnce sends a test query to the specified upstream. // Returns nil if the upstream responds successfully. func (p *prog) checkUpstreamOnce(upstream string, uc *ctrld.UpstreamConfig) error { @@ -1686,11 +1689,19 @@ func (p *prog) checkUpstreamOnce(upstream string, uc *ctrld.UpstreamConfig) erro duration := time.Since(start) if err != nil { + // Demote upstream.os check failures to debug while WFP loopback + // protect is active: an external WFP block filter is interfering + // with plain DNS so repeated failures here are expected. Other + // upstreams keep error level so real outages stay visible. + if upstream == upstreamOS && p.osHealthcheckSuppressed() { + mainLog.Load().Debug().Err(err).Msgf("Upstream %s check failed after %v (WFP loopback protect active)", upstream, duration) + return errOsHealthcheckSuppressed + } mainLog.Load().Error().Err(err).Msgf("Upstream %s check failed after %v", upstream, duration) - } else { - mainLog.Load().Debug().Msgf("Upstream %s responded successfully in %v", upstream, duration) + return err } - return err + mainLog.Load().Debug().Msgf("Upstream %s responded successfully in %v", upstream, duration) + return nil } // recoveryDebounceWindow is the time to wait after the last network change @@ -1909,7 +1920,8 @@ func (p *prog) waitForUpstreamRecovery(ctx context.Context, upstreams map[string default: attempts++ // checkUpstreamOnce will reset any failure counters on success. - if err := p.checkUpstreamOnce(name, uc); err == nil { + err := p.checkUpstreamOnce(name, uc) + if err == nil || errors.Is(err, errOsHealthcheckSuppressed) { mainLog.Load().Debug().Msgf("Upstream %s recovered successfully", name) select { case recoveredCh <- name: