mirror of
https://github.com/Control-D-Inc/ctrld.git
synced 2026-04-07 12:32:04 +02:00
fix: use raw IPv6 socket for DNS responses in macOS intercept mode
macOS rejects sendmsg from [::1] to global unicast IPv6 (EINVAL), and nat on lo0 doesn't fire for route-to'd packets (pf skips translation on the second interface pass). ULA addresses on lo0 also fail (EHOSTUNREACH - kernel segregates lo0 routing). Solution: wrap the [::1] UDP listener's ResponseWriter with rawIPv6Writer that sends responses via SOCK_RAW (IPPROTO_UDP) on lo0, bypassing the kernel's routing validation. pf's rdr state reverses the address translation on the response path. Changes: - Add rawipv6_darwin.go: rawIPv6Writer wraps dns.ResponseWriter, sends UDP responses via raw IPv6 socket with proper checksum calculation - Add rawipv6_other.go: no-op wrapIPv6Handler for non-darwin platforms - Remove nat rules from pf anchor (no longer needed) - Block IPv6 TCP DNS (block return) - falls back to IPv4 (~1s, rare) - Remove IPv6 TCP rdr/route-to/pass rules (only UDP intercepted)
This commit is contained in:
committed by
Cuong Manh Le
parent
95dd871e2d
commit
22a796f673
@@ -321,14 +321,13 @@ func (p *prog) startDNSIntercept() error {
|
||||
// options → normalization (scrub) → queueing → translation (nat/rdr) → filtering (pass/block/anchor)
|
||||
//
|
||||
// "pfctl -sr" returns BOTH scrub-anchor (normalization) AND anchor/pass/block (filter) rules.
|
||||
// "pfctl -sn" returns nat-anchor AND rdr-anchor (translation) rules.
|
||||
// "pfctl -sn" returns rdr-anchor (translation) rules.
|
||||
// Both commands emit "No ALTQ support in kernel" warnings on stderr.
|
||||
//
|
||||
// We must reassemble in correct order: scrub → nat/rdr → filter.
|
||||
//
|
||||
// The anchor reference does not survive a reboot, but ctrld re-adds it on every start.
|
||||
func (p *prog) ensurePFAnchorReference() error {
|
||||
natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName)
|
||||
rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName)
|
||||
anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName)
|
||||
|
||||
@@ -347,11 +346,10 @@ func (p *prog) ensurePFAnchorReference() error {
|
||||
natLines := pfFilterRuleLines(string(natOut))
|
||||
filterLines := pfFilterRuleLines(string(filterOut))
|
||||
|
||||
hasNatAnchor := pfContainsRule(natLines, natAnchorRef)
|
||||
hasRdrAnchor := pfContainsRule(natLines, rdrAnchorRef)
|
||||
hasAnchor := pfContainsRule(filterLines, anchorRef)
|
||||
|
||||
if hasNatAnchor && hasRdrAnchor && hasAnchor {
|
||||
if hasRdrAnchor && hasAnchor {
|
||||
// Verify anchor ordering: our anchor should appear before other anchors
|
||||
// for reliable DNS interception priority. Log a warning if out of order,
|
||||
// but don't force a reload (the interface-specific rules in our anchor
|
||||
@@ -380,15 +378,8 @@ func (p *prog) ensurePFAnchorReference() error {
|
||||
// rules in whichever anchor appears first win. By prepending, our DNS
|
||||
// intercept rules match port 53 traffic before a VPN app's broader
|
||||
// "pass out quick on <iface> all" rules in their anchor.
|
||||
if !hasNatAnchor || !hasRdrAnchor {
|
||||
var newRefs []string
|
||||
if !hasNatAnchor {
|
||||
newRefs = append(newRefs, natAnchorRef)
|
||||
}
|
||||
if !hasRdrAnchor {
|
||||
newRefs = append(newRefs, rdrAnchorRef)
|
||||
}
|
||||
natLines = append(newRefs, natLines...)
|
||||
if !hasRdrAnchor {
|
||||
natLines = append([]string{rdrAnchorRef}, natLines...)
|
||||
}
|
||||
if !hasAnchor {
|
||||
pureFilterLines = append([]string{anchorRef}, pureFilterLines...)
|
||||
@@ -590,7 +581,6 @@ func (p *prog) stopDNSIntercept() error {
|
||||
// The anchor itself is already flushed by stopDNSIntercept, so even if removal
|
||||
// fails, the empty anchor is a no-op.
|
||||
func (p *prog) removePFAnchorReference() error {
|
||||
natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName)
|
||||
rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName)
|
||||
anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName)
|
||||
|
||||
@@ -609,7 +599,7 @@ func (p *prog) removePFAnchorReference() error {
|
||||
|
||||
var cleanNat []string
|
||||
for _, line := range natLines {
|
||||
if !strings.Contains(line, rdrAnchorRef) && !strings.Contains(line, natAnchorRef) {
|
||||
if !strings.Contains(line, rdrAnchorRef) {
|
||||
cleanNat = append(cleanNat, line)
|
||||
}
|
||||
}
|
||||
@@ -804,23 +794,13 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string {
|
||||
// a stateful entry that handles response routing. Using "rdr pass" would skip filter
|
||||
// evaluation, and its implicit state alone is insufficient for response delivery —
|
||||
// proven by commit 51cf029 where responses were silently dropped.
|
||||
rules.WriteString("# --- Translation rules (nat + rdr) ---\n")
|
||||
rules.WriteString("# --- Translation rules (rdr) ---\n")
|
||||
|
||||
// NAT source to ::1 for IPv6 DNS on loopback. macOS/BSD rejects sendmsg from
|
||||
// [::1] to a global unicast IPv6 address (EINVAL), unlike IPv4 where sendmsg from
|
||||
// 127.0.0.1 to local private IPs works fine. The rdr rewrites the destination but
|
||||
// preserves the original source (machine's global IPv6). Without nat, ctrld cannot
|
||||
// reply. pf reverses both translations on the response path.
|
||||
// Note: nat must appear before rdr (pf evaluates nat first in translation phase).
|
||||
listenerAddr6 := fmt.Sprintf("::1 port %d", listenerPort)
|
||||
rules.WriteString("nat on lo0 inet6 proto udp from ! ::1 to ! ::1 port 53 -> ::1\n")
|
||||
rules.WriteString("nat on lo0 inet6 proto tcp from ! ::1 to ! ::1 port 53 -> ::1\n")
|
||||
|
||||
rules.WriteString("# Redirect DNS on loopback to ctrld's listener.\n")
|
||||
rules.WriteString(fmt.Sprintf("rdr on lo0 inet proto udp from any to ! %s port 53 -> %s\n", listenerIP, listenerAddr))
|
||||
rules.WriteString(fmt.Sprintf("rdr on lo0 inet proto tcp from any to ! %s port 53 -> %s\n", listenerIP, listenerAddr))
|
||||
rules.WriteString(fmt.Sprintf("rdr on lo0 inet6 proto udp from any to ! ::1 port 53 -> %s\n", listenerAddr6))
|
||||
rules.WriteString(fmt.Sprintf("rdr on lo0 inet6 proto tcp from any to ! ::1 port 53 -> %s\n\n", listenerAddr6))
|
||||
rules.WriteString(fmt.Sprintf("rdr on lo0 inet6 proto udp from any to ! ::1 port 53 -> %s\n\n", listenerAddr6))
|
||||
|
||||
// --- Filtering rules ---
|
||||
rules.WriteString("# --- Filtering rules (pass) ---\n\n")
|
||||
@@ -983,7 +963,6 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string {
|
||||
rules.WriteString(fmt.Sprintf("pass out quick on %s route-to lo0 inet proto udp from any to ! %s port 53\n", iface, listenerIP))
|
||||
rules.WriteString(fmt.Sprintf("pass out quick on %s route-to lo0 inet proto tcp from any to ! %s port 53\n", iface, listenerIP))
|
||||
rules.WriteString(fmt.Sprintf("pass out quick on %s route-to lo0 inet6 proto udp from any to ! ::1 port 53\n", iface))
|
||||
rules.WriteString(fmt.Sprintf("pass out quick on %s route-to lo0 inet6 proto tcp from any to ! ::1 port 53\n", iface))
|
||||
}
|
||||
rules.WriteString("\n")
|
||||
}
|
||||
@@ -1003,10 +982,13 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string {
|
||||
rules.WriteString(fmt.Sprintf("pass out quick on ! lo0 route-to lo0 inet proto udp from any to ! %s port 53\n", listenerIP))
|
||||
rules.WriteString(fmt.Sprintf("pass out quick on ! lo0 route-to lo0 inet proto tcp from any to ! %s port 53\n\n", listenerIP))
|
||||
|
||||
// Force remaining outbound IPv6 DNS through loopback for interception.
|
||||
rules.WriteString("# Force remaining outbound IPv6 DNS through loopback for interception.\n")
|
||||
// Force remaining outbound IPv6 UDP DNS through loopback for interception.
|
||||
// IPv6 TCP DNS is blocked instead — raw socket response injection only handles UDP,
|
||||
// and TCP DNS is rare (truncated responses, zone transfers). Apps fall back to IPv4 TCP.
|
||||
rules.WriteString("# Force remaining outbound IPv6 UDP DNS through loopback for interception.\n")
|
||||
rules.WriteString("pass out quick on ! lo0 route-to lo0 inet6 proto udp from any to ! ::1 port 53\n")
|
||||
rules.WriteString("pass out quick on ! lo0 route-to lo0 inet6 proto tcp from any to ! ::1 port 53\n\n")
|
||||
rules.WriteString("# Block IPv6 TCP DNS — raw socket can't handle TCP; apps fall back to IPv4.\n")
|
||||
rules.WriteString("block return out quick on ! lo0 inet6 proto tcp from any to ! ::1 port 53\n\n")
|
||||
|
||||
// Allow route-to'd DNS packets to pass outbound on lo0.
|
||||
// Without this, VPN firewalls with "block drop all" (e.g., Windscribe) drop the packet
|
||||
@@ -1018,8 +1000,7 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string {
|
||||
rules.WriteString("# Pass route-to'd DNS outbound on lo0 — no state to avoid bypassing rdr inbound.\n")
|
||||
rules.WriteString(fmt.Sprintf("pass out quick on lo0 inet proto udp from any to ! %s port 53 no state\n", listenerIP))
|
||||
rules.WriteString(fmt.Sprintf("pass out quick on lo0 inet proto tcp from any to ! %s port 53 no state\n", listenerIP))
|
||||
rules.WriteString("pass out quick on lo0 inet6 proto udp from any to ! ::1 port 53 no state\n")
|
||||
rules.WriteString("pass out quick on lo0 inet6 proto tcp from any to ! ::1 port 53 no state\n\n")
|
||||
rules.WriteString("pass out quick on lo0 inet6 proto udp from any to ! ::1 port 53 no state\n\n")
|
||||
|
||||
// Allow the redirected traffic through on loopback (inbound after rdr).
|
||||
//
|
||||
@@ -1034,7 +1015,7 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string {
|
||||
// (source 127.0.0.1 → original DNS server IP, e.g., 10.255.255.3).
|
||||
rules.WriteString("# Accept redirected DNS — reply-to lo0 forces response through loopback.\n")
|
||||
rules.WriteString(fmt.Sprintf("pass in quick on lo0 reply-to lo0 inet proto { udp, tcp } from any to %s\n", listenerAddr))
|
||||
rules.WriteString(fmt.Sprintf("pass in quick on lo0 reply-to lo0 inet6 proto { udp, tcp } from any to %s\n", listenerAddr6))
|
||||
rules.WriteString(fmt.Sprintf("pass in quick on lo0 reply-to lo0 inet6 proto udp from any to %s\n", listenerAddr6))
|
||||
|
||||
return rules.String()
|
||||
}
|
||||
@@ -1043,12 +1024,11 @@ func (p *prog) buildPFAnchorRules(vpnExemptions []vpnDNSExemption) string {
|
||||
// It verifies both the anchor references in the main ruleset and the rules within
|
||||
// our anchor. Failures are logged at ERROR level to make them impossible to miss.
|
||||
func (p *prog) verifyPFState() {
|
||||
natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName)
|
||||
rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName)
|
||||
anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName)
|
||||
verified := true
|
||||
|
||||
// Check main ruleset for anchor references (nat-anchor + rdr-anchor in translation rules).
|
||||
// Check main ruleset for anchor references (rdr-anchor in translation rules).
|
||||
natOut, err := exec.Command("pfctl", "-sn").CombinedOutput()
|
||||
if err != nil {
|
||||
mainLog.Load().Error().Err(err).Msg("DNS intercept: VERIFICATION FAILED — could not dump NAT rules")
|
||||
@@ -1059,10 +1039,6 @@ func (p *prog) verifyPFState() {
|
||||
mainLog.Load().Error().Msg("DNS intercept: VERIFICATION FAILED — rdr-anchor reference missing from running NAT rules")
|
||||
verified = false
|
||||
}
|
||||
if !strings.Contains(natStr, natAnchorRef) {
|
||||
mainLog.Load().Error().Msg("DNS intercept: VERIFICATION FAILED — nat-anchor reference missing from running NAT rules")
|
||||
verified = false
|
||||
}
|
||||
}
|
||||
|
||||
filterOut, err := exec.Command("pfctl", "-sr").CombinedOutput()
|
||||
@@ -1229,6 +1205,7 @@ func stringSlicesEqual(a, b []string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
|
||||
// pfStartStabilization enters stabilization mode, suppressing all pf restores
|
||||
// until the VPN's ruleset stops changing. This prevents a death spiral where
|
||||
// ctrld and the VPN repeatedly overwrite each other's pf rules.
|
||||
@@ -1347,7 +1324,6 @@ func (p *prog) ensurePFAnchorActive() bool {
|
||||
}
|
||||
}
|
||||
|
||||
natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName)
|
||||
rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName)
|
||||
anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName)
|
||||
needsRestore := false
|
||||
@@ -1363,10 +1339,6 @@ func (p *prog) ensurePFAnchorActive() bool {
|
||||
mainLog.Load().Warn().Msg("DNS intercept watchdog: rdr-anchor reference missing from running ruleset")
|
||||
needsRestore = true
|
||||
}
|
||||
if !strings.Contains(natStr, natAnchorRef) {
|
||||
mainLog.Load().Warn().Msg("DNS intercept watchdog: nat-anchor reference missing from running ruleset")
|
||||
needsRestore = true
|
||||
}
|
||||
|
||||
if !needsRestore {
|
||||
filterOut, err := exec.Command("pfctl", "-sr").CombinedOutput()
|
||||
@@ -1762,7 +1734,6 @@ func (p *prog) pfInterceptMonitor() {
|
||||
// The reload is safe for VPN interop because it reassembles from the current running
|
||||
// ruleset (pfctl -sr/-sn), preserving all existing anchors and rules.
|
||||
func (p *prog) forceReloadPFMainRuleset() {
|
||||
natAnchorRef := fmt.Sprintf("nat-anchor \"%s\"", pfAnchorName)
|
||||
rdrAnchorRef := fmt.Sprintf("rdr-anchor \"%s\"", pfAnchorName)
|
||||
anchorRef := fmt.Sprintf("anchor \"%s\"", pfAnchorName)
|
||||
|
||||
@@ -1793,9 +1764,6 @@ func (p *prog) forceReloadPFMainRuleset() {
|
||||
}
|
||||
|
||||
// Ensure our anchor references are present (they may have been wiped).
|
||||
if !pfContainsRule(natLines, natAnchorRef) {
|
||||
natLines = append([]string{natAnchorRef}, natLines...)
|
||||
}
|
||||
if !pfContainsRule(natLines, rdrAnchorRef) {
|
||||
natLines = append([]string{rdrAnchorRef}, natLines...)
|
||||
}
|
||||
|
||||
@@ -211,7 +211,11 @@ func (p *prog) serveDNS(listenerNum string) error {
|
||||
proto := proto
|
||||
if needLocalIPv6Listener(p.cfg.Service.InterceptMode) {
|
||||
g.Go(func() error {
|
||||
s, errCh := runDNSServer(net.JoinHostPort("::1", strconv.Itoa(listenerConfig.Port)), proto, handler)
|
||||
ipv6Handler := handler
|
||||
if proto == "udp" {
|
||||
ipv6Handler = wrapIPv6Handler(handler)
|
||||
}
|
||||
s, errCh := runDNSServer(net.JoinHostPort("::1", strconv.Itoa(listenerConfig.Port)), proto, ipv6Handler)
|
||||
defer s.Shutdown()
|
||||
select {
|
||||
case <-p.stopCh:
|
||||
|
||||
163
cmd/cli/rawipv6_darwin.go
Normal file
163
cmd/cli/rawipv6_darwin.go
Normal file
@@ -0,0 +1,163 @@
|
||||
//go:build darwin
|
||||
|
||||
package cli
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"net"
|
||||
"syscall"
|
||||
|
||||
"github.com/miekg/dns"
|
||||
)
|
||||
|
||||
// wrapIPv6Handler wraps a DNS handler so that UDP responses on the [::1] listener
|
||||
// are sent via raw IPv6 sockets instead of the normal sendmsg path. This is needed
|
||||
// because macOS rejects sendmsg from [::1] to global unicast IPv6 addresses (EINVAL).
|
||||
func wrapIPv6Handler(h dns.Handler) dns.Handler {
|
||||
return dns.HandlerFunc(func(w dns.ResponseWriter, r *dns.Msg) {
|
||||
h.ServeDNS(&rawIPv6Writer{ResponseWriter: w}, r)
|
||||
})
|
||||
}
|
||||
|
||||
// rawIPv6Writer wraps a dns.ResponseWriter for the [::1] IPv6 listener on macOS.
|
||||
// When pf redirects IPv6 DNS traffic via route-to + rdr to [::1]:53, the original
|
||||
// client source address is a global unicast IPv6 (e.g., 2607:f0c8:...). macOS
|
||||
// rejects sendmsg from [::1] to any non-loopback address (EINVAL), so the normal
|
||||
// WriteMsg fails. This wrapper intercepts UDP writes and sends the response via a
|
||||
// raw IPv6 socket on lo0, bypassing the kernel's routing validation.
|
||||
//
|
||||
// TCP is not handled — IPv6 TCP DNS is blocked by pf rules and falls back to IPv4.
|
||||
type rawIPv6Writer struct {
|
||||
dns.ResponseWriter
|
||||
}
|
||||
|
||||
// WriteMsg packs the DNS message and sends it via raw socket.
|
||||
func (w *rawIPv6Writer) WriteMsg(m *dns.Msg) error {
|
||||
data, err := m.Pack()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = w.Write(data)
|
||||
return err
|
||||
}
|
||||
|
||||
// Write sends raw DNS response bytes via a raw IPv6/UDP socket on lo0.
|
||||
// It constructs a UDP packet (header + payload) and sends it using
|
||||
// IPPROTO_RAW-like behavior via IPV6_HDRINCL-free raw UDP socket.
|
||||
//
|
||||
// pf's rdr state table will reverse-translate the addresses on the response:
|
||||
// - src [::1]:53 → original DNS server IPv6
|
||||
// - dst [client]:port → unchanged
|
||||
func (w *rawIPv6Writer) Write(payload []byte) (int, error) {
|
||||
localAddr := w.ResponseWriter.LocalAddr()
|
||||
remoteAddr := w.ResponseWriter.RemoteAddr()
|
||||
|
||||
srcIP, srcPort, err := parseAddrPort(localAddr)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("rawIPv6Writer: parse local addr %s: %w", localAddr, err)
|
||||
}
|
||||
dstIP, dstPort, err := parseAddrPort(remoteAddr)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("rawIPv6Writer: parse remote addr %s: %w", remoteAddr, err)
|
||||
}
|
||||
|
||||
// Build UDP packet: 8-byte header + DNS payload.
|
||||
udpLen := 8 + len(payload)
|
||||
udpPacket := make([]byte, udpLen)
|
||||
binary.BigEndian.PutUint16(udpPacket[0:2], uint16(srcPort))
|
||||
binary.BigEndian.PutUint16(udpPacket[2:4], uint16(dstPort))
|
||||
binary.BigEndian.PutUint16(udpPacket[4:6], uint16(udpLen))
|
||||
// Checksum placeholder — filled below.
|
||||
binary.BigEndian.PutUint16(udpPacket[6:8], 0)
|
||||
copy(udpPacket[8:], payload)
|
||||
|
||||
// Compute UDP checksum over IPv6 pseudo-header + UDP packet.
|
||||
// For IPv6, UDP checksum is mandatory (unlike IPv4 where it's optional).
|
||||
csum := udp6Checksum(srcIP, dstIP, udpPacket)
|
||||
binary.BigEndian.PutUint16(udpPacket[6:8], csum)
|
||||
|
||||
// Open raw UDP socket. SOCK_RAW with IPPROTO_UDP lets us send
|
||||
// hand-crafted UDP packets. The kernel adds the IPv6 header.
|
||||
fd, err := syscall.Socket(syscall.AF_INET6, syscall.SOCK_RAW, syscall.IPPROTO_UDP)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("rawIPv6Writer: socket: %w", err)
|
||||
}
|
||||
defer syscall.Close(fd)
|
||||
|
||||
// Bind to lo0 interface so the packet exits on loopback where pf can
|
||||
// reverse-translate via its rdr state table.
|
||||
if err := bindToLoopback6(fd); err != nil {
|
||||
return 0, fmt.Errorf("rawIPv6Writer: bind to lo0: %w", err)
|
||||
}
|
||||
|
||||
// Send to the client's address.
|
||||
sa := &syscall.SockaddrInet6{Port: 0} // Port is in the UDP header, not the sockaddr for raw sockets.
|
||||
copy(sa.Addr[:], dstIP.To16())
|
||||
|
||||
if err := syscall.Sendto(fd, udpPacket, 0, sa); err != nil {
|
||||
return 0, fmt.Errorf("rawIPv6Writer: sendto [%s]:%d: %w", dstIP, dstPort, err)
|
||||
}
|
||||
|
||||
return len(payload), nil
|
||||
}
|
||||
|
||||
// parseAddrPort extracts IP and port from a net.Addr (supports *net.UDPAddr and string parsing).
|
||||
func parseAddrPort(addr net.Addr) (net.IP, int, error) {
|
||||
if ua, ok := addr.(*net.UDPAddr); ok {
|
||||
return ua.IP, ua.Port, nil
|
||||
}
|
||||
host, portStr, err := net.SplitHostPort(addr.String())
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
ip := net.ParseIP(host)
|
||||
if ip == nil {
|
||||
return nil, 0, fmt.Errorf("invalid IP: %s", host)
|
||||
}
|
||||
port, err := net.LookupPort("udp", portStr)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
return ip, port, nil
|
||||
}
|
||||
|
||||
// udp6Checksum computes the UDP checksum over the IPv6 pseudo-header and UDP packet.
|
||||
// The pseudo-header includes: src IP (16), dst IP (16), UDP length (4), next header (4).
|
||||
func udp6Checksum(src, dst net.IP, udpPacket []byte) uint16 {
|
||||
// IPv6 pseudo-header for checksum:
|
||||
// Source Address (16 bytes)
|
||||
// Destination Address (16 bytes)
|
||||
// UDP Length (4 bytes, upper layer packet length)
|
||||
// Zero (3 bytes) + Next Header (1 byte) = 17 (UDP)
|
||||
psh := make([]byte, 40)
|
||||
copy(psh[0:16], src.To16())
|
||||
copy(psh[16:32], dst.To16())
|
||||
binary.BigEndian.PutUint32(psh[32:36], uint32(len(udpPacket)))
|
||||
psh[39] = 17 // Next Header: UDP
|
||||
|
||||
// Checksum over pseudo-header + UDP packet.
|
||||
var sum uint32
|
||||
data := append(psh, udpPacket...)
|
||||
for i := 0; i+1 < len(data); i += 2 {
|
||||
sum += uint32(binary.BigEndian.Uint16(data[i : i+2]))
|
||||
}
|
||||
if len(data)%2 == 1 {
|
||||
sum += uint32(data[len(data)-1]) << 8
|
||||
}
|
||||
for sum > 0xffff {
|
||||
sum = (sum >> 16) + (sum & 0xffff)
|
||||
}
|
||||
return ^uint16(sum)
|
||||
}
|
||||
|
||||
// bindToLoopback6 binds a raw IPv6 socket to the loopback interface (lo0)
|
||||
// and sets the source address to ::1. This ensures the packet exits on lo0
|
||||
// where pf's rdr state can reverse-translate the addresses.
|
||||
func bindToLoopback6(fd int) error {
|
||||
// Bind source to ::1 — this is the address ctrld is listening on,
|
||||
// and what pf's rdr state expects as the source of the response.
|
||||
sa := &syscall.SockaddrInet6{Port: 0}
|
||||
copy(sa.Addr[:], net.IPv6loopback.To16())
|
||||
return syscall.Bind(fd, sa)
|
||||
}
|
||||
12
cmd/cli/rawipv6_other.go
Normal file
12
cmd/cli/rawipv6_other.go
Normal file
@@ -0,0 +1,12 @@
|
||||
//go:build !darwin
|
||||
|
||||
package cli
|
||||
|
||||
import "github.com/miekg/dns"
|
||||
|
||||
// wrapIPv6Handler is a no-op on non-darwin platforms. The raw IPv6 response
|
||||
// writer is only needed on macOS where pf's rdr preserves the original global
|
||||
// unicast source address, and the kernel rejects sendmsg from [::1] to it.
|
||||
func wrapIPv6Handler(h dns.Handler) dns.Handler {
|
||||
return h
|
||||
}
|
||||
@@ -17,7 +17,7 @@ options (set) → normalization (scrub) → queueing → translation (nat/rdr)
|
||||
| Anchor Type | Section | Purpose |
|
||||
|-------------|---------|---------|
|
||||
| `scrub-anchor` | Normalization | Packet normalization |
|
||||
| `nat-anchor` | Translation | NAT rules |
|
||||
| `nat-anchor` | Translation | NAT rules (not used by ctrld) |
|
||||
| `rdr-anchor` | Translation | Redirect rules |
|
||||
| `anchor` | Filtering | Pass/block rules |
|
||||
|
||||
@@ -122,57 +122,60 @@ Three problems prevent a simple "mirror the IPv4 rules" approach:
|
||||
|
||||
3. **sendmsg from `[::1]` to global unicast fails**: Unlike IPv4 where the kernel allows `sendmsg` from `127.0.0.1` to local private IPs (e.g., `10.x.x.x`), macOS/BSD rejects `sendmsg` from `[::1]` to a global unicast IPv6 address with `EINVAL`. Since pf's `rdr` preserves the original source IP (the machine's global IPv6 address), ctrld's reply would fail.
|
||||
|
||||
### Solution: nat + rdr + [::1] Listener
|
||||
### Solution: Raw Socket Response + rdr + [::1] Listener
|
||||
|
||||
**Key insight:** pf's `nat on lo0` doesn't fire for `route-to`'d packets (pf already ran the translation phase on the original outbound interface and skips it on lo0's outbound pass). `rdr` works because it fires on lo0's *inbound* side (a new direction after loopback reflection). So we can't use `nat` to rewrite the source, and any address bound to lo0 (including ULAs like `fd00:53::1`) can't send to global unicast addresses — the kernel segregates lo0's routing.
|
||||
|
||||
Instead, we use a **raw IPv6 socket** to send UDP responses. The `[::1]` listener receives queries normally via `rdr`, but responses are sent via `SOCK_RAW` with `IPPROTO_UDP`, bypassing the kernel's routing validation. The raw socket constructs the UDP packet (header + DNS payload) with correct checksums and sends it on lo0. pf matches the response against the `rdr` state table and reverse-translates the addresses.
|
||||
|
||||
**IPv6 TCP DNS** is blocked (`block return`) and falls back to IPv4 — TCP DNS is rare (truncated responses, zone transfers) and raw socket injection for TCP would require managing the full TCP state machine.
|
||||
|
||||
```
|
||||
# NAT: rewrite source to ::1 so ctrld can reply
|
||||
nat on lo0 inet6 proto udp from ! ::1 to ! ::1 port 53 -> ::1
|
||||
nat on lo0 inet6 proto tcp from ! ::1 to ! ::1 port 53 -> ::1
|
||||
|
||||
# RDR: redirect destination to ctrld's IPv6 listener
|
||||
# RDR: redirect IPv6 UDP DNS to ctrld's listener (no nat needed)
|
||||
rdr on lo0 inet6 proto udp from any to ! ::1 port 53 -> ::1 port 53
|
||||
rdr on lo0 inet6 proto tcp from any to ! ::1 port 53 -> ::1 port 53
|
||||
|
||||
# Filter: route-to forces IPv6 DNS to loopback (mirrors IPv4 rules)
|
||||
# Filter: route-to forces IPv6 UDP DNS to loopback
|
||||
pass out quick on ! lo0 route-to lo0 inet6 proto udp from any to ! ::1 port 53
|
||||
pass out quick on ! lo0 route-to lo0 inet6 proto tcp from any to ! ::1 port 53
|
||||
|
||||
# Block IPv6 TCP DNS — raw socket can't handle TCP; apps fall back to IPv4
|
||||
block return out quick on ! lo0 inet6 proto tcp from any to ! ::1 port 53
|
||||
|
||||
# Pass on lo0 without state (mirrors IPv4)
|
||||
pass out quick on lo0 inet6 proto udp from any to ! ::1 port 53 no state
|
||||
pass out quick on lo0 inet6 proto tcp from any to ! ::1 port 53 no state
|
||||
|
||||
# Accept redirected IPv6 DNS with reply-to (mirrors IPv4)
|
||||
pass in quick on lo0 reply-to lo0 inet6 proto { udp, tcp } from any to ::1 port 53
|
||||
pass in quick on lo0 reply-to lo0 inet6 proto udp from any to ::1 port 53
|
||||
```
|
||||
|
||||
### IPv6 Packet Flow
|
||||
### IPv6 Packet Flow (UDP)
|
||||
|
||||
```
|
||||
Application queries [2607:f0c8:8000:8210::1]:53 (IPv6 DNS server)
|
||||
↓
|
||||
pf filter: "pass out route-to lo0 inet6 ... port 53" → redirects to lo0
|
||||
pf filter: "pass out route-to lo0 inet6 proto udp ... port 53" → redirects to lo0
|
||||
↓
|
||||
pf (outbound lo0): "pass out on lo0 inet6 ... no state" → passes
|
||||
↓
|
||||
Loopback reflects packet inbound on lo0
|
||||
↓
|
||||
pf nat: rewrites source 2607:f0c8:...:ec6e → ::1
|
||||
pf rdr: rewrites dest [2607:f0c8:8000:8210::1]:53 → [::1]:53
|
||||
(source remains: 2607:f0c8:...:ec6e — the machine's global IPv6)
|
||||
↓
|
||||
ctrld receives query from [::1]:port → [::1]:53
|
||||
ctrld receives query from [2607:f0c8:...:ec6e]:port → [::1]:53
|
||||
↓
|
||||
ctrld resolves via DoH, replies to [::1]:port (kernel accepts ::1 → ::1)
|
||||
ctrld resolves via DoH upstream
|
||||
↓
|
||||
pf reverses both translations:
|
||||
- nat reverse: dest ::1 → 2607:f0c8:...:ec6e (original client)
|
||||
- rdr reverse: src ::1 → 2607:f0c8:8000:8210::1 (original DNS server)
|
||||
Raw IPv6 socket sends response: [::1]:53 → [2607:f0c8:...:ec6e]:port
|
||||
(bypasses kernel routing validation — raw socket on lo0)
|
||||
↓
|
||||
pf reverses rdr: src [::1]:53 → [2607:f0c8:8000:8210::1]:53
|
||||
↓
|
||||
Application receives response from [2607:f0c8:8000:8210::1]:53 ✓
|
||||
```
|
||||
|
||||
### Client IP Recovery
|
||||
|
||||
The `nat` rewrites the source to `::1`, so ctrld sees the client as `::1` (loopback). The existing `spoofLoopbackIpInClientInfo()` logic detects this and replaces it with the machine's real RFC1918 IPv4 address (e.g., `10.0.10.211`). This is the same mechanism used when queries arrive from `127.0.0.1` — no client identity is lost.
|
||||
pf's `rdr` preserves the original source (machine's global IPv6), so ctrld sees the real address. The existing `spoofLoopbackIpInClientInfo()` logic replaces loopback IPs with the machine's real RFC1918 IPv4 address for `X-Cd-Ip` reporting. For IPv6 intercepted queries, the source is already the real address — no spoofing needed.
|
||||
|
||||
### IPv6 Listener
|
||||
|
||||
@@ -180,12 +183,10 @@ The `[::1]` listener reuses the existing infrastructure from Windows (where it w
|
||||
- **Windows**: Always (if IPv6 is available)
|
||||
- **macOS**: Only in intercept mode
|
||||
|
||||
On macOS, the UDP handler is wrapped with `rawIPv6Writer` which intercepts `WriteMsg`/`Write` calls and sends responses via a raw IPv6 socket on lo0 instead of the normal `sendmsg` path.
|
||||
|
||||
If the `[::1]` listener fails to bind, it logs a warning and continues — the IPv4 listener is primary.
|
||||
|
||||
### nat-anchor Requirement
|
||||
|
||||
The `nat` rules in our anchor require a `nat-anchor "com.controld.ctrld"` reference in the main pf ruleset, in addition to the existing `rdr-anchor` and `anchor` references. All pf management functions (inject, remove, verify, watchdog, force-reload) handle all three anchor types.
|
||||
|
||||
## Rule Ordering Within the Anchor
|
||||
|
||||
pf requires translation rules before filter rules, even within an anchor:
|
||||
@@ -236,7 +237,7 @@ The trickiest part. macOS only processes anchors declared in the active pf rules
|
||||
|
||||
1. Read `/etc/pf.conf`
|
||||
2. If our anchor reference already exists, reload as-is
|
||||
3. Otherwise, inject `nat-anchor "com.controld.ctrld"` and `rdr-anchor "com.controld.ctrld"` in the translation section and `anchor "com.controld.ctrld"` in the filter section
|
||||
3. Otherwise, inject `rdr-anchor "com.controld.ctrld"` in the translation section and `anchor "com.controld.ctrld"` in the filter section
|
||||
4. Write to a **temp file** and load with `pfctl -f <tmpfile>`
|
||||
5. **We never modify `/etc/pf.conf` on disk** — changes are runtime-only and don't survive reboot (ctrld re-injects on every start)
|
||||
|
||||
@@ -376,5 +377,6 @@ We chose `route-to + rdr` as the best balance of effectiveness and deployability
|
||||
9. **`pass out quick` exemptions work with route-to** — they fire in the same phase (filter), so `quick` + rule ordering means exempted packets never hit the route-to rule
|
||||
10. **pf cannot cross-AF redirect** — `rdr on lo0 inet6 ... -> 127.0.0.1` is invalid. IPv6 DNS must be handled by an `[::1]` listener.
|
||||
11. **`block return` doesn't work for IPv6 DNS** — BSD doesn't deliver ICMPv6 unreachable to unconnected UDP sockets (`sendto`). Apps timeout waiting for a response that never comes.
|
||||
12. **sendmsg from `::1` to global unicast fails on macOS** — unlike IPv4 where `127.0.0.1` can send to any local address, `::1` cannot send to the machine's own global IPv6 address. `nat` on lo0 is required to rewrite the source.
|
||||
13. **`nat-anchor` is separate from `rdr-anchor`** — pf requires both in the main ruleset for nat and rdr rules in an anchor to be evaluated. `rdr-anchor` alone does not cover nat rules.
|
||||
12. **sendmsg from `::1` to global unicast fails on macOS** — unlike IPv4 where `127.0.0.1` can send to any local address, `::1` cannot send to the machine's own global IPv6 address. Solved with raw socket response injection (SOCK_RAW + IPPROTO_UDP on lo0).
|
||||
13. **`nat on lo0` doesn't fire for `route-to`'d packets** — pf runs translation on the original outbound interface (en0), then skips it on lo0's outbound pass. `rdr` works because lo0 inbound is a genuinely new direction. Any lo0 address (including ULAs) can't route to global unicast — the kernel segregates lo0's routing table.
|
||||
14. **Raw IPv6 sockets bypass routing validation** — `SOCK_RAW` with `IPPROTO_UDP` can send from `::1` to global unicast on lo0, unlike normal `SOCK_DGRAM` sockets. The kernel doesn't apply the same routing checks for raw sockets.
|
||||
|
||||
Reference in New Issue
Block a user