feat: add macOS pf DNS interception

Implement DNS interception on macOS using pf (packet filter):
- Anchor injection into running ruleset (not /etc/pf.conf)
- route-to lo0 + rdr rules for locally-originated DNS capture
- _ctrld group exemption so ctrld's own queries bypass interception
- Watchdog to detect and restore wiped anchor rules
- Probe-based auto-heal for Parallels VM pf corruption
- IPv6 DNS blocking and block-return for clean timeouts
- Interface-specific tunnel detection for VPN coexistence
- Port 5354 fallback in intercept mode

Includes pf technical reference docs and test scripts.

Squashed from intercept mode development on v1.0 branch (#497).
This commit is contained in:
Codescribe
2026-03-03 02:07:11 -05:00
committed by Cuong Manh Le
parent 1e8240bd1c
commit 289a46dc2c
13 changed files with 4820 additions and 0 deletions

View File

@@ -0,0 +1,40 @@
#!/bin/bash
# diag-lo0-capture.sh — Capture DNS on lo0 to see where the pf chain breaks
# Usage: sudo bash diag-lo0-capture.sh
# Run while VPN + ctrld are both active, then dig from another terminal
set -u
PCAP="/tmp/lo0-dns-$(date +%s).pcap"
echo "=== lo0 DNS Packet Capture ==="
echo "Capturing to: $PCAP"
echo ""
# Show current rules (verify build)
echo "--- ctrld anchor rdr rules ---"
pfctl -a com.controld.ctrld -sn 2>/dev/null
echo ""
echo "--- ctrld anchor filter rules (lo0 only) ---"
pfctl -a com.controld.ctrld -sr 2>/dev/null | grep lo0
echo ""
# Check pf state table for port 53 before
echo "--- port 53 states BEFORE dig ---"
pfctl -ss 2>/dev/null | grep ':53' | head -10
echo "(total: $(pfctl -ss 2>/dev/null | grep -c ':53'))"
echo ""
# Start capture on lo0
echo "Starting tcpdump on lo0 port 53..."
echo ">>> In another terminal, run: dig example.com"
echo ">>> Then press Ctrl-C here"
echo ""
tcpdump -i lo0 -n -v port 53 -w "$PCAP" 2>&1 &
TCPDUMP_PID=$!
# Also show live output
tcpdump -i lo0 -n port 53 2>&1 &
LIVE_PID=$!
# Wait for Ctrl-C
trap "kill $TCPDUMP_PID $LIVE_PID 2>/dev/null; echo ''; echo '--- port 53 states AFTER dig ---'; pfctl -ss 2>/dev/null | grep ':53' | head -20; echo '(total: '$(pfctl -ss 2>/dev/null | grep -c ':53')')'; echo ''; echo 'Capture saved to: $PCAP'; echo 'Read with: tcpdump -r $PCAP -n -v'; exit 0" INT
wait

View File

@@ -0,0 +1,62 @@
#!/bin/bash
# diag-pf-poll.sh — Polls pf rules, options, states, and DNS every 2s
# Usage: sudo bash diag-pf-poll.sh | tee /tmp/pf-poll.log
# Steps: 1) Run script 2) Connect VPN 3) Start ctrld 4) Ctrl-C when done
set -u
LOG="/tmp/pf-poll-$(date +%s).log"
echo "=== PF Poll Diagnostic — logging to $LOG ==="
echo "Press Ctrl-C to stop"
echo ""
poll() {
local ts=$(date '+%H:%M:%S.%3N')
echo "======== [$ts] POLL ========"
# 1. pf options — looking for "set skip on lo0"
echo "--- pf options ---"
pfctl -so 2>/dev/null | grep -i skip || echo "(no skip rules)"
# 2. Main ruleset anchors — where is ctrld relative to block drop all?
echo "--- main filter rules (summary) ---"
pfctl -sr 2>/dev/null | head -30
# 3. Main NAT/rdr rules
echo "--- main nat/rdr rules (summary) ---"
pfctl -sn 2>/dev/null | head -20
# 4. ctrld anchor content
echo "--- ctrld anchor (filter) ---"
pfctl -a com.apple.internet-sharing/ctrld -sr 2>/dev/null || echo "(no anchor)"
echo "--- ctrld anchor (nat/rdr) ---"
pfctl -a com.apple.internet-sharing/ctrld -sn 2>/dev/null || echo "(no anchor)"
# 5. State count for rdr target (10.255.255.3) and loopback
echo "--- states summary ---"
local total=$(pfctl -ss 2>/dev/null | wc -l | tr -d ' ')
local rdr=$(pfctl -ss 2>/dev/null | grep -c '10\.255\.255\.3' || true)
local lo0=$(pfctl -ss 2>/dev/null | grep -c 'lo0' || true)
echo "total=$total rdr_target=$rdr lo0=$lo0"
# 6. Quick DNS test (1s timeout)
echo "--- DNS tests ---"
local direct=$(dig +short +time=1 +tries=1 example.com @127.0.0.1 2>&1 | head -1)
local system=$(dig +short +time=1 +tries=1 example.com 2>&1 | head -1)
echo "direct @127.0.0.1: $direct"
echo "system DNS: $system"
# 7. VPN tunnel interface
echo "--- tunnel interfaces ---"
ifconfig -l | tr ' ' '\n' | grep -E '^utun' | while read iface; do
echo -n "$iface: "
ifconfig "$iface" 2>/dev/null | grep 'inet ' | awk '{print $2}' || echo "no ip"
done
echo ""
}
# Main loop
while true; do
poll 2>&1 | tee -a "$LOG"
sleep 2
done

View File

@@ -0,0 +1,183 @@
#!/bin/bash
# diag-vpn-connect.sh — Diagnostic script for testing ctrld dns-intercept
# during VPN VPN connection on macOS.
#
# Usage: sudo ./diag-vpn-connect.sh
#
# Run this BEFORE connecting VPN. It polls every 0.5s and captures:
# 1. pf anchor state (are ctrld anchors present?)
# 2. pf state table entries (rdr interception working?)
# 3. ctrld log events (watchdog, rebootstrap, errors)
# 4. scutil DNS resolver state
# 5. Active tunnel interfaces
# 6. dig test query results
#
# Output goes to /tmp/diag-vpn-<timestamp>/
# Press Ctrl-C to stop. A summary is printed at the end.
set -e
if [ "$(id -u)" -ne 0 ]; then
echo "ERROR: Must run as root (sudo)"
exit 1
fi
CTRLD_LOG="${CTRLD_LOG:-/tmp/dns.log}"
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
OUTDIR="/tmp/diag-vpn-${TIMESTAMP}"
mkdir -p "$OUTDIR"
echo "=== VPN + ctrld DNS Intercept Diagnostic ==="
echo "Output: $OUTDIR"
echo "ctrld log: $CTRLD_LOG"
echo ""
echo "1. Start this script"
echo "2. Connect VPN"
echo "3. Wait ~30 seconds"
echo "4. Try: dig popads.net / dig @127.0.0.1 popads.net"
echo "5. Ctrl-C to stop and see summary"
echo ""
echo "Polling every 0.5s... Press Ctrl-C to stop."
echo ""
# Track ctrld log position
if [ -f "$CTRLD_LOG" ]; then
LOG_START_LINE=$(wc -l < "$CTRLD_LOG")
else
LOG_START_LINE=0
fi
ITER=0
DIG_FAIL=0
DIG_OK=0
ANCHOR_MISSING=0
ANCHOR_PRESENT=0
PF_WIPE_COUNT=0
FORCE_REBOOT_COUNT=0
LAST_TUNNEL_IFACES=""
cleanup() {
echo ""
echo "=== Stopping diagnostic ==="
# Capture final state
echo "--- Final pf state ---" > "$OUTDIR/final-pfctl.txt"
pfctl -sa 2>/dev/null >> "$OUTDIR/final-pfctl.txt" 2>&1 || true
echo "--- Final scutil ---" > "$OUTDIR/final-scutil.txt"
scutil --dns >> "$OUTDIR/final-scutil.txt" 2>&1 || true
# Extract ctrld log events since start
if [ -f "$CTRLD_LOG" ]; then
tail -n +$((LOG_START_LINE + 1)) "$CTRLD_LOG" > "$OUTDIR/ctrld-events.log" 2>/dev/null || true
# Extract key events
echo "--- Watchdog events ---" > "$OUTDIR/summary-watchdog.txt"
grep -i "watchdog\|anchor.*missing\|anchor.*restored\|force-reset\|re-bootstrapping\|force re-bootstrapping" "$OUTDIR/ctrld-events.log" >> "$OUTDIR/summary-watchdog.txt" 2>/dev/null || true
echo "--- Errors ---" > "$OUTDIR/summary-errors.txt"
grep '"level":"error"' "$OUTDIR/ctrld-events.log" >> "$OUTDIR/summary-errors.txt" 2>/dev/null || true
echo "--- Network changes ---" > "$OUTDIR/summary-network.txt"
grep -i "Network change\|tunnel interface\|Ignoring interface" "$OUTDIR/ctrld-events.log" >> "$OUTDIR/summary-network.txt" 2>/dev/null || true
echo "--- Transport resets ---" > "$OUTDIR/summary-transport.txt"
grep -i "re-bootstrap\|force.*bootstrap\|dialing to\|connected to" "$OUTDIR/ctrld-events.log" >> "$OUTDIR/summary-transport.txt" 2>/dev/null || true
# Count key events
PF_WIPE_COUNT=$(grep -c "anchor.*missing\|restoring pf" "$OUTDIR/ctrld-events.log" 2>/dev/null || echo 0)
FORCE_REBOOT_COUNT=$(grep -c "force re-bootstrapping\|force-reset" "$OUTDIR/ctrld-events.log" 2>/dev/null || echo 0)
DEADLINE_COUNT=$(grep -c "context deadline exceeded" "$OUTDIR/ctrld-events.log" 2>/dev/null || echo 0)
FALLBACK_COUNT=$(grep -c "OS resolver retry query successful" "$OUTDIR/ctrld-events.log" 2>/dev/null || echo 0)
fi
echo ""
echo "========================================="
echo " DIAGNOSTIC SUMMARY"
echo "========================================="
echo "Duration: $ITER iterations (~$((ITER / 2))s)"
echo ""
echo "pf Anchor Status:"
echo " Present: $ANCHOR_PRESENT times"
echo " Missing: $ANCHOR_MISSING times"
echo ""
echo "dig Tests (popads.net):"
echo " Success: $DIG_OK"
echo " Failed: $DIG_FAIL"
echo ""
echo "ctrld Log Events:"
echo " pf wipes detected: $PF_WIPE_COUNT"
echo " Force rebootstraps: $FORCE_REBOOT_COUNT"
echo " Context deadline errors: ${DEADLINE_COUNT:-0}"
echo " OS resolver fallbacks: ${FALLBACK_COUNT:-0}"
echo ""
echo "Last tunnel interfaces: ${LAST_TUNNEL_IFACES:-none}"
echo ""
echo "Files saved to: $OUTDIR/"
echo " final-pfctl.txt — full pfctl -sa at exit"
echo " final-scutil.txt — scutil --dns at exit"
echo " ctrld-events.log — ctrld log during test"
echo " summary-watchdog.txt — watchdog events"
echo " summary-errors.txt — errors"
echo " summary-transport.txt — transport reset events"
echo " timeline.log — per-iteration state"
echo "========================================="
exit 0
}
trap cleanup INT TERM
while true; do
ITER=$((ITER + 1))
NOW=$(date '+%H:%M:%S.%3N' 2>/dev/null || date '+%H:%M:%S')
# 1. Check pf anchor presence
ANCHOR_STATUS="MISSING"
if pfctl -sr 2>/dev/null | grep -q "com.controld.ctrld"; then
ANCHOR_STATUS="PRESENT"
ANCHOR_PRESENT=$((ANCHOR_PRESENT + 1))
else
ANCHOR_MISSING=$((ANCHOR_MISSING + 1))
fi
# 2. Check tunnel interfaces
TUNNEL_IFACES=$(ifconfig -l 2>/dev/null | tr ' ' '\n' | grep -E '^(utun|ipsec|ppp|tap|tun)' | \
while read iface; do
# Only list interfaces that are UP and have an IP
if ifconfig "$iface" 2>/dev/null | grep -q "inet "; then
echo -n "$iface "
fi
done)
TUNNEL_IFACES=$(echo "$TUNNEL_IFACES" | xargs) # trim
if [ -n "$TUNNEL_IFACES" ]; then
LAST_TUNNEL_IFACES="$TUNNEL_IFACES"
fi
# 3. Count rdr states (three-part = intercepted)
RDR_COUNT=$(pfctl -ss 2>/dev/null | grep -c "127.0.0.1:53 <-" || echo 0)
# 4. Quick dig test (0.5s timeout)
DIG_RESULT="SKIP"
if [ $((ITER % 4)) -eq 0 ]; then # every 2 seconds
if dig +time=1 +tries=1 popads.net A @127.0.0.1 +short >/dev/null 2>&1; then
DIG_RESULT="OK"
DIG_OK=$((DIG_OK + 1))
else
DIG_RESULT="FAIL"
DIG_FAIL=$((DIG_FAIL + 1))
fi
fi
# 5. Check latest ctrld log for recent errors
RECENT_ERR=""
if [ -f "$CTRLD_LOG" ]; then
RECENT_ERR=$(tail -5 "$CTRLD_LOG" 2>/dev/null | grep -o '"message":"[^"]*deadline[^"]*"' | tail -1 || true)
fi
# Output timeline
LINE="[$NOW] anchor=$ANCHOR_STATUS rdr_states=$RDR_COUNT tunnels=[$TUNNEL_IFACES] dig=$DIG_RESULT $RECENT_ERR"
echo "$LINE"
echo "$LINE" >> "$OUTDIR/timeline.log"
sleep 0.5
done

View File

@@ -0,0 +1,556 @@
#!/bin/bash
# =============================================================================
# DNS Intercept Mode Test Script — macOS (pf)
# =============================================================================
# Run as root: sudo bash test-dns-intercept-mac.sh
#
# Tests the dns-intercept feature end-to-end with validation at each step.
# Logs are read from /tmp/dns.log (ctrld log location on test machine).
#
# Manual steps marked with [MANUAL] require human interaction.
# =============================================================================
set -euo pipefail
CTRLD_LOG="/tmp/dns.log"
PF_ANCHOR="com.controld.ctrld"
PASS=0
FAIL=0
WARN=0
RESULTS=()
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m'
header() { echo -e "\n${CYAN}${BOLD}━━━ $1 ━━━${NC}"; }
info() { echo -e " ${BOLD}${NC} $1"; }
pass() { echo -e " ${GREEN}✅ PASS${NC}: $1"; PASS=$((PASS+1)); RESULTS+=("PASS: $1"); }
fail() { echo -e " ${RED}❌ FAIL${NC}: $1"; FAIL=$((FAIL+1)); RESULTS+=("FAIL: $1"); }
warn() { echo -e " ${YELLOW}⚠️ WARN${NC}: $1"; WARN=$((WARN+1)); RESULTS+=("WARN: $1"); }
manual() { echo -e " ${YELLOW}[MANUAL]${NC} $1"; }
separator() { echo -e "${CYAN}─────────────────────────────────────────────────────${NC}"; }
check_root() {
if [[ $EUID -ne 0 ]]; then
echo -e "${RED}This script must be run as root (sudo).${NC}"
exit 1
fi
}
wait_for_key() {
echo -e "\n Press ${BOLD}Enter${NC} to continue..."
read -r
}
# Grep recent log entries (last N lines)
log_grep() {
local pattern="$1"
local lines="${2:-200}"
tail -n "$lines" "$CTRLD_LOG" 2>/dev/null | grep -i "$pattern" 2>/dev/null || true
}
log_grep_count() {
local pattern="$1"
local lines="${2:-200}"
tail -n "$lines" "$CTRLD_LOG" 2>/dev/null | grep -ci "$pattern" 2>/dev/null || echo "0"
}
# =============================================================================
# TEST SECTIONS
# =============================================================================
test_prereqs() {
header "0. Prerequisites"
if command -v pfctl &>/dev/null; then
pass "pfctl available"
else
fail "pfctl not found"
exit 1
fi
if [[ -f "$CTRLD_LOG" ]]; then
pass "ctrld log exists at $CTRLD_LOG"
else
warn "ctrld log not found at $CTRLD_LOG — log checks will be skipped"
fi
if command -v dig &>/dev/null; then
pass "dig available"
else
fail "dig not found — install bind tools"
exit 1
fi
info "Default route interface: $(route -n get default 2>/dev/null | grep interface | awk '{print $2}' || echo 'unknown')"
info "Current DNS servers:"
scutil --dns | grep "nameserver\[" | head -5 | sed 's/^/ /'
}
test_pf_state() {
header "1. PF State Validation"
# Is pf enabled?
local pf_status
pf_status=$(pfctl -si 2>&1 | grep "Status:" || true)
if echo "$pf_status" | grep -q "Enabled"; then
pass "pf is enabled"
else
fail "pf is NOT enabled (status: $pf_status)"
fi
# Is our anchor referenced in the running ruleset?
local sr_match sn_match
sr_match=$(pfctl -sr 2>&1 | grep "$PF_ANCHOR" || true)
sn_match=$(pfctl -sn 2>&1 | grep "$PF_ANCHOR" || true)
if [[ -n "$sr_match" ]]; then
pass "anchor '$PF_ANCHOR' found in filter rules (pfctl -sr)"
info " $sr_match"
else
fail "anchor '$PF_ANCHOR' NOT in filter rules — main ruleset doesn't reference it"
fi
if [[ -n "$sn_match" ]]; then
pass "rdr-anchor '$PF_ANCHOR' found in NAT rules (pfctl -sn)"
info " $sn_match"
else
fail "rdr-anchor '$PF_ANCHOR' NOT in NAT rules — redirect won't work"
fi
# Check anchor rules
separator
info "Anchor filter rules (pfctl -a '$PF_ANCHOR' -sr):"
local anchor_sr
anchor_sr=$(pfctl -a "$PF_ANCHOR" -sr 2>&1 | grep -v "ALTQ" || true)
if [[ -n "$anchor_sr" ]]; then
echo "$anchor_sr" | sed 's/^/ /'
# Check for route-to rules
if echo "$anchor_sr" | grep -q "route-to"; then
pass "route-to lo0 rules present (needed for local traffic interception)"
else
warn "No route-to rules found — local DNS may not be intercepted"
fi
else
fail "No filter rules in anchor"
fi
info "Anchor redirect rules (pfctl -a '$PF_ANCHOR' -sn):"
local anchor_sn
anchor_sn=$(pfctl -a "$PF_ANCHOR" -sn 2>&1 | grep -v "ALTQ" || true)
if [[ -n "$anchor_sn" ]]; then
echo "$anchor_sn" | sed 's/^/ /'
if echo "$anchor_sn" | grep -q "rdr.*lo0.*port = 53"; then
pass "rdr rules on lo0 present (redirect DNS to ctrld)"
else
warn "rdr rules don't match expected pattern"
fi
else
fail "No redirect rules in anchor"
fi
# Check anchor file exists
if [[ -f "/etc/pf.anchors/$PF_ANCHOR" ]]; then
pass "Anchor file exists: /etc/pf.anchors/$PF_ANCHOR"
else
fail "Anchor file missing: /etc/pf.anchors/$PF_ANCHOR"
fi
# Check pf.conf was NOT modified
if grep -q "$PF_ANCHOR" /etc/pf.conf 2>/dev/null; then
warn "pf.conf contains '$PF_ANCHOR' reference — should NOT be modified on disk"
else
pass "pf.conf NOT modified on disk (anchor injected at runtime only)"
fi
}
test_dns_interception() {
header "2. DNS Interception Tests"
# Mark position in log
local log_lines_before=0
if [[ -f "$CTRLD_LOG" ]]; then
log_lines_before=$(wc -l < "$CTRLD_LOG")
fi
# Test 1: Query to external resolver should be intercepted
info "Test: dig @8.8.8.8 example.com (should be intercepted by ctrld)"
local dig_result
dig_result=$(dig @8.8.8.8 example.com +short +timeout=5 2>&1 || true)
if [[ -n "$dig_result" ]] && ! echo "$dig_result" | grep -q "timed out"; then
pass "dig @8.8.8.8 returned result: $dig_result"
else
fail "dig @8.8.8.8 failed or timed out"
fi
# Check if ctrld logged the query
sleep 1
if [[ -f "$CTRLD_LOG" ]]; then
local intercepted
intercepted=$(tail -n +$((log_lines_before+1)) "$CTRLD_LOG" | grep -c "example.com" || echo "0")
if [[ "$intercepted" -gt 0 ]]; then
pass "ctrld logged the intercepted query for example.com"
else
fail "ctrld did NOT log query for example.com — interception may not be working"
fi
fi
# Check dig reports ctrld answered (not 8.8.8.8)
local full_dig
full_dig=$(dig @8.8.8.8 example.com +timeout=5 2>&1 || true)
local server_line
server_line=$(echo "$full_dig" | grep "SERVER:" || true)
info "dig SERVER line: $server_line"
if echo "$server_line" | grep -q "127.0.0.1"; then
pass "Response came from 127.0.0.1 (ctrld intercepted)"
elif echo "$server_line" | grep -q "8.8.8.8"; then
fail "Response came from 8.8.8.8 directly — NOT intercepted"
else
warn "Could not determine response server from dig output"
fi
separator
# Test 2: Query to another external resolver
info "Test: dig @1.1.1.1 cloudflare.com (should also be intercepted)"
local dig2
dig2=$(dig @1.1.1.1 cloudflare.com +short +timeout=5 2>&1 || true)
if [[ -n "$dig2" ]] && ! echo "$dig2" | grep -q "timed out"; then
pass "dig @1.1.1.1 returned result"
else
fail "dig @1.1.1.1 failed or timed out"
fi
separator
# Test 3: Query to localhost should work (not double-redirected)
info "Test: dig @127.0.0.1 example.org (direct to ctrld, should NOT be redirected)"
local dig3
dig3=$(dig @127.0.0.1 example.org +short +timeout=5 2>&1 || true)
if [[ -n "$dig3" ]] && ! echo "$dig3" | grep -q "timed out"; then
pass "dig @127.0.0.1 works (no loop)"
else
fail "dig @127.0.0.1 failed — possible redirect loop"
fi
separator
# Test 4: System DNS resolution
info "Test: host example.net (system resolver, should go through ctrld)"
local host_result
host_result=$(host example.net 2>&1 || true)
if echo "$host_result" | grep -q "has address"; then
pass "System DNS resolution works via host command"
else
fail "System DNS resolution failed"
fi
separator
# Test 5: TCP DNS query
info "Test: dig @9.9.9.9 example.com +tcp (TCP DNS should also be intercepted)"
local dig_tcp
dig_tcp=$(dig @9.9.9.9 example.com +tcp +short +timeout=5 2>&1 || true)
if [[ -n "$dig_tcp" ]] && ! echo "$dig_tcp" | grep -q "timed out"; then
pass "TCP DNS query intercepted and resolved"
else
warn "TCP DNS query failed (may not be critical if UDP works)"
fi
}
test_non_dns_unaffected() {
header "3. Non-DNS Traffic Unaffected"
# HTTPS should work fine
info "Test: curl https://example.com (HTTPS port 443 should NOT be affected)"
local curl_result
curl_result=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 https://example.com 2>&1 || echo "000")
if [[ "$curl_result" == "200" ]] || [[ "$curl_result" == "301" ]] || [[ "$curl_result" == "302" ]]; then
pass "HTTPS works (HTTP $curl_result)"
else
fail "HTTPS failed (HTTP $curl_result) — pf may be affecting non-DNS traffic"
fi
# SSH-style connection test (port 22 should be unaffected)
info "Test: nc -z -w5 github.com 22 (SSH port should NOT be affected)"
if nc -z -w5 github.com 22 2>/dev/null; then
pass "SSH port reachable (non-DNS traffic unaffected)"
else
warn "SSH port unreachable (may be firewall, not necessarily our fault)"
fi
}
test_ctrld_log_health() {
header "4. ctrld Log Health Check"
if [[ ! -f "$CTRLD_LOG" ]]; then
warn "Skipping log checks — $CTRLD_LOG not found"
return
fi
# Check for intercept initialization
if log_grep "DNS intercept.*initializing" 500 | grep -q "."; then
pass "DNS intercept initialization logged"
else
fail "No DNS intercept initialization in recent logs"
fi
# Check for successful anchor load
if log_grep "pf anchor.*active" 500 | grep -q "."; then
pass "PF anchor reported as active"
else
fail "PF anchor not reported as active"
fi
# Check for anchor reference injection
if log_grep "anchor reference active" 500 | grep -q "."; then
pass "Anchor reference injected into running ruleset"
else
fail "Anchor reference NOT injected — this is the critical step"
fi
# Check for errors
separator
info "Recent errors/warnings in ctrld log:"
local errors
errors=$(log_grep '"level":"error"' 500)
if [[ -n "$errors" ]]; then
echo "$errors" | tail -5 | sed 's/^/ /'
warn "Errors found in recent logs (see above)"
else
pass "No errors in recent logs"
fi
local warnings
warnings=$(log_grep '"level":"warn"' 500 | grep -v "skipping self-upgrade" || true)
if [[ -n "$warnings" ]]; then
echo "$warnings" | tail -5 | sed 's/^/ /'
info "(warnings above may be expected)"
fi
# Check for recovery bypass state
if log_grep "recoveryBypass\|recovery bypass\|prepareForRecovery" 500 | grep -q "."; then
info "Recovery bypass activity detected in logs"
log_grep "recovery" 500 | tail -3 | sed 's/^/ /'
fi
# Check for VPN DNS detection
if log_grep "VPN DNS" 500 | grep -q "."; then
info "VPN DNS activity in logs:"
log_grep "VPN DNS" 500 | tail -5 | sed 's/^/ /'
else
info "No VPN DNS activity (expected if no VPN is connected)"
fi
}
test_pf_counters() {
header "5. PF Statistics & Counters"
info "PF info (pfctl -si):"
pfctl -si 2>&1 | grep -v "ALTQ" | head -15 | sed 's/^/ /'
info "PF state table entries:"
pfctl -ss 2>&1 | grep -c "." | sed 's/^/ States: /'
# Count evaluations of our anchor
info "Anchor-specific stats (if available):"
local anchor_info
anchor_info=$(pfctl -a "$PF_ANCHOR" -si 2>&1 | grep -v "ALTQ" || true)
if [[ -n "$anchor_info" ]]; then
echo "$anchor_info" | head -10 | sed 's/^/ /'
else
info " (no per-anchor stats available)"
fi
}
test_cleanup_on_stop() {
header "6. Cleanup Validation (After ctrld Stop)"
manual "Stop ctrld now (Ctrl+C or 'ctrld stop'), then press Enter"
wait_for_key
# Check anchor is flushed
local anchor_rules_after
anchor_rules_after=$(pfctl -a "$PF_ANCHOR" -sr 2>&1 | grep -v "ALTQ" | grep -v "^$" || true)
if [[ -z "$anchor_rules_after" ]]; then
pass "Anchor filter rules flushed after stop"
else
fail "Anchor filter rules still present after stop"
echo "$anchor_rules_after" | sed 's/^/ /'
fi
local anchor_rdr_after
anchor_rdr_after=$(pfctl -a "$PF_ANCHOR" -sn 2>&1 | grep -v "ALTQ" | grep -v "^$" || true)
if [[ -z "$anchor_rdr_after" ]]; then
pass "Anchor redirect rules flushed after stop"
else
fail "Anchor redirect rules still present after stop"
fi
# Check anchor file removed
if [[ ! -f "/etc/pf.anchors/$PF_ANCHOR" ]]; then
pass "Anchor file removed after stop"
else
fail "Anchor file still exists: /etc/pf.anchors/$PF_ANCHOR"
fi
# Check pf.conf is clean
if ! grep -q "$PF_ANCHOR" /etc/pf.conf 2>/dev/null; then
pass "pf.conf is clean (no ctrld references)"
else
fail "pf.conf still has ctrld references after stop"
fi
# DNS should work normally without ctrld
info "Test: dig example.com (should resolve via system DNS)"
local dig_after
dig_after=$(dig example.com +short +timeout=5 2>&1 || true)
if [[ -n "$dig_after" ]] && ! echo "$dig_after" | grep -q "timed out"; then
pass "DNS works after ctrld stop"
else
fail "DNS broken after ctrld stop — cleanup may have failed"
fi
}
test_restart_resilience() {
header "7. Restart Resilience"
manual "Start ctrld again with --dns-intercept, then press Enter"
wait_for_key
sleep 3
# Re-run pf state checks
local sr_match sn_match
sr_match=$(pfctl -sr 2>&1 | grep "$PF_ANCHOR" || true)
sn_match=$(pfctl -sn 2>&1 | grep "$PF_ANCHOR" || true)
if [[ -n "$sr_match" ]] && [[ -n "$sn_match" ]]; then
pass "Anchor references restored after restart"
else
fail "Anchor references NOT restored after restart"
fi
# Quick interception test
local dig_after_restart
dig_after_restart=$(dig @8.8.8.8 example.com +short +timeout=5 2>&1 || true)
if [[ -n "$dig_after_restart" ]] && ! echo "$dig_after_restart" | grep -q "timed out"; then
pass "DNS interception works after restart"
else
fail "DNS interception broken after restart"
fi
}
test_network_change() {
header "8. Network Change Recovery"
info "This test verifies recovery after network changes."
manual "Switch Wi-Fi networks (or disconnect/reconnect Ethernet), then press Enter"
wait_for_key
sleep 5
# Check pf rules still active
local sr_after sn_after
sr_after=$(pfctl -sr 2>&1 | grep "$PF_ANCHOR" || true)
sn_after=$(pfctl -sn 2>&1 | grep "$PF_ANCHOR" || true)
if [[ -n "$sr_after" ]] && [[ -n "$sn_after" ]]; then
pass "Anchor references survived network change"
else
fail "Anchor references lost after network change"
fi
# Check interception still works
local dig_after_net
dig_after_net=$(dig @8.8.8.8 example.com +short +timeout=10 2>&1 || true)
if [[ -n "$dig_after_net" ]] && ! echo "$dig_after_net" | grep -q "timed out"; then
pass "DNS interception works after network change"
else
fail "DNS interception broken after network change"
fi
# Check logs for recovery bypass activity
if [[ -f "$CTRLD_LOG" ]]; then
local recovery_logs
recovery_logs=$(log_grep "recovery\|network change\|network monitor" 100)
if [[ -n "$recovery_logs" ]]; then
info "Recovery/network change log entries:"
echo "$recovery_logs" | tail -5 | sed 's/^/ /'
fi
fi
}
# =============================================================================
# SUMMARY
# =============================================================================
print_summary() {
header "TEST SUMMARY"
echo ""
for r in "${RESULTS[@]}"; do
if [[ "$r" == PASS* ]]; then
echo -e " ${GREEN}${NC} ${r#PASS: }"
elif [[ "$r" == FAIL* ]]; then
echo -e " ${RED}${NC} ${r#FAIL: }"
elif [[ "$r" == WARN* ]]; then
echo -e " ${YELLOW}⚠️${NC} ${r#WARN: }"
fi
done
echo ""
separator
echo -e " ${GREEN}Passed: $PASS${NC} | ${RED}Failed: $FAIL${NC} | ${YELLOW}Warnings: $WARN${NC}"
separator
if [[ $FAIL -gt 0 ]]; then
echo -e "\n ${RED}${BOLD}Some tests failed.${NC} Check output above for details."
echo -e " Useful debug commands:"
echo -e " pfctl -a '$PF_ANCHOR' -sr # anchor filter rules"
echo -e " pfctl -a '$PF_ANCHOR' -sn # anchor redirect rules"
echo -e " pfctl -sr | grep controld # main ruleset references"
echo -e " tail -100 $CTRLD_LOG # recent ctrld logs"
else
echo -e "\n ${GREEN}${BOLD}All tests passed!${NC}"
fi
}
# =============================================================================
# MAIN
# =============================================================================
echo -e "${BOLD}╔═══════════════════════════════════════════════════════╗${NC}"
echo -e "${BOLD}║ ctrld DNS Intercept Mode — macOS Test Suite ║${NC}"
echo -e "${BOLD}║ Tests pf-based DNS interception (route-to + rdr) ║${NC}"
echo -e "${BOLD}╚═══════════════════════════════════════════════════════╝${NC}"
check_root
echo ""
echo "Make sure ctrld is running with --dns-intercept before starting."
echo "Log location: $CTRLD_LOG"
wait_for_key
test_prereqs
test_pf_state
test_dns_interception
test_non_dns_unaffected
test_ctrld_log_health
test_pf_counters
separator
echo ""
echo "The next tests require manual steps (stop/start ctrld, network changes)."
echo "Press Enter to continue, or Ctrl+C to skip and see results so far."
wait_for_key
test_cleanup_on_stop
test_restart_resilience
test_network_change
print_summary

View File

@@ -0,0 +1,147 @@
#!/bin/bash
# Test: pf group-based exemption for DNS intercept
# Run as root: sudo bash test-pf-group-exemption.sh
set -e
GROUP_NAME="_ctrld"
ANCHOR="com.controld.test"
TEST_DNS="1.1.1.1"
echo "=== Step 1: Create test group ==="
if dscl . -read /Groups/$GROUP_NAME PrimaryGroupID &>/dev/null; then
echo "Group $GROUP_NAME already exists"
else
# Find an unused GID in 350-450 range
USED_GIDS=$(dscl . -list /Groups PrimaryGroupID 2>/dev/null | awk '{print $2}' | sort -n)
GROUP_ID=""
for gid in $(seq 350 450); do
if ! echo "$USED_GIDS" | grep -q "^${gid}$"; then
GROUP_ID=$gid
break
fi
done
if [ -z "$GROUP_ID" ]; then
echo "ERROR: Could not find unused GID in 350-450 range"
exit 1
fi
dscl . -create /Groups/$GROUP_NAME
dscl . -create /Groups/$GROUP_NAME PrimaryGroupID $GROUP_ID
dscl . -create /Groups/$GROUP_NAME RealName "Control D DNS Intercept"
echo "Created group $GROUP_NAME (GID $GROUP_ID)"
fi
ACTUAL_GID=$(dscl . -read /Groups/$GROUP_NAME PrimaryGroupID | awk '{print $2}')
echo "GID: $ACTUAL_GID"
echo ""
echo "=== Step 2: Enable pf ==="
pfctl -e 2>&1 || true
echo ""
echo "=== Step 3: Set up pf anchor with group exemption ==="
cat > /tmp/pf-group-test-anchor.conf << RULES
# Translation: redirect DNS on loopback to our listener
rdr pass on lo0 inet proto udp from any to ! 127.0.0.1 port 53 -> 127.0.0.1 port 53
rdr pass on lo0 inet proto tcp from any to ! 127.0.0.1 port 53 -> 127.0.0.1 port 53
# Exemption: only group _ctrld can talk to $TEST_DNS directly
pass out quick on ! lo0 inet proto { udp, tcp } from any to $TEST_DNS port 53 group $GROUP_NAME
# Intercept everything else
pass out quick on ! lo0 route-to lo0 inet proto udp from any to ! 127.0.0.1 port 53
pass out quick on ! lo0 route-to lo0 inet proto tcp from any to ! 127.0.0.1 port 53
pass in quick on lo0 inet proto { udp, tcp } from any to 127.0.0.1 port 53
RULES
pfctl -a $ANCHOR -f /tmp/pf-group-test-anchor.conf 2>/dev/null
echo "Loaded anchor $ANCHOR"
# Inject anchor refs into running ruleset
NAT_RULES=$(pfctl -sn 2>/dev/null | grep -v "ALTQ" | grep -v "^$")
FILTER_RULES=$(pfctl -sr 2>/dev/null | grep -v "ALTQ" | grep -v "^$")
SCRUB_RULES=$(echo "$FILTER_RULES" | grep "^scrub" || true)
PURE_FILTER=$(echo "$FILTER_RULES" | grep -v "^scrub" | grep -v "com.controld.test" || true)
CLEAN_NAT=$(echo "$NAT_RULES" | grep -v "com.controld.test" || true)
{
[ -n "$SCRUB_RULES" ] && echo "$SCRUB_RULES"
[ -n "$CLEAN_NAT" ] && echo "$CLEAN_NAT"
echo "rdr-anchor \"$ANCHOR\""
echo "anchor \"$ANCHOR\""
[ -n "$PURE_FILTER" ] && echo "$PURE_FILTER"
} | pfctl -f - 2>/dev/null
echo "Injected anchor references (no duplicates)"
echo ""
echo "=== Step 4: Verify rules ==="
echo "NAT rules:"
pfctl -sn 2>/dev/null | grep -v ALTQ
echo ""
echo "Anchor filter rules:"
pfctl -a $ANCHOR -sr 2>/dev/null | grep -v ALTQ
echo ""
echo "Anchor NAT rules:"
pfctl -a $ANCHOR -sn 2>/dev/null | grep -v ALTQ
echo ""
echo "=== Step 5: Build setgid test binary ==="
# We need a binary that runs with effective group _ctrld.
# sudo -g doesn't work on macOS, so we use a setgid binary.
cat > /tmp/test-dns-group.c << 'EOF'
#include <unistd.h>
int main() {
char *args[] = {"dig", "+short", "+timeout=3", "+tries=1", "@1.1.1.1", "popads.net", NULL};
execvp("dig", args);
return 1;
}
EOF
cc -o /tmp/test-dns-group /tmp/test-dns-group.c
chgrp $GROUP_NAME /tmp/test-dns-group
chmod g+s /tmp/test-dns-group
echo "Built setgid binary /tmp/test-dns-group (group: $GROUP_NAME)"
echo ""
echo "=== Step 6: Test as regular user (should be INTERCEPTED) ==="
echo "Running: dig @$TEST_DNS popads.net (as root / group wheel — no group exemption)"
echo "If nothing listens on 127.0.0.1:53, this should timeout."
DIG_RESULT=$(dig +short +timeout=3 +tries=1 @$TEST_DNS popads.net 2>&1 || true)
echo "Result: ${DIG_RESULT:-TIMEOUT/INTERCEPTED}"
echo ""
echo "=== Step 7: Test as group _ctrld (should BYPASS) ==="
echo "Running: setgid binary (effective group: $GROUP_NAME)"
BYPASS_RESULT=$(/tmp/test-dns-group 2>&1 || true)
echo "Result: ${BYPASS_RESULT:-TIMEOUT/BLOCKED}"
echo ""
echo "=== Results ==="
PASS=true
if [[ -z "$DIG_RESULT" || "$DIG_RESULT" == *"timed out"* || "$DIG_RESULT" == *"connection refused"* ]]; then
echo "✅ Regular query INTERCEPTED (redirected away from $TEST_DNS)"
else
echo "❌ Regular query NOT intercepted — got: $DIG_RESULT"
PASS=false
fi
if [[ -n "$BYPASS_RESULT" && "$BYPASS_RESULT" != *"timed out"* && "$BYPASS_RESULT" != *"connection refused"* && "$BYPASS_RESULT" != *"TIMEOUT"* ]]; then
echo "✅ Group _ctrld query BYPASSED — got: $BYPASS_RESULT"
else
echo "❌ Group _ctrld query was also intercepted — got: ${BYPASS_RESULT:-TIMEOUT}"
PASS=false
fi
if $PASS; then
echo ""
echo "🎉 GROUP EXEMPTION WORKS — this approach is viable for dns-intercept mode"
fi
echo ""
echo "=== Cleanup ==="
pfctl -a $ANCHOR -F all 2>/dev/null
pfctl -f /etc/pf.conf 2>/dev/null
rm -f /tmp/pf-group-test-anchor.conf /tmp/test-dns-group /tmp/test-dns-group.c
echo "Cleaned up. Group $GROUP_NAME left in place."
echo "To remove: sudo dscl . -delete /Groups/$GROUP_NAME"

View File

@@ -0,0 +1,301 @@
#!/bin/bash
# test-recovery-bypass.sh — Test DNS intercept recovery bypass (captive portal simulation)
#
# Simulates a captive portal by:
# 1. Discovering ctrld's upstream IPs from active connections
# 2. Blackholing ALL of them via route table
# 3. Cycling wifi to trigger network change → recovery flow
# 4. Verifying recovery bypass forwards to OS/DHCP resolver
# 5. Unblocking and verifying normal operation resumes
#
# SAFE: Uses route add/delete + networksetup — cleaned up on exit (including Ctrl+C).
#
# Usage: sudo bash test-recovery-bypass.sh [wifi_interface]
# wifi_interface defaults to en0
#
# Prerequisites:
# - ctrld running with --dns-intercept and -v 1 --log /tmp/dns.log
# - Run as root (sudo)
set -euo pipefail
WIFI_IFACE="${1:-en0}"
CTRLD_LOG="/tmp/dns.log"
BLOCKED_IPS=()
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; CYAN='\033[0;36m'; NC='\033[0m'
log() { echo -e "${CYAN}[$(date +%H:%M:%S)]${NC} $*"; }
pass() { echo -e "${GREEN}[PASS]${NC} $*"; }
fail() { echo -e "${RED}[FAIL]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
# ── Safety: always clean up on exit ──────────────────────────────────────────
cleanup() {
echo ""
log "═══ CLEANUP ═══"
# Ensure wifi is on
log "Ensuring wifi is on..."
networksetup -setairportpower "$WIFI_IFACE" on 2>/dev/null || true
# Remove all blackhole routes
for ip in "${BLOCKED_IPS[@]}"; do
route delete -host "$ip" 2>/dev/null && log "Removed route for $ip" || true
done
log "Cleanup complete. Internet should be restored."
log "(If not, run: sudo networksetup -setairportpower $WIFI_IFACE on)"
}
trap cleanup EXIT INT TERM
# ── Pre-checks ───────────────────────────────────────────────────────────────
if [[ $EUID -ne 0 ]]; then
echo "Run as root: sudo bash $0 $*"
exit 1
fi
if [[ ! -f "$CTRLD_LOG" ]]; then
fail "ctrld log not found at $CTRLD_LOG"
echo "Start ctrld with: ctrld run --dns-intercept --cd <uid> -v 1 --log $CTRLD_LOG"
exit 1
fi
# Check wifi interface exists
if ! networksetup -getairportpower "$WIFI_IFACE" >/dev/null 2>&1; then
fail "Wifi interface $WIFI_IFACE not found"
echo "Try: networksetup -listallhardwareports"
exit 1
fi
log "═══════════════════════════════════════════════════════════"
log " Recovery Bypass Test (Captive Portal Simulation)"
log "═══════════════════════════════════════════════════════════"
log "Wifi interface: $WIFI_IFACE"
log "ctrld log: $CTRLD_LOG"
echo ""
# ── Phase 1: Discover upstream IPs ──────────────────────────────────────────
log "Phase 1: Discovering ctrld upstream IPs from active connections"
# Find ctrld's established connections (DoH uses port 443)
CTRLD_CONNS=$(lsof -i -n -P 2>/dev/null | grep -i ctrld | grep ESTABLISHED || true)
if [[ -z "$CTRLD_CONNS" ]]; then
warn "No established ctrld connections found via lsof"
warn "Trying: ss/netstat fallback..."
CTRLD_CONNS=$(netstat -an 2>/dev/null | grep "\.443 " | grep ESTABLISHED || true)
fi
echo "$CTRLD_CONNS" | head -10 | while read -r line; do
log " $line"
done
# Extract unique remote IPs from ctrld connections
UPSTREAM_IPS=()
while IFS= read -r ip; do
[[ -n "$ip" ]] && UPSTREAM_IPS+=("$ip")
done < <(echo "$CTRLD_CONNS" | grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -u | while read -r ip; do
# Filter out local/private IPs — we only want the upstream DoH server IPs
if [[ ! "$ip" =~ ^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.) ]]; then
echo "$ip"
fi
done)
# Also try to resolve known Control D DoH endpoints
for host in dns.controld.com freedns.controld.com; do
for ip in $(dig +short "$host" 2>/dev/null || true); do
if [[ "$ip" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
UPSTREAM_IPS+=("$ip")
fi
done
done
# Deduplicate
UPSTREAM_IPS=($(printf '%s\n' "${UPSTREAM_IPS[@]}" | sort -u))
if [[ ${#UPSTREAM_IPS[@]} -eq 0 ]]; then
fail "Could not discover any upstream IPs!"
echo "Check: lsof -i -n -P | grep ctrld"
exit 1
fi
log "Found ${#UPSTREAM_IPS[@]} upstream IP(s):"
for ip in "${UPSTREAM_IPS[@]}"; do
log " $ip"
done
echo ""
# ── Phase 2: Baseline check ─────────────────────────────────────────────────
log "Phase 2: Baseline — verify DNS works normally"
BASELINE=$(dig +short +timeout=5 example.com @127.0.0.1 2>/dev/null || true)
if [[ -z "$BASELINE" ]]; then
fail "DNS not working before test!"
exit 1
fi
pass "Baseline: example.com → $BASELINE"
LOG_LINES_BEFORE=$(wc -l < "$CTRLD_LOG" | tr -d ' ')
log "Log position: line $LOG_LINES_BEFORE"
echo ""
# ── Phase 3: Block all upstream IPs ─────────────────────────────────────────
log "Phase 3: Blackholing all upstream IPs"
for ip in "${UPSTREAM_IPS[@]}"; do
route delete -host "$ip" 2>/dev/null || true # clean slate
route add -host "$ip" 127.0.0.1 2>/dev/null
BLOCKED_IPS+=("$ip")
log " Blocked: $ip → 127.0.0.1"
done
pass "All ${#UPSTREAM_IPS[@]} upstream IPs blackholed"
echo ""
# ── Phase 4: Cycle wifi to trigger network change ───────────────────────────
log "Phase 4: Cycling wifi to trigger network change event"
log " Turning wifi OFF..."
networksetup -setairportpower "$WIFI_IFACE" off
sleep 3
log " Turning wifi ON..."
networksetup -setairportpower "$WIFI_IFACE" on
log " Waiting for wifi to reconnect (up to 15s)..."
WIFI_UP=false
for i in $(seq 1 15); do
# Check if we have an IP on the wifi interface
IF_IP=$(ipconfig getifaddr "$WIFI_IFACE" 2>/dev/null || true)
if [[ -n "$IF_IP" ]]; then
WIFI_UP=true
pass "Wifi reconnected: $WIFI_IFACE$IF_IP"
break
fi
sleep 1
done
if [[ "$WIFI_UP" == "false" ]]; then
fail "Wifi did not reconnect in 15s!"
warn "Cleaning up and exiting..."
exit 1
fi
log " Waiting 5s for ctrld network monitor to fire..."
sleep 5
echo ""
# ── Phase 5: Query and watch for recovery ────────────────────────────────────
log "Phase 5: Sending queries — upstream is blocked, recovery should activate"
log " (ctrld should detect upstream failure → enable recovery bypass → use DHCP DNS)"
echo ""
RECOVERY_DETECTED=false
BYPASS_ACTIVE=false
DNS_DURING_BYPASS=false
QUERY_COUNT=0
for i in $(seq 1 30); do
QUERY_COUNT=$((QUERY_COUNT + 1))
RESULT=$(dig +short +timeout=3 "example.com" @127.0.0.1 2>/dev/null || true)
if [[ -n "$RESULT" ]]; then
log " Query #$QUERY_COUNT: example.com → $RESULT"
else
log " Query #$QUERY_COUNT: example.com → FAIL ✗"
fi
# Check logs
NEW_LOGS=$(tail -n +$((LOG_LINES_BEFORE + 1)) "$CTRLD_LOG" 2>/dev/null || true)
if [[ "$RECOVERY_DETECTED" == "false" ]] && echo "$NEW_LOGS" | grep -qiE "enabling DHCP bypass|triggering recovery|No healthy"; then
echo ""
pass "🎯 Recovery flow triggered!"
RECOVERY_DETECTED=true
echo "$NEW_LOGS" | grep -iE "recovery|bypass|DHCP|No healthy|network change" | tail -8 | while read -r line; do
echo " 📋 $line"
done
echo ""
fi
if [[ "$BYPASS_ACTIVE" == "false" ]] && echo "$NEW_LOGS" | grep -qi "Recovery bypass active"; then
pass "🔄 Recovery bypass is forwarding queries to OS/DHCP resolver"
BYPASS_ACTIVE=true
fi
if [[ "$RECOVERY_DETECTED" == "true" && -n "$RESULT" ]]; then
pass "✅ DNS resolves during recovery bypass: example.com → $RESULT"
DNS_DURING_BYPASS=true
break
fi
sleep 2
done
# ── Phase 6: Show all recovery-related log entries ──────────────────────────
echo ""
log "Phase 6: All recovery-related ctrld log entries"
log "────────────────────────────────────────────────"
NEW_LOGS=$(tail -n +$((LOG_LINES_BEFORE + 1)) "$CTRLD_LOG" 2>/dev/null || true)
RELEVANT=$(echo "$NEW_LOGS" | grep -iE "recovery|bypass|DHCP|unhealthy|upstream.*fail|No healthy|network change|network monitor|OS resolver" || true)
if [[ -n "$RELEVANT" ]]; then
echo "$RELEVANT" | head -40 | while read -r line; do
echo " $line"
done
else
warn "No recovery-related log entries found!"
log "Last 15 lines of ctrld log:"
tail -15 "$CTRLD_LOG" | while read -r line; do
echo " $line"
done
fi
# ── Phase 7: Unblock and verify full recovery ───────────────────────────────
echo ""
log "Phase 7: Unblocking upstream IPs"
for ip in "${BLOCKED_IPS[@]}"; do
route delete -host "$ip" 2>/dev/null && log " Unblocked: $ip" || true
done
BLOCKED_IPS=() # clear so cleanup doesn't double-delete
pass "All upstream IPs unblocked"
log "Waiting for ctrld to recover (up to 30s)..."
LOG_LINES_UNBLOCK=$(wc -l < "$CTRLD_LOG" | tr -d ' ')
RECOVERY_COMPLETE=false
for i in $(seq 1 15); do
dig +short +timeout=3 example.com @127.0.0.1 >/dev/null 2>&1 || true
POST_LOGS=$(tail -n +$((LOG_LINES_UNBLOCK + 1)) "$CTRLD_LOG" 2>/dev/null || true)
if echo "$POST_LOGS" | grep -qiE "recovery complete|disabling DHCP bypass|Upstream.*recovered"; then
RECOVERY_COMPLETE=true
pass "ctrld recovered — normal operation resumed"
echo "$POST_LOGS" | grep -iE "recovery|recovered|bypass|disabling" | head -5 | while read -r line; do
echo " 📋 $line"
done
break
fi
sleep 2
done
[[ "$RECOVERY_COMPLETE" == "false" ]] && warn "Recovery completion not detected (may need more time)"
# Final check
echo ""
log "Phase 8: Final DNS verification"
sleep 2
FINAL=$(dig +short +timeout=5 example.com @127.0.0.1 2>/dev/null || true)
if [[ -n "$FINAL" ]]; then
pass "DNS working: example.com → $FINAL"
else
fail "DNS not resolving"
fi
# ── Summary ──────────────────────────────────────────────────────────────────
echo ""
log "═══════════════════════════════════════════════════════════"
log " Test Summary"
log "═══════════════════════════════════════════════════════════"
[[ "$RECOVERY_DETECTED" == "true" ]] && pass "Recovery bypass activated" || fail "Recovery bypass NOT activated"
[[ "$BYPASS_ACTIVE" == "true" ]] && pass "Queries forwarded to OS/DHCP resolver" || warn "OS resolver forwarding not confirmed"
[[ "$DNS_DURING_BYPASS" == "true" ]] && pass "DNS resolved during bypass (proof of OS resolver leak)" || warn "DNS during bypass not confirmed"
[[ "$RECOVERY_COMPLETE" == "true" ]] && pass "Normal operation resumed after unblock" || warn "Recovery completion not confirmed"
[[ -n "${FINAL:-}" ]] && pass "DNS functional at end of test" || fail "DNS broken at end of test"
echo ""
log "Full log since test: tail -n +$LOG_LINES_BEFORE $CTRLD_LOG"
log "Recovery entries: tail -n +$LOG_LINES_BEFORE $CTRLD_LOG | grep -i recovery"

View File

@@ -0,0 +1,272 @@
#!/bin/bash
# validate-pf-rules.sh
# Standalone test of the pf redirect rules for dns-intercept mode.
# Does NOT require ctrld. Loads the pf anchor, validates interception, cleans up.
# Run as root (sudo).
set -e
GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[1;33m'; CYAN='\033[0;36m'; NC='\033[0m'
ok() { echo -e "${GREEN}[OK]${NC} $1"; }
fail() { echo -e "${RED}[FAIL]${NC} $1"; FAILURES=$((FAILURES+1)); }
warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
FAILURES=0
ANCHOR="com.controld.ctrld.test"
ANCHOR_FILE="/tmp/pf-dns-intercept-test.conf"
# Use a local DNS listener to prove redirect works (python one-liner)
LISTENER_PID=""
cleanup() {
echo ""
echo -e "${CYAN}--- Cleanup ---${NC}"
# Remove anchor rules
pfctl -a "$ANCHOR" -F all 2>/dev/null && echo " Flushed anchor $ANCHOR" || true
# Remove anchor file
rm -f "$ANCHOR_FILE" "/tmp/pf-combined-test.conf" && echo " Removed temp files" || true
# Reload original pf.conf to remove anchor reference
pfctl -f /etc/pf.conf 2>/dev/null && echo " Reloaded original pf.conf" || true
# Kill test listener
if [ -n "$LISTENER_PID" ]; then
kill "$LISTENER_PID" 2>/dev/null && echo " Stopped test DNS listener" || true
fi
echo " Cleanup complete"
}
trap cleanup EXIT
resolve() {
dig "@${1}" "$2" A +short +timeout=3 +tries=1 2>/dev/null | grep -E '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -1
}
echo -e "${CYAN}=== pf DNS Redirect Rule Validation ===${NC}"
echo " This loads the exact pf rules from the dns-intercept MR,"
echo " starts a tiny DNS listener on 127.0.0.1:53, and verifies"
echo " that queries to external IPs get redirected."
echo ""
# 0. Check we're root
if [ "$(id -u)" -ne 0 ]; then
fail "Must run as root (sudo)"
exit 1
fi
# 1. Start a minimal DNS listener on 127.0.0.1:53
# Uses socat to echo a fixed response — enough to prove redirect works.
# If port 53 is already in use (mDNSResponder), we'll use that instead.
echo "--- Step 1: DNS Listener on 127.0.0.1:53 ---"
if lsof -i :53 -sTCP:LISTEN 2>/dev/null | grep -q "." || lsof -i UDP:53 2>/dev/null | grep -q "."; then
ok "Something already listening on port 53 (likely mDNSResponder or ctrld)"
HAVE_LISTENER=true
else
# Start a simple Python DNS proxy that forwards to 1.1.1.1
python3 -c "
import socket, threading, sys
def proxy(data, addr, sock):
try:
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.settimeout(3)
s.sendto(data, ('1.1.1.1', 53))
resp, _ = s.recvfrom(4096)
sock.sendto(resp, addr)
s.close()
except: pass
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock.bind(('127.0.0.1', 53))
print('READY', flush=True)
while True:
data, addr = sock.recvfrom(4096)
threading.Thread(target=proxy, args=(data, addr, sock), daemon=True).start()
" &
LISTENER_PID=$!
sleep 1
if kill -0 "$LISTENER_PID" 2>/dev/null; then
ok "Started test DNS proxy on 127.0.0.1:53 (PID $LISTENER_PID, forwards to 1.1.1.1)"
HAVE_LISTENER=true
else
fail "Could not start DNS listener on port 53 — port may be in use"
HAVE_LISTENER=false
fi
fi
echo ""
# 2. Verify baseline: direct query to 8.8.8.8 works (before pf rules)
echo "--- Step 2: Baseline (before pf rules) ---"
IP=$(resolve "8.8.8.8" "example.com")
if [ -n "$IP" ]; then
ok "Direct DNS to 8.8.8.8 works (baseline): $IP"
else
warn "Direct DNS to 8.8.8.8 failed — may be blocked by existing firewall"
fi
echo ""
# 3. Write and load the pf anchor (exact rules from MR)
echo "--- Step 3: Load pf Anchor Rules ---"
TEST_UPSTREAM="1.1.1.1"
cat > "$ANCHOR_FILE" << PFRULES
# ctrld DNS Intercept Mode (test anchor)
# Two-step: route-to lo0 + rdr on lo0
#
# In production, ctrld uses DoH (port 443) for upstreams so they're not
# affected by port 53 rules. For this test, we exempt our upstream ($TEST_UPSTREAM)
# explicitly — same mechanism ctrld uses for OS resolver exemptions.
# --- Translation rules (rdr) ---
rdr pass on lo0 inet proto udp from any to ! 127.0.0.1 port 53 -> 127.0.0.1 port 53
rdr pass on lo0 inet proto tcp from any to ! 127.0.0.1 port 53 -> 127.0.0.1 port 53
# --- Filtering rules (pass) ---
# Exempt test upstream (in production: ctrld uses DoH, so this isn't needed).
pass out quick on ! lo0 inet proto { udp, tcp } from any to $TEST_UPSTREAM port 53
# Force remaining outbound DNS through loopback for interception.
pass out quick on ! lo0 route-to lo0 inet proto udp from any to ! 127.0.0.1 port 53 no state
pass out quick on ! lo0 route-to lo0 inet proto tcp from any to ! 127.0.0.1 port 53 no state
# Allow redirected traffic through on loopback.
pass in quick on lo0 inet proto { udp, tcp } from any to 127.0.0.1 port 53 no state
PFRULES
ok "Wrote anchor file: $ANCHOR_FILE"
cat "$ANCHOR_FILE" | sed 's/^/ /'
echo ""
# Load anchor
OUTPUT=$(pfctl -a "$ANCHOR" -f "$ANCHOR_FILE" 2>&1) || {
fail "Failed to load anchor: $OUTPUT"
exit 1
}
ok "Loaded anchor: $ANCHOR"
# Inject anchor references into running pf config.
# pf enforces strict rule ordering: options, normalization, queueing, translation, filtering.
# We must insert rdr-anchor with other rdr-anchors and anchor with other anchors.
TMPCONF="/tmp/pf-combined-test.conf"
python3 -c "
import sys
lines = open('/etc/pf.conf').read().splitlines()
anchor = '$ANCHOR'
rdr_ref = 'rdr-anchor \"' + anchor + '\"'
anchor_ref = 'anchor \"' + anchor + '\"'
out = []
rdr_done = False
anc_done = False
for line in lines:
s = line.strip()
# Insert our rdr-anchor before the first existing rdr-anchor
if not rdr_done and s.startswith('rdr-anchor'):
out.append(rdr_ref)
rdr_done = True
# Insert our anchor before the first existing anchor (filter-phase)
if not anc_done and s.startswith('anchor') and not s.startswith('anchor \"com.apple'):
out.append(anchor_ref)
anc_done = True
out.append(line)
# Fallback if no existing anchors found
if not rdr_done:
# Insert before first non-comment, non-blank after any 'set' or 'scrub' lines
out.insert(0, rdr_ref)
if not anc_done:
out.append(anchor_ref)
open('$TMPCONF', 'w').write('\n'.join(out) + '\n')
" || { fail "Failed to build combined pf config"; exit 1; }
INJECT_OUT=$(pfctl -f "$TMPCONF" 2>&1) || {
fail "Failed to inject anchor reference: $INJECT_OUT"
rm -f "$TMPCONF"
exit 1
}
rm -f "$TMPCONF"
ok "Injected anchor references into running pf ruleset"
# Enable pf
pfctl -e 2>/dev/null || true
# Show loaded rules
echo ""
echo " Active NAT rules:"
pfctl -a "$ANCHOR" -sn 2>/dev/null | sed 's/^/ /'
echo " Active filter rules:"
pfctl -a "$ANCHOR" -sr 2>/dev/null | sed 's/^/ /'
echo ""
# 4. Test: DNS to 8.8.8.8 should now be redirected to 127.0.0.1:53
echo "--- Step 4: Redirect Test ---"
if [ "$HAVE_LISTENER" = true ]; then
IP=$(resolve "8.8.8.8" "example.com" 5)
if [ -n "$IP" ]; then
ok "DNS to 8.8.8.8 redirected through 127.0.0.1:53: $IP"
else
fail "DNS to 8.8.8.8 failed — redirect may not be working"
fi
# Also test another random IP
IP2=$(resolve "9.9.9.9" "example.com" 5)
if [ -n "$IP2" ]; then
ok "DNS to 9.9.9.9 also redirected: $IP2"
else
fail "DNS to 9.9.9.9 failed"
fi
else
warn "No listener on port 53 — cannot test redirect"
fi
echo ""
# 5. Test: DNS to 127.0.0.1 still works (not double-redirected)
echo "--- Step 5: Localhost DNS (no loop) ---"
if [ "$HAVE_LISTENER" = true ]; then
IP=$(resolve "127.0.0.1" "example.com" 5)
if [ -n "$IP" ]; then
ok "DNS to 127.0.0.1 works normally (not caught by redirect): $IP"
else
fail "DNS to 127.0.0.1 failed — possible redirect loop"
fi
fi
echo ""
# 6. Simulate VPN DNS override
echo "--- Step 6: VPN DNS Override Simulation ---"
IFACE=$(route -n get default 2>/dev/null | awk '/interface:/{print $2}')
SVC=""
for try_svc in "Wi-Fi" "Ethernet" "Thunderbolt Ethernet"; do
if networksetup -getdnsservers "$try_svc" 2>/dev/null >/dev/null; then
SVC="$try_svc"
break
fi
done
if [ -n "$SVC" ] && [ "$HAVE_LISTENER" = true ]; then
ORIG_DNS=$(networksetup -getdnsservers "$SVC" 2>/dev/null || echo "")
echo " Service: $SVC"
echo " Current DNS: $ORIG_DNS"
networksetup -setdnsservers "$SVC" 10.50.10.77
dscacheutil -flushcache 2>/dev/null || true
killall -HUP mDNSResponder 2>/dev/null || true
echo " Set DNS to 10.50.10.77 (simulating F5 VPN)"
sleep 2
IP=$(resolve "10.50.10.77" "google.com" 5)
if [ -n "$IP" ]; then
ok "Query to fake VPN DNS (10.50.10.77) redirected to ctrld: $IP"
else
fail "Query to fake VPN DNS failed"
fi
# Restore
if echo "$ORIG_DNS" | grep -q "There aren't any DNS Servers"; then
networksetup -setdnsservers "$SVC" Empty
else
networksetup -setdnsservers "$SVC" $ORIG_DNS
fi
echo " Restored DNS"
else
warn "Skipping VPN simulation (no service found or no listener)"
fi
echo ""
if [ "$FAILURES" -eq 0 ]; then
echo -e "${GREEN}=== All tests passed ===${NC}"
else
echo -e "${RED}=== $FAILURES test(s) failed ===${NC}"
fi