From 49eb152d02e5d482bc3925754486cb4b6c3eb475 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 20 Feb 2025 23:37:44 -0500 Subject: [PATCH] transport should try ipv4 then ipv6 explicitly client list panic guards and debug logging --- cmd/cli/commands.go | 3 +- cmd/cli/control_server.go | 54 +++++++++++++++++++++++++-- cmd/cli/library.go | 59 ++++++++++++++++++++++++++++++ cmd/cli/service.go | 7 +++- internal/clientinfo/client_info.go | 26 +++++++++---- internal/controld/config.go | 52 ++++++++++++++++++++++---- 6 files changed, 180 insertions(+), 21 deletions(-) diff --git a/cmd/cli/commands.go b/cmd/cli/commands.go index 693e6e8..31be3bf 100644 --- a/cmd/cli/commands.go +++ b/cmd/cli/commands.go @@ -1273,7 +1273,8 @@ func initUpgradeCmd() *cobra.Command { } dlUrl := upgradeUrl(baseUrl) mainLog.Load().Debug().Msgf("Downloading binary: %s", dlUrl) - resp, err := http.Get(dlUrl) + + resp, err := getWithRetry(dlUrl) if err != nil { mainLog.Load().Fatal().Err(err).Msg("failed to download binary") } diff --git a/cmd/cli/control_server.go b/cmd/cli/control_server.go index b50d333..17f585d 100644 --- a/cmd/cli/control_server.go +++ b/cmd/cli/control_server.go @@ -79,33 +79,81 @@ func (s *controlServer) register(pattern string, handler http.Handler) { func (p *prog) registerControlServerHandler() { p.cs.register(listClientsPath, http.HandlerFunc(func(w http.ResponseWriter, request *http.Request) { + mainLog.Load().Debug().Msg("handling list clients request") + clients := p.ciTable.ListClients() + mainLog.Load().Debug().Int("client_count", len(clients)).Msg("retrieved clients list") + sort.Slice(clients, func(i, j int) bool { return clients[i].IP.Less(clients[j].IP) }) + mainLog.Load().Debug().Msg("sorted clients by IP address") + if p.metricsQueryStats.Load() { - for _, client := range clients { + mainLog.Load().Debug().Msg("metrics query stats enabled, collecting query counts") + + for idx, client := range clients { + mainLog.Load().Debug(). + Int("index", idx). + Str("ip", client.IP.String()). + Str("mac", client.Mac). + Str("hostname", client.Hostname). + Msg("processing client metrics") + client.IncludeQueryCount = true dm := &dto.Metric{} + + if statsClientQueriesCount.MetricVec == nil { + mainLog.Load().Debug(). + Str("client_ip", client.IP.String()). + Msg("skipping metrics collection: MetricVec is nil") + continue + } + m, err := statsClientQueriesCount.MetricVec.GetMetricWithLabelValues( client.IP.String(), client.Mac, client.Hostname, ) if err != nil { - mainLog.Load().Debug().Err(err).Msgf("could not get metrics for client: %v", client) + mainLog.Load().Debug(). + Err(err). + Str("client_ip", client.IP.String()). + Str("mac", client.Mac). + Str("hostname", client.Hostname). + Msg("failed to get metrics for client") continue } - if err := m.Write(dm); err == nil { + + if err := m.Write(dm); err == nil && dm.Counter != nil { client.QueryCount = int64(dm.Counter.GetValue()) + mainLog.Load().Debug(). + Str("client_ip", client.IP.String()). + Int64("query_count", client.QueryCount). + Msg("successfully collected query count") + } else if err != nil { + mainLog.Load().Debug(). + Err(err). + Str("client_ip", client.IP.String()). + Msg("failed to write metric") } } + } else { + mainLog.Load().Debug().Msg("metrics query stats disabled, skipping query counts") } if err := json.NewEncoder(w).Encode(&clients); err != nil { + mainLog.Load().Error(). + Err(err). + Int("client_count", len(clients)). + Msg("failed to encode clients response") http.Error(w, err.Error(), http.StatusInternalServerError) return } + + mainLog.Load().Debug(). + Int("client_count", len(clients)). + Msg("successfully sent clients list response") })) p.cs.register(startedPath, http.HandlerFunc(func(w http.ResponseWriter, request *http.Request) { select { diff --git a/cmd/cli/library.go b/cmd/cli/library.go index d302644..a5ba389 100644 --- a/cmd/cli/library.go +++ b/cmd/cli/library.go @@ -1,5 +1,12 @@ package cli +import ( + "fmt" + "net" + "net/http" + "time" +) + // AppCallback provides hooks for injecting certain functionalities // from mobile platforms to main ctrld cli. type AppCallback struct { @@ -17,3 +24,55 @@ type AppConfig struct { Verbose int LogPath string } + +const ( + defaultHTTPTimeout = 30 * time.Second + defaultMaxRetries = 3 +) + +// httpClientWithFallback returns an HTTP client configured with timeout and IPv4 fallback +func httpClientWithFallback(timeout time.Duration) *http.Client { + return &http.Client{ + Timeout: timeout, + Transport: &http.Transport{ + // Prefer IPv4 over IPv6 + DialContext: (&net.Dialer{ + Timeout: 10 * time.Second, + KeepAlive: 30 * time.Second, + FallbackDelay: 1 * time.Millisecond, // Very small delay to prefer IPv4 + }).DialContext, + }, + } +} + +// doWithRetry performs an HTTP request with retries +func doWithRetry(req *http.Request, maxRetries int) (*http.Response, error) { + var lastErr error + client := httpClientWithFallback(defaultHTTPTimeout) + + for attempt := 0; attempt < maxRetries; attempt++ { + if attempt > 0 { + time.Sleep(time.Second * time.Duration(attempt+1)) // Exponential backoff + } + + resp, err := client.Do(req) + if err == nil { + return resp, nil + } + lastErr = err + mainLog.Load().Debug().Err(err). + Str("method", req.Method). + Str("url", req.URL.String()). + Msgf("HTTP request attempt %d/%d failed", attempt+1, maxRetries) + } + return nil, fmt.Errorf("failed after %d attempts to %s %s: %v", maxRetries, req.Method, req.URL, lastErr) +} + +// Helper for making GET requests with retries +func getWithRetry(url string) (*http.Response, error) { + req, err := http.NewRequest(http.MethodGet, url, nil) + if err != nil { + return nil, err + } + return doWithRetry(req, defaultMaxRetries) +} diff --git a/cmd/cli/service.go b/cmd/cli/service.go index 0567b88..f03146d 100644 --- a/cmd/cli/service.go +++ b/cmd/cli/service.go @@ -6,6 +6,7 @@ import ( "fmt" "os" "os/exec" + "runtime" "github.com/kardianos/service" @@ -168,7 +169,11 @@ func doTasks(tasks []task) bool { mainLog.Load().Error().Msgf("error running task %s: %v", task.Name, err) return false } - mainLog.Load().Debug().Msgf("error running task %s: %v", task.Name, err) + // if this is darwin stop command, dont print debug + // since launchctl complains on every start + if runtime.GOOS != "darwin" || task.Name != "Stop" { + mainLog.Load().Debug().Msgf("error running task %s: %v", task.Name, err) + } } } return true diff --git a/internal/clientinfo/client_info.go b/internal/clientinfo/client_info.go index 582f661..06449e1 100644 --- a/internal/clientinfo/client_info.go +++ b/internal/clientinfo/client_info.go @@ -422,17 +422,27 @@ func (t *Table) ListClients() []*Client { t.Refresh() ipMap := make(map[string]*Client) il := []ipLister{t.dhcp, t.arp, t.ndp, t.ptr, t.mdns, t.vni} + for _, ir := range il { + if ir == nil { + continue + } + for _, ip := range ir.List() { - c, ok := ipMap[ip] - if !ok { - c = &Client{ - IP: netip.MustParseAddr(ip), - Source: map[string]struct{}{ir.String(): {}}, + // Validate IP before using MustParseAddr + if addr, err := netip.ParseAddr(ip); err == nil { + c, ok := ipMap[ip] + if !ok { + c = &Client{ + IP: addr, + Source: map[string]struct{}{}, + } + ipMap[ip] = c + } + // Safely get source name + if src := ir.String(); src != "" { + c.Source[src] = struct{}{} } - ipMap[ip] = c - } else { - c.Source[ir.String()] = struct{}{} } } } diff --git a/internal/controld/config.go b/internal/controld/config.go index 5bedb04..5e65fdb 100644 --- a/internal/controld/config.go +++ b/internal/controld/config.go @@ -216,19 +216,55 @@ func apiTransport(cdDev bool) *http.Transport { if cdDev { apiDomain = apiDomainDev } + + // First try IPv4 + dialer := &net.Dialer{ + Timeout: 10 * time.Second, + KeepAlive: 30 * time.Second, + } + ips := ctrld.LookupIP(apiDomain) if len(ips) == 0 { - ctrld.ProxyLogger.Load().Warn().Msgf("No IPs found for %s, connecting to %s", apiDomain, addr) - return ctrldnet.Dialer.DialContext(ctx, network, addr) + ctrld.ProxyLogger.Load().Warn().Msgf("No IPs found for %s, falling back to direct connection to %s", apiDomain, addr) + return dialer.DialContext(ctx, network, addr) } - ctrld.ProxyLogger.Load().Debug().Msgf("API IPs: %v", ips) + + // Separate IPv4 and IPv6 addresses + var ipv4s, ipv6s []string + for _, ip := range ips { + if strings.Contains(ip, ":") { + ipv6s = append(ipv6s, ip) + } else { + ipv4s = append(ipv4s, ip) + } + } + _, port, _ := net.SplitHostPort(addr) - addrs := make([]string, len(ips)) - for i := range ips { - addrs[i] = net.JoinHostPort(ips[i], port) + + // Try IPv4 first + if len(ipv4s) > 0 { + addrs := make([]string, len(ipv4s)) + for i, ip := range ipv4s { + addrs[i] = net.JoinHostPort(ip, port) + } + d := &ctrldnet.ParallelDialer{} + if conn, err := d.DialContext(ctx, "tcp4", addrs, ctrld.ProxyLogger.Load()); err == nil { + return conn, nil + } } - d := &ctrldnet.ParallelDialer{} - return d.DialContext(ctx, network, addrs, ctrld.ProxyLogger.Load()) + + // Fall back to IPv6 if available + if len(ipv6s) > 0 { + addrs := make([]string, len(ipv6s)) + for i, ip := range ipv6s { + addrs[i] = net.JoinHostPort(ip, port) + } + d := &ctrldnet.ParallelDialer{} + return d.DialContext(ctx, "tcp6", addrs, ctrld.ProxyLogger.Load()) + } + + // Final fallback to direct connection + return dialer.DialContext(ctx, network, addr) } if router.Name() == ddwrt.Name || runtime.GOOS == "android" { transport.TLSClientConfig = &tls.Config{RootCAs: certs.CACertPool()}