transport should try ipv4 then ipv6 explicitly

client list panic guards and debug logging
This commit is contained in:
Alex
2025-02-20 23:37:44 -05:00
committed by Cuong Manh Le
parent b05056423a
commit 49eb152d02
6 changed files with 180 additions and 21 deletions

View File

@@ -1273,7 +1273,8 @@ func initUpgradeCmd() *cobra.Command {
}
dlUrl := upgradeUrl(baseUrl)
mainLog.Load().Debug().Msgf("Downloading binary: %s", dlUrl)
resp, err := http.Get(dlUrl)
resp, err := getWithRetry(dlUrl)
if err != nil {
mainLog.Load().Fatal().Err(err).Msg("failed to download binary")
}

View File

@@ -79,33 +79,81 @@ func (s *controlServer) register(pattern string, handler http.Handler) {
func (p *prog) registerControlServerHandler() {
p.cs.register(listClientsPath, http.HandlerFunc(func(w http.ResponseWriter, request *http.Request) {
mainLog.Load().Debug().Msg("handling list clients request")
clients := p.ciTable.ListClients()
mainLog.Load().Debug().Int("client_count", len(clients)).Msg("retrieved clients list")
sort.Slice(clients, func(i, j int) bool {
return clients[i].IP.Less(clients[j].IP)
})
mainLog.Load().Debug().Msg("sorted clients by IP address")
if p.metricsQueryStats.Load() {
for _, client := range clients {
mainLog.Load().Debug().Msg("metrics query stats enabled, collecting query counts")
for idx, client := range clients {
mainLog.Load().Debug().
Int("index", idx).
Str("ip", client.IP.String()).
Str("mac", client.Mac).
Str("hostname", client.Hostname).
Msg("processing client metrics")
client.IncludeQueryCount = true
dm := &dto.Metric{}
if statsClientQueriesCount.MetricVec == nil {
mainLog.Load().Debug().
Str("client_ip", client.IP.String()).
Msg("skipping metrics collection: MetricVec is nil")
continue
}
m, err := statsClientQueriesCount.MetricVec.GetMetricWithLabelValues(
client.IP.String(),
client.Mac,
client.Hostname,
)
if err != nil {
mainLog.Load().Debug().Err(err).Msgf("could not get metrics for client: %v", client)
mainLog.Load().Debug().
Err(err).
Str("client_ip", client.IP.String()).
Str("mac", client.Mac).
Str("hostname", client.Hostname).
Msg("failed to get metrics for client")
continue
}
if err := m.Write(dm); err == nil {
if err := m.Write(dm); err == nil && dm.Counter != nil {
client.QueryCount = int64(dm.Counter.GetValue())
mainLog.Load().Debug().
Str("client_ip", client.IP.String()).
Int64("query_count", client.QueryCount).
Msg("successfully collected query count")
} else if err != nil {
mainLog.Load().Debug().
Err(err).
Str("client_ip", client.IP.String()).
Msg("failed to write metric")
}
}
} else {
mainLog.Load().Debug().Msg("metrics query stats disabled, skipping query counts")
}
if err := json.NewEncoder(w).Encode(&clients); err != nil {
mainLog.Load().Error().
Err(err).
Int("client_count", len(clients)).
Msg("failed to encode clients response")
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
mainLog.Load().Debug().
Int("client_count", len(clients)).
Msg("successfully sent clients list response")
}))
p.cs.register(startedPath, http.HandlerFunc(func(w http.ResponseWriter, request *http.Request) {
select {

View File

@@ -1,5 +1,12 @@
package cli
import (
"fmt"
"net"
"net/http"
"time"
)
// AppCallback provides hooks for injecting certain functionalities
// from mobile platforms to main ctrld cli.
type AppCallback struct {
@@ -17,3 +24,55 @@ type AppConfig struct {
Verbose int
LogPath string
}
const (
defaultHTTPTimeout = 30 * time.Second
defaultMaxRetries = 3
)
// httpClientWithFallback returns an HTTP client configured with timeout and IPv4 fallback
func httpClientWithFallback(timeout time.Duration) *http.Client {
return &http.Client{
Timeout: timeout,
Transport: &http.Transport{
// Prefer IPv4 over IPv6
DialContext: (&net.Dialer{
Timeout: 10 * time.Second,
KeepAlive: 30 * time.Second,
FallbackDelay: 1 * time.Millisecond, // Very small delay to prefer IPv4
}).DialContext,
},
}
}
// doWithRetry performs an HTTP request with retries
func doWithRetry(req *http.Request, maxRetries int) (*http.Response, error) {
var lastErr error
client := httpClientWithFallback(defaultHTTPTimeout)
for attempt := 0; attempt < maxRetries; attempt++ {
if attempt > 0 {
time.Sleep(time.Second * time.Duration(attempt+1)) // Exponential backoff
}
resp, err := client.Do(req)
if err == nil {
return resp, nil
}
lastErr = err
mainLog.Load().Debug().Err(err).
Str("method", req.Method).
Str("url", req.URL.String()).
Msgf("HTTP request attempt %d/%d failed", attempt+1, maxRetries)
}
return nil, fmt.Errorf("failed after %d attempts to %s %s: %v", maxRetries, req.Method, req.URL, lastErr)
}
// Helper for making GET requests with retries
func getWithRetry(url string) (*http.Response, error) {
req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
return nil, err
}
return doWithRetry(req, defaultMaxRetries)
}

View File

@@ -6,6 +6,7 @@ import (
"fmt"
"os"
"os/exec"
"runtime"
"github.com/kardianos/service"
@@ -168,7 +169,11 @@ func doTasks(tasks []task) bool {
mainLog.Load().Error().Msgf("error running task %s: %v", task.Name, err)
return false
}
mainLog.Load().Debug().Msgf("error running task %s: %v", task.Name, err)
// if this is darwin stop command, dont print debug
// since launchctl complains on every start
if runtime.GOOS != "darwin" || task.Name != "Stop" {
mainLog.Load().Debug().Msgf("error running task %s: %v", task.Name, err)
}
}
}
return true

View File

@@ -422,17 +422,27 @@ func (t *Table) ListClients() []*Client {
t.Refresh()
ipMap := make(map[string]*Client)
il := []ipLister{t.dhcp, t.arp, t.ndp, t.ptr, t.mdns, t.vni}
for _, ir := range il {
if ir == nil {
continue
}
for _, ip := range ir.List() {
c, ok := ipMap[ip]
if !ok {
c = &Client{
IP: netip.MustParseAddr(ip),
Source: map[string]struct{}{ir.String(): {}},
// Validate IP before using MustParseAddr
if addr, err := netip.ParseAddr(ip); err == nil {
c, ok := ipMap[ip]
if !ok {
c = &Client{
IP: addr,
Source: map[string]struct{}{},
}
ipMap[ip] = c
}
// Safely get source name
if src := ir.String(); src != "" {
c.Source[src] = struct{}{}
}
ipMap[ip] = c
} else {
c.Source[ir.String()] = struct{}{}
}
}
}

View File

@@ -216,19 +216,55 @@ func apiTransport(cdDev bool) *http.Transport {
if cdDev {
apiDomain = apiDomainDev
}
// First try IPv4
dialer := &net.Dialer{
Timeout: 10 * time.Second,
KeepAlive: 30 * time.Second,
}
ips := ctrld.LookupIP(apiDomain)
if len(ips) == 0 {
ctrld.ProxyLogger.Load().Warn().Msgf("No IPs found for %s, connecting to %s", apiDomain, addr)
return ctrldnet.Dialer.DialContext(ctx, network, addr)
ctrld.ProxyLogger.Load().Warn().Msgf("No IPs found for %s, falling back to direct connection to %s", apiDomain, addr)
return dialer.DialContext(ctx, network, addr)
}
ctrld.ProxyLogger.Load().Debug().Msgf("API IPs: %v", ips)
// Separate IPv4 and IPv6 addresses
var ipv4s, ipv6s []string
for _, ip := range ips {
if strings.Contains(ip, ":") {
ipv6s = append(ipv6s, ip)
} else {
ipv4s = append(ipv4s, ip)
}
}
_, port, _ := net.SplitHostPort(addr)
addrs := make([]string, len(ips))
for i := range ips {
addrs[i] = net.JoinHostPort(ips[i], port)
// Try IPv4 first
if len(ipv4s) > 0 {
addrs := make([]string, len(ipv4s))
for i, ip := range ipv4s {
addrs[i] = net.JoinHostPort(ip, port)
}
d := &ctrldnet.ParallelDialer{}
if conn, err := d.DialContext(ctx, "tcp4", addrs, ctrld.ProxyLogger.Load()); err == nil {
return conn, nil
}
}
d := &ctrldnet.ParallelDialer{}
return d.DialContext(ctx, network, addrs, ctrld.ProxyLogger.Load())
// Fall back to IPv6 if available
if len(ipv6s) > 0 {
addrs := make([]string, len(ipv6s))
for i, ip := range ipv6s {
addrs[i] = net.JoinHostPort(ip, port)
}
d := &ctrldnet.ParallelDialer{}
return d.DialContext(ctx, "tcp6", addrs, ctrld.ProxyLogger.Load())
}
// Final fallback to direct connection
return dialer.DialContext(ctx, network, addr)
}
if router.Name() == ddwrt.Name || runtime.GOOS == "android" {
transport.TLSClientConfig = &tls.Config{RootCAs: certs.CACertPool()}