diff --git a/cmd/ctrld/cli.go b/cmd/ctrld/cli.go index e24911a..f9543aa 100644 --- a/cmd/ctrld/cli.go +++ b/cmd/ctrld/cli.go @@ -435,13 +435,8 @@ func initCLI() { mainLog.Load().Warn().Err(err).Msg("post installation failed, please check system/service log for details error") return } - status, err := s.Status() - if err != nil { - mainLog.Load().Warn().Err(err).Msg("could not get service status") - return - } - status = selfCheckStatus(status) + status := selfCheckStatus(s) switch status { case service.StatusRunning: mainLog.Load().Notice().Msg("Service started") @@ -969,7 +964,19 @@ func processNoConfigFlags(noConfigStart bool) { func processCDFlags() { logger := mainLog.Load().With().Str("mode", "cd").Logger() logger.Info().Msgf("fetching Controld D configuration from API: %s", cdUID) + bo := backoff.NewBackoff("processCDFlags", logf, 30*time.Second) + bo.LogLongerThan = 30 * time.Second + ctx := context.Background() resolverConfig, err := controld.FetchResolverConfig(cdUID, rootCmd.Version, cdDev) + for { + if errUrlNetworkError(err) { + bo.BackOff(ctx, err) + logger.Warn().Msg("could not fetch resolver using bootstrap DNS, retrying...") + resolverConfig, err = controld.FetchResolverConfig(cdUID, rootCmd.Version, cdDev) + continue + } + break + } if uer, ok := err.(*controld.UtilityErrorResponse); ok && uer.ErrorField.Code == controld.InvalidConfigCode { s, err := newService(&prog{}, svcConfig) if err != nil { @@ -1114,7 +1121,16 @@ func defaultIfaceName() string { return dri } -func selfCheckStatus(status service.Status) service.Status { +func selfCheckStatus(s service.Service) service.Status { + status, err := s.Status() + if err != nil { + mainLog.Load().Warn().Err(err).Msg("could not get service status") + return status + } + // If ctrld is not running, do nothing, just return the status as-is. + if status != service.StatusRunning { + return status + } dir, err := userHomeDir() if err != nil { mainLog.Load().Error().Err(err).Msg("failed to check ctrld listener status: could not get home directory") @@ -1124,17 +1140,30 @@ func selfCheckStatus(status service.Status) service.Status { bo := backoff.NewBackoff("self-check", logf, 10*time.Second) bo.LogLongerThan = 10 * time.Second ctx := context.Background() - maxAttempts := 20 mainLog.Load().Debug().Msg("waiting for ctrld listener to be ready") cc := newControlClient(filepath.Join(dir, ctrldControlUnixSock)) // The socket control server may not start yet, so attempt to ping - // it until we got a response, or maxAttempts reached. - for i := 0; i < maxAttempts; i++ { + // it until we got a response. For each iteration, check ctrld status + // to make sure ctrld is still running. + for { + curStatus, err := s.Status() + if err != nil { + mainLog.Load().Warn().Err(err).Msg("could not get service status while doing self-check") + return status + } + if curStatus != service.StatusRunning { + return curStatus + } if _, err := cc.post("/", nil); err != nil { - bo.BackOff(ctx, err) - continue + // Do not count attempt if the server is not ready yet. + if errUrlConnRefused(err) { + bo.BackOff(ctx, err) + continue + } + mainLog.Load().Warn().Err(err).Msg("could not ping socket control server") + return service.StatusUnknown } break } @@ -1153,6 +1182,7 @@ func selfCheckStatus(status service.Status) service.Status { mainLog.Load().Debug().Msg("performing self-check") bo = backoff.NewBackoff("self-check", logf, 10*time.Second) bo.LogLongerThan = 500 * time.Millisecond + maxAttempts := 20 c := new(dns.Client) var ( lcChanged map[string]*ctrld.ListenerConfig diff --git a/cmd/ctrld/prog.go b/cmd/ctrld/prog.go index 47e3b92..530a7c2 100644 --- a/cmd/ctrld/prog.go +++ b/cmd/ctrld/prog.go @@ -5,6 +5,7 @@ import ( "fmt" "math/rand" "net" + "net/url" "os" "strconv" "sync" @@ -310,11 +311,41 @@ func runLogServer(sockPath string) net.Conn { } func errAddrInUse(err error) bool { - opErr, ok := err.(*net.OpError) - if !ok { - return false + var opErr *net.OpError + if errors.As(err, &opErr) { + return errors.Is(opErr.Err, syscall.EADDRINUSE) } - return errors.Is(opErr.Err, syscall.EADDRINUSE) + return false +} + +func errUrlConnRefused(err error) bool { + var urlErr *url.Error + if errors.As(err, &urlErr) { + var opErr *net.OpError + if errors.As(urlErr.Err, &opErr) { + return errors.Is(opErr.Err, syscall.ECONNREFUSED) + } + } + return false +} + +func errUrlNetworkError(err error) bool { + var urlErr *url.Error + if errors.As(err, &urlErr) { + var opErr *net.OpError + if errors.As(urlErr.Err, &opErr) { + if opErr.Temporary() { + return true + } + switch { + case errors.Is(opErr.Err, syscall.ECONNREFUSED), + errors.Is(opErr.Err, syscall.EINVAL), + errors.Is(opErr.Err, syscall.ENETUNREACH): + return true + } + } + } + return false } // defaultRouteIP returns IP string of the default route if present, prefer IPv4 over IPv6.