From 5007a87d3a3de4d169b0824b3a983ce408256a90 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Sat, 8 Feb 2025 08:17:43 +0700 Subject: [PATCH] cmd/cli: better error message when doing restart In case of remote config validation error during start, it's likely that there's problem with connecting to ControlD API. The ctrld daemon was restarted in this case, but may not ready to receive requests yet. This commit changes the error message to explicitly state that instead of a mis-leading "could not complete service restart". --- cmd/cli/cli.go | 13 ++++++++++--- cmd/cli/commands.go | 24 +++++++++++++++++------- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/cmd/cli/cli.go b/cmd/cli/cli.go index 07abf3c..9d01206 100644 --- a/cmd/cli/cli.go +++ b/cmd/cli/cli.go @@ -1478,6 +1478,12 @@ func removeOrgFlagsFromArgs(sc *service.Config) { // newSocketControlClient returns new control client after control server was started. func newSocketControlClient(ctx context.Context, s service.Service, dir string) *controlClient { + return newSocketControlClientWithTimeout(ctx, s, dir, dialSocketControlServerTimeout) +} + +// newSocketControlClientWithTimeout returns new control client after control server was started. +// The timeoutDuration controls how long to wait for the server. +func newSocketControlClientWithTimeout(ctx context.Context, s service.Service, dir string, timeoutDuration time.Duration) *controlClient { // Return early if service is not running. if status, err := s.Status(); err != nil || status != service.StatusRunning { return nil @@ -1486,7 +1492,7 @@ func newSocketControlClient(ctx context.Context, s service.Service, dir string) bo.LogLongerThan = 10 * time.Second cc := newControlClient(filepath.Join(dir, ctrldControlUnixSock)) - timeout := time.NewTimer(30 * time.Second) + timeout := time.NewTimer(timeoutDuration) defer timeout.Stop() // The socket control server may not start yet, so attempt to ping @@ -1807,7 +1813,7 @@ func resetDnsTask(p *prog, s service.Service, isCtrldInstalled bool, ir *ifaceRe } // doValidateCdRemoteConfig fetches and validates custom config for cdUID. -func doValidateCdRemoteConfig(cdUID string, fatal bool) { +func doValidateCdRemoteConfig(cdUID string, fatal bool) error { rc, err := controld.FetchResolverConfig(cdUID, rootCmd.Version, cdDev) if err != nil { logger := mainLog.Load().Fatal() @@ -1816,7 +1822,7 @@ func doValidateCdRemoteConfig(cdUID string, fatal bool) { } logger.Err(err).Err(err).Msgf("failed to fetch resolver uid: %s", cdUID) if !fatal { - return + return err } } // validateCdRemoteConfig clobbers v, saving it here to restore later. @@ -1847,6 +1853,7 @@ func doValidateCdRemoteConfig(cdUID string, fatal bool) { mainLog.Load().Warn().Msg("disregarding invalid custom config") } v = oldV + return nil } // uninstallInvalidCdUID performs self-uninstallation because the ControlD device does not exist. diff --git a/cmd/cli/commands.go b/cmd/cli/commands.go index d340574..49dfb8f 100644 --- a/cmd/cli/commands.go +++ b/cmd/cli/commands.go @@ -30,6 +30,9 @@ import ( "github.com/Control-D-Inc/ctrld/internal/router" ) +// dialSocketControlServerTimeout is the default timeout to wait when ping control server. +const dialSocketControlServerTimeout = 30 * time.Second + func initLogCmd() *cobra.Command { warnRuntimeLoggingNotEnabled := func() { mainLog.Load().Warn().Msg("runtime debug logging is not enabled") @@ -373,7 +376,7 @@ NOTE: running "ctrld start" without any arguments will start already installed c } if cdUID != "" { - doValidateCdRemoteConfig(cdUID, true) + _ = doValidateCdRemoteConfig(cdUID, true) } else if uid := cdUIDFromProvToken(); uid != "" { cdUID = uid mainLog.Load().Debug().Msg("using uid from provision token") @@ -697,8 +700,9 @@ func initRestartCmd() *cobra.Command { initInteractiveLogging() + var validateConfigErr error if cdMode { - doValidateCdRemoteConfig(cdUID, false) + validateConfigErr = doValidateCdRemoteConfig(cdUID, false) } if ir := runningIface(s); ir != nil { @@ -752,12 +756,18 @@ func initRestartCmd() *cobra.Command { if doRestart() { if dir, err := socketDir(); err == nil { - cc := newSocketControlClient(context.TODO(), s, dir) - if cc == nil { - mainLog.Load().Error().Msg("Could not complete service restart") - os.Exit(1) + timeout := dialSocketControlServerTimeout + // If we failed to validate remote config above, it's likely that + // we are having problem with network connection. So using a shorter + // timeout than default one for better UX. + if validateConfigErr != nil { + timeout = 5 * time.Second + } + if cc := newSocketControlClientWithTimeout(context.TODO(), s, dir, timeout); cc != nil { + _, _ = cc.post(ifacePath, nil) + } else { + mainLog.Load().Warn().Err(err).Msg("Service was restarted, but ctrld process may not be ready yet") } - _, _ = cc.post(ifacePath, nil) } else { mainLog.Load().Warn().Err(err).Msg("Service was restarted, but could not ping the control server") }