mirror of
https://github.com/Control-D-Inc/ctrld.git
synced 2026-02-03 22:18:39 +00:00
cmd/cli: make self-check process faster
The "ctrld start" command is running slow, and using much CPU than necessary. The problem was made because of several things: 1. ctrld process is waiting for 5 seconds before marking listeners up. That ends up adding those seconds to the self-check process, even though the listeners may have been already available. 2. While creating socket control client, "s.Status()" is called to obtain ctrld service status, so we could terminate early if the service failed to run. However, that would make a lot of syscall in a hot loop, eating the CPU constantly while the command is running. On Windows, that call would become slower after each calls. The same effect could be seen using Windows services manager GUI, by pressing start/stop/restart button fast enough, we could see a timeout raised. 3. The socket control server is started lately, after all the listeners up. That would make the loop for creating socket control client run longer and use much resources than necessary. Fixes for these problems are quite obvious: 1. Removing hard code 5 seconds waiting. NotifyStartedFunc is enough to ensure that listeners are ready for accepting requests. 2. Check "s.Status()" only once before the loop. There has been already 30 seconds timeout, so if anything went wrong, the self-check process could be terminated, and won't hang forever. 3. Starting socket control server earlier, so newSocketControlClient can connect to server with fewest attempts, then querying "/started" endpoint to ensure the listeners have been ready. With these fixes, "ctrld start" now run much faster on modern machines, taking ~1-2 seconds (previously ~5-8 seconds) to finish. On dual cores VM, it takes ~5-8 seconds (previously a few dozen seconds or timeout). --- While at it, there are two refactoring for making the code easier to read/maintain: - PersistentPreRun is now used in root command to init console logging, so we don't have to initialize them in sub-commands. - NotifyStartedFunc now use channel for synchronization, instead of a mutex, making the ugly asymetric calls to lock goes away, making the code more idiom, and theoretically have better performance.
This commit is contained in:
committed by
Cuong Manh Le
parent
f499770d45
commit
a1fda2c0de
@@ -1598,7 +1598,7 @@ func selfCheckStatus(s service.Service) (bool, service.Status, error) {
|
||||
|
||||
dir, err := socketDir()
|
||||
if err != nil {
|
||||
mainLog.Load().Error().Err(err).Msg("failed to check ctrld listener status: could not get home directory")
|
||||
mainLog.Load().Error().Err(err).Msg("failed to get ctrld listener status: could not get home directory")
|
||||
return false, status, err
|
||||
}
|
||||
mainLog.Load().Debug().Msg("waiting for ctrld listener to be ready")
|
||||
@@ -1607,17 +1607,6 @@ func selfCheckStatus(s service.Service) (bool, service.Status, error) {
|
||||
return false, status, errors.New("could not connect to control server")
|
||||
}
|
||||
|
||||
resp, err := cc.post(startedPath, nil)
|
||||
if err != nil {
|
||||
mainLog.Load().Error().Err(err).Msg("failed to connect to control server")
|
||||
return false, status, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
mainLog.Load().Error().Msg("ctrld listener is not ready")
|
||||
return false, status, errors.New("ctrld listener is not ready")
|
||||
}
|
||||
|
||||
// Not a ctrld upstream, return status as-is.
|
||||
if cfg.FirstUpstream().VerifyDomain() == "" {
|
||||
return true, status, nil
|
||||
@@ -2261,6 +2250,10 @@ func removeProvTokenFromArgs(sc *service.Config) {
|
||||
|
||||
// newSocketControlClient returns new control client after control server was started.
|
||||
func newSocketControlClient(s service.Service, dir string) *controlClient {
|
||||
// Return early if service is not running.
|
||||
if status, err := s.Status(); err != nil || status != service.StatusRunning {
|
||||
return nil
|
||||
}
|
||||
bo := backoff.NewBackoff("self-check", logf, 10*time.Second)
|
||||
bo.LogLongerThan = 10 * time.Second
|
||||
ctx := context.Background()
|
||||
@@ -2270,28 +2263,21 @@ func newSocketControlClient(s service.Service, dir string) *controlClient {
|
||||
defer timeout.Stop()
|
||||
|
||||
// The socket control server may not start yet, so attempt to ping
|
||||
// it until we got a response. For each iteration, check ctrld status
|
||||
// to make sure ctrld is still running.
|
||||
// it until we got a response.
|
||||
for {
|
||||
curStatus, err := s.Status()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
if curStatus != service.StatusRunning {
|
||||
return nil
|
||||
}
|
||||
if _, err := cc.post("/", nil); err == nil {
|
||||
_, err := cc.post(startedPath, nil)
|
||||
if err == nil {
|
||||
// Server was started, stop pinging.
|
||||
break
|
||||
}
|
||||
// The socket control server is not ready yet, backoff for waiting it to be ready.
|
||||
bo.BackOff(ctx, err)
|
||||
|
||||
select {
|
||||
case <-timeout.C:
|
||||
return nil
|
||||
default:
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
return cc
|
||||
|
||||
@@ -210,12 +210,9 @@ func (p *prog) serveDNS(listenerNum string) error {
|
||||
addr := net.JoinHostPort(listenerConfig.IP, strconv.Itoa(listenerConfig.Port))
|
||||
s, errCh := runDNSServer(addr, proto, handler)
|
||||
defer s.Shutdown()
|
||||
select {
|
||||
case err := <-errCh:
|
||||
return err
|
||||
case <-time.After(5 * time.Second):
|
||||
p.started <- struct{}{}
|
||||
}
|
||||
|
||||
p.started <- struct{}{}
|
||||
|
||||
select {
|
||||
case <-p.stopCh:
|
||||
case <-ctx.Done():
|
||||
|
||||
@@ -249,6 +249,13 @@ func (p *prog) run(reload bool, reloadCh chan struct{}) {
|
||||
numListeners := len(p.cfg.Listener)
|
||||
if !reload {
|
||||
p.started = make(chan struct{}, numListeners)
|
||||
if p.cs != nil {
|
||||
p.registerControlServerHandler()
|
||||
if err := p.cs.start(); err != nil {
|
||||
mainLog.Load().Warn().Err(err).Msg("could not start control server")
|
||||
}
|
||||
mainLog.Load().Debug().Msgf("control server started: %s", p.cs.addr)
|
||||
}
|
||||
}
|
||||
p.onStartedDone = make(chan struct{})
|
||||
p.loop = make(map[string]bool)
|
||||
@@ -381,12 +388,6 @@ func (p *prog) run(reload bool, reloadCh chan struct{}) {
|
||||
if p.logConn != nil {
|
||||
_ = p.logConn.Close()
|
||||
}
|
||||
if p.cs != nil {
|
||||
p.registerControlServerHandler()
|
||||
if err := p.cs.start(); err != nil {
|
||||
mainLog.Load().Warn().Err(err).Msg("could not start control server")
|
||||
}
|
||||
}
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user