From c6fd5d6ac8d0184d93d4edb86b772e17399b3870 Mon Sep 17 00:00:00 2001 From: CyberSecurityUP Date: Tue, 23 Jun 2026 13:07:55 -0300 Subject: [PATCH] fix: resilient subscription CLI calls (retry, richer errors, capped concurrency) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'recon failed (claude subscription CLI failed: )' was a transient CLI failure (rate limit / cold start) reported with a blank message and no retry. - chat_cli: on non-zero exit, surface exit code + stdout (CLI writes the real reason there, not stderr); treat empty output as an error - pool.one(): retry up to 3x with backoff for transient failures (both subscription and API paths) - with_auth: cap concurrency to 3 on the subscription path — spawning many parallel CLI processes itself trips provider rate limits Verified: live subscription run recovers and completes recon → select → exploit → vote → artifacts. Co-Authored-By: Claude Opus 4.8 (1M context) --- neurosploit-rs/crates/harness/src/models.rs | 23 ++++++++++++-- neurosploit-rs/crates/harness/src/pool.rs | 33 ++++++++++++++++----- 2 files changed, 46 insertions(+), 10 deletions(-) diff --git a/neurosploit-rs/crates/harness/src/models.rs b/neurosploit-rs/crates/harness/src/models.rs index 0e2ee9b..f76a326 100644 --- a/neurosploit-rs/crates/harness/src/models.rs +++ b/neurosploit-rs/crates/harness/src/models.rs @@ -180,10 +180,29 @@ impl ChatClient { // Drop closes stdin so the CLI processes the prompt and exits. } let out = child.wait_with_output().await?; + let stdout = String::from_utf8_lossy(&out.stdout).trim().to_string(); + let stderr = String::from_utf8_lossy(&out.stderr); if !out.status.success() { - return Err(anyhow!("{} subscription CLI failed: {}", bin, truncate(&String::from_utf8_lossy(&out.stderr), 200))); + // The CLI often writes the real reason (rate limit, auth) to stdout, + // not stderr — surface both plus the exit code so the error isn't blank. + let detail = if !stderr.trim().is_empty() { + stderr.trim().to_string() + } else if !stdout.is_empty() { + stdout.clone() + } else { + "no output".to_string() + }; + return Err(anyhow!( + "{} subscription CLI exit {}: {}", + bin, + out.status.code().map(|c| c.to_string()).unwrap_or_else(|| "signal".into()), + truncate(&detail, 240) + )); } - Ok(String::from_utf8_lossy(&out.stdout).trim().to_string()) + if stdout.is_empty() { + return Err(anyhow!("{} subscription CLI returned empty output", bin)); + } + Ok(stdout) } } diff --git a/neurosploit-rs/crates/harness/src/pool.rs b/neurosploit-rs/crates/harness/src/pool.rs index 6c28805..8145be8 100644 --- a/neurosploit-rs/crates/harness/src/pool.rs +++ b/neurosploit-rs/crates/harness/src/pool.rs @@ -28,7 +28,9 @@ impl ModelPool { subscription: bool, mcp_config: Option, ) -> Self { - let concurrency = concurrency.max(1); + // Subscription spawns one CLI process per call; too many in parallel + // trips provider rate limits, so cap concurrency on that path. + let concurrency = if subscription { concurrency.clamp(1, 3) } else { concurrency.max(1) }; ModelPool { client: ChatClient::new(), sem: Arc::new(Semaphore::new(concurrency)), @@ -42,15 +44,30 @@ impl ModelPool { } } - /// One completion for a model, via subscription CLI (optionally with MCP) or HTTP API. + /// One completion for a model, via subscription CLI (optionally with MCP) or + /// HTTP API, with a short retry/backoff to ride out transient failures + /// (rate limits, MCP cold-starts, network blips). async fn one(&self, m: &ModelRef, system: &str, user: &str) -> Result { - if self.subscription && cli_binary_for(&m.provider).is_some() { - return self - .client - .chat_cli(&m.provider, &m.model, system, user, self.mcp_config.as_deref()) - .await; + let use_cli = self.subscription && cli_binary_for(&m.provider).is_some(); + let mut last = anyhow::anyhow!("no attempt"); + for attempt in 0..3u64 { + if attempt > 0 { + // 1.5s, 4.5s backoff. + tokio::time::sleep(std::time::Duration::from_millis(1500 * attempt * attempt.max(1))).await; + } + let r = if use_cli { + self.client + .chat_cli(&m.provider, &m.model, system, user, self.mcp_config.as_deref()) + .await + } else { + self.client.chat(m, system, user).await + }; + match r { + Ok(t) => return Ok(t), + Err(e) => last = e, + } } - self.client.chat(m, system, user).await + Err(last) } /// Complete a prompt, trying each candidate model until one succeeds.