diff --git a/neurosploit-rs/crates/harness/src/models.rs b/neurosploit-rs/crates/harness/src/models.rs index 0e2ee9b..f76a326 100644 --- a/neurosploit-rs/crates/harness/src/models.rs +++ b/neurosploit-rs/crates/harness/src/models.rs @@ -180,10 +180,29 @@ impl ChatClient { // Drop closes stdin so the CLI processes the prompt and exits. } let out = child.wait_with_output().await?; + let stdout = String::from_utf8_lossy(&out.stdout).trim().to_string(); + let stderr = String::from_utf8_lossy(&out.stderr); if !out.status.success() { - return Err(anyhow!("{} subscription CLI failed: {}", bin, truncate(&String::from_utf8_lossy(&out.stderr), 200))); + // The CLI often writes the real reason (rate limit, auth) to stdout, + // not stderr — surface both plus the exit code so the error isn't blank. + let detail = if !stderr.trim().is_empty() { + stderr.trim().to_string() + } else if !stdout.is_empty() { + stdout.clone() + } else { + "no output".to_string() + }; + return Err(anyhow!( + "{} subscription CLI exit {}: {}", + bin, + out.status.code().map(|c| c.to_string()).unwrap_or_else(|| "signal".into()), + truncate(&detail, 240) + )); } - Ok(String::from_utf8_lossy(&out.stdout).trim().to_string()) + if stdout.is_empty() { + return Err(anyhow!("{} subscription CLI returned empty output", bin)); + } + Ok(stdout) } } diff --git a/neurosploit-rs/crates/harness/src/pool.rs b/neurosploit-rs/crates/harness/src/pool.rs index 6c28805..8145be8 100644 --- a/neurosploit-rs/crates/harness/src/pool.rs +++ b/neurosploit-rs/crates/harness/src/pool.rs @@ -28,7 +28,9 @@ impl ModelPool { subscription: bool, mcp_config: Option, ) -> Self { - let concurrency = concurrency.max(1); + // Subscription spawns one CLI process per call; too many in parallel + // trips provider rate limits, so cap concurrency on that path. + let concurrency = if subscription { concurrency.clamp(1, 3) } else { concurrency.max(1) }; ModelPool { client: ChatClient::new(), sem: Arc::new(Semaphore::new(concurrency)), @@ -42,15 +44,30 @@ impl ModelPool { } } - /// One completion for a model, via subscription CLI (optionally with MCP) or HTTP API. + /// One completion for a model, via subscription CLI (optionally with MCP) or + /// HTTP API, with a short retry/backoff to ride out transient failures + /// (rate limits, MCP cold-starts, network blips). async fn one(&self, m: &ModelRef, system: &str, user: &str) -> Result { - if self.subscription && cli_binary_for(&m.provider).is_some() { - return self - .client - .chat_cli(&m.provider, &m.model, system, user, self.mcp_config.as_deref()) - .await; + let use_cli = self.subscription && cli_binary_for(&m.provider).is_some(); + let mut last = anyhow::anyhow!("no attempt"); + for attempt in 0..3u64 { + if attempt > 0 { + // 1.5s, 4.5s backoff. + tokio::time::sleep(std::time::Duration::from_millis(1500 * attempt * attempt.max(1))).await; + } + let r = if use_cli { + self.client + .chat_cli(&m.provider, &m.model, system, user, self.mcp_config.as_deref()) + .await + } else { + self.client.chat(m, system, user).await + }; + match r { + Ok(t) => return Ok(t), + Err(e) => last = e, + } } - self.client.chat(m, system, user).await + Err(last) } /// Complete a prompt, trying each candidate model until one succeeds.