From 6bf879a78aa5bfaf1eafba3494b804771af47599 Mon Sep 17 00:00:00 2001 From: "Rodrigo Rodriguez (Pragmatismo)" Date: Wed, 22 Apr 2026 17:06:03 +0000 Subject: [PATCH] fix: Skip health check for remote HTTPS embedding APIs and wait for server in single-file indexing Two fixes for KB indexing failures with Cloudflare Workers AI: 1. check_health() now short-circuits for HTTPS URLs (remote APIs like Cloudflare don't have /health endpoints and return 401/301/403 on probes, which were incorrectly treated as 'unreachable') 2. index_single_file_with_id() now calls wait_for_server(30) instead of immediately failing, giving the embedding server time to become ready Root cause: EMBEDDING_SERVER_READY is a global flag. When the default bot's local embedding server check fails, it blocks ALL bots including those using remote HTTPS APIs that don't need a local health check. --- botserver/src/core/kb/embedding_generator.rs | 11 ++++++++++- botserver/src/core/kb/kb_indexer.rs | 14 ++++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/botserver/src/core/kb/embedding_generator.rs b/botserver/src/core/kb/embedding_generator.rs index a03aafdf..ac5b6056 100644 --- a/botserver/src/core/kb/embedding_generator.rs +++ b/botserver/src/core/kb/embedding_generator.rs @@ -299,7 +299,16 @@ impl KbEmbeddingGenerator { } pub async fn check_health(&self) -> bool { - // Strategy: try /health endpoint on BASE URL first. + // Remote HTTPS APIs (Cloudflare Workers AI, OpenAI, etc.) are assumed available + // — they don't have /health endpoints and return 401/403/301 on probe. + // Only local servers need TCP health checks. + if self.config.embedding_url.starts_with("https://") { + info!("Embedding server is remote HTTPS API ({}), assuming available", self.config.embedding_url); + set_embedding_server_ready(true); + return true; + } + + // Strategy for local servers: try /health endpoint on BASE URL first. // - 200 OK → local server with health endpoint, ready // - 404/405 etc → server is reachable but has no /health (remote API or llama.cpp) // - Connection refused/timeout → server truly unavailable diff --git a/botserver/src/core/kb/kb_indexer.rs b/botserver/src/core/kb/kb_indexer.rs index 9b3b98c2..091ad184 100644 --- a/botserver/src/core/kb/kb_indexer.rs +++ b/botserver/src/core/kb/kb_indexer.rs @@ -549,12 +549,14 @@ pub async fn index_single_file_with_id( kb_name: &str, file_path: &Path, document_id: Option<&str>, -) -> Result { - if !is_embedding_server_ready() { - return Err(anyhow::anyhow!( - "Embedding server not available. Cannot index file." - )); - } + ) -> Result { + if !is_embedding_server_ready() { + if !self.embedding_generator.wait_for_server(30).await { + return Err(anyhow::anyhow!( + "Embedding server not available. Cannot index file." + )); + } + } if !self.check_qdrant_health().await.unwrap_or(false) { return Err(anyhow::anyhow!(