From cbf8e024d35f69d55316bc8bfc4839bcc0a6f72b Mon Sep 17 00:00:00 2001 From: "Rodrigo Rodriguez (Pragmatismo)" Date: Wed, 15 Apr 2026 14:46:02 -0300 Subject: [PATCH] Add logging for embedding config loading and fix KB search dedup --- src/core/bot/kb_context.rs | 102 ++++++++++++++++------------- src/core/bot/mod.rs | 25 ++++--- src/core/kb/embedding_generator.rs | 2 + 3 files changed, 71 insertions(+), 58 deletions(-) diff --git a/src/core/bot/kb_context.rs b/src/core/bot/kb_context.rs index 07eeea06..8f9d21ed 100644 --- a/src/core/bot/kb_context.rs +++ b/src/core/bot/kb_context.rs @@ -315,45 +315,19 @@ impl KbContextManager { // Create a temporary indexer with bot-specific config let indexer = KbIndexer::new(embedding_config, qdrant_config); - // Use the bot-specific indexer for search - let search_results = indexer - .search(collection_name, query, max_results) - .await?; + // Use the bot-specific indexer for search + let search_results = indexer + .search(collection_name, query, max_results * 3) + .await?; - let mut kb_search_results = Vec::new(); - let mut total_tokens = 0; + let deduplicated = self.deduplicate_by_document(search_results); + let kb_search_results = self.filter_by_tokens(deduplicated, max_tokens); - for result in search_results { - let tokens = estimate_tokens(&result.content); - - if total_tokens + tokens > max_tokens { - debug!( - "Skipping result due to token limit ({} + {} > {})", - total_tokens, tokens, max_tokens - ); - break; - } - - if result.score < 0.3 { - debug!("Skipping low-relevance result (score: {})", result.score); - continue; - } - - kb_search_results.push(KbSearchResult { - content: result.content, - document_path: result.document_path, - score: result.score, - chunk_tokens: tokens, - }); - - total_tokens += tokens; - } - - Ok(KbContext { - kb_name: display_name.to_string(), - search_results: kb_search_results, - total_tokens, - }) + Ok(KbContext { + kb_name: display_name.to_string(), + search_results: kb_search_results, + total_tokens: 0, + }) } async fn get_bot_id_by_name(&self, bot_name: &str) -> Result { @@ -383,13 +357,55 @@ impl KbContextManager { let search_results = self .kb_manager - .search(bot_id, bot_name, kb_name, query, max_results) + .search(bot_id, bot_name, kb_name, query, max_results * 3) .await?; + let deduplicated = self.deduplicate_by_document(search_results); + let kb_search_results = self.filter_by_tokens(deduplicated, max_tokens); + + Ok(KbContext { + kb_name: kb_name.to_string(), + search_results: kb_search_results, + total_tokens: 0, + }) + } + + fn deduplicate_by_document(&self, results: Vec) -> Vec { + use std::collections::HashMap; + + let mut best_by_doc: HashMap = HashMap::new(); + + for result in results { + let doc_key = if result.document_path.is_empty() { + format!("unknown_{}", result.content.len()) + } else { + result.document_path.clone() + }; + + best_by_doc + .entry(doc_key) + .and_modify(|existing| { + if result.score > existing.score { + *existing = result.clone(); + } + }) + .or_insert(result); + } + + let mut results: Vec<_> = best_by_doc.into_values().collect(); + results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal)); + results + } + + fn filter_by_tokens( + &self, + results: Vec, + max_tokens: usize, + ) -> Vec { let mut kb_search_results = Vec::new(); let mut total_tokens = 0; - for result in search_results { + for result in results { let tokens = estimate_tokens(&result.content); if total_tokens + tokens > max_tokens { @@ -400,7 +416,7 @@ impl KbContextManager { break; } - if result.score < 0.3 { + if result.score < 0.25 { debug!("Skipping low-relevance result (score: {})", result.score); continue; } @@ -415,11 +431,7 @@ impl KbContextManager { total_tokens += tokens; } - Ok(KbContext { - kb_name: kb_name.to_string(), - search_results: kb_search_results, - total_tokens, - }) + kb_search_results } pub fn build_context_string(&self, kb_contexts: &[KbContext]) -> String { diff --git a/src/core/bot/mod.rs b/src/core/bot/mod.rs index d666562e..440895bc 100644 --- a/src/core/bot/mod.rs +++ b/src/core/bot/mod.rs @@ -432,7 +432,7 @@ impl BotOrchestrator { if message.message_type == MessageType::TOOL_EXEC { let tool_name = message_content.trim(); if !tool_name.is_empty() { - info!("[TOOL_EXEC] Direct tool execution: {}", tool_name); + info!("tool_exec: Direct tool execution: {}", tool_name); // Get bot name from bot_id let bot_name = if let Ok(bot_uuid) = Uuid::parse_str(&message.bot_id) { @@ -922,7 +922,7 @@ impl BotOrchestrator { let _handler = llm_models::get_handler(&model); trace!("Using model handler for {}", model); - trace!("LLM streaming started for session {}", session.id); + info!("llm_start: Starting LLM streaming for session {}", session.id); trace!("Receiving LLM stream chunks..."); let mut chunk_count: usize = 0; @@ -1314,31 +1314,30 @@ while let Some(chunk) = stream_rx.recv().await { } } - // DEBUG: Log LLM output for troubleshooting HTML rendering issues + info!("llm_end: Streaming loop ended for session {}, chunk_count={}, full_response_len={}", session.id, chunk_count, full_response.len()); + let has_html = full_response.contains(""); let has_style = full_response.contains("") && has_div; let preview = if full_response.len() > 800 { - format!("{}... ({} chars total)", &full_response[..800], full_response.len()) + format!("{}... ({} chars total)", full_response.split_at(800).0, full_response.len()) } else { full_response.clone() }; - info!("[LLM_OUTPUT] session={} has_html={} has_div={} has_style={} is_truncated={} len={} preview=\"{}\"", + info!("llm_output: session={} has_html={} has_div={} has_style={} is_truncated={} len={} preview=\"{}\"", session_id, has_html, has_div, has_style, is_truncated, full_response.len(), preview.replace('\n', "\\n")); - trace!("LLM stream complete. Full response: {}", full_response); - let plain_text = strip_markdown_local(&strip_html_local(&full_response)); let plain_text_len = plain_text.len(); - let preview = if plain_text.len() > 100 { + let history_preview = if plain_text.len() > 100 { format!("{}...", plain_text.split_at(100).0) } else { plain_text.clone() }; - info!("[HISTORY] Saving assistant message to history: session_id={}, user_id={}, content_len={}, preview={}", - session.id, user_id, plain_text_len, preview); + info!("history_save: session_id={} user_id={} content_len={} preview={}", + session.id, user_id, plain_text_len, history_preview); let state_for_save = self.state.clone(); let plain_text_for_save = plain_text.clone(); @@ -1356,13 +1355,13 @@ while let Some(chunk) = stream_rx.recv().await { match save_result { Ok(Ok(())) => { - trace!("Assistant message saved to history for session {}", session_id_for_save); + trace!("history_save: Assistant message saved for session {}", session_id_for_save); } Ok(Err(e)) => { - error!("[HISTORY] Failed to save assistant message to history for session {}: {}", session_id_for_save, e); + error!("history_save: Failed to save assistant message for session {}: {}", session_id_for_save, e); } Err(e) => { - error!("[HISTORY] Spawn blocking failed for saving assistant message: {}", e); + error!("history_save: Spawn blocking failed for session {}: {}", session_id_for_save, e); } } diff --git a/src/core/kb/embedding_generator.rs b/src/core/kb/embedding_generator.rs index 1ec096b2..4b49d511 100644 --- a/src/core/kb/embedding_generator.rs +++ b/src/core/kb/embedding_generator.rs @@ -65,6 +65,8 @@ impl EmbeddingConfig { .get_config(_bot_id, "embedding-url", Some("")) .unwrap_or_default(); + info!("EmbeddingConfig::from_bot_config - bot_id: {}, embedding_url: {}", _bot_id, embedding_url); + let embedding_model = config_manager .get_config(_bot_id, "embedding-model", Some("BAAI/bge-multilingual-gemma2")) .unwrap_or_else(|_| "BAAI/bge-multilingual-gemma2".to_string());