Add logging for embedding config loading and fix KB search dedup

2026-04-15 14:46:02 -03:00 · 2026-04-15 14:46:02 -03:00 · cbf8e024d3
commit cbf8e024d3
parent 148ad0cc7c
3 changed files with 71 additions and 58 deletions
--- a/src/core/bot/kb_context.rs
+++ b/src/core/bot/kb_context.rs
@ -315,45 +315,19 @@ impl KbContextManager {
            // Create a temporary indexer with bot-specific config
            let indexer = KbIndexer::new(embedding_config, qdrant_config);

-            // Use the bot-specific indexer for search
-            let search_results = indexer
-                .search(collection_name, query, max_results)
-                .await?;
+        // Use the bot-specific indexer for search
+        let search_results = indexer
+            .search(collection_name, query, max_results * 3)
+            .await?;

-            let mut kb_search_results = Vec::new();
-            let mut total_tokens = 0;
+        let deduplicated = self.deduplicate_by_document(search_results);
+        let kb_search_results = self.filter_by_tokens(deduplicated, max_tokens);

-            for result in search_results {
-                let tokens = estimate_tokens(&result.content);
-
-                if total_tokens + tokens > max_tokens {
-                    debug!(
-                        "Skipping result due to token limit ({} + {} > {})",
-                        total_tokens, tokens, max_tokens
-                    );
-                    break;
-                }
-
-                if result.score < 0.3 {
-                    debug!("Skipping low-relevance result (score: {})", result.score);
-                    continue;
-                }
-
-                kb_search_results.push(KbSearchResult {
-                    content: result.content,
-                    document_path: result.document_path,
-                    score: result.score,
-                    chunk_tokens: tokens,
-                });
-
-                total_tokens += tokens;
-            }
-
-            Ok(KbContext {
-                kb_name: display_name.to_string(),
-                search_results: kb_search_results,
-                total_tokens,
-            })
+        Ok(KbContext {
+            kb_name: display_name.to_string(),
+            search_results: kb_search_results,
+            total_tokens: 0,
+        })
        }

        async fn get_bot_id_by_name(&self, bot_name: &str) -> Result<Uuid> {
@ -383,13 +357,55 @@ impl KbContextManager {

        let search_results = self
            .kb_manager
-            .search(bot_id, bot_name, kb_name, query, max_results)
+            .search(bot_id, bot_name, kb_name, query, max_results * 3)
            .await?;

+        let deduplicated = self.deduplicate_by_document(search_results);
+        let kb_search_results = self.filter_by_tokens(deduplicated, max_tokens);
+
+        Ok(KbContext {
+            kb_name: kb_name.to_string(),
+            search_results: kb_search_results,
+            total_tokens: 0,
+        })
+    }
+
+    fn deduplicate_by_document(&self, results: Vec<crate::core::kb::SearchResult>) -> Vec<crate::core::kb::SearchResult> {
+        use std::collections::HashMap;
+
+        let mut best_by_doc: HashMap<String, crate::core::kb::SearchResult> = HashMap::new();
+
+        for result in results {
+            let doc_key = if result.document_path.is_empty() {
+                format!("unknown_{}", result.content.len())
+            } else {
+                result.document_path.clone()
+            };
+
+            best_by_doc
+                .entry(doc_key)
+                .and_modify(|existing| {
+                    if result.score > existing.score {
+                        *existing = result.clone();
+                    }
+                })
+                .or_insert(result);
+        }
+
+        let mut results: Vec<_> = best_by_doc.into_values().collect();
+        results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
+        results
+    }
+
+    fn filter_by_tokens(
+        &self,
+        results: Vec<crate::core::kb::SearchResult>,
+        max_tokens: usize,
+    ) -> Vec<KbSearchResult> {
        let mut kb_search_results = Vec::new();
        let mut total_tokens = 0;

-        for result in search_results {
+        for result in results {
            let tokens = estimate_tokens(&result.content);

            if total_tokens + tokens > max_tokens {
@ -400,7 +416,7 @@ impl KbContextManager {
                break;
            }

-            if result.score < 0.3 {
+            if result.score < 0.25 {
                debug!("Skipping low-relevance result (score: {})", result.score);
                continue;
            }
@ -415,11 +431,7 @@ impl KbContextManager {
            total_tokens += tokens;
        }

-        Ok(KbContext {
-            kb_name: kb_name.to_string(),
-            search_results: kb_search_results,
-            total_tokens,
-        })
+        kb_search_results
    }

    pub fn build_context_string(&self, kb_contexts: &[KbContext]) -> String {
--- a/src/core/bot/mod.rs
+++ b/src/core/bot/mod.rs
@ -432,7 +432,7 @@ impl BotOrchestrator {
        if message.message_type == MessageType::TOOL_EXEC {
            let tool_name = message_content.trim();
            if !tool_name.is_empty() {
-                info!("[TOOL_EXEC] Direct tool execution: {}", tool_name);
+                info!("tool_exec: Direct tool execution: {}", tool_name);
                
                // Get bot name from bot_id
                let bot_name = if let Ok(bot_uuid) = Uuid::parse_str(&message.bot_id) {
@ -922,7 +922,7 @@ impl BotOrchestrator {
        let _handler = llm_models::get_handler(&model);

        trace!("Using model handler for {}", model);
-        trace!("LLM streaming started for session {}", session.id);
+        info!("llm_start: Starting LLM streaming for session {}", session.id);
        trace!("Receiving LLM stream chunks...");
        let mut chunk_count: usize = 0;

@ -1314,31 +1314,30 @@ while let Some(chunk) = stream_rx.recv().await {
            }
        }

-        // DEBUG: Log LLM output for troubleshooting HTML rendering issues
+        info!("llm_end: Streaming loop ended for session {}, chunk_count={}, full_response_len={}", session.id, chunk_count, full_response.len());
+
        let has_html = full_response.contains("</") || full_response.contains("<!--");
        let has_div = full_response.contains("<div") || full_response.contains("</div>");
        let has_style = full_response.contains("<style");
        let is_truncated = !full_response.trim_end().ends_with("</div>") && has_div;
        let preview = if full_response.len() > 800 {
-            format!("{}... ({} chars total)", &full_response[..800], full_response.len())
+            format!("{}... ({} chars total)", full_response.split_at(800).0, full_response.len())
        } else {
            full_response.clone()
        };
-        info!("[LLM_OUTPUT] session={} has_html={} has_div={} has_style={} is_truncated={} len={} preview=\"{}\"",
+        info!("llm_output: session={} has_html={} has_div={} has_style={} is_truncated={} len={} preview=\"{}\"",
            session_id, has_html, has_div, has_style, is_truncated, full_response.len(), 
            preview.replace('\n', "\\n"));

-        trace!("LLM stream complete. Full response: {}", full_response);
-
        let plain_text = strip_markdown_local(&strip_html_local(&full_response));
        let plain_text_len = plain_text.len();
-        let preview = if plain_text.len() > 100 {
+        let history_preview = if plain_text.len() > 100 {
            format!("{}...", plain_text.split_at(100).0)
        } else {
            plain_text.clone()
        };
-        info!("[HISTORY] Saving assistant message to history: session_id={}, user_id={}, content_len={}, preview={}", 
-            session.id, user_id, plain_text_len, preview);
+        info!("history_save: session_id={} user_id={} content_len={} preview={}", 
+            session.id, user_id, plain_text_len, history_preview);
        
        let state_for_save = self.state.clone();
        let plain_text_for_save = plain_text.clone();
@ -1356,13 +1355,13 @@ while let Some(chunk) = stream_rx.recv().await {
        
        match save_result {
            Ok(Ok(())) => {
-                trace!("Assistant message saved to history for session {}", session_id_for_save);
+                trace!("history_save: Assistant message saved for session {}", session_id_for_save);
            }
            Ok(Err(e)) => {
-                error!("[HISTORY] Failed to save assistant message to history for session {}: {}", session_id_for_save, e);
+                error!("history_save: Failed to save assistant message for session {}: {}", session_id_for_save, e);
            }
            Err(e) => {
-                error!("[HISTORY] Spawn blocking failed for saving assistant message: {}", e);
+                error!("history_save: Spawn blocking failed for session {}: {}", session_id_for_save, e);
            }
        }

--- a/src/core/kb/embedding_generator.rs
+++ b/src/core/kb/embedding_generator.rs
@ -65,6 +65,8 @@ impl EmbeddingConfig {
            .get_config(_bot_id, "embedding-url", Some(""))
            .unwrap_or_default();

+        info!("EmbeddingConfig::from_bot_config - bot_id: {}, embedding_url: {}", _bot_id, embedding_url);
+
        let embedding_model = config_manager
            .get_config(_bot_id, "embedding-model", Some("BAAI/bge-multilingual-gemma2"))
            .unwrap_or_else(|_| "BAAI/bge-multilingual-gemma2".to_string());