From 99909de75dc852aca09393a2bbc0e65067edb125 Mon Sep 17 00:00:00 2001
From: "Rodrigo Rodriguez (Pragmatismo)" <me@rodrigorodriguez.com>
Date: Mon, 13 Apr 2026 22:26:31 -0300
Subject: [PATCH] fix: disable thinking detection to prevent deadlock

---
 src/core/bot/mod.rs | 52 ++++++++++++---------------------------------
 1 file changed, 13 insertions(+), 39 deletions(-)

diff --git a/src/core/bot/mod.rs b/src/core/bot/mod.rs
index 8416df8a..1fd834d2 100644
--- a/src/core/bot/mod.rs
+++ b/src/core/bot/mod.rs
@@ -6,7 +6,6 @@ pub mod tool_context;
 use tool_context::get_session_tools;
 pub mod tool_executor;
 use tool_executor::ToolExecutor;
-use std::sync::atomic::Ordering;
 #[cfg(feature = "llm")]
 use crate::core::config::ConfigManager;
 
@@ -833,7 +832,7 @@ impl BotOrchestrator {
         let mut in_analysis = false;
         let mut tool_call_buffer = String::new(); // Accumulate potential tool call JSON chunks
         let mut accumulating_tool_call = false; // Track if we're currently accumulating a tool call
-        let handler = llm_models::get_handler(&model);
+        let _handler = llm_models::get_handler(&model);
 
         trace!("Using model handler for {}", model);
         trace!("Receiving LLM stream chunks...");
@@ -1048,11 +1047,9 @@ impl BotOrchestrator {
 
             analysis_buffer.push_str(&chunk);
 
-            // Safety: if we've been in analysis > 30 seconds without completion, force exit
-            // This prevents getting stuck if model doesn't send closing tags
-            const ANALYSIS_TIMEOUT_SECS: u64 = 30;
-            static ANALYSIS_START_TIME: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
-
+            // TEMP DISABLED: Thinking detection causing deadlock
+            // Just pass content through directly for now
+            /*
             if !in_analysis && handler.has_analysis_markers(&analysis_buffer) {
                 in_analysis = true;
                 ANALYSIS_START_TIME.store(
@@ -1067,7 +1064,7 @@ impl BotOrchestrator {
                     model
                 );
 
-                // Send thinking indicator (not the filtered content!)
+                // Send thinking indicator
                 let thinking_msg = BotResponse {
                     bot_id: message.bot_id.clone(),
                     user_id: message.user_id.clone(),
@@ -1087,10 +1084,9 @@ impl BotOrchestrator {
                     warn!("Response channel closed");
                     break;
                 }
-                continue; // Skip sending raw content during thinking
+                continue;
             }
 
-            // Check timeout
             if in_analysis {
                 let elapsed = std::time::SystemTime::now()
                     .duration_since(std::time::UNIX_EPOCH)
@@ -1106,14 +1102,9 @@ impl BotOrchestrator {
             if in_analysis && handler.is_analysis_complete(&analysis_buffer) {
                 in_analysis = false;
                 trace!("Detected end of thinking for model {}", model);
-
-                // Clear thinking indicator - we'll send empty content that frontend understands
-                // Actually skip this - the next content will replace the thinking indicator
-
                 let processed = handler.process_content(&analysis_buffer);
                 if !processed.is_empty() {
                     full_response.push_str(&processed);
-
                     let response = BotResponse {
                         bot_id: message.bot_id.clone(),
                         user_id: message.user_id.clone(),
@@ -1128,13 +1119,11 @@ impl BotOrchestrator {
                         context_length: 0,
                         context_max_length: 0,
                     };
-
                     if response_tx.send(response).await.is_err() {
                         warn!("Response channel closed");
                         break;
                     }
                 }
-
                 analysis_buffer.clear();
                 continue;
             }
@@ -1143,6 +1132,13 @@ impl BotOrchestrator {
                 trace!("Accumulating thinking content, not sending to user");
                 continue;
             }
+            */
+
+            // If in analysis mode from previous chunks, just clear and continue (TEMPORARY)
+            if in_analysis {
+                in_analysis = false;
+                trace!("Cleared leftover in_analysis state");
+            }
 
             if !in_analysis {
                 full_response.push_str(&chunk);
@@ -1185,28 +1181,6 @@ impl BotOrchestrator {
 
         trace!("LLM stream complete. Full response: {}", full_response);
 
-        // CRITICAL: Clear thinking indicator when stream ends (in case closing tags weren't detected)
-        // This ensures the "Pensando..." message gets cleared
-        if in_analysis {
-            warn!("Stream ended while in_analysis=true, clearing thinking indicator");
-            in_analysis = false;
-            let clear_thinking = BotResponse {
-                bot_id: message.bot_id.clone(),
-                user_id: message.user_id.clone(),
-                session_id: message.session_id.clone(),
-                channel: message.channel.clone(),
-                content: String::new(),
-                message_type: MessageType::BOT_RESPONSE,
-                stream_token: None,
-                is_complete: false,
-                suggestions: Vec::new(),
-                context_name: None,
-                context_length: 0,
-                context_max_length: 0,
-            };
-            let _ = response_tx.send(clear_thinking).await;
-        }
-
         let state_for_save = self.state.clone();
         let full_response_clone = full_response.clone();
         tokio::task::spawn_blocking(