fix: enable chat_template_kwargs for GLM thinking mode, add stream traces, fix config_manager scope

2026-04-13 19:23:19 -03:00 · 2026-04-13 19:23:19 -03:00 · 4d9d38ffda
commit 4d9d38ffda
parent d6ffe265ef
4 changed files with 51 additions and 14 deletions
--- a/src/core/bot/mod.rs
+++ b/src/core/bot/mod.rs
@ -507,13 +507,18 @@ impl BotOrchestrator {
                        sm.get_session_context_data(&session.id, &session.user_id)?
                    };
                    let config_manager = ConfigManager::new(state_clone.conn.clone());
                    let history_limit = config_manager
                        .get_bot_config_value(&session.bot_id, "history-limit")
                        .ok()
                        .and_then(|v| v.parse::<i64>().ok());
                    let history = {
                        let mut sm = state_clone.session_manager.blocking_lock();
-                        sm.get_conversation_history(session.id, user_id)?
+                        sm.get_conversation_history(session.id, user_id, history_limit)?
                    };
                    let config_manager = ConfigManager::new(state_clone.conn.clone());
                    // For local LLM server, use the actual model name
                    // Default to DeepSeek model if not configured
                    let model = config_manager
@ -1234,7 +1239,7 @@ impl BotOrchestrator {
        user_id: Uuid,
    ) -> Result<Vec<(String, String)>, Box<dyn std::error::Error + Send + Sync>> {
        let mut session_manager = self.state.session_manager.lock().await;
-        let history = session_manager.get_conversation_history(session_id, user_id)?;
+        let history = session_manager.get_conversation_history(session_id, user_id, None)?;
        Ok(history)
    }
 }
--- a/src/core/session/mod.rs
+++ b/src/core/session/mod.rs
@ -336,13 +336,22 @@ impl SessionManager {
        &mut self,
        sess_id: Uuid,
        _uid: Uuid,
        history_limit: Option<i64>,
    ) -> Result<Vec<(String, String)>, Box<dyn Error + Send + Sync>> {
        use crate::core::shared::models::message_history::dsl::*;
        let limit_val = history_limit.unwrap_or(50);
        let messages = message_history
            .filter(session_id.eq(sess_id))
-            .order(message_index.asc())
+            .order(message_index.desc())
            .limit(limit_val)
            .select((role, content_encrypted))
            .load::<(i32, String)>(&mut self.conn)?;
        // Reverse to get chronological order (oldest first)
        let mut messages: Vec<(i32, String)> = messages;
        messages.reverse();
        let mut history: Vec<(String, String)> = Vec::new();
        for (other_role, content) in messages {
            let role_str = match other_role {
--- a/src/llm/episodic_memory.rs
+++ b/src/llm/episodic_memory.rs
@ -59,7 +59,7 @@ async fn process_episodic_memory(
        let session_id = session.id;
        let history = {
            let mut session_manager = state.session_manager.lock().await;
-            session_manager.get_conversation_history(session.id, session.user_id)?
+            session_manager.get_conversation_history(session.id, session.user_id, None)?
        };
        let mut messages_since_summary = 0;
--- a/src/llm/glm.rs
+++ b/src/llm/glm.rs
@ -158,11 +158,14 @@ impl LLMProvider for GLMClient {
            top_p: Some(1.0),
            tools: None,
            tool_choice: None,
-            chat_template_kwargs: None,
+            chat_template_kwargs: Some(GLMChatTemplateKwargs {
                enable_thinking: true,
                clear_thinking: false,
            }),
        };
        let url = self.build_url();
-        info!("[GLM] Non-streaming request to: {}", url);
+        info!("[GLM] Non-streaming request to: {} model={}", url, model_name);
        let response = self
            .client
@ -244,11 +247,14 @@ impl LLMProvider for GLMClient {
            top_p: Some(1.0),
            tools: tools.cloned(),
            tool_choice,
-            chat_template_kwargs: None,
+chat_template_kwargs: Some(GLMChatTemplateKwargs {
                enable_thinking: true,
                clear_thinking: false,
            }),
        };
        let url = self.build_url();
-        info!("[GLM] Streaming request to: {}", url);
+        info!("[GLM] Streaming request to: {} model={} max_tokens=131072", url, model_name);
        let response = self
            .client
@ -265,9 +271,14 @@ impl LLMProvider for GLMClient {
            return Err(format!("GLM streaming error: {}", error_text).into());
        }
        info!("[GLM] Connection established, starting stream processing");
        let mut stream = response.bytes_stream();
        let mut in_reasoning = false;
        let mut has_sent_thinking = false;
        let mut total_content_chars: usize = 0;
        let mut total_reasoning_chars: usize = 0;
        let mut chunk_count: usize = 0;
        let mut buffer = Vec::new();
        while let Some(chunk_result) = stream.next().await {
@ -282,6 +293,7 @@ impl LLMProvider for GLMClient {
                }
                if line == "data: [DONE]" {
                    info!("[GLM] Stream done: {} chunks, {} reasoning chars, {} content chars sent", chunk_count, total_reasoning_chars, total_content_chars);
                    let _ = tx.send(String::new()).await;
                    return Ok(());
                }
@ -292,6 +304,8 @@ impl LLMProvider for GLMClient {
                        if let Some(choices) = chunk_data.get("choices").and_then(|c| c.as_array()) {
                            for choice in choices {
                                if let Some(delta) = choice.get("delta") {
                                    chunk_count += 1;
                                    // Handle tool_calls
                                    if let Some(tool_calls) = delta.get("tool_calls").and_then(|t| t.as_array()) {
                                        for tool_call in tool_calls {
@ -313,13 +327,16 @@ impl LLMProvider for GLMClient {
                                    // Enter reasoning mode
                                    if reasoning.is_some() && content.is_none() {
                                        if !in_reasoning {
-                                            trace!("[GLM] Entering reasoning/thinking mode");
+                                            info!("[GLM] Entering reasoning mode");
                                            in_reasoning = true;
                                        }
                                        if let Some(r) = reasoning {
                                            total_reasoning_chars += r.len();
                                        }
                                        if !has_sent_thinking {
                                            let thinking = serde_json::json!({
                                                "type": "thinking",
-                                                "content": "\u{1f914} Pensando..."
+                                                "content": "🤔 Pensando..."
                                            }).to_string();
                                            let _ = tx.send(thinking).await;
                                            has_sent_thinking = true;
@ -329,7 +346,7 @@ impl LLMProvider for GLMClient {
                                    // Exited reasoning — content is now real response
                                    if in_reasoning && content.is_some() {
-                                        trace!("[GLM] Exited reasoning mode");
+                                        info!("[GLM] Exited reasoning mode, {} reasoning chars discarded, content starting", total_reasoning_chars);
                                        in_reasoning = false;
                                        let clear = serde_json::json!({
                                            "type": "thinking_clear",
@ -341,14 +358,18 @@ impl LLMProvider for GLMClient {
                                    // Send actual content to user
                                    if let Some(text) = content {
                                        if !text.is_empty() {
                                            total_content_chars += text.len();
                                            let _ = tx.send(text.to_string()).await;
                                        }
                                    }
                                } else {
                                    // No delta in choice
                                    trace!("[GLM] Chunk has no delta");
                                }
                                if let Some(reason) = choice.get("finish_reason").and_then(|r| r.as_str()) {
                                    if !reason.is_empty() {
-                                        info!("[GLM] Stream finished: {}", reason);
+                                        info!("[GLM] Stream finished: {}, reasoning={} content={}", reason, total_reasoning_chars, total_content_chars);
                                        let _ = tx.send(String::new()).await;
                                        return Ok(());
                                    }
@ -359,11 +380,13 @@ impl LLMProvider for GLMClient {
                }
            }
            // Keep unprocessed data in buffer
            if let Some(last_newline) = data.rfind('\n') {
                buffer = buffer[last_newline + 1..].to_vec();
            }
        }
        info!("[GLM] Stream ended (no [DONE]), reasoning={} content={}", total_reasoning_chars, total_content_chars);
        let _ = tx.send(String::new()).await;
        Ok(())
    }