fix: Kimi K2.5 factory + LLM chunk traces

- Kimi factory: add max_tokens=16384, temperature=1.0, top_p=1.0, and chat_template_kwargs.thinking=true for kimi models - Add chunk count traces in stream_response so we see LLM progress immediately in logs: 'LLM chunk #N received (len=X)' - Keep generic stream parser clean — model-specific logic lives in the request builder (Kimi factory pattern) Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
2026-04-14 10:20:02 -03:00 · 2026-04-14 10:20:02 -03:00 · 679bf05504
commit 679bf05504
parent 03f060680e
2 changed files with 16 additions and 1 deletions
--- a/src/core/bot/mod.rs
+++ b/src/core/bot/mod.rs
@ -836,7 +836,9 @@ impl BotOrchestrator {
        let _handler = llm_models::get_handler(&model);
        trace!("Using model handler for {}", model);
        info!("LLM streaming started for session {}", session.id);
        trace!("Receiving LLM stream chunks...");
        let mut chunk_count: usize = 0;
        #[cfg(feature = "nvidia")]
        {
@ -860,6 +862,10 @@ impl BotOrchestrator {
        }
        while let Some(chunk) = stream_rx.recv().await {
            chunk_count += 1;
            if chunk_count <= 3 || chunk_count % 50 == 0 {
                info!("LLM chunk #{chunk_count} received for session {} (len={})", session.id, chunk.len());
            }
            // ===== GENERIC TOOL EXECUTION =====
            // Add chunk to tool_call_buffer and try to parse
--- a/src/llm/mod.rs
+++ b/src/llm/mod.rs
@ -382,9 +382,18 @@ impl LLMProvider for OpenAIClient {
        let mut request_body = serde_json::json!({
            "model": model,
            "messages": messages,
-            "stream": true
+            "stream": true,
            "max_tokens": 16384,
            "temperature": 1.0,
            "top_p": 1.0
        });
        // Kimi K2.5 factory: enable thinking mode via chat_template_kwargs
        if model.contains("kimi") {
            request_body["chat_template_kwargs"] = serde_json::json!({"thinking": true});
            info!("Kimi factory: enabled thinking mode (chat_template_kwargs)");
        }
        // Add tools to the request if provided
        if let Some(tools_value) = tools {
            if !tools_value.is_empty() {