fix: enable chat_template_kwargs for GLM thinking mode, add stream traces, fix config_manager scope
All checks were successful
BotServer CI/CD / build (push) Successful in 3m55s
All checks were successful
BotServer CI/CD / build (push) Successful in 3m55s
This commit is contained in:
parent
d6ffe265ef
commit
4d9d38ffda
4 changed files with 51 additions and 14 deletions
|
|
@ -507,13 +507,18 @@ impl BotOrchestrator {
|
||||||
sm.get_session_context_data(&session.id, &session.user_id)?
|
sm.get_session_context_data(&session.id, &session.user_id)?
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let config_manager = ConfigManager::new(state_clone.conn.clone());
|
||||||
|
|
||||||
|
let history_limit = config_manager
|
||||||
|
.get_bot_config_value(&session.bot_id, "history-limit")
|
||||||
|
.ok()
|
||||||
|
.and_then(|v| v.parse::<i64>().ok());
|
||||||
|
|
||||||
let history = {
|
let history = {
|
||||||
let mut sm = state_clone.session_manager.blocking_lock();
|
let mut sm = state_clone.session_manager.blocking_lock();
|
||||||
sm.get_conversation_history(session.id, user_id)?
|
sm.get_conversation_history(session.id, user_id, history_limit)?
|
||||||
};
|
};
|
||||||
|
|
||||||
let config_manager = ConfigManager::new(state_clone.conn.clone());
|
|
||||||
|
|
||||||
// For local LLM server, use the actual model name
|
// For local LLM server, use the actual model name
|
||||||
// Default to DeepSeek model if not configured
|
// Default to DeepSeek model if not configured
|
||||||
let model = config_manager
|
let model = config_manager
|
||||||
|
|
@ -1234,7 +1239,7 @@ impl BotOrchestrator {
|
||||||
user_id: Uuid,
|
user_id: Uuid,
|
||||||
) -> Result<Vec<(String, String)>, Box<dyn std::error::Error + Send + Sync>> {
|
) -> Result<Vec<(String, String)>, Box<dyn std::error::Error + Send + Sync>> {
|
||||||
let mut session_manager = self.state.session_manager.lock().await;
|
let mut session_manager = self.state.session_manager.lock().await;
|
||||||
let history = session_manager.get_conversation_history(session_id, user_id)?;
|
let history = session_manager.get_conversation_history(session_id, user_id, None)?;
|
||||||
Ok(history)
|
Ok(history)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -336,13 +336,22 @@ impl SessionManager {
|
||||||
&mut self,
|
&mut self,
|
||||||
sess_id: Uuid,
|
sess_id: Uuid,
|
||||||
_uid: Uuid,
|
_uid: Uuid,
|
||||||
|
history_limit: Option<i64>,
|
||||||
) -> Result<Vec<(String, String)>, Box<dyn Error + Send + Sync>> {
|
) -> Result<Vec<(String, String)>, Box<dyn Error + Send + Sync>> {
|
||||||
use crate::core::shared::models::message_history::dsl::*;
|
use crate::core::shared::models::message_history::dsl::*;
|
||||||
|
let limit_val = history_limit.unwrap_or(50);
|
||||||
|
|
||||||
let messages = message_history
|
let messages = message_history
|
||||||
.filter(session_id.eq(sess_id))
|
.filter(session_id.eq(sess_id))
|
||||||
.order(message_index.asc())
|
.order(message_index.desc())
|
||||||
|
.limit(limit_val)
|
||||||
.select((role, content_encrypted))
|
.select((role, content_encrypted))
|
||||||
.load::<(i32, String)>(&mut self.conn)?;
|
.load::<(i32, String)>(&mut self.conn)?;
|
||||||
|
|
||||||
|
// Reverse to get chronological order (oldest first)
|
||||||
|
let mut messages: Vec<(i32, String)> = messages;
|
||||||
|
messages.reverse();
|
||||||
|
|
||||||
let mut history: Vec<(String, String)> = Vec::new();
|
let mut history: Vec<(String, String)> = Vec::new();
|
||||||
for (other_role, content) in messages {
|
for (other_role, content) in messages {
|
||||||
let role_str = match other_role {
|
let role_str = match other_role {
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,7 @@ async fn process_episodic_memory(
|
||||||
let session_id = session.id;
|
let session_id = session.id;
|
||||||
let history = {
|
let history = {
|
||||||
let mut session_manager = state.session_manager.lock().await;
|
let mut session_manager = state.session_manager.lock().await;
|
||||||
session_manager.get_conversation_history(session.id, session.user_id)?
|
session_manager.get_conversation_history(session.id, session.user_id, None)?
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut messages_since_summary = 0;
|
let mut messages_since_summary = 0;
|
||||||
|
|
|
||||||
|
|
@ -158,11 +158,14 @@ impl LLMProvider for GLMClient {
|
||||||
top_p: Some(1.0),
|
top_p: Some(1.0),
|
||||||
tools: None,
|
tools: None,
|
||||||
tool_choice: None,
|
tool_choice: None,
|
||||||
chat_template_kwargs: None,
|
chat_template_kwargs: Some(GLMChatTemplateKwargs {
|
||||||
|
enable_thinking: true,
|
||||||
|
clear_thinking: false,
|
||||||
|
}),
|
||||||
};
|
};
|
||||||
|
|
||||||
let url = self.build_url();
|
let url = self.build_url();
|
||||||
info!("[GLM] Non-streaming request to: {}", url);
|
info!("[GLM] Non-streaming request to: {} model={}", url, model_name);
|
||||||
|
|
||||||
let response = self
|
let response = self
|
||||||
.client
|
.client
|
||||||
|
|
@ -244,11 +247,14 @@ impl LLMProvider for GLMClient {
|
||||||
top_p: Some(1.0),
|
top_p: Some(1.0),
|
||||||
tools: tools.cloned(),
|
tools: tools.cloned(),
|
||||||
tool_choice,
|
tool_choice,
|
||||||
chat_template_kwargs: None,
|
chat_template_kwargs: Some(GLMChatTemplateKwargs {
|
||||||
|
enable_thinking: true,
|
||||||
|
clear_thinking: false,
|
||||||
|
}),
|
||||||
};
|
};
|
||||||
|
|
||||||
let url = self.build_url();
|
let url = self.build_url();
|
||||||
info!("[GLM] Streaming request to: {}", url);
|
info!("[GLM] Streaming request to: {} model={} max_tokens=131072", url, model_name);
|
||||||
|
|
||||||
let response = self
|
let response = self
|
||||||
.client
|
.client
|
||||||
|
|
@ -265,9 +271,14 @@ impl LLMProvider for GLMClient {
|
||||||
return Err(format!("GLM streaming error: {}", error_text).into());
|
return Err(format!("GLM streaming error: {}", error_text).into());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
info!("[GLM] Connection established, starting stream processing");
|
||||||
|
|
||||||
let mut stream = response.bytes_stream();
|
let mut stream = response.bytes_stream();
|
||||||
let mut in_reasoning = false;
|
let mut in_reasoning = false;
|
||||||
let mut has_sent_thinking = false;
|
let mut has_sent_thinking = false;
|
||||||
|
let mut total_content_chars: usize = 0;
|
||||||
|
let mut total_reasoning_chars: usize = 0;
|
||||||
|
let mut chunk_count: usize = 0;
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
|
|
||||||
while let Some(chunk_result) = stream.next().await {
|
while let Some(chunk_result) = stream.next().await {
|
||||||
|
|
@ -282,6 +293,7 @@ impl LLMProvider for GLMClient {
|
||||||
}
|
}
|
||||||
|
|
||||||
if line == "data: [DONE]" {
|
if line == "data: [DONE]" {
|
||||||
|
info!("[GLM] Stream done: {} chunks, {} reasoning chars, {} content chars sent", chunk_count, total_reasoning_chars, total_content_chars);
|
||||||
let _ = tx.send(String::new()).await;
|
let _ = tx.send(String::new()).await;
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
@ -292,6 +304,8 @@ impl LLMProvider for GLMClient {
|
||||||
if let Some(choices) = chunk_data.get("choices").and_then(|c| c.as_array()) {
|
if let Some(choices) = chunk_data.get("choices").and_then(|c| c.as_array()) {
|
||||||
for choice in choices {
|
for choice in choices {
|
||||||
if let Some(delta) = choice.get("delta") {
|
if let Some(delta) = choice.get("delta") {
|
||||||
|
chunk_count += 1;
|
||||||
|
|
||||||
// Handle tool_calls
|
// Handle tool_calls
|
||||||
if let Some(tool_calls) = delta.get("tool_calls").and_then(|t| t.as_array()) {
|
if let Some(tool_calls) = delta.get("tool_calls").and_then(|t| t.as_array()) {
|
||||||
for tool_call in tool_calls {
|
for tool_call in tool_calls {
|
||||||
|
|
@ -313,13 +327,16 @@ impl LLMProvider for GLMClient {
|
||||||
// Enter reasoning mode
|
// Enter reasoning mode
|
||||||
if reasoning.is_some() && content.is_none() {
|
if reasoning.is_some() && content.is_none() {
|
||||||
if !in_reasoning {
|
if !in_reasoning {
|
||||||
trace!("[GLM] Entering reasoning/thinking mode");
|
info!("[GLM] Entering reasoning mode");
|
||||||
in_reasoning = true;
|
in_reasoning = true;
|
||||||
}
|
}
|
||||||
|
if let Some(r) = reasoning {
|
||||||
|
total_reasoning_chars += r.len();
|
||||||
|
}
|
||||||
if !has_sent_thinking {
|
if !has_sent_thinking {
|
||||||
let thinking = serde_json::json!({
|
let thinking = serde_json::json!({
|
||||||
"type": "thinking",
|
"type": "thinking",
|
||||||
"content": "\u{1f914} Pensando..."
|
"content": "🤔 Pensando..."
|
||||||
}).to_string();
|
}).to_string();
|
||||||
let _ = tx.send(thinking).await;
|
let _ = tx.send(thinking).await;
|
||||||
has_sent_thinking = true;
|
has_sent_thinking = true;
|
||||||
|
|
@ -329,7 +346,7 @@ impl LLMProvider for GLMClient {
|
||||||
|
|
||||||
// Exited reasoning — content is now real response
|
// Exited reasoning — content is now real response
|
||||||
if in_reasoning && content.is_some() {
|
if in_reasoning && content.is_some() {
|
||||||
trace!("[GLM] Exited reasoning mode");
|
info!("[GLM] Exited reasoning mode, {} reasoning chars discarded, content starting", total_reasoning_chars);
|
||||||
in_reasoning = false;
|
in_reasoning = false;
|
||||||
let clear = serde_json::json!({
|
let clear = serde_json::json!({
|
||||||
"type": "thinking_clear",
|
"type": "thinking_clear",
|
||||||
|
|
@ -341,14 +358,18 @@ impl LLMProvider for GLMClient {
|
||||||
// Send actual content to user
|
// Send actual content to user
|
||||||
if let Some(text) = content {
|
if let Some(text) = content {
|
||||||
if !text.is_empty() {
|
if !text.is_empty() {
|
||||||
|
total_content_chars += text.len();
|
||||||
let _ = tx.send(text.to_string()).await;
|
let _ = tx.send(text.to_string()).await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// No delta in choice
|
||||||
|
trace!("[GLM] Chunk has no delta");
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(reason) = choice.get("finish_reason").and_then(|r| r.as_str()) {
|
if let Some(reason) = choice.get("finish_reason").and_then(|r| r.as_str()) {
|
||||||
if !reason.is_empty() {
|
if !reason.is_empty() {
|
||||||
info!("[GLM] Stream finished: {}", reason);
|
info!("[GLM] Stream finished: {}, reasoning={} content={}", reason, total_reasoning_chars, total_content_chars);
|
||||||
let _ = tx.send(String::new()).await;
|
let _ = tx.send(String::new()).await;
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
@ -359,11 +380,13 @@ impl LLMProvider for GLMClient {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Keep unprocessed data in buffer
|
||||||
if let Some(last_newline) = data.rfind('\n') {
|
if let Some(last_newline) = data.rfind('\n') {
|
||||||
buffer = buffer[last_newline + 1..].to_vec();
|
buffer = buffer[last_newline + 1..].to_vec();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
info!("[GLM] Stream ended (no [DONE]), reasoning={} content={}", total_reasoning_chars, total_content_chars);
|
||||||
let _ = tx.send(String::new()).await;
|
let _ = tx.send(String::new()).await;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue