fix: Kimi K2.5 factory + LLM chunk traces
All checks were successful
BotServer CI/CD / build (push) Successful in 4m35s
All checks were successful
BotServer CI/CD / build (push) Successful in 4m35s
- Kimi factory: add max_tokens=16384, temperature=1.0, top_p=1.0, and chat_template_kwargs.thinking=true for kimi models - Add chunk count traces in stream_response so we see LLM progress immediately in logs: 'LLM chunk #N received (len=X)' - Keep generic stream parser clean — model-specific logic lives in the request builder (Kimi factory pattern) Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
parent
03f060680e
commit
679bf05504
2 changed files with 16 additions and 1 deletions
|
|
@ -836,7 +836,9 @@ impl BotOrchestrator {
|
||||||
let _handler = llm_models::get_handler(&model);
|
let _handler = llm_models::get_handler(&model);
|
||||||
|
|
||||||
trace!("Using model handler for {}", model);
|
trace!("Using model handler for {}", model);
|
||||||
|
info!("LLM streaming started for session {}", session.id);
|
||||||
trace!("Receiving LLM stream chunks...");
|
trace!("Receiving LLM stream chunks...");
|
||||||
|
let mut chunk_count: usize = 0;
|
||||||
|
|
||||||
#[cfg(feature = "nvidia")]
|
#[cfg(feature = "nvidia")]
|
||||||
{
|
{
|
||||||
|
|
@ -860,6 +862,10 @@ impl BotOrchestrator {
|
||||||
}
|
}
|
||||||
|
|
||||||
while let Some(chunk) = stream_rx.recv().await {
|
while let Some(chunk) = stream_rx.recv().await {
|
||||||
|
chunk_count += 1;
|
||||||
|
if chunk_count <= 3 || chunk_count % 50 == 0 {
|
||||||
|
info!("LLM chunk #{chunk_count} received for session {} (len={})", session.id, chunk.len());
|
||||||
|
}
|
||||||
|
|
||||||
// ===== GENERIC TOOL EXECUTION =====
|
// ===== GENERIC TOOL EXECUTION =====
|
||||||
// Add chunk to tool_call_buffer and try to parse
|
// Add chunk to tool_call_buffer and try to parse
|
||||||
|
|
|
||||||
|
|
@ -382,9 +382,18 @@ impl LLMProvider for OpenAIClient {
|
||||||
let mut request_body = serde_json::json!({
|
let mut request_body = serde_json::json!({
|
||||||
"model": model,
|
"model": model,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"stream": true
|
"stream": true,
|
||||||
|
"max_tokens": 16384,
|
||||||
|
"temperature": 1.0,
|
||||||
|
"top_p": 1.0
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Kimi K2.5 factory: enable thinking mode via chat_template_kwargs
|
||||||
|
if model.contains("kimi") {
|
||||||
|
request_body["chat_template_kwargs"] = serde_json::json!({"thinking": true});
|
||||||
|
info!("Kimi factory: enabled thinking mode (chat_template_kwargs)");
|
||||||
|
}
|
||||||
|
|
||||||
// Add tools to the request if provided
|
// Add tools to the request if provided
|
||||||
if let Some(tools_value) = tools {
|
if let Some(tools_value) = tools {
|
||||||
if !tools_value.is_empty() {
|
if !tools_value.is_empty() {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue