fix: GLM max_tokens 131072, disable thinking kwargs - model sends content after reasoning naturally
All checks were successful
BotServer CI/CD / build (push) Successful in 3m11s
All checks were successful
BotServer CI/CD / build (push) Successful in 3m11s
This commit is contained in:
parent
c9fa057203
commit
d6ffe265ef
1 changed files with 4 additions and 10 deletions
|
|
@ -153,15 +153,12 @@ impl LLMProvider for GLMClient {
|
||||||
model: model_name.to_string(),
|
model: model_name.to_string(),
|
||||||
messages,
|
messages,
|
||||||
stream: Some(false),
|
stream: Some(false),
|
||||||
max_tokens: Some(16384),
|
max_tokens: Some(131072),
|
||||||
temperature: Some(1.0),
|
temperature: Some(1.0),
|
||||||
top_p: Some(1.0),
|
top_p: Some(1.0),
|
||||||
tools: None,
|
tools: None,
|
||||||
tool_choice: None,
|
tool_choice: None,
|
||||||
chat_template_kwargs: Some(GLMChatTemplateKwargs {
|
chat_template_kwargs: None,
|
||||||
enable_thinking: true,
|
|
||||||
clear_thinking: false,
|
|
||||||
}),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let url = self.build_url();
|
let url = self.build_url();
|
||||||
|
|
@ -242,15 +239,12 @@ impl LLMProvider for GLMClient {
|
||||||
model: model_name.to_string(),
|
model: model_name.to_string(),
|
||||||
messages,
|
messages,
|
||||||
stream: Some(true),
|
stream: Some(true),
|
||||||
max_tokens: Some(16384),
|
max_tokens: Some(131072),
|
||||||
temperature: Some(1.0),
|
temperature: Some(1.0),
|
||||||
top_p: Some(1.0),
|
top_p: Some(1.0),
|
||||||
tools: tools.cloned(),
|
tools: tools.cloned(),
|
||||||
tool_choice,
|
tool_choice,
|
||||||
chat_template_kwargs: Some(GLMChatTemplateKwargs {
|
chat_template_kwargs: None,
|
||||||
enable_thinking: true,
|
|
||||||
clear_thinking: false,
|
|
||||||
}),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let url = self.build_url();
|
let url = self.build_url();
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue