fix: remove hardcoded salesianos, strip think tags globally, block reasoning_content leak
All checks were successful
BotServer CI/CD / build (push) Successful in 6m38s

- drive_monitor: replace hardcoded salesianos.gbot with dynamic bot_name
- llm/mod.rs: stop falling back to reasoning_content as content
- llm/claude.rs: same fix for Claude handler
- deepseek_r3: export strip_think_tags for reuse
- gpt_oss_20b: use strip_think_tags so all models strip tags
- gpt_oss_120b: use strip_think_tags so all models strip tags
This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2026-04-13 09:04:22 -03:00
parent dbec0df923
commit dd68cdbe6c
6 changed files with 49 additions and 25 deletions

View file

@ -1096,8 +1096,8 @@ let file_state = FileState {
if parts.len() >= 2 { if parts.len() >= 2 {
let prompt_filename = parts[1].trim(); let prompt_filename = parts[1].trim();
if !prompt_filename.is_empty() { if !prompt_filename.is_empty() {
// Get prompt file from MinIO let bot_name = self.bucket_name.strip_suffix(".gbai").unwrap_or(&self.bucket_name);
let prompt_key = format!("salesianos.gbot/{}", prompt_filename); let prompt_key = format!("{}.gbot/{}", bot_name, prompt_filename);
if let Ok(prompt_response) = client if let Ok(prompt_response) = client
.get_object() .get_object()
.bucket(&self.bucket_name) .bucket(&self.bucket_name)
@ -1110,7 +1110,6 @@ let file_state = FileState {
.map_err(|_e| format!("UTF-8 error in {}", prompt_filename))?; .map_err(|_e| format!("UTF-8 error in {}", prompt_filename))?;
// Save to work directory // Save to work directory
let bot_name = self.bucket_name.strip_suffix(".gbai").unwrap_or(&self.bucket_name);
let gbot_dir = self.work_root.join(format!("{}.gbai/{}.gbot", bot_name, bot_name)); let gbot_dir = self.work_root.join(format!("{}.gbai/{}.gbot", bot_name, bot_name));
if let Err(e) = tokio::task::spawn_blocking({ if let Err(e) = tokio::task::spawn_blocking({

View file

@ -332,7 +332,6 @@ impl ClaudeClient {
let text = delta let text = delta
.content .content
.as_deref() .as_deref()
.or(delta.reasoning_content.as_deref())
.unwrap_or(""); .unwrap_or("");
if !text.is_empty() { if !text.is_empty() {

View file

@ -1,24 +1,31 @@
use super::ModelHandler; use super::ModelHandler;
use std::sync::LazyLock; use std::sync::LazyLock;
use regex::Regex;
static THINK_TAG_REGEX: LazyLock<Result<regex::Regex, regex::Error>> = LazyLock::new(|| { static THINK_TAG_REGEX: LazyLock<Result<Regex, regex::Error>> = LazyLock::new(|| {
regex::Regex::new(r"(?s)<think>.*?</think>") Regex::new(r"(?s)<think>.*?</think>")
}); });
pub fn strip_think_tags(content: &str) -> String {
if let Ok(re) = &*THINK_TAG_REGEX {
re.replace_all(content, "").to_string()
} else {
content.to_string()
}
}
#[derive(Debug)] #[derive(Debug)]
pub struct DeepseekR3Handler; pub struct DeepseekR3Handler;
impl ModelHandler for DeepseekR3Handler { impl ModelHandler for DeepseekR3Handler {
fn is_analysis_complete(&self, buffer: &str) -> bool { fn is_analysis_complete(&self, buffer: &str) -> bool {
buffer.contains("</think>") buffer.contains("</think>")
} }
fn process_content(&self, content: &str) -> String { fn process_content(&self, content: &str) -> String {
if let Ok(re) = &*THINK_TAG_REGEX { strip_think_tags(content)
re.replace_all(content, "").to_string()
} else {
content.to_string()
}
} }
fn has_analysis_markers(&self, buffer: &str) -> bool { fn has_analysis_markers(&self, buffer: &str) -> bool {
buffer.contains("<think>") buffer.contains("<think>")
} }

View file

@ -1,4 +1,4 @@
use super::deepseek_r3::strip_think_tags;
use super::ModelHandler; use super::ModelHandler;
#[derive(Debug)] #[derive(Debug)]
pub struct GptOss120bHandler {} pub struct GptOss120bHandler {}
@ -15,12 +15,14 @@ impl GptOss120bHandler {
} }
impl ModelHandler for GptOss120bHandler { impl ModelHandler for GptOss120bHandler {
fn is_analysis_complete(&self, buffer: &str) -> bool { fn is_analysis_complete(&self, buffer: &str) -> bool {
buffer.contains("**end**") buffer.contains("**end**") || buffer.contains("</think>")
} }
fn process_content(&self, content: &str) -> String { fn process_content(&self, content: &str) -> String {
content.replace("**start**", "").replace("**end**", "") strip_think_tags(content)
.replace("**start**", "")
.replace("**end**", "")
} }
fn has_analysis_markers(&self, buffer: &str) -> bool { fn has_analysis_markers(&self, buffer: &str) -> bool {
buffer.contains("**start**") buffer.contains("**start**") || buffer.contains("<think>")
} }
} }

View file

@ -1,16 +1,36 @@
use super::deepseek_r3::strip_think_tags;
use super::ModelHandler; use super::ModelHandler;
use std::sync::LazyLock;
use regex::Regex;
static ANALYSIS_MARKER_REGEX: LazyLock<Result<Regex, regex::Error>> = LazyLock::new(|| {
Regex::new(r"analysis<\|message\|>")
});
#[derive(Debug)] #[derive(Debug)]
pub struct GptOss20bHandler; pub struct GptOss20bHandler;
impl ModelHandler for GptOss20bHandler { impl ModelHandler for GptOss20bHandler {
fn is_analysis_complete(&self, buffer: &str) -> bool { fn is_analysis_complete(&self, buffer: &str) -> bool {
buffer.ends_with("final") buffer.contains("final") || buffer.contains("</think>")
} }
fn process_content(&self, content: &str) -> String { fn process_content(&self, content: &str) -> String {
content let without_think = strip_think_tags(content);
.find("final") if without_think.is_empty() {
.map_or_else(|| content.to_string(), |pos| content[..pos].to_string()) return String::new();
}
match without_think.find("final") {
Some(pos) => without_think[..pos].to_string(),
None => without_think,
}
} }
fn has_analysis_markers(&self, buffer: &str) -> bool { fn has_analysis_markers(&self, buffer: &str) -> bool {
buffer.contains("analysis<|message|>") (if let Ok(re) = &*ANALYSIS_MARKER_REGEX {
re.is_match(buffer)
} else {
buffer.contains("analysis<|message|>")
}) || buffer.contains("<think>")
} }
} }

View file

@ -458,10 +458,7 @@ impl LLMProvider for OpenAIClient {
for line in chunk_str.lines() { for line in chunk_str.lines() {
if line.starts_with("data: ") && !line.contains("[DONE]") { if line.starts_with("data: ") && !line.contains("[DONE]") {
if let Ok(data) = serde_json::from_str::<Value>(&line[6..]) { if let Ok(data) = serde_json::from_str::<Value>(&line[6..]) {
// Handle content (standard) or reasoning/reasoning_content (NVIDIA reasoning models) let content = data["choices"][0]["delta"]["content"].as_str();
let content = data["choices"][0]["delta"]["content"].as_str()
.or_else(|| data["choices"][0]["delta"]["reasoning_content"].as_str())
.or_else(|| data["choices"][0]["delta"]["reasoning"].as_str());
if let Some(content) = content { if let Some(content) = content {
let processed = handler.process_content(content); let processed = handler.process_content(content);
if !processed.is_empty() { if !processed.is_empty() {