Fix PDF tabular parsing spacing
All checks were successful
Botlib CI / build (push) Successful in 5s
BotServer CI / build (push) Successful in 3m10s
Bottest CI / build (push) Successful in 21s
BotUI CI / build (push) Successful in 23s

This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2026-04-22 21:19:18 +00:00
parent 8069fbab28
commit 058000cd1f

View file

@ -482,20 +482,20 @@ impl DocumentProcessor {
}
fn clean_text(text: &str) -> String {
let cleaned = text
.lines()
.map(|line| line.trim())
text.lines()
.map(|line| {
let cleaned_line: String = line
.chars()
.filter(|c| !c.is_control() || c.is_whitespace())
.collect();
cleaned_line
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
})
.filter(|line| !line.is_empty())
.collect::<Vec<_>>()
.join("\n");
cleaned
.chars()
.filter(|c| !c.is_control() || c.is_whitespace())
.collect::<String>()
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.join("\n")
}
fn create_chunks(&self, text: &str, file_path: &Path) -> Vec<TextChunk> {