Fix backoff logic to be per KB folder instead of global
Some checks failed
BotServer CI/CD / build (push) Has been cancelled

- Filter states by kb_folder_pattern (e.g. 'cartas/', 'proc/')
- Only apply backoff based on files in that specific KB folder
- Each KB folder has independent retry timing
This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2026-04-12 09:15:32 -03:00
parent ee273256fb
commit 789789e313

View file

@ -1294,31 +1294,40 @@ impl DriveMonitor {
// Check fail_count for this KB folder - implement backoff // Check fail_count for this KB folder - implement backoff
{ {
let states = self.file_states.read().await; let states = self.file_states.read().await;
let _kb_prefix = format!("{}/", gbkb_prefix); let kb_folder_pattern = format!("{}/", kb_name);
let max_fail_count = states.values() // Filter only files in this KB folder
.map(|s| s.fail_count) let kb_states: Vec<_> = states.iter()
.max() .filter(|(path, _)| path.contains(&kb_folder_pattern))
.unwrap_or(0); .collect();
// Backoff: wait longer based on fail count if kb_states.is_empty() {
// fail_count 0: no wait, 1: 5min, 2: 15min, 3+: 1h // No files in file_states yet for this KB - proceed with indexing
if max_fail_count > 0 { } else {
let wait_seconds = match max_fail_count { let max_fail_count = kb_states.iter()
1 => 300, // 5 min .map(|(_, s)| s.fail_count)
2 => 900, // 15 min .max()
_ => 3600, // 1 hour .unwrap_or(0);
};
if let Some(last_failed) = states.values() // Backoff: wait longer based on fail count
.filter_map(|s| s.last_failed_at) // fail_count 0: no wait, 1: 5min, 2: 15min, 3+: 1h
.max() if max_fail_count > 0 {
{ let wait_seconds = match max_fail_count {
let elapsed = chrono::Utc::now() - last_failed; 1 => 300, // 5 min
if elapsed.num_seconds() < wait_seconds { 2 => 900, // 15 min
trace!("[DRIVE_MONITOR] KB folder {} in backoff (fail_count={}, elapsed={}s < {}s), skipping", _ => 3600, // 1 hour
kb_key, max_fail_count, elapsed.num_seconds(), wait_seconds); };
continue;
if let Some(last_failed) = kb_states.iter()
.filter_map(|(_, s)| s.last_failed_at)
.max()
{
let elapsed = chrono::Utc::now() - last_failed;
if elapsed.num_seconds() < wait_seconds {
trace!("[DRIVE_MONITOR] KB folder {} in backoff (fail_count={}, elapsed={}s < {}s), skipping",
kb_key, max_fail_count, elapsed.num_seconds(), wait_seconds);
continue;
}
} }
} }
} }