fix: prevent KB re-download loop when file_states fails to load
All checks were successful
BotServer CI/CD / build (push) Successful in 3m28s
All checks were successful
BotServer CI/CD / build (push) Successful in 3m28s
- Add fallback: skip files from indexed KB folders even when file_states is empty - Add file_states_count to debug log to detect load failures - Add indexed_kb_names set for quick KB folder lookup - This prevents the infinite download loop when file_states.json fails to deserialize Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
parent
60fd3dbbc4
commit
e48b5610db
1 changed files with 22 additions and 2 deletions
|
|
@ -1567,17 +1567,37 @@ let file_state = FileState {
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut file_states = self.file_states.write().await;
|
let mut file_states = self.file_states.write().await;
|
||||||
debug!("[GBKB] file_states lock acquired, processing {} files (all_indexed={})", current_files.len(), all_indexed);
|
debug!("[GBKB] file_states lock acquired, processing {} files (all_indexed={}, file_states_count={})", current_files.len(), all_indexed, file_states.len());
|
||||||
|
|
||||||
|
// Build set of already-indexed KB folder names for quick lookup
|
||||||
|
let indexed_kb_names: HashSet<String> = {
|
||||||
|
let indexed = self.kb_indexed_folders.read().await;
|
||||||
|
kb_folders.iter()
|
||||||
|
.filter(|kb| indexed.contains(&format!("{}_{}", bot_name, kb)))
|
||||||
|
.cloned()
|
||||||
|
.collect()
|
||||||
|
};
|
||||||
|
|
||||||
for (path, current_state) in current_files.iter() {
|
for (path, current_state) in current_files.iter() {
|
||||||
let is_new = !file_states.contains_key(path);
|
let is_new = !file_states.contains_key(path);
|
||||||
debug!("[GBKB] DEBUG: path={} in_file_states={}", path, !is_new);
|
debug!("[GBKB] DEBUG: path={} in_file_states={}", path, !is_new);
|
||||||
|
|
||||||
// When all KBs are indexed, skip files that are already tracked (not new)
|
// Skip files from already-indexed KB folders that are not new
|
||||||
|
// This prevents re-download loop when file_states fails to load
|
||||||
|
let kb_name_from_path = path.split('/').nth(1).map(|s| s.to_string());
|
||||||
if all_indexed && !is_new {
|
if all_indexed && !is_new {
|
||||||
trace!("[GBKB] Skipping already indexed file: {}", path);
|
trace!("[GBKB] Skipping already indexed file: {}", path);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
// Extra safety: if file_states is empty but KB is indexed, skip non-new files
|
||||||
|
if file_states.is_empty() && all_indexed {
|
||||||
|
if let Some(kb) = &kb_name_from_path {
|
||||||
|
if indexed_kb_names.contains(kb) {
|
||||||
|
trace!("[GBKB] Skipping file from indexed KB (empty file_states): {}", path);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Use last_modified as primary change detector (more stable than ETag)
|
// Use last_modified as primary change detector (more stable than ETag)
|
||||||
// ETags can change due to metadata updates even when content is identical
|
// ETags can change due to metadata updates even when content is identical
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue