fix: prevent KB re-download loop when file_states fails to load

- Add fallback: skip files from indexed KB folders even when file_states is empty - Add file_states_count to debug log to detect load failures - Add indexed_kb_names set for quick KB folder lookup - This prevents the infinite download loop when file_states.json fails to deserialize Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
2026-04-13 16:26:12 -03:00 · 2026-04-13 16:26:12 -03:00 · e48b5610db
commit e48b5610db
parent 60fd3dbbc4
1 changed files with 22 additions and 2 deletions
--- a/src/drive/drive_monitor/mod.rs
+++ b/src/drive/drive_monitor/mod.rs
@ -1567,17 +1567,37 @@ let file_state = FileState {
        }
        let mut file_states = self.file_states.write().await;
-        debug!("[GBKB] file_states lock acquired, processing {} files (all_indexed={})", current_files.len(), all_indexed);
+        debug!("[GBKB] file_states lock acquired, processing {} files (all_indexed={}, file_states_count={})", current_files.len(), all_indexed, file_states.len());
        // Build set of already-indexed KB folder names for quick lookup
        let indexed_kb_names: HashSet<String> = {
            let indexed = self.kb_indexed_folders.read().await;
            kb_folders.iter()
                .filter(|kb| indexed.contains(&format!("{}_{}", bot_name, kb)))
                .cloned()
                .collect()
        };
        for (path, current_state) in current_files.iter() {
            let is_new = !file_states.contains_key(path);
            debug!("[GBKB] DEBUG: path={} in_file_states={}", path, !is_new);
-            // When all KBs are indexed, skip files that are already tracked (not new)
+            // Skip files from already-indexed KB folders that are not new
            // This prevents re-download loop when file_states fails to load
            let kb_name_from_path = path.split('/').nth(1).map(|s| s.to_string());
            if all_indexed && !is_new {
                trace!("[GBKB] Skipping already indexed file: {}", path);
                continue;
            }
            // Extra safety: if file_states is empty but KB is indexed, skip non-new files
            if file_states.is_empty() && all_indexed {
                if let Some(kb) = &kb_name_from_path {
                    if indexed_kb_names.contains(kb) {
                        trace!("[GBKB] Skipping file from indexed KB (empty file_states): {}", path);
                        continue;
                    }
                }
            }
            // Use last_modified as primary change detector (more stable than ETag)
            // ETags can change due to metadata updates even when content is identical