fix: DriveMonitor skips unchanged files on rescan, skips directory entries

- Only upsert drive_files when ETag actually changed (was re-processing all files every 60s cycle)
- Skip S3 directory entries (keys ending with '/') to avoid storing stale directory markers
- Add debug-level logging for unchanged file skips
- Fixes noisy 'Added/updated drive_files' spam on every scan cycle
This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2026-04-21 16:16:39 +00:00
parent c70fbba099
commit 1ae0ad7051

View file

@ -35,6 +35,11 @@ impl DriveMonitor {
let current_keys: Vec<String> = objects.iter().map(|o| o.key.clone()).collect(); let current_keys: Vec<String> = objects.iter().map(|o| o.key.clone()).collect();
for obj in &objects { for obj in &objects {
if obj.key.ends_with('/') {
log::debug!("Skipping directory entry: {}", obj.key);
continue;
}
let file_type = classify_file(&obj.key); let file_type = classify_file(&obj.key);
let full_key = format!("{}.gbai/{}", bot_name, obj.key); let full_key = format!("{}.gbai/{}", bot_name, obj.key);
let etag = obj.etag.as_deref().map(normalize_etag); let etag = obj.etag.as_deref().map(normalize_etag);
@ -46,10 +51,17 @@ impl DriveMonitor {
log::info!("ETag changed for {}, will reindex", full_key); log::info!("ETag changed for {}, will reindex", full_key);
true true
} }
Some(_) => !existing.as_ref().map_or(false, |f| f.indexed), Some(prev) if !prev.indexed && prev.etag.as_deref() == etag.as_deref() => {
log::debug!("{} unchanged but not yet indexed, will index", full_key);
true
}
Some(_) => true,
None => true, None => true,
}; };
let etag_changed = existing.as_ref().map_or(true, |prev| prev.etag.as_deref() != etag.as_deref());
if etag_changed || existing.is_none() {
match self.file_repo.upsert_file( match self.file_repo.upsert_file(
self.bot_id, self.bot_id,
&full_key, &full_key,
@ -60,6 +72,9 @@ impl DriveMonitor {
Ok(_) => log::info!("Added/updated drive_files for: {} ({})", full_key, file_type), Ok(_) => log::info!("Added/updated drive_files for: {} ({})", full_key, file_type),
Err(e) => log::error!("Failed to upsert {}: {}", full_key, e), Err(e) => log::error!("Failed to upsert {}: {}", full_key, e),
} }
} else {
log::debug!("{} unchanged, skipping upsert", full_key);
}
if needs_reindex && file_type == "kb" { if needs_reindex && file_type == "kb" {
#[cfg(any(feature = "research", feature = "llm"))] #[cfg(any(feature = "research", feature = "llm"))]