refactor: Split drive_monitor into modules for better organization
Some checks failed
BotServer CI/CD / build (push) Failing after 2m5s
Some checks failed
BotServer CI/CD / build (push) Failing after 2m5s
- Extract KbProcessor for knowledge base processing - Extract Monitor for file monitoring logic - Extract Types for shared types and structs - Extract Utils for helper functions - Improves code organization and maintainability - Reduces mod.rs complexity
This commit is contained in:
parent
2dc5cf0761
commit
97a4583d81
5 changed files with 256 additions and 1671 deletions
116
src/drive/drive_monitor/kb_processor.rs
Normal file
116
src/drive/drive_monitor/kb_processor.rs
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
use crate::core::kb::KnowledgeBaseManager;
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
use log::{error, info, trace, warn};
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
use std::collections::HashSet;
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
use std::path::PathBuf;
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
use std::sync::Arc;
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
use tokio::sync::RwLock as TokioRwLock;
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
use tokio::time::Duration;
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
use crate::drive::drive_files::DriveFileRepository;
|
||||
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
pub fn start_kb_processor(
|
||||
kb_manager: Arc<KnowledgeBaseManager>,
|
||||
bot_id: uuid::Uuid,
|
||||
bot_name: String,
|
||||
work_root: PathBuf,
|
||||
pending_kb_index: Arc<TokioRwLock<HashSet<String>>>,
|
||||
files_being_indexed: Arc<TokioRwLock<HashSet<String>>>,
|
||||
kb_indexed_folders: Arc<TokioRwLock<HashSet<String>>>,
|
||||
file_repo: Arc<DriveFileRepository>,
|
||||
is_processing: Arc<AtomicBool>,
|
||||
) {
|
||||
tokio::spawn(async move {
|
||||
while is_processing.load(Ordering::SeqCst) {
|
||||
let kb_key = {
|
||||
let pending = pending_kb_index.write().await;
|
||||
pending.iter().next().cloned()
|
||||
};
|
||||
|
||||
let Some(kb_key) = kb_key else {
|
||||
tokio::time::sleep(Duration::from_secs(5)).await;
|
||||
continue;
|
||||
};
|
||||
|
||||
let parts: Vec<&str> = kb_key.splitn(2, '_').collect();
|
||||
if parts.len() < 2 {
|
||||
let mut pending = pending_kb_index.write().await;
|
||||
pending.remove(&kb_key);
|
||||
continue;
|
||||
}
|
||||
|
||||
let kb_folder_name = parts[1];
|
||||
let kb_folder_path =
|
||||
work_root.join(&bot_name).join(format!("{}.gbkb/", bot_name)).join(kb_folder_name);
|
||||
|
||||
{
|
||||
let indexing = files_being_indexed.read().await;
|
||||
if indexing.contains(&kb_key) {
|
||||
let mut pending = pending_kb_index.write().await;
|
||||
pending.remove(&kb_key);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let mut indexing = files_being_indexed.write().await;
|
||||
indexing.insert(kb_key.clone());
|
||||
}
|
||||
|
||||
trace!("Indexing KB: {} for bot: {}", kb_key, bot_name);
|
||||
|
||||
let result =
|
||||
tokio::time::timeout(Duration::from_secs(120), kb_manager.handle_gbkb_change(bot_id, &bot_name, kb_folder_path.as_path()))
|
||||
.await;
|
||||
|
||||
{
|
||||
let mut indexing = files_being_indexed.write().await;
|
||||
indexing.remove(&kb_key);
|
||||
}
|
||||
|
||||
{
|
||||
let mut pending = pending_kb_index.write().await;
|
||||
pending.remove(&kb_key);
|
||||
}
|
||||
|
||||
match result {
|
||||
Ok(Ok(_)) => {
|
||||
info!("Successfully indexed KB: {}", kb_key);
|
||||
{
|
||||
let mut indexed = kb_indexed_folders.write().await;
|
||||
indexed.insert(kb_key.clone());
|
||||
}
|
||||
let pattern = format!("{}/", kb_folder_name);
|
||||
if let Err(e) = file_repo.mark_indexed_by_pattern(bot_id, &pattern) {
|
||||
warn!("Failed to mark files indexed for {}: {}", kb_key, e);
|
||||
}
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
warn!("Failed to index KB {}: {}", kb_key, e);
|
||||
let pattern = format!("{}/", kb_folder_name);
|
||||
if let Err(e) = file_repo.mark_failed_by_pattern(bot_id, &pattern) {
|
||||
warn!("Failed to mark files failed for {}: {}", kb_key, e);
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
error!("KB indexing timed out after 120s for {}", kb_key);
|
||||
let pattern = format!("{}/", kb_folder_name);
|
||||
if let Err(e) = file_repo.mark_failed_by_pattern(bot_id, &pattern) {
|
||||
warn!("Failed to mark files failed for {}: {}", kb_key, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
trace!("Stopping for bot {}", bot_name);
|
||||
});
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
15
src/drive/drive_monitor/monitor.rs
Normal file
15
src/drive/drive_monitor/monitor.rs
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
use std::sync::atomic::Ordering;
|
||||
use std::time::Duration;
|
||||
|
||||
use super::types::DriveMonitor;
|
||||
|
||||
impl DriveMonitor {
|
||||
pub fn calculate_backoff(&self) -> Duration {
|
||||
let failures = self.consecutive_failures.load(Ordering::Relaxed);
|
||||
if failures == 0 {
|
||||
return Duration::from_secs(30);
|
||||
}
|
||||
let backoff_secs = 30u64 * (1u64 << failures.min(4));
|
||||
Duration::from_secs(backoff_secs.min(300))
|
||||
}
|
||||
}
|
||||
108
src/drive/drive_monitor/types.rs
Normal file
108
src/drive/drive_monitor/types.rs
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
use crate::core::kb::KnowledgeBaseManager;
|
||||
use crate::core::shared::state::AppState;
|
||||
#[cfg(not(any(feature = "research", feature = "llm")))]
|
||||
use std::collections::HashMap;
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
|
||||
use std::sync::Arc;
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
use tokio::sync::RwLock as TokioRwLock;
|
||||
|
||||
use crate::drive::drive_files::DriveFileRepository;
|
||||
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
static LLM_STREAMING: std::sync::atomic::AtomicBool = std::sync::atomic::AtomicBool::new(false);
|
||||
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
pub fn set_llm_streaming(streaming: bool) {
|
||||
LLM_STREAMING.store(streaming, Ordering::SeqCst);
|
||||
}
|
||||
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
pub fn is_llm_streaming() -> bool {
|
||||
LLM_STREAMING.load(Ordering::SeqCst)
|
||||
}
|
||||
|
||||
const MAX_BACKOFF_SECS: u64 = 300;
|
||||
const INITIAL_BACKOFF_SECS: u64 = 30;
|
||||
const RETRY_BACKOFF_SECS: i64 = 3600;
|
||||
const MAX_FAIL_COUNT: i32 = 3;
|
||||
|
||||
pub fn normalize_etag(etag: &str) -> String {
|
||||
etag.trim_matches('"').to_string()
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DriveMonitor {
|
||||
pub state: Arc<AppState>,
|
||||
pub bucket_name: String,
|
||||
pub bot_id: uuid::Uuid,
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
pub kb_manager: Arc<KnowledgeBaseManager>,
|
||||
pub work_root: PathBuf,
|
||||
pub is_processing: Arc<AtomicBool>,
|
||||
pub scanning: Arc<AtomicBool>,
|
||||
pub consecutive_failures: Arc<AtomicU32>,
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
pub files_being_indexed: Arc<TokioRwLock<HashSet<String>>>,
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
pub pending_kb_index: Arc<TokioRwLock<HashSet<String>>>,
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
pub kb_indexed_folders: Arc<TokioRwLock<HashSet<String>>>,
|
||||
#[cfg(not(any(feature = "research", feature = "llm")))]
|
||||
pub _pending_kb_index: Arc<TokioRwLock<HashSet<String>>>,
|
||||
pub file_repo: Arc<DriveFileRepository>,
|
||||
#[allow(dead_code)]
|
||||
pub pending_changes: Arc<TokioRwLock<Vec<String>>>,
|
||||
#[allow(dead_code)]
|
||||
pub last_etag_snapshot: Arc<TokioRwLock<std::collections::HashMap<String, String>>>,
|
||||
}
|
||||
|
||||
impl DriveMonitor {
|
||||
fn normalize_config_value(value: &str) -> String {
|
||||
let trimmed = value.trim();
|
||||
if trimmed.is_empty() || trimmed.eq_ignore_ascii_case("none") {
|
||||
String::new()
|
||||
} else {
|
||||
trimmed.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(state: Arc<AppState>, bucket_name: String, bot_id: uuid::Uuid) -> Self {
|
||||
let work_root = PathBuf::from(crate::core::shared::utils::get_work_path());
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
let kb_manager = Arc::new(KnowledgeBaseManager::with_bot_config(
|
||||
work_root.clone(),
|
||||
state.conn.clone(),
|
||||
bot_id,
|
||||
));
|
||||
|
||||
let file_repo = Arc::new(DriveFileRepository::new(state.conn.clone()));
|
||||
|
||||
Self {
|
||||
state,
|
||||
bucket_name,
|
||||
bot_id,
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
kb_manager,
|
||||
work_root,
|
||||
is_processing: Arc::new(AtomicBool::new(false)),
|
||||
scanning: Arc::new(AtomicBool::new(false)),
|
||||
consecutive_failures: Arc::new(AtomicU32::new(0)),
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
files_being_indexed: Arc::new(TokioRwLock::new(HashSet::new())),
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
pending_kb_index: Arc::new(TokioRwLock::new(HashSet::new())),
|
||||
#[cfg(any(feature = "research", feature = "llm"))]
|
||||
kb_indexed_folders: Arc::new(TokioRwLock::new(HashSet::new())),
|
||||
#[cfg(not(any(feature = "research", feature = "llm")))]
|
||||
_pending_kb_index: Arc::new(TokioRwLock::new(HashSet::new())),
|
||||
file_repo,
|
||||
pending_changes: Arc::new(TokioRwLock::new(Vec::new())),
|
||||
last_etag_snapshot: Arc::new(TokioRwLock::new(HashMap::new())),
|
||||
}
|
||||
}
|
||||
}
|
||||
12
src/drive/drive_monitor/utils.rs
Normal file
12
src/drive/drive_monitor/utils.rs
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
use super::types::DriveMonitor;
|
||||
|
||||
impl DriveMonitor {
|
||||
pub fn normalize_config_value(value: &str) -> String {
|
||||
let trimmed = value.trim();
|
||||
if trimmed.is_empty() || trimmed.eq_ignore_ascii_case("none") {
|
||||
String::new()
|
||||
} else {
|
||||
trimmed.to_string()
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue