Add smart sleep based on fail_count to prevent excessive monitoring cycles
All checks were successful
BotServer CI/CD / build (push) Successful in 3m9s
All checks were successful
BotServer CI/CD / build (push) Successful in 3m9s
- fail_count >= 3: sleep 1 hour - fail_count >= 2: sleep 15 min - fail_count >= 1: sleep 5 min - fail_count = 0: sleep 10 sec (default)
This commit is contained in:
parent
789789e313
commit
256d55fc93
1 changed files with 25 additions and 2 deletions
|
|
@ -291,8 +291,31 @@ impl DriveMonitor {
|
||||||
{
|
{
|
||||||
debug!("[DRIVE_MONITOR] Inside monitoring loop for bot {}", self_clone.bot_id);
|
debug!("[DRIVE_MONITOR] Inside monitoring loop for bot {}", self_clone.bot_id);
|
||||||
debug!("[DRIVE_MONITOR] Periodic check starting for bot {}", self_clone.bot_id);
|
debug!("[DRIVE_MONITOR] Periodic check starting for bot {}", self_clone.bot_id);
|
||||||
// Use fixed 10 second interval instead of backoff calculation
|
|
||||||
tokio::time::sleep(Duration::from_secs(10)).await;
|
// Smart sleep based on fail_count - prevent excessive retries
|
||||||
|
{
|
||||||
|
let states = self_clone.file_states.read().await;
|
||||||
|
let max_fail_count = states.values()
|
||||||
|
.map(|s| s.fail_count)
|
||||||
|
.max()
|
||||||
|
.unwrap_or(0);
|
||||||
|
|
||||||
|
let base_sleep = if max_fail_count >= 3 {
|
||||||
|
3600 // 1 hour for fail_count >= 3
|
||||||
|
} else if max_fail_count >= 2 {
|
||||||
|
900 // 15 min for fail_count >= 2
|
||||||
|
} else if max_fail_count >= 1 {
|
||||||
|
300 // 5 min for fail_count >= 1
|
||||||
|
} else {
|
||||||
|
10 // 10 sec default
|
||||||
|
};
|
||||||
|
|
||||||
|
if base_sleep > 10 {
|
||||||
|
debug!("[DRIVE_MONITOR] Sleep {}s based on fail_count={}", base_sleep, max_fail_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
tokio::time::sleep(Duration::from_secs(base_sleep)).await;
|
||||||
|
}
|
||||||
|
|
||||||
debug!("[DRIVE_MONITOR] Checking drive health for bot {}", self_clone.bot_id);
|
debug!("[DRIVE_MONITOR] Checking drive health for bot {}", self_clone.bot_id);
|
||||||
// Skip drive health check - just proceed with monitoring
|
// Skip drive health check - just proceed with monitoring
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue