#![deny(warnings)] #![allow(dead_code)] //! [TSM.ID].[11031972] -- Platform X Ecosystem //! xcu-sentinel -- System Watchdog with Resource Monitoring //! CPU/RAM/disk monitoring, threshold alerts, SLA enforcement use std::collections::VecDeque; use std::sync::{Arc, Mutex}; use std::time::SystemTime; #[derive(Debug)] pub enum SentinelError { ThresholdExceeded(String), MonitorFailed(String), ConfigError(String), } impl std::fmt::Display for SentinelError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::ThresholdExceeded(e) => write!(f, "Threshold exceeded: {e}"), Self::MonitorFailed(e) => write!(f, "Monitor failed: {e}"), Self::ConfigError(e) => write!(f, "Config error: {e}"), } } } impl std::error::Error for SentinelError {} #[derive(Debug, Clone)] pub struct ResourceSnapshot { pub cpu_percent: f64, pub memory_used_mb: u64, pub memory_total_mb: u64, pub disk_used_percent: f64, pub open_connections: u32, pub timestamp: u64, } #[derive(Debug, Clone)] pub struct AlertThreshold { pub cpu_critical: f64, pub cpu_warning: f64, pub memory_critical_percent: f64, pub memory_warning_percent: f64, pub disk_critical_percent: f64, pub response_time_ms_critical: u64, } impl Default for AlertThreshold { fn default() -> Self { Self { cpu_critical: 90.0, cpu_warning: 70.0, memory_critical_percent: 85.0, memory_warning_percent: 70.0, disk_critical_percent: 90.0, response_time_ms_critical: 5000, } } } #[derive(Debug, Clone)] pub enum AlertLevel { Info, Warning, Critical, Fatal } #[derive(Debug, Clone)] pub struct Alert { pub level: AlertLevel, pub resource: String, pub message: String, pub value: f64, pub threshold: f64, pub timestamp: u64, } pub struct Sentinel { thresholds: AlertThreshold, history: Arc>>, alerts: Arc>>, max_history: usize, } impl Sentinel { pub fn new(thresholds: AlertThreshold, max_history: usize) -> Self { Self { thresholds, history: Arc::new(Mutex::new(VecDeque::with_capacity(max_history))), alerts: Arc::new(Mutex::new(Vec::new())), max_history, } } /// Record a resource snapshot and check thresholds pub fn record(&self, snapshot: ResourceSnapshot) -> Result, SentinelError> { let mut new_alerts = Vec::new(); let ts = snapshot.timestamp; // CPU check if snapshot.cpu_percent >= self.thresholds.cpu_critical { new_alerts.push(Alert { level: AlertLevel::Critical, resource: "cpu".into(), message: format!("CPU {}% >= {}%", snapshot.cpu_percent, self.thresholds.cpu_critical), value: snapshot.cpu_percent, threshold: self.thresholds.cpu_critical, timestamp: ts, }); } else if snapshot.cpu_percent >= self.thresholds.cpu_warning { new_alerts.push(Alert { level: AlertLevel::Warning, resource: "cpu".into(), message: format!("CPU {}% >= {}%", snapshot.cpu_percent, self.thresholds.cpu_warning), value: snapshot.cpu_percent, threshold: self.thresholds.cpu_warning, timestamp: ts, }); } // Memory check let mem_percent = if snapshot.memory_total_mb > 0 { (snapshot.memory_used_mb as f64 / snapshot.memory_total_mb as f64) * 100.0 } else { 0.0 }; if mem_percent >= self.thresholds.memory_critical_percent { new_alerts.push(Alert { level: AlertLevel::Critical, resource: "memory".into(), message: format!("Memory {:.1}% >= {}%", mem_percent, self.thresholds.memory_critical_percent), value: mem_percent, threshold: self.thresholds.memory_critical_percent, timestamp: ts, }); } else if mem_percent >= self.thresholds.memory_warning_percent { new_alerts.push(Alert { level: AlertLevel::Warning, resource: "memory".into(), message: format!("Memory {:.1}% >= {}%", mem_percent, self.thresholds.memory_warning_percent), value: mem_percent, threshold: self.thresholds.memory_warning_percent, timestamp: ts, }); } // Disk check if snapshot.disk_used_percent >= self.thresholds.disk_critical_percent { new_alerts.push(Alert { level: AlertLevel::Critical, resource: "disk".into(), message: format!("Disk {:.1}% >= {}%", snapshot.disk_used_percent, self.thresholds.disk_critical_percent), value: snapshot.disk_used_percent, threshold: self.thresholds.disk_critical_percent, timestamp: ts, }); } // Store history if let Ok(mut hist) = self.history.lock() { if hist.len() >= self.max_history { hist.pop_front(); } hist.push_back(snapshot); } // Store alerts if let Ok(mut alert_log) = self.alerts.lock() { for a in &new_alerts { alert_log.push(a.clone()); } } Ok(new_alerts) } /// Calculate moving average of CPU over last N samples pub fn cpu_moving_average(&self, window: usize) -> Result { if let Ok(hist) = self.history.lock() { let samples: Vec = hist.iter().rev().take(window).map(|s| s.cpu_percent).collect(); if samples.is_empty() { return Ok(0.0); } let sum: f64 = samples.iter().sum(); Ok(sum / samples.len() as f64) } else { Err(SentinelError::MonitorFailed("Lock poisoned".into())) } } /// Detect anomaly: sudden spike compared to moving average pub fn detect_anomaly(&self, current_cpu: f64, window: usize) -> Result { let avg = self.cpu_moving_average(window)?; if avg > 0.0 { let deviation = (current_cpu - avg).abs() / avg; Ok(deviation > 0.5) // 50% deviation = anomaly } else { Ok(false) } } /// SLA check: uptime percentage pub fn calculate_uptime(&self, total_checks: u64, failed_checks: u64) -> Result { if total_checks == 0 { return Err(SentinelError::ConfigError("No checks recorded".into())); } let uptime = ((total_checks - failed_checks) as f64 / total_checks as f64) * 100.0; Ok(uptime) } /// Get current epoch timestamp pub fn now_epoch() -> u64 { SystemTime::now() .duration_since(SystemTime::UNIX_EPOCH) .map(|d| d.as_secs()) .unwrap_or(0) } pub fn get_alerts(&self) -> Vec { self.alerts.lock().map(|a| a.clone()).unwrap_or_default() } pub fn get_history(&self) -> Vec { self.history.lock().map(|h| h.iter().cloned().collect()).unwrap_or_default() } } #[cfg(test)] mod tests { use super::*; #[test] fn test_cpu_critical_alert() { let sentinel = Sentinel::new(AlertThreshold::default(), 100); let snap = ResourceSnapshot { cpu_percent: 95.0, memory_used_mb: 4000, memory_total_mb: 8000, disk_used_percent: 50.0, open_connections: 100, timestamp: 1000, }; let alerts = sentinel.record(snap).unwrap(); assert!(!alerts.is_empty()); assert!(matches!(alerts[0].level, AlertLevel::Critical)); } #[test] fn test_moving_average() { let sentinel = Sentinel::new(AlertThreshold::default(), 100); for i in 0..10 { let snap = ResourceSnapshot { cpu_percent: 30.0 + i as f64, memory_used_mb: 2000, memory_total_mb: 8000, disk_used_percent: 40.0, open_connections: 50, timestamp: i as u64, }; let _ = sentinel.record(snap); } let avg = sentinel.cpu_moving_average(5).unwrap(); assert!(avg > 30.0 && avg < 40.0); } #[test] fn test_anomaly_detection() { let sentinel = Sentinel::new(AlertThreshold::default(), 100); for i in 0..20 { let snap = ResourceSnapshot { cpu_percent: 30.0, memory_used_mb: 2000, memory_total_mb: 8000, disk_used_percent: 40.0, open_connections: 50, timestamp: i, }; let _ = sentinel.record(snap); } let is_anomaly = sentinel.detect_anomaly(80.0, 10).unwrap(); assert!(is_anomaly); } #[test] fn test_sla_uptime() { let sentinel = Sentinel::new(AlertThreshold::default(), 100); let uptime = sentinel.calculate_uptime(1000, 1).unwrap(); assert!(uptime > 99.8); } }