khora_control/
analysis.rs

1// Copyright 2025 eraflo
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Heuristic analysis for the DCC.
16//!
17//! The `HeuristicEngine` is the analytical core that evaluates the full
18//! situational model (hardware state, execution phase, metric trends) to
19//! decide whether a GORNA renegotiation is necessary and what the global
20//! performance target should be.
21
22use crate::context::{Context, ExecutionPhase};
23use crate::metrics::MetricStore;
24use khora_core::platform::{BatteryLevel, ThermalStatus};
25use khora_core::telemetry::MetricId;
26
27/// Threshold (ms) above which frame time is considered problematic.
28const FRAME_TIME_WARN_THRESHOLD_MS: f32 = 18.0;
29/// Threshold (ms) above which frame time is critically high.
30const FRAME_TIME_CRITICAL_THRESHOLD_MS: f32 = 25.0;
31/// Threshold for frame time variance indicating stutter.
32const FRAME_TIME_VARIANCE_THRESHOLD: f32 = 4.0;
33/// Rising trend threshold (ms per sample window) triggering preemptive action.
34const FRAME_TIME_TREND_THRESHOLD: f32 = 2.0;
35/// CPU load threshold for triggering negotiation.
36const CPU_LOAD_CRITICAL: f32 = 0.95;
37/// GPU load threshold for triggering negotiation.
38const GPU_LOAD_CRITICAL: f32 = 0.95;
39/// GPU load threshold for a warning-level response.
40const GPU_LOAD_WARN: f32 = 0.90;
41
42/// Analysis results and alerts produced by the `HeuristicEngine`.
43#[derive(Debug, Clone)]
44pub struct AnalysisReport {
45    /// `true` if a resource conflict or performance drop is detected and GORNA
46    /// should run a full negotiation round.
47    pub needs_negotiation: bool,
48    /// Suggested global target latency (in ms) derived from analysis.
49    pub suggested_latency_ms: f32,
50    /// `true` if the engine is in a "death spiral" — multiple subsystems are
51    /// simultaneously failing to meet budgets and an emergency stop is required.
52    pub death_spiral_detected: bool,
53    /// Human-readable summary of analysis findings for telemetry/logging.
54    pub alerts: Vec<String>,
55}
56
57impl Default for AnalysisReport {
58    fn default() -> Self {
59        Self {
60            needs_negotiation: false,
61            suggested_latency_ms: 16.66,
62            death_spiral_detected: false,
63            alerts: Vec::new(),
64        }
65    }
66}
67
68/// Analyzes metrics and context to determine engine-wide strategy changes.
69pub struct HeuristicEngine;
70
71impl HeuristicEngine {
72    /// Analyzes the current situational model.
73    ///
74    /// Evaluates the full set of heuristics:
75    /// 1. **Phase heuristics**: Adjust target FPS for the current execution phase.
76    /// 2. **Thermal analysis**: Detect throttling / critical and reduce budgets.
77    /// 3. **Battery analysis**: Conserve power on low/critical battery.
78    /// 4. **Frame time analysis**: Detect sustained performance drops.
79    /// 5. **Stutter analysis**: Detect high frame time variance.
80    /// 6. **Trend analysis**: Preempt worsening performance via slope detection.
81    /// 7. **CPU/GPU pressure**: Detect resource saturation.
82    pub fn analyze(&self, context: &Context, store: &MetricStore) -> AnalysisReport {
83        let mut report = AnalysisReport::default();
84        let mut pressure_count: u32 = 0;
85
86        // ── 1. Phase-Based Target ────────────────────────────────────────
87        report.suggested_latency_ms = match context.phase {
88            ExecutionPhase::Boot => 33.33, // Loading — no frame budget needed
89            ExecutionPhase::Menu => 33.33, // 30 FPS in menus is sufficient
90            ExecutionPhase::Simulation => 16.66, // 60 FPS target
91            ExecutionPhase::Background => 200.0, // 5 FPS — absolute minimum
92        };
93
94        // Background phase always triggers negotiation so agents can throttle down.
95        if context.phase == ExecutionPhase::Background {
96            report.needs_negotiation = true;
97            report
98                .alerts
99                .push("Phase: Background — reducing all agents to minimum.".into());
100            return report;
101        }
102
103        // ── 2. Thermal Analysis ──────────────────────────────────────────
104        match context.hardware.thermal {
105            ThermalStatus::Critical => {
106                log::warn!("Heuristic: CRITICAL thermal state — emergency budget reduction.");
107                report.needs_negotiation = true;
108                report.suggested_latency_ms = f32::max(report.suggested_latency_ms, 50.0); // ~20 FPS cap
109                report
110                    .alerts
111                    .push("Thermal: CRITICAL — emergency load reduction.".into());
112                pressure_count += 1;
113            }
114            ThermalStatus::Throttling => {
115                log::warn!("Heuristic: Device is throttling. Recommending load reduction.");
116                report.needs_negotiation = true;
117                report.suggested_latency_ms = f32::max(report.suggested_latency_ms, 33.33); // 30 FPS cap
118                report
119                    .alerts
120                    .push("Thermal: Throttling — capping to 30 FPS.".into());
121                pressure_count += 1;
122            }
123            ThermalStatus::Warm => {
124                log::debug!("Heuristic: Device is warm. Monitoring.");
125            }
126            ThermalStatus::Cool => {}
127        }
128
129        // ── 3. Battery Analysis ──────────────────────────────────────────
130        match context.hardware.battery {
131            BatteryLevel::Critical => {
132                log::warn!("Heuristic: Battery CRITICAL — mandatory power saving.");
133                report.needs_negotiation = true;
134                report.suggested_latency_ms = f32::max(report.suggested_latency_ms, 50.0); // ~20 FPS
135                report
136                    .alerts
137                    .push("Battery: CRITICAL — mandatory power saving.".into());
138                pressure_count += 1;
139            }
140            BatteryLevel::Low => {
141                log::info!("Heuristic: Battery low — reducing target to 30 FPS.");
142                report.needs_negotiation = true;
143                report.suggested_latency_ms = f32::max(report.suggested_latency_ms, 33.33);
144                report
145                    .alerts
146                    .push("Battery: Low — capping to 30 FPS.".into());
147            }
148            BatteryLevel::High | BatteryLevel::Mains => {}
149        }
150
151        // ── 4. Frame Time Analysis ───────────────────────────────────────
152        let frame_time_id = MetricId::new("renderer", "frame_time");
153        let avg_frame_time = store.get_average(&frame_time_id);
154        let has_enough_samples = store.get_sample_count(&frame_time_id) >= 10;
155
156        if has_enough_samples {
157            if avg_frame_time > FRAME_TIME_CRITICAL_THRESHOLD_MS {
158                log::warn!(
159                    "Heuristic: Frame time critically high ({:.2}ms). Forcing negotiation.",
160                    avg_frame_time
161                );
162                report.needs_negotiation = true;
163                report.alerts.push(format!(
164                    "FrameTime: CRITICAL — avg {:.2}ms exceeds {:.0}ms.",
165                    avg_frame_time, FRAME_TIME_CRITICAL_THRESHOLD_MS
166                ));
167                pressure_count += 1;
168            } else if avg_frame_time > FRAME_TIME_WARN_THRESHOLD_MS {
169                log::debug!(
170                    "Heuristic: Frame time elevated ({:.2}ms). Triggering negotiation.",
171                    avg_frame_time
172                );
173                report.needs_negotiation = true;
174                report.alerts.push(format!(
175                    "FrameTime: Elevated — avg {:.2}ms above {:.0}ms threshold.",
176                    avg_frame_time, FRAME_TIME_WARN_THRESHOLD_MS
177                ));
178            }
179
180            // ── 5. Stutter Detection (variance) ─────────────────────────
181            let variance = store.get_variance(&frame_time_id);
182            if variance > FRAME_TIME_VARIANCE_THRESHOLD {
183                log::info!(
184                    "Heuristic: High frame time variance ({:.2}). Stutter detected.",
185                    variance
186                );
187                report.needs_negotiation = true;
188                report.alerts.push(format!(
189                    "Stutter: Variance {:.2} exceeds threshold {:.1}.",
190                    variance, FRAME_TIME_VARIANCE_THRESHOLD
191                ));
192            }
193
194            // ── 6. Trend Analysis (preemptive) ──────────────────────────
195            let trend = store.get_trend(&frame_time_id);
196            if trend > FRAME_TIME_TREND_THRESHOLD {
197                log::info!(
198                    "Heuristic: Frame time rising ({:+.2}ms trend). Preemptive negotiation.",
199                    trend
200                );
201                report.needs_negotiation = true;
202                report.alerts.push(format!(
203                    "Trend: Frame time rising at {:+.2}ms/window.",
204                    trend
205                ));
206            }
207        }
208
209        // ── 7. CPU Pressure ──────────────────────────────────────────────
210        if context.hardware.cpu_load > CPU_LOAD_CRITICAL {
211            log::warn!(
212                "Heuristic: CPU load critical ({:.2}). Triggering negotiation.",
213                context.hardware.cpu_load
214            );
215            report.needs_negotiation = true;
216            report.alerts.push(format!(
217                "CPU: Load {:.0}% exceeds critical threshold.",
218                context.hardware.cpu_load * 100.0
219            ));
220            pressure_count += 1;
221        }
222
223        // ── 8. GPU Pressure ──────────────────────────────────────────────
224        if context.hardware.gpu_load > GPU_LOAD_CRITICAL {
225            log::warn!(
226                "Heuristic: GPU load critical ({:.2}). Triggering negotiation.",
227                context.hardware.gpu_load
228            );
229            report.needs_negotiation = true;
230            report.alerts.push(format!(
231                "GPU: Load {:.0}% exceeds critical threshold.",
232                context.hardware.gpu_load * 100.0
233            ));
234            pressure_count += 1;
235        } else if context.hardware.gpu_load > GPU_LOAD_WARN {
236            log::debug!(
237                "Heuristic: GPU load elevated ({:.2}).",
238                context.hardware.gpu_load
239            );
240            report.needs_negotiation = true;
241            report.alerts.push(format!(
242                "GPU: Load {:.0}% above warning threshold.",
243                context.hardware.gpu_load * 100.0
244            ));
245        }
246
247        // ── 9. Death Spiral Detection ────────────────────────────────────
248        // If 3+ independent pressure sources are active simultaneously,
249        // the engine is likely in a cascading failure ("death spiral").
250        if pressure_count >= 3 {
251            log::error!(
252                "Heuristic: DEATH SPIRAL detected ({} simultaneous pressure sources). \
253                 Emergency stop required.",
254                pressure_count
255            );
256            report.death_spiral_detected = true;
257            report.needs_negotiation = true;
258            report.alerts.push(format!(
259                "DEATH SPIRAL: {} simultaneous pressures.",
260                pressure_count
261            ));
262        }
263
264        report
265    }
266}
267
268#[cfg(test)]
269mod tests {
270    use super::*;
271    use crate::metrics::MetricStore;
272
273    fn default_context() -> Context {
274        Context::default()
275    }
276
277    fn simulation_context() -> Context {
278        Context {
279            phase: ExecutionPhase::Simulation,
280            ..Default::default()
281        }
282    }
283
284    // ── Phase Heuristics ─────────────────────────────────────────────
285
286    #[test]
287    fn test_normal_simulation_no_negotiation() {
288        let engine = HeuristicEngine;
289        let ctx = simulation_context();
290        let store = MetricStore::new();
291
292        let report = engine.analyze(&ctx, &store);
293        assert!(!report.needs_negotiation);
294        assert!((report.suggested_latency_ms - 16.66).abs() < 0.1);
295        assert!(!report.death_spiral_detected);
296    }
297
298    #[test]
299    fn test_background_phase_triggers_negotiation() {
300        let engine = HeuristicEngine;
301        let mut ctx = default_context();
302        ctx.phase = ExecutionPhase::Background;
303        let store = MetricStore::new();
304
305        let report = engine.analyze(&ctx, &store);
306        assert!(report.needs_negotiation);
307        assert!(report.suggested_latency_ms >= 200.0);
308    }
309
310    #[test]
311    fn test_menu_phase_targets_30fps() {
312        let engine = HeuristicEngine;
313        let mut ctx = default_context();
314        ctx.phase = ExecutionPhase::Menu;
315        let store = MetricStore::new();
316
317        let report = engine.analyze(&ctx, &store);
318        assert!((report.suggested_latency_ms - 33.33).abs() < 0.1);
319    }
320
321    // ── Thermal Heuristics ───────────────────────────────────────────
322
323    #[test]
324    fn test_thermal_throttling_triggers_negotiation() {
325        let engine = HeuristicEngine;
326        let mut ctx = simulation_context();
327        ctx.hardware.thermal = ThermalStatus::Throttling;
328        let store = MetricStore::new();
329
330        let report = engine.analyze(&ctx, &store);
331        assert!(report.needs_negotiation);
332        assert!(report.suggested_latency_ms >= 33.33);
333    }
334
335    #[test]
336    fn test_thermal_critical_emergency() {
337        let engine = HeuristicEngine;
338        let mut ctx = simulation_context();
339        ctx.hardware.thermal = ThermalStatus::Critical;
340        let store = MetricStore::new();
341
342        let report = engine.analyze(&ctx, &store);
343        assert!(report.needs_negotiation);
344        assert!(report.suggested_latency_ms >= 50.0);
345    }
346
347    // ── Battery Heuristics ───────────────────────────────────────────
348
349    #[test]
350    fn test_battery_low_caps_fps() {
351        let engine = HeuristicEngine;
352        let mut ctx = simulation_context();
353        ctx.hardware.battery = BatteryLevel::Low;
354        let store = MetricStore::new();
355
356        let report = engine.analyze(&ctx, &store);
357        assert!(report.needs_negotiation);
358        assert!(report.suggested_latency_ms >= 33.33);
359    }
360
361    #[test]
362    fn test_battery_critical_aggressive_cap() {
363        let engine = HeuristicEngine;
364        let mut ctx = simulation_context();
365        ctx.hardware.battery = BatteryLevel::Critical;
366        let store = MetricStore::new();
367
368        let report = engine.analyze(&ctx, &store);
369        assert!(report.needs_negotiation);
370        assert!(report.suggested_latency_ms >= 50.0);
371    }
372
373    // ── Frame Time Heuristics ────────────────────────────────────────
374
375    #[test]
376    fn test_high_frame_time_triggers_negotiation() {
377        let engine = HeuristicEngine;
378        let ctx = simulation_context();
379        let mut store = MetricStore::new();
380
381        let id = MetricId::new("renderer", "frame_time");
382        for _ in 0..20 {
383            store.push(id.clone(), 22.0); // 22ms > 18ms warn threshold
384        }
385
386        let report = engine.analyze(&ctx, &store);
387        assert!(report.needs_negotiation);
388    }
389
390    #[test]
391    fn test_critical_frame_time_pressure() {
392        let engine = HeuristicEngine;
393        let ctx = simulation_context();
394        let mut store = MetricStore::new();
395
396        let id = MetricId::new("renderer", "frame_time");
397        for _ in 0..20 {
398            store.push(id.clone(), 30.0); // 30ms > 25ms critical threshold
399        }
400
401        let report = engine.analyze(&ctx, &store);
402        assert!(report.needs_negotiation);
403        assert!(!report.alerts.is_empty());
404    }
405
406    // ── Stutter Detection ────────────────────────────────────────────
407
408    #[test]
409    fn test_high_variance_stutter_detection() {
410        let engine = HeuristicEngine;
411        let ctx = simulation_context();
412        let mut store = MetricStore::new();
413
414        let id = MetricId::new("renderer", "frame_time");
415        // Alternating between 5ms and 30ms = extreme stutter
416        for i in 0..20 {
417            store.push(id.clone(), if i % 2 == 0 { 5.0 } else { 30.0 });
418        }
419
420        let report = engine.analyze(&ctx, &store);
421        assert!(report.needs_negotiation);
422        assert!(report.alerts.iter().any(|a| a.contains("Variance")));
423    }
424
425    // ── GPU Pressure ─────────────────────────────────────────────────
426
427    #[test]
428    fn test_gpu_pressure_triggers_negotiation() {
429        let engine = HeuristicEngine;
430        let mut ctx = simulation_context();
431        ctx.hardware.gpu_load = 0.96;
432        let store = MetricStore::new();
433
434        let report = engine.analyze(&ctx, &store);
435        assert!(report.needs_negotiation);
436        assert!(report.alerts.iter().any(|a| a.contains("GPU")));
437    }
438
439    // ── Death Spiral ─────────────────────────────────────────────────
440
441    #[test]
442    fn test_death_spiral_detection() {
443        let engine = HeuristicEngine;
444        let mut ctx = simulation_context();
445        ctx.hardware.thermal = ThermalStatus::Critical; // +1 pressure
446        ctx.hardware.cpu_load = 0.98; // +1 pressure
447        ctx.hardware.gpu_load = 0.97; // +1 pressure
448        let store = MetricStore::new();
449
450        let report = engine.analyze(&ctx, &store);
451        assert!(report.death_spiral_detected);
452        assert!(report.needs_negotiation);
453        assert!(report.alerts.iter().any(|a| a.contains("DEATH SPIRAL")));
454    }
455
456    #[test]
457    fn test_no_death_spiral_with_single_pressure() {
458        let engine = HeuristicEngine;
459        let mut ctx = simulation_context();
460        ctx.hardware.thermal = ThermalStatus::Throttling; // Only 1 pressure
461        let store = MetricStore::new();
462
463        let report = engine.analyze(&ctx, &store);
464        assert!(!report.death_spiral_detected);
465    }
466}