khora_telemetry/
service.rs

1// Copyright 2025 eraflo
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Service for managing telemetry data and resource monitoring.
16
17use crate::metrics::registry::MetricsRegistry;
18use crate::monitoring::registry::MonitorRegistry;
19use crossbeam_channel::Sender;
20use khora_core::telemetry::event::TelemetryEvent;
21use std::time::{Duration, Instant};
22
23/// Central service for collecting and managing engine-wide telemetry.
24///
25/// The `TelemetryService` acts as a central registry for all metrics and
26/// resource monitors. It periodically triggers monitor updates and,
27/// if configured, forwards the results to the DCC for higher-level analysis.
28#[derive(Debug)]
29pub struct TelemetryService {
30    metrics: MetricsRegistry,
31    monitors: MonitorRegistry,
32    last_update: Instant,
33    update_interval: Duration,
34    /// Optional sender to forward events to the DCC.
35    dcc_sender: Option<Sender<TelemetryEvent>>,
36}
37
38impl TelemetryService {
39    /// Creates a new `TelemetryService` with the specified update interval.
40    pub fn new(update_interval: Duration) -> Self {
41        Self {
42            metrics: MetricsRegistry::new(),
43            monitors: MonitorRegistry::new(),
44            last_update: Instant::now(),
45            update_interval,
46            dcc_sender: None,
47        }
48    }
49
50    /// Sets the sender for forwarding events to the DCC.
51    pub fn with_dcc_sender(mut self, sender: Sender<TelemetryEvent>) -> Self {
52        self.dcc_sender = Some(sender);
53        self
54    }
55
56    /// Updates all registered monitors if the update interval has passed.
57    ///
58    /// Returns `true` if monitors were updated, `false` otherwise.
59    pub fn tick(&mut self) -> bool {
60        if self.last_update.elapsed() >= self.update_interval {
61            log::trace!("Updating all resource monitors...");
62            self.monitors.update_all();
63
64            // Forward monitor reports to DCC if sender is configured.
65            if let Some(sender) = &self.dcc_sender {
66                // 1. Forward monitor reports.
67                for monitor in self.monitors.get_all_monitors() {
68                    // Standard ResourceUsageReport (bytes)
69                    let report = monitor.get_usage_report();
70                    let _ = sender.send(TelemetryEvent::ResourceReport(report));
71
72                    // GPU Performance Report (timings)
73                    if let Some(gpu_report) = monitor.get_gpu_report() {
74                        let _ = sender.send(TelemetryEvent::GpuReport(gpu_report));
75                    }
76
77                    // Hardware Health Report (thermal, load)
78                    if let Some(hw_report) = monitor.get_hardware_report() {
79                        let _ = sender.send(TelemetryEvent::HardwareReport(hw_report));
80                    }
81
82                    // Discrete Metrics
83                    for (id, value) in monitor.get_metrics() {
84                        let _ = sender.send(TelemetryEvent::MetricUpdate { id, value });
85                    }
86                }
87
88                // 2. Forward metric updates.
89                for metric in self.metrics.backend().list_all_metrics() {
90                    let _ = sender.send(TelemetryEvent::MetricUpdate {
91                        id: metric.metadata.id,
92                        value: metric.value,
93                    });
94                }
95            }
96
97            self.last_update = Instant::now();
98            true
99        } else {
100            false
101        }
102    }
103
104    /// Returns a reference to the metrics registry.
105    pub fn metrics_registry(&self) -> &MetricsRegistry {
106        &self.metrics
107    }
108
109    /// Returns a reference to the monitor registry.
110    pub fn monitor_registry(&self) -> &MonitorRegistry {
111        &self.monitors
112    }
113}
114
115impl Default for TelemetryService {
116    fn default() -> Self {
117        Self::new(Duration::from_secs(1))
118    }
119}