khora_telemetry/service.rs
1// Copyright 2025 eraflo
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Service for managing telemetry data and resource monitoring.
16
17use crate::metrics::registry::MetricsRegistry;
18use crate::monitoring::registry::MonitorRegistry;
19use crossbeam_channel::Sender;
20use khora_core::telemetry::event::TelemetryEvent;
21use std::time::{Duration, Instant};
22
23/// Central service for collecting and managing engine-wide telemetry.
24///
25/// The `TelemetryService` acts as a central registry for all metrics and
26/// resource monitors. It periodically triggers monitor updates and,
27/// if configured, forwards the results to the DCC for higher-level analysis.
28#[derive(Debug)]
29pub struct TelemetryService {
30 metrics: MetricsRegistry,
31 monitors: MonitorRegistry,
32 last_update: Instant,
33 update_interval: Duration,
34 /// Optional sender to forward events to the DCC.
35 dcc_sender: Option<Sender<TelemetryEvent>>,
36}
37
38impl TelemetryService {
39 /// Creates a new `TelemetryService` with the specified update interval.
40 pub fn new(update_interval: Duration) -> Self {
41 Self {
42 metrics: MetricsRegistry::new(),
43 monitors: MonitorRegistry::new(),
44 last_update: Instant::now(),
45 update_interval,
46 dcc_sender: None,
47 }
48 }
49
50 /// Sets the sender for forwarding events to the DCC.
51 pub fn with_dcc_sender(mut self, sender: Sender<TelemetryEvent>) -> Self {
52 self.dcc_sender = Some(sender);
53 self
54 }
55
56 /// Updates all registered monitors if the update interval has passed.
57 ///
58 /// Returns `true` if monitors were updated, `false` otherwise.
59 pub fn tick(&mut self) -> bool {
60 if self.last_update.elapsed() >= self.update_interval {
61 log::trace!("Updating all resource monitors...");
62 self.monitors.update_all();
63
64 // Forward monitor reports to DCC if sender is configured.
65 if let Some(sender) = &self.dcc_sender {
66 // 1. Forward monitor reports.
67 for monitor in self.monitors.get_all_monitors() {
68 // Standard ResourceUsageReport (bytes)
69 let report = monitor.get_usage_report();
70 let _ = sender.send(TelemetryEvent::ResourceReport(report));
71
72 // GPU Performance Report (timings)
73 if let Some(gpu_report) = monitor.get_gpu_report() {
74 let _ = sender.send(TelemetryEvent::GpuReport(gpu_report));
75 }
76
77 // Hardware Health Report (thermal, load)
78 if let Some(hw_report) = monitor.get_hardware_report() {
79 let _ = sender.send(TelemetryEvent::HardwareReport(hw_report));
80 }
81
82 // Discrete Metrics
83 for (id, value) in monitor.get_metrics() {
84 let _ = sender.send(TelemetryEvent::MetricUpdate { id, value });
85 }
86 }
87
88 // 2. Forward metric updates.
89 for metric in self.metrics.backend().list_all_metrics() {
90 let _ = sender.send(TelemetryEvent::MetricUpdate {
91 id: metric.metadata.id,
92 value: metric.value,
93 });
94 }
95 }
96
97 self.last_update = Instant::now();
98 true
99 } else {
100 false
101 }
102 }
103
104 /// Returns a reference to the metrics registry.
105 pub fn metrics_registry(&self) -> &MetricsRegistry {
106 &self.metrics
107 }
108
109 /// Returns a reference to the monitor registry.
110 pub fn monitor_registry(&self) -> &MonitorRegistry {
111 &self.monitors
112 }
113}
114
115impl Default for TelemetryService {
116 fn default() -> Self {
117 Self::new(Duration::from_secs(1))
118 }
119}