khora_core/telemetry/
monitoring.rs

1// Copyright 2025 eraflo
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Provides traits and data structures for active resource monitoring.
16//!
17//! "Monitoring" is distinct from "metrics" in that it involves actively polling
18//! a system resource (like VRAM or a GPU) to get a snapshot of its state, whereas
19//! metrics are typically discrete, event-based measurements.
20
21use std::borrow::Cow;
22use std::fmt::Debug;
23
24use crate::renderer::GpuHook;
25
26/// The core trait for a resource monitor.
27///
28/// A `ResourceMonitor` is a stateful object, typically living in the `khora-infra`
29/// crate, that knows how to query a specific system resource. The `khora-telemetry`
30/// service will hold a collection of these monitors and periodically call `update`
31/// and `get_usage_report` on them.
32pub trait ResourceMonitor: Send + Sync + Debug + 'static {
33    /// Returns a unique, human-readable identifier for this monitor instance.
34    fn monitor_id(&self) -> Cow<'static, str>;
35
36    /// Returns the general type of resource being monitored.
37    fn resource_type(&self) -> MonitoredResourceType;
38
39    /// Returns a snapshot of the current usage data for the monitored resource.
40    fn get_usage_report(&self) -> ResourceUsageReport;
41
42    /// Allows downcasting to a concrete `ResourceMonitor` type.
43    fn as_any(&self) -> &dyn std::any::Any;
44
45    /// Triggers the monitor to update its internal state by polling the resource.
46    /// This default implementation does nothing, for monitors that update passively.
47    fn update(&self) {
48        // Default: no-op
49    }
50}
51
52/// An enumeration of the types of resources that can be monitored.
53#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
54pub enum MonitoredResourceType {
55    /// Video RAM on a GPU.
56    Vram,
57    /// Main system RAM.
58    SystemRam,
59    /// General GPU performance (e.g., execution timing).
60    Gpu,
61}
62
63/// A generic, unified report of resource usage, typically in bytes.
64#[derive(Debug, Clone, Copy, Default)]
65pub struct ResourceUsageReport {
66    /// The number of bytes currently in use.
67    pub current_bytes: u64,
68    /// The peak number of bytes ever in use simultaneously, if tracked.
69    pub peak_bytes: Option<u64>,
70    /// The total capacity of the resource in bytes, if known.
71    pub total_capacity_bytes: Option<u64>,
72}
73
74/// A report of GPU performance timings for a single frame.
75#[derive(Debug, Clone, Copy, Default)]
76pub struct GpuReport {
77    /// The frame number this report corresponds to.
78    pub frame_number: u64,
79    /// Raw timestamp query results for each GPU hook, in microseconds.
80    /// The order corresponds to the `GpuHook` enum definition.
81    pub hook_timings_us: [Option<u32>; 4],
82    /// The CPU time spent preparing the frame, in microseconds.
83    pub cpu_preparation_time_us: Option<u32>,
84    /// The CPU time spent submitting commands for the frame, in microseconds.
85    pub cpu_submission_time_us: Option<u32>,
86}
87
88/// A detailed report of system memory (RAM) usage and allocation patterns.
89#[derive(Debug, Clone, Copy, Default)]
90pub struct MemoryReport {
91    /// The number of bytes of system RAM currently in use by the application.
92    pub current_usage_bytes: usize,
93    /// The peak number of bytes of system RAM ever used simultaneously.
94    pub peak_usage_bytes: usize,
95    /// The number of bytes allocated since the last monitor update.
96    pub allocation_delta_bytes: usize,
97    /// The total number of times the memory usage has been sampled.
98    pub sample_count: u64,
99
100    // Extended statistics (often from a tracking allocator)
101    /// The total number of allocation calls since the start.
102    pub total_allocations: u64,
103    /// The total number of deallocation calls since the start.
104    pub total_deallocations: u64,
105    /// The total number of reallocation calls since the start.
106    pub total_reallocations: u64,
107    /// The cumulative sum of all bytes ever allocated.
108    pub bytes_allocated_lifetime: u64,
109    /// The cumulative sum of all bytes ever deallocated.
110    pub bytes_deallocated_lifetime: u64,
111    /// The number of allocations classified as "large" (e.g., >= 1MB).
112    pub large_allocations: u64,
113    /// The total byte size of all "large" allocations.
114    pub large_allocation_bytes: u64,
115    /// The number of allocations classified as "small" (e.g., < 1KB).
116    pub small_allocations: u64,
117    /// The total byte size of all "small" allocations.
118    pub small_allocation_bytes: u64,
119    /// A calculated ratio indicating potential memory fragmentation.
120    pub fragmentation_ratio: f64,
121    /// A calculated ratio of memory still in use versus total ever allocated.
122    pub allocation_efficiency: f64,
123    /// The calculated average size of a single memory allocation in bytes.
124    pub average_allocation_size: f64,
125}
126
127/// A report of Video RAM (VRAM) usage.
128#[derive(Debug, Clone, Copy, Default)]
129pub struct VramReport {
130    /// The number of bytes of VRAM currently in use.
131    pub current_usage_bytes: usize,
132    /// The peak number of bytes of VRAM ever in use, if tracked.
133    pub peak_usage_bytes: Option<usize>,
134    /// The total physical VRAM capacity in bytes, if available.
135    pub total_capacity_bytes: Option<usize>,
136}
137
138/// A trait for types that can provide VRAM usage statistics.
139/// This is typically implemented by a `GraphicsDevice` or a dedicated monitor in `khora-infra`.
140pub trait VramProvider: Send + Sync {
141    /// Returns the current VRAM usage in megabytes.
142    fn get_vram_usage_mb(&self) -> f32;
143    /// Returns the peak VRAM usage in megabytes.
144    fn get_vram_peak_mb(&self) -> f32;
145    /// Returns the total VRAM capacity in megabytes, if available.
146    fn get_vram_capacity_mb(&self) -> Option<f32>;
147}
148
149impl MemoryReport {
150    /// Returns the current memory usage in megabytes (MB).
151    pub fn current_usage_mb(&self) -> f64 {
152        self.current_usage_bytes as f64 / (1024.0 * 1024.0)
153    }
154
155    /// Returns the peak memory usage in megabytes (MB).
156    pub fn peak_usage_mb(&self) -> f64 {
157        self.peak_usage_bytes as f64 / (1024.0 * 1024.0)
158    }
159
160    /// Returns the change in allocated bytes since the last update, in kilobytes (KB).
161    pub fn allocation_delta_kb(&self) -> f64 {
162        self.allocation_delta_bytes as f64 / 1024.0
163    }
164
165    /// Calculates the memory turnover rate (allocations + deallocations per sample).
166    pub fn memory_turnover_rate(&self) -> f64 {
167        if self.sample_count > 0 {
168            (self.total_allocations + self.total_deallocations) as f64 / self.sample_count as f64
169        } else {
170            0.0
171        }
172    }
173
174    /// Calculates the percentage of total allocations that were classified as "large".
175    pub fn large_allocation_percentage(&self) -> f64 {
176        if self.total_allocations > 0 {
177            (self.large_allocations as f64 / self.total_allocations as f64) * 100.0
178        } else {
179            0.0
180        }
181    }
182
183    /// Returns the memory allocation efficiency as a percentage.
184    pub fn memory_utilization_efficiency(&self) -> f64 {
185        self.allocation_efficiency * 100.0
186    }
187
188    /// Returns the average allocation size in megabytes (MB).
189    pub fn average_allocation_size_mb(&self) -> f64 {
190        self.average_allocation_size / (1024.0 * 1024.0)
191    }
192
193    /// Returns a descriptive string for the current fragmentation status.
194    pub fn fragmentation_status(&self) -> &'static str {
195        match self.fragmentation_ratio {
196            r if r < 0.1 => "Low",
197            r if r < 0.3 => "Moderate",
198            r if r < 0.6 => "High",
199            _ => "Critical",
200        }
201    }
202}
203
204impl GpuReport {
205    /// Gets the timing for a specific GPU performance hook, in microseconds.
206    pub fn get_hook_timing_us(&self, hook: GpuHook) -> Option<u32> {
207        self.hook_timings_us[hook as usize]
208    }
209
210    /// Calculates the duration of the main render pass, in microseconds.
211    pub fn main_pass_duration_us(&self) -> Option<u32> {
212        match (
213            self.get_hook_timing_us(GpuHook::MainPassBegin),
214            self.get_hook_timing_us(GpuHook::MainPassEnd),
215        ) {
216            (Some(begin), Some(end)) if end >= begin => Some(end - begin),
217            _ => None,
218        }
219    }
220
221    /// Calculates the total GPU duration for the frame, in microseconds.
222    pub fn frame_total_duration_us(&self) -> Option<u32> {
223        match (
224            self.get_hook_timing_us(GpuHook::FrameStart),
225            self.get_hook_timing_us(GpuHook::FrameEnd),
226        ) {
227            (Some(start), Some(end)) if end >= start => Some(end - start),
228            _ => None,
229        }
230    }
231
232    /// Sets the timing for a specific hook, in microseconds.
233    pub fn set_hook_timing_us(&mut self, hook: GpuHook, timing_us: Option<u32>) {
234        self.hook_timings_us[hook as usize] = timing_us;
235    }
236}