khora_core/telemetry/monitoring.rs
1// Copyright 2025 eraflo
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Provides traits and data structures for active resource monitoring.
16//!
17//! "Monitoring" is distinct from "metrics" in that it involves actively polling
18//! a system resource (like VRAM or a GPU) to get a snapshot of its state, whereas
19//! metrics are typically discrete, event-based measurements.
20
21use std::borrow::Cow;
22use std::fmt::Debug;
23
24use crate::renderer::GpuHook;
25
26/// The core trait for a resource monitor.
27///
28/// A `ResourceMonitor` is a stateful object, typically living in the `khora-infra`
29/// crate, that knows how to query a specific system resource. The `khora-telemetry`
30/// service will hold a collection of these monitors and periodically call `update`
31/// and `get_usage_report` on them.
32pub trait ResourceMonitor: Send + Sync + Debug + 'static {
33 /// Returns a unique, human-readable identifier for this monitor instance.
34 fn monitor_id(&self) -> Cow<'static, str>;
35
36 /// Returns the general type of resource being monitored.
37 fn resource_type(&self) -> MonitoredResourceType;
38
39 /// Returns a snapshot of the current usage data for the monitored resource.
40 fn get_usage_report(&self) -> ResourceUsageReport;
41
42 /// Allows downcasting to a concrete `ResourceMonitor` type.
43 fn as_any(&self) -> &dyn std::any::Any;
44
45 /// Triggers the monitor to update its internal state by polling the resource.
46 /// This default implementation does nothing, for monitors that update passively.
47 fn update(&self) {
48 // Default: no-op
49 }
50}
51
52/// An enumeration of the types of resources that can be monitored.
53#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
54pub enum MonitoredResourceType {
55 /// Video RAM on a GPU.
56 Vram,
57 /// Main system RAM.
58 SystemRam,
59 /// General GPU performance (e.g., execution timing).
60 Gpu,
61}
62
63/// A generic, unified report of resource usage, typically in bytes.
64#[derive(Debug, Clone, Copy, Default)]
65pub struct ResourceUsageReport {
66 /// The number of bytes currently in use.
67 pub current_bytes: u64,
68 /// The peak number of bytes ever in use simultaneously, if tracked.
69 pub peak_bytes: Option<u64>,
70 /// The total capacity of the resource in bytes, if known.
71 pub total_capacity_bytes: Option<u64>,
72}
73
74/// A report of GPU performance timings for a single frame.
75#[derive(Debug, Clone, Copy, Default)]
76pub struct GpuReport {
77 /// The frame number this report corresponds to.
78 pub frame_number: u64,
79 /// Raw timestamp query results for each GPU hook, in microseconds.
80 /// The order corresponds to the `GpuHook` enum definition.
81 pub hook_timings_us: [Option<u32>; 4],
82 /// The CPU time spent preparing the frame, in microseconds.
83 pub cpu_preparation_time_us: Option<u32>,
84 /// The CPU time spent submitting commands for the frame, in microseconds.
85 pub cpu_submission_time_us: Option<u32>,
86}
87
88/// A detailed report of system memory (RAM) usage and allocation patterns.
89#[derive(Debug, Clone, Copy, Default)]
90pub struct MemoryReport {
91 /// The number of bytes of system RAM currently in use by the application.
92 pub current_usage_bytes: usize,
93 /// The peak number of bytes of system RAM ever used simultaneously.
94 pub peak_usage_bytes: usize,
95 /// The number of bytes allocated since the last monitor update.
96 pub allocation_delta_bytes: usize,
97 /// The total number of times the memory usage has been sampled.
98 pub sample_count: u64,
99
100 // Extended statistics (often from a tracking allocator)
101 /// The total number of allocation calls since the start.
102 pub total_allocations: u64,
103 /// The total number of deallocation calls since the start.
104 pub total_deallocations: u64,
105 /// The total number of reallocation calls since the start.
106 pub total_reallocations: u64,
107 /// The cumulative sum of all bytes ever allocated.
108 pub bytes_allocated_lifetime: u64,
109 /// The cumulative sum of all bytes ever deallocated.
110 pub bytes_deallocated_lifetime: u64,
111 /// The number of allocations classified as "large" (e.g., >= 1MB).
112 pub large_allocations: u64,
113 /// The total byte size of all "large" allocations.
114 pub large_allocation_bytes: u64,
115 /// The number of allocations classified as "small" (e.g., < 1KB).
116 pub small_allocations: u64,
117 /// The total byte size of all "small" allocations.
118 pub small_allocation_bytes: u64,
119 /// A calculated ratio indicating potential memory fragmentation.
120 pub fragmentation_ratio: f64,
121 /// A calculated ratio of memory still in use versus total ever allocated.
122 pub allocation_efficiency: f64,
123 /// The calculated average size of a single memory allocation in bytes.
124 pub average_allocation_size: f64,
125}
126
127/// A report of Video RAM (VRAM) usage.
128#[derive(Debug, Clone, Copy, Default)]
129pub struct VramReport {
130 /// The number of bytes of VRAM currently in use.
131 pub current_usage_bytes: usize,
132 /// The peak number of bytes of VRAM ever in use, if tracked.
133 pub peak_usage_bytes: Option<usize>,
134 /// The total physical VRAM capacity in bytes, if available.
135 pub total_capacity_bytes: Option<usize>,
136}
137
138/// A trait for types that can provide VRAM usage statistics.
139/// This is typically implemented by a `GraphicsDevice` or a dedicated monitor in `khora-infra`.
140pub trait VramProvider: Send + Sync {
141 /// Returns the current VRAM usage in megabytes.
142 fn get_vram_usage_mb(&self) -> f32;
143 /// Returns the peak VRAM usage in megabytes.
144 fn get_vram_peak_mb(&self) -> f32;
145 /// Returns the total VRAM capacity in megabytes, if available.
146 fn get_vram_capacity_mb(&self) -> Option<f32>;
147}
148
149impl MemoryReport {
150 /// Returns the current memory usage in megabytes (MB).
151 pub fn current_usage_mb(&self) -> f64 {
152 self.current_usage_bytes as f64 / (1024.0 * 1024.0)
153 }
154
155 /// Returns the peak memory usage in megabytes (MB).
156 pub fn peak_usage_mb(&self) -> f64 {
157 self.peak_usage_bytes as f64 / (1024.0 * 1024.0)
158 }
159
160 /// Returns the change in allocated bytes since the last update, in kilobytes (KB).
161 pub fn allocation_delta_kb(&self) -> f64 {
162 self.allocation_delta_bytes as f64 / 1024.0
163 }
164
165 /// Calculates the memory turnover rate (allocations + deallocations per sample).
166 pub fn memory_turnover_rate(&self) -> f64 {
167 if self.sample_count > 0 {
168 (self.total_allocations + self.total_deallocations) as f64 / self.sample_count as f64
169 } else {
170 0.0
171 }
172 }
173
174 /// Calculates the percentage of total allocations that were classified as "large".
175 pub fn large_allocation_percentage(&self) -> f64 {
176 if self.total_allocations > 0 {
177 (self.large_allocations as f64 / self.total_allocations as f64) * 100.0
178 } else {
179 0.0
180 }
181 }
182
183 /// Returns the memory allocation efficiency as a percentage.
184 pub fn memory_utilization_efficiency(&self) -> f64 {
185 self.allocation_efficiency * 100.0
186 }
187
188 /// Returns the average allocation size in megabytes (MB).
189 pub fn average_allocation_size_mb(&self) -> f64 {
190 self.average_allocation_size / (1024.0 * 1024.0)
191 }
192
193 /// Returns a descriptive string for the current fragmentation status.
194 pub fn fragmentation_status(&self) -> &'static str {
195 match self.fragmentation_ratio {
196 r if r < 0.1 => "Low",
197 r if r < 0.3 => "Moderate",
198 r if r < 0.6 => "High",
199 _ => "Critical",
200 }
201 }
202}
203
204impl GpuReport {
205 /// Gets the timing for a specific GPU performance hook, in microseconds.
206 pub fn get_hook_timing_us(&self, hook: GpuHook) -> Option<u32> {
207 self.hook_timings_us[hook as usize]
208 }
209
210 /// Calculates the duration of the main render pass, in microseconds.
211 pub fn main_pass_duration_us(&self) -> Option<u32> {
212 match (
213 self.get_hook_timing_us(GpuHook::MainPassBegin),
214 self.get_hook_timing_us(GpuHook::MainPassEnd),
215 ) {
216 (Some(begin), Some(end)) if end >= begin => Some(end - begin),
217 _ => None,
218 }
219 }
220
221 /// Calculates the total GPU duration for the frame, in microseconds.
222 pub fn frame_total_duration_us(&self) -> Option<u32> {
223 match (
224 self.get_hook_timing_us(GpuHook::FrameStart),
225 self.get_hook_timing_us(GpuHook::FrameEnd),
226 ) {
227 (Some(start), Some(end)) if end >= start => Some(end - start),
228 _ => None,
229 }
230 }
231
232 /// Sets the timing for a specific hook, in microseconds.
233 pub fn set_hook_timing_us(&mut self, hook: GpuHook, timing_us: Option<u32>) {
234 self.hook_timings_us[hook as usize] = timing_us;
235 }
236}