khora_core/renderer/forward_plus.rs
1// Copyright 2025 eraflo
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Defines data structures for Forward+ (Tiled Forward) rendering.
16//!
17//! Forward+ is an advanced rendering technique that optimizes multi-light
18//! scenarios by dividing the screen into tiles and pre-computing which lights
19//! affect each tile using a compute shader pass.
20//!
21//! # SAA Integration
22//!
23//! The `ForwardPlusLane` is a **strategy** of the `RenderAgent` ISA. The agent
24//! can select between `LitForwardLane` and `ForwardPlusLane` based on:
25//! - Scene light count (Forward+ typically wins when > 20 lights)
26//! - GORNA budget allocation
27//!
28//! # Performance Characteristics
29//!
30//! - **Complexity**: O(meshes × lights_per_tile) vs O(meshes × lights) for Forward
31//! - **Overhead**: Fixed compute pass cost for light culling (~0.5ms)
32//! - **Memory**: Light grid and index buffers scale with screen resolution
33
34use bytemuck::{Pod, Zeroable};
35
36/// The tile size for Forward+ light culling.
37///
38/// Smaller tiles provide more precise culling but increase compute overhead.
39/// Larger tiles reduce overhead but may include more lights per tile.
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
41pub enum TileSize {
42 /// 16×16 pixel tiles (standard, precise culling).
43 #[default]
44 X16,
45 /// 32×32 pixel tiles (less overhead, coarser culling).
46 X32,
47}
48
49impl TileSize {
50 /// Returns the tile size in pixels.
51 #[inline]
52 pub const fn pixels(&self) -> u32 {
53 match self {
54 TileSize::X16 => 16,
55 TileSize::X32 => 32,
56 }
57 }
58
59 /// Calculates the number of tiles needed for a given screen dimension.
60 #[inline]
61 pub const fn tile_count(&self, screen_size: u32) -> u32 {
62 screen_size.div_ceil(self.pixels())
63 }
64}
65
66/// Configuration for Forward+ tiled rendering.
67///
68/// This configuration is **adaptive** and can be adjusted by GORNA or the
69/// `RenderAgent` based on runtime conditions.
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71pub struct ForwardPlusTileConfig {
72 /// The tile size for light culling.
73 pub tile_size: TileSize,
74 /// Maximum number of lights per tile.
75 /// Higher values handle dense light clusters but use more memory.
76 pub max_lights_per_tile: u32,
77 /// Whether to use a depth pre-pass to improve light culling.
78 /// Adds ~0.5ms but improves culling by 20-30% for scenes with depth variation.
79 pub use_depth_prepass: bool,
80}
81
82impl Default for ForwardPlusTileConfig {
83 fn default() -> Self {
84 Self {
85 tile_size: TileSize::X16,
86 max_lights_per_tile: 128,
87 use_depth_prepass: false,
88 }
89 }
90}
91
92impl ForwardPlusTileConfig {
93 /// Creates a new configuration with default values.
94 pub const fn new() -> Self {
95 Self {
96 tile_size: TileSize::X16,
97 max_lights_per_tile: 128,
98 use_depth_prepass: false,
99 }
100 }
101
102 /// Creates a configuration optimized for many lights.
103 pub const fn high_light_count() -> Self {
104 Self {
105 tile_size: TileSize::X16,
106 max_lights_per_tile: 256,
107 use_depth_prepass: true,
108 }
109 }
110
111 /// Creates a configuration optimized for low overhead.
112 pub const fn low_overhead() -> Self {
113 Self {
114 tile_size: TileSize::X32,
115 max_lights_per_tile: 64,
116 use_depth_prepass: false,
117 }
118 }
119
120 /// Calculates the tile grid dimensions for a given screen size.
121 #[inline]
122 pub const fn tile_dimensions(&self, screen_width: u32, screen_height: u32) -> (u32, u32) {
123 (
124 self.tile_size.tile_count(screen_width),
125 self.tile_size.tile_count(screen_height),
126 )
127 }
128
129 /// Calculates the total number of tiles for a given screen size.
130 #[inline]
131 pub fn total_tiles(&self, screen_width: u32, screen_height: u32) -> u32 {
132 let (tiles_x, tiles_y) = self.tile_dimensions(screen_width, screen_height);
133 tiles_x * tiles_y
134 }
135
136 /// Calculates the required light index buffer size in bytes.
137 pub fn light_index_buffer_size(&self, screen_width: u32, screen_height: u32) -> u64 {
138 let total_tiles = self.total_tiles(screen_width, screen_height) as u64;
139 total_tiles * self.max_lights_per_tile as u64 * std::mem::size_of::<u32>() as u64
140 }
141
142 /// Calculates the required light grid buffer size in bytes.
143 /// Each tile stores (offset: u32, count: u32).
144 pub fn light_grid_buffer_size(&self, screen_width: u32, screen_height: u32) -> u64 {
145 let total_tiles = self.total_tiles(screen_width, screen_height) as u64;
146 total_tiles * 2 * std::mem::size_of::<u32>() as u64
147 }
148}
149
150/// GPU-friendly representation of a light source for compute shader processing.
151///
152/// This structure is designed for efficient GPU transfer and compute shader access.
153/// It uses a unified layout that can represent all light types.
154///
155/// # Memory Layout
156///
157/// Total size: 72 bytes (18 × 4-byte fields), padded from 64 after shadow fields were added.
158#[repr(C)]
159#[derive(Debug, Clone, Copy, PartialEq, Pod, Zeroable)]
160pub struct GpuLight {
161 /// Light position in world space (ignored for directional lights).
162 pub position: [f32; 3],
163 /// Maximum range of the light (point/spot lights only).
164 pub range: f32,
165
166 /// Light color (RGB, linear space).
167 pub color: [f32; 3],
168 /// Light intensity multiplier.
169 pub intensity: f32,
170
171 /// Light direction (normalized, for directional/spot lights).
172 pub direction: [f32; 3],
173 /// Light type: 0 = directional, 1 = point, 2 = spot.
174 pub light_type: u32,
175
176 /// Cosine of inner cone angle (spot lights only).
177 pub inner_cone_cos: f32,
178 /// Cosine of outer cone angle (spot lights only).
179 pub outer_cone_cos: f32,
180
181 /// Index into the shadow texture array, or -1 if no shadow.
182 pub shadow_map_index: i32,
183 /// Shadow bias.
184 pub shadow_bias: f32,
185 /// Shadow normal bias.
186 pub shadow_normal_bias: f32,
187 /// Padding/Reserved.
188 pub _unused: f32,
189}
190
191impl GpuLight {
192 /// Light type constant for directional lights.
193 pub const TYPE_DIRECTIONAL: u32 = 0;
194 /// Light type constant for point lights.
195 pub const TYPE_POINT: u32 = 1;
196 /// Light type constant for spot lights.
197 pub const TYPE_SPOT: u32 = 2;
198
199 /// Creates a `GpuLight` from world-space position, direction, and light properties.
200 pub fn from_parts(
201 position: [f32; 3],
202 direction: [f32; 3],
203 ty: &super::light::LightType,
204 ) -> Self {
205 match ty {
206 super::light::LightType::Directional(l) => Self {
207 position: [0.0; 3],
208 range: 0.0,
209 color: [l.color.r, l.color.g, l.color.b],
210 intensity: l.intensity,
211 direction,
212 light_type: Self::TYPE_DIRECTIONAL,
213 inner_cone_cos: 0.0,
214 outer_cone_cos: 0.0,
215 shadow_map_index: -1,
216 shadow_bias: l.shadow_bias,
217 shadow_normal_bias: l.shadow_normal_bias,
218 _unused: 0.0,
219 },
220 super::light::LightType::Point(l) => Self {
221 position,
222 range: l.range,
223 color: [l.color.r, l.color.g, l.color.b],
224 intensity: l.intensity,
225 direction: [0.0; 3],
226 light_type: Self::TYPE_POINT,
227 inner_cone_cos: 0.0,
228 outer_cone_cos: 0.0,
229 shadow_map_index: -1,
230 shadow_bias: l.shadow_bias,
231 shadow_normal_bias: l.shadow_normal_bias,
232 _unused: 0.0,
233 },
234 super::light::LightType::Spot(l) => Self {
235 position,
236 range: l.range,
237 color: [l.color.r, l.color.g, l.color.b],
238 intensity: l.intensity,
239 direction,
240 light_type: Self::TYPE_SPOT,
241 inner_cone_cos: l.inner_cone_angle.cos(),
242 outer_cone_cos: l.outer_cone_angle.cos(),
243 shadow_map_index: -1,
244 shadow_bias: l.shadow_bias,
245 shadow_normal_bias: l.shadow_normal_bias,
246 _unused: 0.0,
247 },
248 }
249 }
250}
251
252impl Default for GpuLight {
253 fn default() -> Self {
254 Self {
255 position: [0.0, 0.0, 0.0],
256 range: 10.0,
257 color: [1.0, 1.0, 1.0],
258 intensity: 1.0,
259 direction: [0.0, -1.0, 0.0],
260 light_type: Self::TYPE_POINT,
261 inner_cone_cos: 0.9, // ~25 degrees
262 outer_cone_cos: 0.7, // ~45 degrees
263 shadow_map_index: -1,
264 shadow_bias: 0.01,
265 shadow_normal_bias: 0.0,
266 _unused: 0.0,
267 }
268 }
269}
270
271/// Uniforms for the light culling compute shader.
272///
273/// This structure is uploaded to GPU each frame with the current camera
274/// and screen state for the light culling pass.
275#[repr(C)]
276#[derive(Debug, Clone, Copy, PartialEq, Pod, Zeroable)]
277pub struct LightCullingUniforms {
278 /// View-projection matrix for frustum calculations.
279 pub view_projection: [[f32; 4]; 4],
280 /// Inverse projection matrix for reconstructing view-space positions.
281 pub inverse_projection: [[f32; 4]; 4],
282
283 /// Screen dimensions in pixels (width, height).
284 pub screen_dimensions: [f32; 2],
285 /// Tile grid dimensions (tiles_x, tiles_y).
286 pub tile_count: [u32; 2],
287
288 /// Number of active lights in the light buffer.
289 pub num_lights: u32,
290 /// Tile size in pixels.
291 pub tile_size: u32,
292 /// Index of the first directional light's shadow map.
293 pub shadow_atlas_index: i32,
294 /// Padding for 16-byte alignment.
295 pub _padding: [f32; 1],
296}
297
298impl Default for LightCullingUniforms {
299 fn default() -> Self {
300 Self {
301 view_projection: [[0.0; 4]; 4],
302 inverse_projection: [[0.0; 4]; 4],
303 screen_dimensions: [1920.0, 1080.0],
304 tile_count: [120, 68], // 1920/16, 1080/16 rounded up
305 num_lights: 0,
306 tile_size: 16,
307 shadow_atlas_index: -1,
308 _padding: [0.0; 1],
309 }
310 }
311}
312
313#[cfg(test)]
314mod tests {
315 use super::*;
316
317 #[test]
318 fn test_tile_size_pixels() {
319 assert_eq!(TileSize::X16.pixels(), 16);
320 assert_eq!(TileSize::X32.pixels(), 32);
321 }
322
323 #[test]
324 fn test_tile_count_calculation() {
325 // 1920 / 16 = 120 tiles exactly
326 assert_eq!(TileSize::X16.tile_count(1920), 120);
327 // 1080 / 16 = 67.5 -> 68 tiles (rounded up)
328 assert_eq!(TileSize::X16.tile_count(1080), 68);
329 // 1920 / 32 = 60 tiles exactly
330 assert_eq!(TileSize::X32.tile_count(1920), 60);
331 }
332
333 #[test]
334 fn test_forward_plus_tile_config_default() {
335 let config = ForwardPlusTileConfig::default();
336 assert_eq!(config.tile_size, TileSize::X16);
337 assert_eq!(config.max_lights_per_tile, 128);
338 assert!(!config.use_depth_prepass);
339 }
340
341 #[test]
342 fn test_tile_dimensions() {
343 let config = ForwardPlusTileConfig::default();
344 let (tiles_x, tiles_y) = config.tile_dimensions(1920, 1080);
345 assert_eq!(tiles_x, 120);
346 assert_eq!(tiles_y, 68);
347 }
348
349 #[test]
350 fn test_gpu_light_size_and_alignment() {
351 // GpuLight should be exactly 72 bytes (18 x 4-byte fields)
352 // Updated from 64 after shadow fields (shadow_map_index, shadow_bias, shadow_normal_bias, _padding) were added.
353 assert_eq!(std::mem::size_of::<GpuLight>(), 72);
354 }
355
356 #[test]
357 fn test_light_culling_uniforms_size() {
358 // LightCullingUniforms should be a multiple of 16 bytes for GPU alignment
359 let size = std::mem::size_of::<LightCullingUniforms>();
360 assert_eq!(
361 size % 16,
362 0,
363 "LightCullingUniforms should be 16-byte aligned"
364 );
365 }
366
367 #[test]
368 fn test_gpu_light_default() {
369 let light = GpuLight::default();
370 assert_eq!(light.light_type, GpuLight::TYPE_POINT);
371 assert_eq!(light.color, [1.0, 1.0, 1.0]);
372 }
373
374 #[test]
375 fn test_buffer_size_calculation() {
376 let config = ForwardPlusTileConfig::default();
377 // 120 * 68 = 8160 tiles
378 // Light index buffer: 8160 * 128 * 4 = 4,177,920 bytes
379 let index_size = config.light_index_buffer_size(1920, 1080);
380 assert_eq!(index_size, 8160 * 128 * 4);
381
382 // Light grid buffer: 8160 * 2 * 4 = 65,280 bytes
383 let grid_size = config.light_grid_buffer_size(1920, 1080);
384 assert_eq!(grid_size, 8160 * 2 * 4);
385 }
386}