khora_core/renderer/
forward_plus.rs

1// Copyright 2025 eraflo
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Defines data structures for Forward+ (Tiled Forward) rendering.
16//!
17//! Forward+ is an advanced rendering technique that optimizes multi-light
18//! scenarios by dividing the screen into tiles and pre-computing which lights
19//! affect each tile using a compute shader pass.
20//!
21//! # SAA Integration
22//!
23//! The `ForwardPlusLane` is a **strategy** of the `RenderAgent` ISA. The agent
24//! can select between `LitForwardLane` and `ForwardPlusLane` based on:
25//! - Scene light count (Forward+ typically wins when > 20 lights)
26//! - GORNA budget allocation
27//!
28//! # Performance Characteristics
29//!
30//! - **Complexity**: O(meshes × lights_per_tile) vs O(meshes × lights) for Forward
31//! - **Overhead**: Fixed compute pass cost for light culling (~0.5ms)
32//! - **Memory**: Light grid and index buffers scale with screen resolution
33
34use bytemuck::{Pod, Zeroable};
35
36/// The tile size for Forward+ light culling.
37///
38/// Smaller tiles provide more precise culling but increase compute overhead.
39/// Larger tiles reduce overhead but may include more lights per tile.
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
41pub enum TileSize {
42    /// 16×16 pixel tiles (standard, precise culling).
43    #[default]
44    X16,
45    /// 32×32 pixel tiles (less overhead, coarser culling).
46    X32,
47}
48
49impl TileSize {
50    /// Returns the tile size in pixels.
51    #[inline]
52    pub const fn pixels(&self) -> u32 {
53        match self {
54            TileSize::X16 => 16,
55            TileSize::X32 => 32,
56        }
57    }
58
59    /// Calculates the number of tiles needed for a given screen dimension.
60    #[inline]
61    pub const fn tile_count(&self, screen_size: u32) -> u32 {
62        screen_size.div_ceil(self.pixels())
63    }
64}
65
66/// Configuration for Forward+ tiled rendering.
67///
68/// This configuration is **adaptive** and can be adjusted by GORNA or the
69/// `RenderAgent` based on runtime conditions.
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71pub struct ForwardPlusTileConfig {
72    /// The tile size for light culling.
73    pub tile_size: TileSize,
74    /// Maximum number of lights per tile.
75    /// Higher values handle dense light clusters but use more memory.
76    pub max_lights_per_tile: u32,
77    /// Whether to use a depth pre-pass to improve light culling.
78    /// Adds ~0.5ms but improves culling by 20-30% for scenes with depth variation.
79    pub use_depth_prepass: bool,
80}
81
82impl Default for ForwardPlusTileConfig {
83    fn default() -> Self {
84        Self {
85            tile_size: TileSize::X16,
86            max_lights_per_tile: 128,
87            use_depth_prepass: false,
88        }
89    }
90}
91
92impl ForwardPlusTileConfig {
93    /// Creates a new configuration with default values.
94    pub const fn new() -> Self {
95        Self {
96            tile_size: TileSize::X16,
97            max_lights_per_tile: 128,
98            use_depth_prepass: false,
99        }
100    }
101
102    /// Creates a configuration optimized for many lights.
103    pub const fn high_light_count() -> Self {
104        Self {
105            tile_size: TileSize::X16,
106            max_lights_per_tile: 256,
107            use_depth_prepass: true,
108        }
109    }
110
111    /// Creates a configuration optimized for low overhead.
112    pub const fn low_overhead() -> Self {
113        Self {
114            tile_size: TileSize::X32,
115            max_lights_per_tile: 64,
116            use_depth_prepass: false,
117        }
118    }
119
120    /// Calculates the tile grid dimensions for a given screen size.
121    #[inline]
122    pub const fn tile_dimensions(&self, screen_width: u32, screen_height: u32) -> (u32, u32) {
123        (
124            self.tile_size.tile_count(screen_width),
125            self.tile_size.tile_count(screen_height),
126        )
127    }
128
129    /// Calculates the total number of tiles for a given screen size.
130    #[inline]
131    pub fn total_tiles(&self, screen_width: u32, screen_height: u32) -> u32 {
132        let (tiles_x, tiles_y) = self.tile_dimensions(screen_width, screen_height);
133        tiles_x * tiles_y
134    }
135
136    /// Calculates the required light index buffer size in bytes.
137    pub fn light_index_buffer_size(&self, screen_width: u32, screen_height: u32) -> u64 {
138        let total_tiles = self.total_tiles(screen_width, screen_height) as u64;
139        total_tiles * self.max_lights_per_tile as u64 * std::mem::size_of::<u32>() as u64
140    }
141
142    /// Calculates the required light grid buffer size in bytes.
143    /// Each tile stores (offset: u32, count: u32).
144    pub fn light_grid_buffer_size(&self, screen_width: u32, screen_height: u32) -> u64 {
145        let total_tiles = self.total_tiles(screen_width, screen_height) as u64;
146        total_tiles * 2 * std::mem::size_of::<u32>() as u64
147    }
148}
149
150/// GPU-friendly representation of a light source for compute shader processing.
151///
152/// This structure is designed for efficient GPU transfer and compute shader access.
153/// It uses a unified layout that can represent all light types.
154///
155/// # Memory Layout
156///
157/// Total size: 72 bytes (18 × 4-byte fields), padded from 64 after shadow fields were added.
158#[repr(C)]
159#[derive(Debug, Clone, Copy, PartialEq, Pod, Zeroable)]
160pub struct GpuLight {
161    /// Light position in world space (ignored for directional lights).
162    pub position: [f32; 3],
163    /// Maximum range of the light (point/spot lights only).
164    pub range: f32,
165
166    /// Light color (RGB, linear space).
167    pub color: [f32; 3],
168    /// Light intensity multiplier.
169    pub intensity: f32,
170
171    /// Light direction (normalized, for directional/spot lights).
172    pub direction: [f32; 3],
173    /// Light type: 0 = directional, 1 = point, 2 = spot.
174    pub light_type: u32,
175
176    /// Cosine of inner cone angle (spot lights only).
177    pub inner_cone_cos: f32,
178    /// Cosine of outer cone angle (spot lights only).
179    pub outer_cone_cos: f32,
180
181    /// Index into the shadow texture array, or -1 if no shadow.
182    pub shadow_map_index: i32,
183    /// Shadow bias.
184    pub shadow_bias: f32,
185    /// Shadow normal bias.
186    pub shadow_normal_bias: f32,
187    /// Padding/Reserved.
188    pub _unused: f32,
189}
190
191impl GpuLight {
192    /// Light type constant for directional lights.
193    pub const TYPE_DIRECTIONAL: u32 = 0;
194    /// Light type constant for point lights.
195    pub const TYPE_POINT: u32 = 1;
196    /// Light type constant for spot lights.
197    pub const TYPE_SPOT: u32 = 2;
198
199    /// Creates a `GpuLight` from world-space position, direction, and light properties.
200    pub fn from_parts(
201        position: [f32; 3],
202        direction: [f32; 3],
203        ty: &super::light::LightType,
204    ) -> Self {
205        match ty {
206            super::light::LightType::Directional(l) => Self {
207                position: [0.0; 3],
208                range: 0.0,
209                color: [l.color.r, l.color.g, l.color.b],
210                intensity: l.intensity,
211                direction,
212                light_type: Self::TYPE_DIRECTIONAL,
213                inner_cone_cos: 0.0,
214                outer_cone_cos: 0.0,
215                shadow_map_index: -1,
216                shadow_bias: l.shadow_bias,
217                shadow_normal_bias: l.shadow_normal_bias,
218                _unused: 0.0,
219            },
220            super::light::LightType::Point(l) => Self {
221                position,
222                range: l.range,
223                color: [l.color.r, l.color.g, l.color.b],
224                intensity: l.intensity,
225                direction: [0.0; 3],
226                light_type: Self::TYPE_POINT,
227                inner_cone_cos: 0.0,
228                outer_cone_cos: 0.0,
229                shadow_map_index: -1,
230                shadow_bias: l.shadow_bias,
231                shadow_normal_bias: l.shadow_normal_bias,
232                _unused: 0.0,
233            },
234            super::light::LightType::Spot(l) => Self {
235                position,
236                range: l.range,
237                color: [l.color.r, l.color.g, l.color.b],
238                intensity: l.intensity,
239                direction,
240                light_type: Self::TYPE_SPOT,
241                inner_cone_cos: l.inner_cone_angle.cos(),
242                outer_cone_cos: l.outer_cone_angle.cos(),
243                shadow_map_index: -1,
244                shadow_bias: l.shadow_bias,
245                shadow_normal_bias: l.shadow_normal_bias,
246                _unused: 0.0,
247            },
248        }
249    }
250}
251
252impl Default for GpuLight {
253    fn default() -> Self {
254        Self {
255            position: [0.0, 0.0, 0.0],
256            range: 10.0,
257            color: [1.0, 1.0, 1.0],
258            intensity: 1.0,
259            direction: [0.0, -1.0, 0.0],
260            light_type: Self::TYPE_POINT,
261            inner_cone_cos: 0.9, // ~25 degrees
262            outer_cone_cos: 0.7, // ~45 degrees
263            shadow_map_index: -1,
264            shadow_bias: 0.01,
265            shadow_normal_bias: 0.0,
266            _unused: 0.0,
267        }
268    }
269}
270
271/// Uniforms for the light culling compute shader.
272///
273/// This structure is uploaded to GPU each frame with the current camera
274/// and screen state for the light culling pass.
275#[repr(C)]
276#[derive(Debug, Clone, Copy, PartialEq, Pod, Zeroable)]
277pub struct LightCullingUniforms {
278    /// View-projection matrix for frustum calculations.
279    pub view_projection: [[f32; 4]; 4],
280    /// Inverse projection matrix for reconstructing view-space positions.
281    pub inverse_projection: [[f32; 4]; 4],
282
283    /// Screen dimensions in pixels (width, height).
284    pub screen_dimensions: [f32; 2],
285    /// Tile grid dimensions (tiles_x, tiles_y).
286    pub tile_count: [u32; 2],
287
288    /// Number of active lights in the light buffer.
289    pub num_lights: u32,
290    /// Tile size in pixels.
291    pub tile_size: u32,
292    /// Index of the first directional light's shadow map.
293    pub shadow_atlas_index: i32,
294    /// Padding for 16-byte alignment.
295    pub _padding: [f32; 1],
296}
297
298impl Default for LightCullingUniforms {
299    fn default() -> Self {
300        Self {
301            view_projection: [[0.0; 4]; 4],
302            inverse_projection: [[0.0; 4]; 4],
303            screen_dimensions: [1920.0, 1080.0],
304            tile_count: [120, 68], // 1920/16, 1080/16 rounded up
305            num_lights: 0,
306            tile_size: 16,
307            shadow_atlas_index: -1,
308            _padding: [0.0; 1],
309        }
310    }
311}
312
313#[cfg(test)]
314mod tests {
315    use super::*;
316
317    #[test]
318    fn test_tile_size_pixels() {
319        assert_eq!(TileSize::X16.pixels(), 16);
320        assert_eq!(TileSize::X32.pixels(), 32);
321    }
322
323    #[test]
324    fn test_tile_count_calculation() {
325        // 1920 / 16 = 120 tiles exactly
326        assert_eq!(TileSize::X16.tile_count(1920), 120);
327        // 1080 / 16 = 67.5 -> 68 tiles (rounded up)
328        assert_eq!(TileSize::X16.tile_count(1080), 68);
329        // 1920 / 32 = 60 tiles exactly
330        assert_eq!(TileSize::X32.tile_count(1920), 60);
331    }
332
333    #[test]
334    fn test_forward_plus_tile_config_default() {
335        let config = ForwardPlusTileConfig::default();
336        assert_eq!(config.tile_size, TileSize::X16);
337        assert_eq!(config.max_lights_per_tile, 128);
338        assert!(!config.use_depth_prepass);
339    }
340
341    #[test]
342    fn test_tile_dimensions() {
343        let config = ForwardPlusTileConfig::default();
344        let (tiles_x, tiles_y) = config.tile_dimensions(1920, 1080);
345        assert_eq!(tiles_x, 120);
346        assert_eq!(tiles_y, 68);
347    }
348
349    #[test]
350    fn test_gpu_light_size_and_alignment() {
351        // GpuLight should be exactly 72 bytes (18 x 4-byte fields)
352        // Updated from 64 after shadow fields (shadow_map_index, shadow_bias, shadow_normal_bias, _padding) were added.
353        assert_eq!(std::mem::size_of::<GpuLight>(), 72);
354    }
355
356    #[test]
357    fn test_light_culling_uniforms_size() {
358        // LightCullingUniforms should be a multiple of 16 bytes for GPU alignment
359        let size = std::mem::size_of::<LightCullingUniforms>();
360        assert_eq!(
361            size % 16,
362            0,
363            "LightCullingUniforms should be 16-byte aligned"
364        );
365    }
366
367    #[test]
368    fn test_gpu_light_default() {
369        let light = GpuLight::default();
370        assert_eq!(light.light_type, GpuLight::TYPE_POINT);
371        assert_eq!(light.color, [1.0, 1.0, 1.0]);
372    }
373
374    #[test]
375    fn test_buffer_size_calculation() {
376        let config = ForwardPlusTileConfig::default();
377        // 120 * 68 = 8160 tiles
378        // Light index buffer: 8160 * 128 * 4 = 4,177,920 bytes
379        let index_size = config.light_index_buffer_size(1920, 1080);
380        assert_eq!(index_size, 8160 * 128 * 4);
381
382        // Light grid buffer: 8160 * 2 * 4 = 65,280 bytes
383        let grid_size = config.light_grid_buffer_size(1920, 1080);
384        assert_eq!(grid_size, 8160 * 2 * 4);
385    }
386}