khora_lanes/render_lane/
forward_plus_lane.rs

1// Copyright 2025 eraflo
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Forward+ (Tiled Forward) rendering lane implementation.
16//!
17//! This module implements a Forward+ rendering strategy that uses a compute shader
18//! to perform per-tile light culling before the main render pass. This approach
19//! significantly reduces the number of lights processed per fragment, making it
20//! ideal for scenes with many lights (>20).
21//!
22//! # Architecture
23//!
24//! The Forward+ pipeline works in two stages:
25//!
26//! 1. **Light Culling (Compute Pass)**: The screen is divided into tiles (16x16 pixels).
27//!    For each tile, the compute shader determines which lights intersect the tile's
28//!    frustum and builds a list of affecting light indices.
29//!
30//! 2. **Rendering (Render Pass)**: Each fragment looks up its tile's light list and
31//!    only evaluates lighting for those specific lights, rather than all lights in the scene.
32//!
33//! # Performance Characteristics
34//!
35//! - **O(tiles × lights)** for light culling (compute pass)
36//! - **O(fragments × lights_per_tile)** for shading (render pass)
37//! - **Suitable for**: Scenes with many lights (>20)
38//! - **Break-even point**: ~20 lights (vs standard forward rendering)
39//!
40//! # SAA Compliance (Symbiotic Adaptive Architecture)
41//!
42//! This lane integrates with GORNA through:
43//! - `estimate_cost()`: Provides accurate cost estimation including compute overhead
44//! - Configurable tile size and max lights per tile
45//! - Runtime-adjustable configuration via `ForwardPlusTileConfig`
46
47use super::RenderWorld;
48use crate::render_lane::ShaderComplexity;
49
50use khora_core::renderer::api::{
51    command::BindGroupLayoutId,
52    util::{
53        dynamic_uniform_buffer::DynamicUniformRingBuffer, uniform_ring_buffer::UniformRingBuffer,
54    },
55};
56use khora_core::{
57    asset::Material,
58    renderer::{
59        api::{
60            command::{
61                BindGroupId, ComputePassDescriptor, ComputePipelineId, LoadOp, Operations,
62                RenderPassColorAttachment, RenderPassDepthStencilAttachment, RenderPassDescriptor,
63                StoreOp,
64            },
65            core::RenderContext,
66            pipeline::enums::PrimitiveTopology,
67            pipeline::RenderPipelineId,
68            resource::{BufferId, CameraUniformData},
69            scene::GpuMesh,
70        },
71        traits::CommandEncoder,
72        ForwardPlusTileConfig,
73    },
74};
75use khora_data::assets::Assets;
76use std::sync::RwLock;
77
78// --- Cost Estimation Constants ---
79
80/// Base cost per triangle rendered.
81const TRIANGLE_COST: f32 = 0.001;
82
83/// Cost per draw call issued.
84const DRAW_CALL_COST: f32 = 0.1;
85
86/// Fixed overhead for the compute pass (tile frustum + dispatch).
87const COMPUTE_PASS_OVERHEAD: f32 = 0.5;
88
89/// Cost factor per tile in the light culling pass.
90const PER_TILE_COST: f32 = 0.0001;
91
92/// Cost factor per light-tile intersection test.
93const LIGHT_TILE_TEST_COST: f32 = 0.00001;
94
95// --- ForwardPlusLane ---
96
97/// GPU resource handles for the Forward+ compute pass.
98///
99/// These are created during lane initialization and used each frame
100/// for light culling and rendering.
101#[derive(Debug, Default)]
102pub struct ForwardPlusGpuResources {
103    /// Buffer containing all GpuLight instances.
104    pub light_buffer: Option<BufferId>,
105    /// Buffer containing per-tile light index lists.
106    pub light_index_buffer: Option<BufferId>,
107    /// Buffer containing (offset, count) pairs per tile.
108    pub light_grid_buffer: Option<BufferId>,
109    /// Buffer containing tile info for the fragment shader.
110    pub tile_info_buffer: Option<BufferId>,
111    /// Uniform buffer for culling parameters.
112    pub culling_uniforms_buffer: Option<BufferId>,
113
114    /// Bind group layout for Group 0 (Camera).
115    pub camera_layout: Option<BindGroupLayoutId>,
116    /// Bind group layout for Group 1 (Model).
117    pub model_layout: Option<BindGroupLayoutId>,
118    /// Bind group layout for Group 2 (Material).
119    pub material_layout: Option<BindGroupLayoutId>,
120    /// Bind group layout for Group 3 (Forward Light Data).
121    pub forward_layout: Option<BindGroupLayoutId>,
122    /// Bind group layout for Culling compute pass.
123    pub culling_layout: Option<BindGroupLayoutId>,
124
125    /// Ring buffer for camera uniforms.
126    pub camera_ring: Option<UniformRingBuffer>,
127    /// Ring buffer for model uniforms.
128    pub model_ring: Option<DynamicUniformRingBuffer>,
129    /// Ring buffer for material uniforms.
130    pub material_ring: Option<DynamicUniformRingBuffer>,
131
132    /// Bind group for the culling compute shader.
133    pub culling_bind_group: Option<BindGroupId>,
134    /// Bind group for the forward pass (light data).
135    pub forward_bind_group: Option<BindGroupId>,
136    /// Compute pipeline for light culling.
137    pub culling_pipeline: Option<ComputePipelineId>,
138    /// Render pipeline for the Forward+ pass.
139    pub render_pipeline: Option<RenderPipelineId>,
140}
141
142impl ForwardPlusGpuResources {
143    /// Returns true if all required resources are initialized.
144    pub fn is_initialized(&self) -> bool {
145        self.light_buffer.is_some()
146            && self.light_index_buffer.is_some()
147            && self.light_grid_buffer.is_some()
148            && self.culling_uniforms_buffer.is_some()
149            && self.culling_bind_group.is_some()
150            && self.culling_pipeline.is_some()
151    }
152}
153
154/// A rendering lane that implements Forward+ (Tiled Forward) rendering.
155///
156/// Forward+ divides the screen into tiles and uses a compute shader to determine
157/// which lights affect each tile before the main render pass. This significantly
158/// reduces per-fragment lighting cost for scenes with many lights.
159///
160/// # Configuration
161///
162/// The lane is configured via `ForwardPlusTileConfig`, which controls:
163/// - **Tile size**: 16x16 or 32x32 pixels (trade-off between culling granularity and overhead)
164/// - **Max lights per tile**: Memory budget for per-tile light lists
165/// - **Depth pre-pass**: Optional optimization for depth-bounded light culling
166#[derive(Debug)]
167pub struct ForwardPlusLane {
168    /// Tile configuration for light culling.
169    pub tile_config: ForwardPlusTileConfig,
170
171    /// Shader complexity for cost estimation.
172    pub shader_complexity: ShaderComplexity,
173
174    /// Current screen dimensions (for tile count calculation).
175    screen_size: (u32, u32),
176
177    /// GPU resources for compute and render passes.
178    pub gpu_resources: std::sync::Mutex<ForwardPlusGpuResources>,
179}
180
181impl Default for ForwardPlusLane {
182    fn default() -> Self {
183        Self {
184            tile_config: ForwardPlusTileConfig::default(),
185            shader_complexity: ShaderComplexity::SimpleLit,
186            screen_size: (1920, 1080),
187            gpu_resources: std::sync::Mutex::new(ForwardPlusGpuResources::default()),
188        }
189    }
190}
191
192impl ForwardPlusLane {
193    /// Creates a new `ForwardPlusLane` with default settings.
194    ///
195    /// Default configuration:
196    /// - Tile size: 16x16 pixels
197    /// - Max lights per tile: 128
198    /// - No depth pre-pass
199    pub fn new() -> Self {
200        Self::default()
201    }
202
203    /// Creates a new `ForwardPlusLane` with the specified configuration.
204    ///
205    /// # Arguments
206    ///
207    /// * `config` - The tile configuration for light culling
208    pub fn with_config(config: ForwardPlusTileConfig) -> Self {
209        Self {
210            tile_config: config,
211            ..Default::default()
212        }
213    }
214
215    /// Creates a new `ForwardPlusLane` with the specified shader complexity.
216    pub fn with_complexity(complexity: ShaderComplexity) -> Self {
217        Self {
218            shader_complexity: complexity,
219            ..Default::default()
220        }
221    }
222
223    /// Updates the screen size used for tile calculations.
224    ///
225    /// This should be called when the window is resized to recalculate
226    /// tile counts and buffer sizes.
227    pub fn set_screen_size(&mut self, width: u32, height: u32) {
228        self.screen_size = (width, height);
229    }
230
231    /// Calculates the number of tiles in each dimension.
232    pub fn tile_count(&self) -> (u32, u32) {
233        self.tile_config
234            .tile_dimensions(self.screen_size.0, self.screen_size.1)
235    }
236
237    /// Calculates the total number of tiles on screen.
238    pub fn total_tiles(&self) -> u32 {
239        let (tiles_x, tiles_y) = self.tile_count();
240        tiles_x * tiles_y
241    }
242
243    /// Returns the effective number of lights in the scene.
244    ///
245    /// This counts all light types (directional, point, spot) that will be
246    /// processed by the light culling pass.
247    pub fn effective_light_count(&self, render_world: &RenderWorld) -> usize {
248        render_world.directional_light_count()
249            + render_world.point_light_count()
250            + render_world.spot_light_count()
251    }
252
253    /// Estimates the cost of the compute pass (light culling).
254    fn compute_pass_cost(&self, render_world: &RenderWorld) -> f32 {
255        let total_tiles = self.total_tiles() as f32;
256        let light_count = self.effective_light_count(render_world) as f32;
257
258        // Compute pass cost = overhead + per-tile cost + light-tile tests
259        COMPUTE_PASS_OVERHEAD
260            + (total_tiles * PER_TILE_COST)
261            + (total_tiles * light_count * LIGHT_TILE_TEST_COST)
262    }
263
264    /// Calculates the per-fragment light cost factor.
265    ///
266    /// For Forward+, this uses sqrt(total_lights) instead of linear scaling
267    /// because lights are culled per-tile, so each fragment only processes
268    /// a subset of lights.
269    fn fragment_light_factor(&self, render_world: &RenderWorld) -> f32 {
270        let total_lights = self.effective_light_count(render_world) as f32;
271
272        if total_lights == 0.0 {
273            return 1.0;
274        }
275
276        // Sublinear scaling: sqrt(lights) because of tile culling
277        // Clamped to max_lights_per_tile
278        let effective_lights = total_lights
279            .sqrt()
280            .min(self.tile_config.max_lights_per_tile as f32);
281
282        1.0 + (effective_lights * 0.02)
283    }
284}
285
286impl khora_core::lane::Lane for ForwardPlusLane {
287    fn strategy_name(&self) -> &'static str {
288        "ForwardPlus"
289    }
290
291    fn lane_kind(&self) -> khora_core::lane::LaneKind {
292        khora_core::lane::LaneKind::Render
293    }
294
295    fn estimate_cost(&self, ctx: &khora_core::lane::LaneContext) -> f32 {
296        let render_world =
297            match ctx.get::<khora_core::lane::Slot<crate::render_lane::RenderWorld>>() {
298                Some(slot) => slot.get_ref(),
299                None => return 1.0,
300            };
301        let gpu_meshes = match ctx.get::<std::sync::Arc<
302            std::sync::RwLock<
303                khora_data::assets::Assets<khora_core::renderer::api::scene::GpuMesh>,
304            >,
305        >>() {
306            Some(arc) => arc,
307            None => return 1.0,
308        };
309        self.estimate_render_cost(render_world, gpu_meshes)
310    }
311
312    fn on_initialize(
313        &self,
314        ctx: &mut khora_core::lane::LaneContext,
315    ) -> Result<(), khora_core::lane::LaneError> {
316        let device = ctx
317            .get::<std::sync::Arc<dyn khora_core::renderer::GraphicsDevice>>()
318            .ok_or(khora_core::lane::LaneError::missing(
319                "Arc<dyn GraphicsDevice>",
320            ))?;
321        self.on_gpu_init(device.as_ref())
322            .map_err(|e| khora_core::lane::LaneError::InitializationFailed(Box::new(e)))
323    }
324
325    fn execute(
326        &self,
327        ctx: &mut khora_core::lane::LaneContext,
328    ) -> Result<(), khora_core::lane::LaneError> {
329        use khora_core::lane::{LaneError, Slot};
330        let device = ctx
331            .get::<std::sync::Arc<dyn khora_core::renderer::GraphicsDevice>>()
332            .ok_or(LaneError::missing("Arc<dyn GraphicsDevice>"))?
333            .clone();
334        let gpu_meshes = ctx
335            .get::<std::sync::Arc<
336                std::sync::RwLock<
337                    khora_data::assets::Assets<khora_core::renderer::api::scene::GpuMesh>,
338                >,
339            >>()
340            .ok_or(LaneError::missing("Arc<RwLock<Assets<GpuMesh>>>"))?
341            .clone();
342        let encoder = ctx
343            .get::<Slot<dyn khora_core::renderer::traits::CommandEncoder>>()
344            .ok_or(LaneError::missing("Slot<dyn CommandEncoder>"))?
345            .get();
346        let render_world = ctx
347            .get::<Slot<crate::render_lane::RenderWorld>>()
348            .ok_or(LaneError::missing("Slot<RenderWorld>"))?
349            .get_ref();
350        let color_target = ctx
351            .get::<khora_core::lane::ColorTarget>()
352            .ok_or(LaneError::missing("ColorTarget"))?
353            .0;
354        let depth_target = ctx
355            .get::<khora_core::lane::DepthTarget>()
356            .ok_or(LaneError::missing("DepthTarget"))?
357            .0;
358        let clear_color = ctx
359            .get::<khora_core::lane::ClearColor>()
360            .ok_or(LaneError::missing("ClearColor"))?
361            .0;
362        let shadow_atlas = ctx.get::<khora_core::lane::ShadowAtlasView>().map(|v| v.0);
363        let shadow_sampler = ctx
364            .get::<khora_core::lane::ShadowComparisonSampler>()
365            .map(|v| v.0);
366
367        let mut render_ctx = khora_core::renderer::api::core::RenderContext::new(
368            &color_target,
369            Some(&depth_target),
370            clear_color,
371        );
372        render_ctx.shadow_atlas = shadow_atlas.as_ref();
373        render_ctx.shadow_sampler = shadow_sampler.as_ref();
374
375        self.render(
376            render_world,
377            device.as_ref(),
378            encoder,
379            &render_ctx,
380            &gpu_meshes,
381        );
382        Ok(())
383    }
384
385    fn on_shutdown(&self, ctx: &mut khora_core::lane::LaneContext) {
386        if let Some(device) = ctx.get::<std::sync::Arc<dyn khora_core::renderer::GraphicsDevice>>()
387        {
388            self.on_gpu_shutdown(device.as_ref());
389        }
390    }
391
392    fn as_any(&self) -> &dyn std::any::Any {
393        self
394    }
395
396    fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
397        self
398    }
399}
400
401impl ForwardPlusLane {
402    /// Returns the render pipeline for the given material (or default).
403    pub fn get_pipeline_for_material(
404        &self,
405        _material: Option<&khora_core::asset::AssetHandle<Box<dyn Material>>>,
406    ) -> RenderPipelineId {
407        // Return the stored pipeline. Fallback to pipeline 0 if on_gpu_init hasn't run yet.
408        self.gpu_resources
409            .lock()
410            .unwrap()
411            .render_pipeline
412            .unwrap_or(RenderPipelineId(0))
413    }
414
415    fn render(
416        &self,
417        render_world: &RenderWorld,
418        device: &dyn khora_core::renderer::GraphicsDevice,
419        encoder: &mut dyn CommandEncoder,
420        render_ctx: &RenderContext,
421        gpu_meshes: &RwLock<Assets<GpuMesh>>,
422    ) {
423        let mut resources = self.gpu_resources.lock().unwrap();
424
425        // 1. Get Active Camera View
426        let view = if let Some(first_view) = render_world.views.first() {
427            first_view
428        } else {
429            return; // No camera, nothing to render
430        };
431
432        // 2. Prepare Camera Uniforms (Group 0)
433        let camera_uniforms = CameraUniformData {
434            view_projection: view.view_proj.to_cols_array_2d(),
435            camera_position: [view.position.x, view.position.y, view.position.z, 1.0],
436        };
437
438        let camera_bind_group = if let Some(ref mut ring) = resources.camera_ring {
439            ring.advance();
440            if let Err(e) = ring.write(device, bytemuck::bytes_of(&camera_uniforms)) {
441                log::error!("Failed to write camera ring buffer: {:?}", e);
442                return;
443            }
444            *ring.current_bind_group()
445        } else {
446            return;
447        };
448
449        // 3. Prepare Tile Info (sent to Group 3)
450        if let Some(tile_buffer) = resources.tile_info_buffer {
451            let config = self.tile_config;
452            let (width, height) = self.screen_size;
453            let num_tiles_x = width.div_ceil(config.tile_size.pixels());
454            let num_tiles_y = height.div_ceil(config.tile_size.pixels());
455            let tile_info = [
456                num_tiles_x,
457                num_tiles_y,
458                config.tile_size.pixels(),
459                config.max_lights_per_tile,
460            ];
461            let _ = device.write_buffer(tile_buffer, 0, bytemuck::cast_slice(&tile_info));
462        }
463
464        // 4. Update Light Data
465        let lights: Vec<_> = render_world
466            .lights
467            .iter()
468            .map(|l| {
469                khora_core::renderer::GpuLight::from_parts(
470                    [l.position.x, l.position.y, l.position.z],
471                    [l.direction.x, l.direction.y, l.direction.z],
472                    &l.light_type,
473                )
474            })
475            .collect();
476
477        if let Some(light_buffer) = resources.light_buffer {
478            let _ = device.write_buffer(light_buffer, 0, bytemuck::cast_slice(&lights));
479        }
480
481        // Prepare and write Culling Uniforms
482        if let Some(culling_buffer) = resources.culling_uniforms_buffer {
483            let config = self.tile_config;
484            let (width, height) = self.screen_size;
485            let num_tiles_x = width.div_ceil(config.tile_size.pixels());
486            let num_tiles_y = height.div_ceil(config.tile_size.pixels());
487
488            let inv_vp = view.view_proj.inverse().unwrap_or_default();
489
490            let culling_data = khora_core::renderer::api::scene::CullingUniformsData {
491                view_projection: view.view_proj.to_cols_array_2d(),
492                inverse_projection: inv_vp.to_cols_array_2d(),
493                screen_dimensions: [width as f32, height as f32],
494                tile_count: [num_tiles_x, num_tiles_y],
495                num_lights: lights.len() as u32,
496                tile_size: config.tile_size.pixels(),
497                _padding: [0.0; 2],
498            };
499
500            let _ = device.write_buffer(culling_buffer, 0, bytemuck::bytes_of(&culling_data));
501        }
502
503        // Run Culling Compute Pass
504        if let (Some(culling_pipeline), Some(culling_bg)) =
505            (resources.culling_pipeline, resources.culling_bind_group)
506        {
507            let mut compute_pass = encoder.begin_compute_pass(&ComputePassDescriptor {
508                label: Some("Forward+ Light Culling Pass"),
509                timestamp_writes: None,
510            });
511            compute_pass.set_pipeline(&culling_pipeline);
512            compute_pass.set_bind_group(0, &culling_bg, &[]);
513
514            let config = self.tile_config;
515            let (width, height) = self.screen_size;
516            let num_tiles_x = width.div_ceil(config.tile_size.pixels());
517            let num_tiles_y = height.div_ceil(config.tile_size.pixels());
518            compute_pass.dispatch_workgroups(num_tiles_x, num_tiles_y, 1);
519        }
520
521        // 5. Prepare Per-Mesh Data (Dynamic Uniforms)
522        let mut draw_commands = Vec::new();
523
524        if let Some(ref mut ring) = resources.model_ring {
525            ring.advance();
526        }
527        if let Some(ref mut ring) = resources.material_ring {
528            ring.advance();
529        }
530
531        let gpu_mesh_assets = gpu_meshes.read().unwrap();
532        for extracted_mesh in &render_world.meshes {
533            if let Some(gpu_mesh_handle) = gpu_mesh_assets.get(&extracted_mesh.cpu_mesh_uuid) {
534                // Compute Matrices
535                let model_mat = extracted_mesh.transform.to_matrix();
536                let normal_mat = model_mat.inverse().unwrap_or_default().transpose();
537
538                let mut base_color = khora_core::math::LinearRgba::WHITE;
539                let mut emissive = khora_core::math::LinearRgba::BLACK;
540                let mut specular_power = 32.0;
541
542                if let Some(mat_handle) = &extracted_mesh.material {
543                    base_color = mat_handle.base_color();
544                    emissive = mat_handle.emissive_color();
545                    specular_power = mat_handle.specular_power();
546                }
547
548                let model_uniforms = khora_core::renderer::api::scene::ModelUniforms {
549                    model_matrix: model_mat.to_cols_array_2d(),
550                    normal_matrix: normal_mat.to_cols_array_2d(),
551                };
552
553                let material_uniforms = khora_core::renderer::api::scene::MaterialUniforms {
554                    base_color,
555                    emissive: emissive.with_alpha(specular_power),
556                    ambient: khora_core::math::LinearRgba::new(0.05, 0.05, 0.05, 1.0),
557                };
558
559                // Push to rings and get offsets/ids
560                let (model_bg, model_offset) = if let Some(ref mut ring) = resources.model_ring {
561                    let offset = match ring.push(device, bytemuck::bytes_of(&model_uniforms)) {
562                        Ok(off) => off,
563                        Err(_) => continue,
564                    };
565                    (*ring.current_bind_group(), offset)
566                } else {
567                    continue;
568                };
569
570                let (material_bg, material_offset) = if let Some(ref mut ring) =
571                    resources.material_ring
572                {
573                    let offset = match ring.push(device, bytemuck::bytes_of(&material_uniforms)) {
574                        Ok(off) => off,
575                        Err(_) => continue,
576                    };
577                    (*ring.current_bind_group(), offset)
578                } else {
579                    continue;
580                };
581
582                draw_commands.push(khora_core::renderer::api::command::DrawCommand {
583                    pipeline: resources.render_pipeline.unwrap_or(RenderPipelineId(0)),
584                    vertex_buffer: gpu_mesh_handle.vertex_buffer,
585                    index_buffer: gpu_mesh_handle.index_buffer,
586                    index_count: gpu_mesh_handle.index_count,
587                    index_format: gpu_mesh_handle.index_format,
588                    model_bind_group: Some(model_bg),
589                    model_offset,
590                    material_bind_group: Some(material_bg),
591                    material_offset,
592                });
593            }
594        }
595
596        // 6. Render Pass
597        let color_attachment = RenderPassColorAttachment {
598            view: render_ctx.color_target,
599            resolve_target: None,
600            ops: Operations {
601                load: LoadOp::Clear(render_ctx.clear_color),
602                store: StoreOp::Store,
603            },
604            base_array_layer: 0,
605        };
606
607        let render_pass_desc = RenderPassDescriptor {
608            label: Some("ForwardPlus Render Pass"),
609            color_attachments: &[color_attachment],
610            depth_stencil_attachment: render_ctx.depth_target.map(|depth_view| {
611                RenderPassDepthStencilAttachment {
612                    view: depth_view,
613                    depth_ops: Some(Operations {
614                        load: LoadOp::Clear(1.0),
615                        store: StoreOp::Store,
616                    }),
617                    stencil_ops: None,
618                    base_array_layer: 0,
619                }
620            }),
621        };
622
623        let mut render_pass = encoder.begin_render_pass(&render_pass_desc);
624
625        // Bind Group 0: Camera
626        render_pass.set_bind_group(0, &camera_bind_group, &[]);
627
628        // Bind Group 3: Forward Light Data
629        if let Some(ref forward_bg) = resources.forward_bind_group {
630            render_pass.set_bind_group(3, forward_bg, &[]);
631        }
632
633        // Set Render Pipeline
634        if let Some(ref pipeline) = resources.render_pipeline {
635            render_pass.set_pipeline(pipeline);
636        } else {
637            return;
638        }
639
640        // Draw Cached Commands
641        for cmd in &draw_commands {
642            if let Some(ref bg) = cmd.model_bind_group {
643                render_pass.set_bind_group(1, bg, &[cmd.model_offset]);
644            }
645            if let Some(ref bg) = cmd.material_bind_group {
646                render_pass.set_bind_group(2, bg, &[cmd.material_offset]);
647            }
648
649            render_pass.set_vertex_buffer(0, &cmd.vertex_buffer, 0);
650            render_pass.set_index_buffer(&cmd.index_buffer, 0, cmd.index_format);
651            render_pass.draw_indexed(0..cmd.index_count, 0, 0..1);
652        }
653    }
654
655    fn estimate_render_cost(
656        &self,
657        render_world: &RenderWorld,
658        gpu_meshes: &RwLock<Assets<GpuMesh>>,
659    ) -> f32 {
660        let gpu_mesh_assets = gpu_meshes.read().unwrap();
661
662        let mut total_triangles = 0u32;
663        let mut draw_call_count = 0u32;
664
665        for extracted_mesh in &render_world.meshes {
666            if let Some(gpu_mesh) = gpu_mesh_assets.get(&extracted_mesh.cpu_mesh_uuid) {
667                let triangle_count = match gpu_mesh.primitive_topology {
668                    PrimitiveTopology::TriangleList => gpu_mesh.index_count / 3,
669                    PrimitiveTopology::TriangleStrip => {
670                        if gpu_mesh.index_count >= 3 {
671                            gpu_mesh.index_count - 2
672                        } else {
673                            0
674                        }
675                    }
676                    PrimitiveTopology::LineList
677                    | PrimitiveTopology::LineStrip
678                    | PrimitiveTopology::PointList => 0,
679                };
680
681                total_triangles += triangle_count;
682                draw_call_count += 1;
683            }
684        }
685
686        // Base geometry cost
687        let geometry_cost =
688            (total_triangles as f32 * TRIANGLE_COST) + (draw_call_count as f32 * DRAW_CALL_COST);
689
690        // Shader complexity multiplier
691        let shader_multiplier = self.shader_complexity.cost_multiplier();
692
693        // Compute pass overhead
694        let compute_cost = self.compute_pass_cost(render_world);
695
696        // Per-fragment light factor (sublinear for Forward+)
697        let light_factor = self.fragment_light_factor(render_world);
698
699        // Total cost
700        compute_cost + (geometry_cost * shader_multiplier * light_factor)
701    }
702
703    fn on_gpu_init(
704        &self,
705        device: &dyn khora_core::renderer::GraphicsDevice,
706    ) -> Result<(), khora_core::renderer::error::RenderError> {
707        use crate::render_lane::shaders::FORWARD_PLUS_WGSL;
708        use khora_core::renderer::api::{
709            command::{
710                BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor,
711                BindGroupLayoutEntry, BindingType, BufferBindingType,
712            },
713            core::{ShaderModuleDescriptor, ShaderSourceData},
714            pipeline::enums::{CompareFunction, VertexFormat, VertexStepMode},
715            pipeline::state::{ColorWrites, DepthBiasState, StencilFaceState},
716            pipeline::{
717                ColorTargetStateDescriptor, DepthStencilStateDescriptor,
718                MultisampleStateDescriptor, PrimitiveStateDescriptor, RenderPipelineDescriptor,
719                VertexAttributeDescriptor, VertexBufferLayoutDescriptor,
720            },
721            resource::CameraUniformData,
722            scene::{MaterialUniforms, ModelUniforms},
723            util::{SampleCount, ShaderStageFlags},
724        };
725        use std::borrow::Cow;
726
727        log::info!("ForwardPlusLane: Initializing GPU resources...");
728
729        // 1. Create Bind Group Layouts
730
731        // Group 0: Camera
732        let camera_layout = device
733            .create_bind_group_layout(&BindGroupLayoutDescriptor {
734                label: Some("forward_plus_camera_layout"),
735                entries: &[BindGroupLayoutEntry {
736                    binding: 0,
737                    visibility: ShaderStageFlags::VERTEX | ShaderStageFlags::FRAGMENT,
738                    ty: BindingType::Buffer {
739                        ty: BufferBindingType::Uniform,
740                        has_dynamic_offset: false,
741                        min_binding_size: None,
742                    },
743                }],
744            })
745            .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
746
747        // Group 1: Model
748        let model_layout = device
749            .create_bind_group_layout(&BindGroupLayoutDescriptor {
750                label: Some("forward_plus_model_layout"),
751                entries: &[BindGroupLayoutEntry {
752                    binding: 0,
753                    visibility: ShaderStageFlags::VERTEX,
754                    ty: BindingType::Buffer {
755                        ty: BufferBindingType::Uniform,
756                        has_dynamic_offset: true,
757                        min_binding_size: std::num::NonZeroU64::new(
758                            std::mem::size_of::<ModelUniforms>() as u64,
759                        ),
760                    },
761                }],
762            })
763            .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
764
765        // Group 2: Material
766        let material_layout = device
767            .create_bind_group_layout(&BindGroupLayoutDescriptor {
768                label: Some("forward_plus_material_layout"),
769                entries: &[BindGroupLayoutEntry {
770                    binding: 0,
771                    visibility: ShaderStageFlags::FRAGMENT,
772                    ty: BindingType::Buffer {
773                        ty: BufferBindingType::Uniform,
774                        has_dynamic_offset: true,
775                        min_binding_size: std::num::NonZeroU64::new(std::mem::size_of::<
776                            MaterialUniforms,
777                        >()
778                            as u64),
779                    },
780                }],
781            })
782            .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
783
784        // Group 3: Forward Light Data (Render Pass side)
785        let forward_layout = device
786            .create_bind_group_layout(&BindGroupLayoutDescriptor {
787                label: Some("Forward+ Render Pass Light Layout"),
788                entries: &[
789                    // 0: Lights
790                    BindGroupLayoutEntry::buffer(
791                        0,
792                        ShaderStageFlags::FRAGMENT,
793                        BufferBindingType::Storage { read_only: true },
794                        false,
795                        None,
796                    ),
797                    // 1: Light Index List
798                    BindGroupLayoutEntry::buffer(
799                        1,
800                        ShaderStageFlags::FRAGMENT,
801                        BufferBindingType::Storage { read_only: true },
802                        false,
803                        None,
804                    ),
805                    // 2: Light Grid
806                    BindGroupLayoutEntry::buffer(
807                        2,
808                        ShaderStageFlags::FRAGMENT,
809                        BufferBindingType::Storage { read_only: true },
810                        false,
811                        None,
812                    ),
813                    // 3: Tile Info
814                    BindGroupLayoutEntry::buffer(
815                        3,
816                        ShaderStageFlags::FRAGMENT,
817                        BufferBindingType::Uniform,
818                        false,
819                        None,
820                    ),
821                ],
822            })
823            .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
824
825        // Culling Layout (Compute Pass side)
826        let culling_layout = device
827            .create_bind_group_layout(&BindGroupLayoutDescriptor {
828                label: Some("Forward+ Culling Layout"),
829                entries: &[
830                    // 0: Uniforms
831                    BindGroupLayoutEntry::buffer(
832                        0,
833                        ShaderStageFlags::COMPUTE,
834                        BufferBindingType::Uniform,
835                        false,
836                        None,
837                    ),
838                    // 1: Lights (Storage read-only)
839                    BindGroupLayoutEntry::buffer(
840                        1,
841                        ShaderStageFlags::COMPUTE,
842                        BufferBindingType::Storage { read_only: true },
843                        false,
844                        None,
845                    ),
846                    // 2: Light Index List (Storage read-write)
847                    BindGroupLayoutEntry::buffer(
848                        2,
849                        ShaderStageFlags::COMPUTE,
850                        BufferBindingType::Storage { read_only: false },
851                        false,
852                        None,
853                    ),
854                    // 3: Light Grid (Storage read-write)
855                    BindGroupLayoutEntry::buffer(
856                        3,
857                        ShaderStageFlags::COMPUTE,
858                        BufferBindingType::Storage { read_only: false },
859                        false,
860                        None,
861                    ),
862                ],
863            })
864            .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
865
866        // 2. Create Pipelines
867
868        // Render Pipeline
869        let shader_module = device
870            .create_shader_module(&ShaderModuleDescriptor {
871                label: Some("forward_plus_render_shader"),
872                source: ShaderSourceData::Wgsl(Cow::Borrowed(FORWARD_PLUS_WGSL)),
873            })
874            .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
875
876        let vertex_attributes = vec![
877            VertexAttributeDescriptor {
878                format: VertexFormat::Float32x3,
879                offset: 0,
880                shader_location: 0,
881            },
882            VertexAttributeDescriptor {
883                format: VertexFormat::Float32x3,
884                offset: 12,
885                shader_location: 1,
886            },
887            VertexAttributeDescriptor {
888                format: VertexFormat::Float32x2,
889                offset: 24,
890                shader_location: 2,
891            },
892        ];
893
894        let vertex_layout = VertexBufferLayoutDescriptor {
895            array_stride: 32,
896            step_mode: VertexStepMode::Vertex,
897            attributes: Cow::Owned(vertex_attributes),
898        };
899
900        // Explicit Render Pipeline Layout
901        let render_pipeline_layout = device
902            .create_pipeline_layout(
903                &khora_core::renderer::api::pipeline::PipelineLayoutDescriptor {
904                    label: Some(Cow::Borrowed("Forward+ Render Pipeline Layout")),
905                    bind_group_layouts: &[
906                        camera_layout,
907                        model_layout,
908                        material_layout,
909                        forward_layout,
910                    ],
911                },
912            )
913            .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
914
915        let pipeline_desc = RenderPipelineDescriptor {
916            label: Some(Cow::Borrowed("ForwardPlus Pipeline")),
917            layout: Some(render_pipeline_layout),
918            vertex_shader_module: shader_module,
919            vertex_entry_point: Cow::Borrowed("vs_main"),
920            fragment_shader_module: Some(shader_module),
921            fragment_entry_point: Some(Cow::Borrowed("fs_main")),
922            vertex_buffers_layout: Cow::Owned(vec![vertex_layout]),
923            primitive_state: PrimitiveStateDescriptor {
924                topology: PrimitiveTopology::TriangleList,
925                ..Default::default()
926            },
927            depth_stencil_state: Some(DepthStencilStateDescriptor {
928                format: khora_core::renderer::api::util::TextureFormat::Depth32Float,
929                depth_write_enabled: true,
930                depth_compare: CompareFunction::Less,
931                stencil_front: StencilFaceState::default(),
932                stencil_back: StencilFaceState::default(),
933                stencil_read_mask: 0,
934                stencil_write_mask: 0,
935                bias: DepthBiasState::default(),
936            }),
937            color_target_states: Cow::Owned(vec![ColorTargetStateDescriptor {
938                format: device
939                    .get_surface_format()
940                    .unwrap_or(khora_core::renderer::api::util::TextureFormat::Rgba8UnormSrgb),
941                blend: None,
942                write_mask: ColorWrites::ALL,
943            }]),
944            multisample_state: MultisampleStateDescriptor {
945                count: SampleCount::X1,
946                mask: !0,
947                alpha_to_coverage_enabled: false,
948            },
949        };
950
951        let pipeline_id = device
952            .create_render_pipeline(&pipeline_desc)
953            .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
954
955        // Compute Pipeline for Culling
956        let culling_pipeline_layout = device
957            .create_pipeline_layout(
958                &khora_core::renderer::api::pipeline::PipelineLayoutDescriptor {
959                    label: Some(Cow::Borrowed("Forward+ Culling Pipeline Layout")),
960                    bind_group_layouts: &[culling_layout],
961                },
962            )
963            .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
964
965        let culling_shader_module = device
966            .create_shader_module(&ShaderModuleDescriptor {
967                label: Some("Forward+ Culling Shader"),
968                source: ShaderSourceData::Wgsl(Cow::Borrowed(
969                    crate::render_lane::shaders::LIGHT_CULLING_WGSL,
970                )),
971            })
972            .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
973
974        let culling_pipeline = device
975            .create_compute_pipeline(
976                &khora_core::renderer::api::command::ComputePipelineDescriptor {
977                    label: Some(Cow::Borrowed("Forward+ Culling Pipeline")),
978                    layout: Some(culling_pipeline_layout),
979                    shader_module: culling_shader_module,
980                    entry_point: Cow::Borrowed("cs_main"),
981                },
982            )
983            .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
984
985        // 3. Create Buffers and Rings
986
987        // Light Data Buffer
988        let light_buffer = device
989            .create_buffer(&khora_core::renderer::api::resource::BufferDescriptor {
990                label: Some(Cow::Borrowed("Forward+ Light Buffer")),
991                size: 64 * 1024,
992                usage: khora_core::renderer::api::resource::BufferUsage::STORAGE
993                    | khora_core::renderer::api::resource::BufferUsage::COPY_DST,
994                mapped_at_creation: false,
995            })
996            .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
997
998        // Light Index List
999        let light_index_buffer = device
1000            .create_buffer(&khora_core::renderer::api::resource::BufferDescriptor {
1001                label: Some(Cow::Borrowed("Forward+ Light Index Buffer")),
1002                size: 120 * 68 * 256 * 4,
1003                usage: khora_core::renderer::api::resource::BufferUsage::STORAGE
1004                    | khora_core::renderer::api::resource::BufferUsage::COPY_DST,
1005                mapped_at_creation: false,
1006            })
1007            .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
1008
1009        // Light Grid
1010        let light_grid_buffer = device
1011            .create_buffer(&khora_core::renderer::api::resource::BufferDescriptor {
1012                label: Some(Cow::Borrowed("Forward+ Light Grid Buffer")),
1013                size: 120 * 68 * 2 * 4,
1014                usage: khora_core::renderer::api::resource::BufferUsage::STORAGE
1015                    | khora_core::renderer::api::resource::BufferUsage::COPY_DST,
1016                mapped_at_creation: false,
1017            })
1018            .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
1019
1020        // Tile Info Buffer
1021        let tile_info_buffer = device
1022            .create_buffer(&khora_core::renderer::api::resource::BufferDescriptor {
1023                label: Some(Cow::Borrowed("Forward+ Tile Info")),
1024                size: 256,
1025                usage: khora_core::renderer::api::resource::BufferUsage::UNIFORM
1026                    | khora_core::renderer::api::resource::BufferUsage::COPY_DST,
1027                mapped_at_creation: false,
1028            })
1029            .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
1030
1031        // Culling Uniforms
1032        let culling_uniforms_buffer = device
1033            .create_buffer(&khora_core::renderer::api::resource::BufferDescriptor {
1034                label: Some(Cow::Borrowed("Forward+ Culling Uniforms")),
1035                size: 256,
1036                usage: khora_core::renderer::api::resource::BufferUsage::UNIFORM
1037                    | khora_core::renderer::api::resource::BufferUsage::COPY_DST,
1038                mapped_at_creation: false,
1039            })
1040            .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
1041
1042        // Ring Buffers
1043        let camera_ring = UniformRingBuffer::new(
1044            device,
1045            camera_layout,
1046            0,
1047            std::mem::size_of::<CameraUniformData>() as u64,
1048            "Forward+ Camera Ring",
1049        )
1050        .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
1051
1052        let model_ring = DynamicUniformRingBuffer::new(
1053            device,
1054            model_layout,
1055            0,
1056            std::mem::size_of::<ModelUniforms>() as u32,
1057            khora_core::renderer::api::util::dynamic_uniform_buffer::DEFAULT_MAX_ELEMENTS,
1058            khora_core::renderer::api::util::dynamic_uniform_buffer::MIN_UNIFORM_ALIGNMENT,
1059            "Forward+ Model Ring",
1060        )
1061        .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
1062
1063        let material_ring = DynamicUniformRingBuffer::new(
1064            device,
1065            material_layout,
1066            0,
1067            std::mem::size_of::<MaterialUniforms>() as u32,
1068            khora_core::renderer::api::util::dynamic_uniform_buffer::DEFAULT_MAX_ELEMENTS,
1069            khora_core::renderer::api::util::dynamic_uniform_buffer::MIN_UNIFORM_ALIGNMENT,
1070            "Forward+ Material Ring",
1071        )
1072        .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
1073
1074        // 4. Bind Groups
1075
1076        let culling_bg = device
1077            .create_bind_group(&BindGroupDescriptor {
1078                label: Some("Forward+ Culling Bind Group"),
1079                layout: culling_layout,
1080                entries: &[
1081                    BindGroupEntry::buffer(0, culling_uniforms_buffer, 0, None),
1082                    BindGroupEntry::buffer(1, light_buffer, 0, None),
1083                    BindGroupEntry::buffer(2, light_index_buffer, 0, None),
1084                    BindGroupEntry::buffer(3, light_grid_buffer, 0, None),
1085                ],
1086            })
1087            .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
1088
1089        let forward_bg = device
1090            .create_bind_group(&BindGroupDescriptor {
1091                label: Some("Forward+ Render Pass Bind Group"),
1092                layout: forward_layout,
1093                entries: &[
1094                    BindGroupEntry::buffer(0, light_buffer, 0, None),
1095                    BindGroupEntry::buffer(1, light_index_buffer, 0, None),
1096                    BindGroupEntry::buffer(2, light_grid_buffer, 0, None),
1097                    BindGroupEntry::buffer(3, tile_info_buffer, 0, None),
1098                ],
1099            })
1100            .map_err(khora_core::renderer::error::RenderError::ResourceError)?;
1101
1102        // 5. Store all resources
1103        let mut res = self.gpu_resources.lock().unwrap();
1104        res.light_buffer = Some(light_buffer);
1105        res.light_index_buffer = Some(light_index_buffer);
1106        res.light_grid_buffer = Some(light_grid_buffer);
1107        res.tile_info_buffer = Some(tile_info_buffer);
1108        res.culling_uniforms_buffer = Some(culling_uniforms_buffer);
1109        res.camera_layout = Some(camera_layout);
1110        res.model_layout = Some(model_layout);
1111        res.material_layout = Some(material_layout);
1112        res.forward_layout = Some(forward_layout);
1113        res.culling_layout = Some(culling_layout);
1114        res.camera_ring = Some(camera_ring);
1115        res.model_ring = Some(model_ring);
1116        res.material_ring = Some(material_ring);
1117        res.culling_bind_group = Some(culling_bg);
1118        res.forward_bind_group = Some(forward_bg);
1119        res.culling_pipeline = Some(culling_pipeline);
1120        res.render_pipeline = Some(pipeline_id);
1121
1122        Ok(())
1123    }
1124
1125    fn on_gpu_shutdown(&self, device: &dyn khora_core::renderer::GraphicsDevice) {
1126        let mut resources = self.gpu_resources.lock().unwrap();
1127
1128        if let Some(ring) = resources.camera_ring.take() {
1129            ring.destroy(device);
1130        }
1131        if let Some(ring) = resources.model_ring.take() {
1132            ring.destroy(device);
1133        }
1134        if let Some(ring) = resources.material_ring.take() {
1135            ring.destroy(device);
1136        }
1137
1138        if let Some(id) = resources.light_buffer.take() {
1139            device.destroy_buffer(id).ok();
1140        }
1141        if let Some(id) = resources.light_index_buffer.take() {
1142            device.destroy_buffer(id).ok();
1143        }
1144        if let Some(id) = resources.light_grid_buffer.take() {
1145            let _ = device.destroy_buffer(id);
1146        }
1147        if let Some(id) = resources.culling_uniforms_buffer.take() {
1148            let _ = device.destroy_buffer(id);
1149        }
1150    }
1151}
1152
1153#[cfg(test)]
1154mod tests {
1155    use super::*;
1156    use khora_core::lane::Lane;
1157    use khora_core::renderer::TileSize;
1158
1159    #[test]
1160    fn test_forward_plus_lane_creation() {
1161        let lane = ForwardPlusLane::new();
1162        assert_eq!(lane.tile_config.tile_size, TileSize::X16);
1163        assert_eq!(lane.tile_config.max_lights_per_tile, 128);
1164        assert_eq!(lane.shader_complexity, ShaderComplexity::SimpleLit);
1165    }
1166
1167    #[test]
1168    fn test_forward_plus_lane_with_config() {
1169        let config = ForwardPlusTileConfig {
1170            tile_size: TileSize::X32,
1171            max_lights_per_tile: 256,
1172            use_depth_prepass: true,
1173        };
1174        let lane = ForwardPlusLane::with_config(config);
1175
1176        assert_eq!(lane.tile_config.tile_size, TileSize::X32);
1177        assert_eq!(lane.tile_config.max_lights_per_tile, 256);
1178        assert!(lane.tile_config.use_depth_prepass);
1179    }
1180
1181    #[test]
1182    fn test_tile_count_calculation() {
1183        let mut lane = ForwardPlusLane::new();
1184        lane.set_screen_size(1920, 1080);
1185
1186        let (tiles_x, tiles_y) = lane.tile_count();
1187        assert_eq!(tiles_x, 120); // 1920 / 16
1188        assert_eq!(tiles_y, 68); // ceil(1080 / 16)
1189    }
1190
1191    #[test]
1192    fn test_strategy_name() {
1193        let lane = ForwardPlusLane::new();
1194        assert_eq!(lane.strategy_name(), "ForwardPlus");
1195    }
1196
1197    #[test]
1198    fn test_pipeline_id() {
1199        let lane = ForwardPlusLane::new();
1200        // No GPU init → pipeline not yet created → fallback to RenderPipelineId(0)
1201        let pipeline = lane.get_pipeline_for_material(None);
1202        assert_eq!(pipeline, RenderPipelineId(0));
1203    }
1204}