From 11a421ecfe73621edfaf4a3a4085327f91e3a4f1 Mon Sep 17 00:00:00 2001 From: Ralph Kuepper Date: Mon, 15 Jun 2026 23:12:32 +0200 Subject: [PATCH 1/4] feat(render): clouds, foliage wind, additive fog, shadows for immediate-mode, Windows fixes Rendering features behind the shooter's visual pass: - Sky: analytic drifting cloud layer in the procedural-sky shader (env.rs); time fed via the unused SkyUniforms.intensity.y. - Fog: manual height-fog march now runs additively with procedural sky (post.rs) instead of only as the aerial-LUT else-branch, so haze works at sub-km scene scale. - Foliage: wind sway + backlit translucency for alpha-cutout materials in the scene shader (core.rs); wind appended to LightingUniforms (types.rs) with widened UBO visibility (lighting.rs, mod.rs). - Shadows: shadow pass renders immediate-mode + cached-model draws, not just scene nodes (shadow_pass.rs), so immediate-mode games cast shadows. - Device: raise max_sampled_textures/samplers per stage to the adapter max so refractive materials (19 sampled textures) compile (windows). - FFI: bloom_set_bloom_intensity/tonemap/auto_exposure_*, mesh scratch-buffer upload (createMesh i64 ABI fix), drawModelRotated re-export (visual.rs, models.rs, src/*). - Windows build: jolt default features, panic=abort, Jolt libDirs, LTCG off for lld-link (build.rs, Cargo.toml, package.json). --- native/shared/build.rs | 19 +++++-- native/shared/src/ffi_core/models.rs | 56 +++++++++++++++++++ native/shared/src/ffi_core/visual.rs | 41 ++++++++++++++ native/shared/src/models.rs | 31 +++++++++++ native/shared/src/renderer/lighting.rs | 5 +- native/shared/src/renderer/mod.rs | 16 +++++- native/shared/src/renderer/shaders/core.rs | 26 ++++++++- native/shared/src/renderer/shaders/env.rs | 53 ++++++++++++++++++ native/shared/src/renderer/shaders/post.rs | 11 +++- native/shared/src/renderer/shadow_pass.rs | 48 ++++++++++++++++- native/shared/src/renderer/types.rs | 6 +++ native/windows/Cargo.toml | 29 ++++++---- native/windows/src/lib.rs | 38 ++++++++++++- package.json | 62 +++++++++++++++++++++- src/core/index.ts | 39 ++++++++++++++ src/index.ts | 3 +- src/models/index.ts | 45 ++++++++++------ 17 files changed, 488 insertions(+), 40 deletions(-) diff --git a/native/shared/build.rs b/native/shared/build.rs index 843644a..eae9138 100644 --- a/native/shared/build.rs +++ b/native/shared/build.rs @@ -96,11 +96,22 @@ fn build_jolt() { .join(format!("{}Jolt.{}", lib_prefix, lib_ext)); if !(bloom_jolt_lib.exists() && jolt_lib.exists()) { - let _ = cmake::Config::new(&shim_dir) - .out_dir(&dst) + let mut cfg = cmake::Config::new(&shim_dir); + cfg.out_dir(&dst) .profile("Release") - .define("CMAKE_BUILD_TYPE", "Release") - .build(); + .define("CMAKE_BUILD_TYPE", "Release"); + if target_os == "windows" { + // perry links the prebuilt Jolt archive with lld-link, which + // cannot read MSVC `/GL` (whole-program-optimization) object + // files — they're LTCG intermediates, not native COFF, and the + // link fails with "is not a native COFF file. Recompile without + // /GL?". Turn off Jolt's interprocedural optimization so the + // archive holds ordinary COFF objects perry can consume. (Costs + // a little Jolt codegen perf; physics is not the bottleneck.) + cfg.define("INTERPROCEDURAL_OPTIMIZATION", "OFF") + .define("CMAKE_INTERPROCEDURAL_OPTIMIZATION", "OFF"); + } + let _ = cfg.build(); } println!("cargo:rustc-link-search=native={}", dst.join("lib").display()); diff --git a/native/shared/src/ffi_core/models.rs b/native/shared/src/ffi_core/models.rs index 306008e..3562577 100644 --- a/native/shared/src/ffi_core/models.rs +++ b/native/shared/src/ffi_core/models.rs @@ -374,6 +374,62 @@ macro_rules! __bloom_ffi_models { 0.0 } + // Array-free mesh upload (Perry 0.5.1171 rejects number[] -> i64 pointer + // params; see ModelManager::scratch_f32). Push the 12-float vertex + // records + u32 indices one scalar at a time (all-f64 ABI), then build. + #[cfg(feature = "models3d")] + #[no_mangle] + pub extern "C" fn bloom_mesh_scratch_reset() { + $crate::ffi::guard("bloom_mesh_scratch_reset", move || { + engine().models.mesh_scratch_reset(); + }) + } + #[cfg(not(feature = "models3d"))] + #[no_mangle] + pub extern "C" fn bloom_mesh_scratch_reset() { + $crate::ffi::feature_off_warn_once("bloom_mesh_scratch_reset", "models3d"); + } + + #[cfg(feature = "models3d")] + #[no_mangle] + pub extern "C" fn bloom_mesh_scratch_push_f32(v: f64) { + $crate::ffi::guard("bloom_mesh_scratch_push_f32", move || { + engine().models.mesh_scratch_push_f32(v as f32); + }) + } + #[cfg(not(feature = "models3d"))] + #[no_mangle] + pub extern "C" fn bloom_mesh_scratch_push_f32(_v: f64) { + $crate::ffi::feature_off_warn_once("bloom_mesh_scratch_push_f32", "models3d"); + } + + #[cfg(feature = "models3d")] + #[no_mangle] + pub extern "C" fn bloom_mesh_scratch_push_u32(v: f64) { + $crate::ffi::guard("bloom_mesh_scratch_push_u32", move || { + engine().models.mesh_scratch_push_u32(v as u32); + }) + } + #[cfg(not(feature = "models3d"))] + #[no_mangle] + pub extern "C" fn bloom_mesh_scratch_push_u32(_v: f64) { + $crate::ffi::feature_off_warn_once("bloom_mesh_scratch_push_u32", "models3d"); + } + + #[cfg(feature = "models3d")] + #[no_mangle] + pub extern "C" fn bloom_create_mesh_scratch(vertex_count: f64, index_count: f64) -> f64 { + $crate::ffi::guard("bloom_create_mesh_scratch", move || { + engine().models.create_mesh_from_scratch(vertex_count as u32, index_count as u32) + }) + } + #[cfg(not(feature = "models3d"))] + #[no_mangle] + pub extern "C" fn bloom_create_mesh_scratch(_vertex_count: f64, _index_count: f64) -> f64 { + $crate::ffi::feature_off_warn_once("bloom_create_mesh_scratch", "models3d"); + 0.0 + } + // bloom_get_model_mesh_count [source: linux; gated: models3d] #[cfg(feature = "models3d")] #[no_mangle] diff --git a/native/shared/src/ffi_core/visual.rs b/native/shared/src/ffi_core/visual.rs index 44ec090..a2218b8 100644 --- a/native/shared/src/ffi_core/visual.rs +++ b/native/shared/src/ffi_core/visual.rs @@ -373,6 +373,47 @@ macro_rules! __bloom_ffi_visual { }) } + // bloom_set_bloom_intensity [source: art-direction] + // Scales the bloom contribution added to the HDR scene before tonemap + // (0 = none, ~0.04 subtle default, higher = stronger glow). + #[no_mangle] + pub extern "C" fn bloom_set_bloom_intensity(value: f64) { + $crate::ffi::guard("bloom_set_bloom_intensity", move || { + engine().renderer.set_bloom_intensity(value as f32); + }) + } + + // bloom_set_tonemap [source: art-direction] + // Selects the tonemap operator: 0 = ACES (default), 1 = AgX (more + // filmic, better highlight desaturation + a punchier look). + #[no_mangle] + pub extern "C" fn bloom_set_tonemap(kind: f64) { + $crate::ffi::guard("bloom_set_tonemap", move || { + engine().renderer.set_tonemap_kind(kind as u32); + }) + } + + // bloom_set_auto_exposure_key [source: art-direction] + // Target scene-average luma for auto-exposure. Lower = the auto- + // exposure aims for a darker, more saturated midpoint (less wash-out); + // higher = brighter. + #[no_mangle] + pub extern "C" fn bloom_set_auto_exposure_key(key: f64) { + $crate::ffi::guard("bloom_set_auto_exposure_key", move || { + engine().renderer.set_auto_exposure_key(key as f32); + }) + } + + // bloom_set_auto_exposure_rate [source: art-direction] + // Per-frame adaptation rate for auto-exposure (0 = frozen, ~0.05 = a + // smooth eye-adaptation feel, 1 = instant). + #[no_mangle] + pub extern "C" fn bloom_set_auto_exposure_rate(rate: f64) { + $crate::ffi::guard("bloom_set_auto_exposure_rate", move || { + engine().renderer.set_auto_exposure_rate(rate as f32); + }) + } + // bloom_set_ssao_enabled [source: macos] #[no_mangle] pub extern "C" fn bloom_set_ssao_enabled(on: f64) { diff --git a/native/shared/src/models.rs b/native/shared/src/models.rs index 58c81e0..7aede39 100644 --- a/native/shared/src/models.rs +++ b/native/shared/src/models.rs @@ -68,6 +68,14 @@ pub struct ModelAnimation { pub struct ModelManager { pub models: HandleRegistry, pub animations: HandleRegistry, + /// Scratch buffers for the array-free mesh-upload path. Perry 0.5.1171 + /// rejects passing a JS `number[]` to a native `i64` pointer param + /// (strict safe-integer check), so `createMesh` instead pushes vertex + /// floats / indices one scalar at a time through `mesh_scratch_push_*` + /// (all `f64` ABI) and then builds the mesh from these. Mirrors the + /// physics subsystem's `scratch_*` shape-upload path. + pub scratch_f32: Vec, + pub scratch_u32: Vec, } impl ModelManager { @@ -75,9 +83,32 @@ impl ModelManager { Self { models: HandleRegistry::new(), animations: HandleRegistry::new(), + scratch_f32: Vec::new(), + scratch_u32: Vec::new(), } } + pub fn mesh_scratch_reset(&mut self) { + self.scratch_f32.clear(); + self.scratch_u32.clear(); + } + pub fn mesh_scratch_push_f32(&mut self, v: f32) { self.scratch_f32.push(v); } + pub fn mesh_scratch_push_u32(&mut self, v: u32) { self.scratch_u32.push(v); } + + /// Build a mesh from the scratch buffers: `vertex_count` vertices of 12 + /// floats each in `scratch_f32`, `index_count` indices in `scratch_u32`. + pub fn create_mesh_from_scratch(&mut self, vertex_count: u32, index_count: u32) -> f64 { + let need_f = vertex_count as usize * 12; + let need_u = index_count as usize; + if vertex_count == 0 || self.scratch_f32.len() < need_f || self.scratch_u32.len() < need_u { + return 0.0; + } + // Clone out so create_mesh's &self borrow doesn't alias scratch. + let verts: Vec = self.scratch_f32[..need_f].to_vec(); + let inds: Vec = self.scratch_u32[..need_u].to_vec(); + self.create_mesh(&verts, &inds) + } + pub fn load_model(&mut self, file_data: &[u8]) -> f64 { match load_gltf(file_data) { Some(model) => self.models.alloc(model), diff --git a/native/shared/src/renderer/lighting.rs b/native/shared/src/renderer/lighting.rs index 453887b..324e511 100644 --- a/native/shared/src/renderer/lighting.rs +++ b/native/shared/src/renderer/lighting.rs @@ -31,10 +31,11 @@ pub(super) fn create_lighting_layout( }; let frag = wgpu::ShaderStages::FRAGMENT; let mut entries = vec![ - // 0: Lighting UBO + // 0: Lighting UBO. VERTEX_FRAGMENT so the scene vertex shader can read + // `wind` (foliage sway); the fragment stage uses the full struct. wgpu::BindGroupLayoutEntry { binding: 0, - visibility: frag, + visibility: wgpu::ShaderStages::VERTEX_FRAGMENT, ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Uniform, has_dynamic_offset: false, diff --git a/native/shared/src/renderer/mod.rs b/native/shared/src/renderer/mod.rs index 7bc3ed1..b75448a 100644 --- a/native/shared/src/renderer/mod.rs +++ b/native/shared/src/renderer/mod.rs @@ -641,6 +641,9 @@ struct CachedModelDraw { uniform_slot: usize, cache_handle: u64, mesh_idx: usize, + /// Object→world model matrix for this draw, kept CPU-side so the + /// shadow pass can render the model depth-only from the light. + model: [[f32; 4]; 4], } // ============================================================ @@ -2829,7 +2832,10 @@ impl Renderer { tex_entry(6), samp_entry(7), wgpu::BindGroupLayoutEntry { binding: 8, - visibility: wgpu::ShaderStages::FRAGMENT, + // VERTEX_FRAGMENT: the vertex stage reads metal_rough.w + // (alpha-cutoff) to gate foliage wind sway; fragment reads + // the full MaterialFactors. + visibility: wgpu::ShaderStages::VERTEX_FRAGMENT, ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Uniform, has_dynamic_offset: false, @@ -8662,7 +8668,8 @@ impl Renderer { ], up: [up_world[0] * tan_half, up_world[1] * tan_half, up_world[2] * tan_half, 0.0], forward: [forward_world[0], forward_world[1], forward_world[2], 0.0], - intensity: [intensity, 0.0, 0.0, 0.0], + // .y carries current time (seconds) so the cloud layer can drift. + intensity: [intensity, self.lighting_uniforms.wind[3], 0.0, 0.0], }; self.queue.write_buffer(&self.sky_uniform_buffer, 0, bytemuck::bytes_of(&uniforms)); @@ -10215,6 +10222,7 @@ impl Renderer { uniform_slot: slot, cache_handle: handle_bits, mesh_idx, + model: model_matrix, }); } } @@ -11724,6 +11732,10 @@ impl Renderer { /// FFI callers drive this from their frame boundary so `PerFrame.time` /// reflects the real process-uptime clock. pub fn material_system_begin_frame(&mut self, time_seconds: f32, delta_time: f32) { + // Feed wind + time to the built-in scene shader (foliage sway). Set + // here each frame so it's current before the per-frame lighting upload. + self.lighting_uniforms.wind = + [self.wind[0], self.wind[1], self.wind[2], time_seconds]; let screen_w = self.surface_config.width as f32; let screen_h = self.surface_config.height as f32; let (rw, rh) = self.render_extent(); diff --git a/native/shared/src/renderer/shaders/core.rs b/native/shared/src/renderer/shaders/core.rs index e3b4aaf..d628f9c 100644 --- a/native/shared/src/renderer/shaders/core.rs +++ b/native/shared/src/renderer/shaders/core.rs @@ -229,6 +229,7 @@ struct Lighting { shadow_cascade_vps: array, 3>, shadow_cascade_splits: vec4, shadow_view_matrix: mat4x4, + wind: vec4, // xy=dir, z=amplitude, w=time (foliage sway) }; struct MaterialFactors { @@ -318,7 +319,21 @@ fn env_sample(dir: vec3) -> vec3 { @vertex fn vs_main_scene(in: VertexInputScene) -> VertexOutputScene { var out: VertexOutputScene; - let pos4 = vec4(in.position, 1.0); + var local = in.position; + // Foliage wind sway — only for alpha-cut materials (leaf cards), so the + // opaque trunk and the rest of the world stay rigid. Sway grows with the + // vertex's height up the plant and its phase varies by world position so + // cards don't move in lockstep. lighting.wind = (dir.x, dir.z, amp, time). + if (material.metal_rough.w > 0.0 && lighting.wind.z > 0.0) { + let wp0 = (u.model * vec4(in.position, 1.0)).xyz; + let t = lighting.wind.w; + let sway = lighting.wind.z * (0.25 + max(in.position.y, 0.0) * 0.16); + let phase = t * 1.6 + wp0.x * 0.5 + wp0.z * 0.5; + local.x = local.x + lighting.wind.x * sway * sin(phase); + local.z = local.z + lighting.wind.y * sway * sin(phase * 1.3 + 1.1); + local.y = local.y + sway * 0.12 * sin(phase * 0.9 + 2.0); + } + let pos4 = vec4(local, 1.0); let curr = u.mvp * pos4; out.clip_position = curr; out.curr_clip = curr; @@ -758,6 +773,15 @@ fn fs_main_scene(in: VertexOutputScene) -> SceneOut { lighting.light_dir.w, base_color, metallic, roughness) * direct_shadow; + // Foliage backlit transmission — sun bleeding THROUGH alpha-cut leaf cards + // (the bright rim glow when the sun is behind a tree). Gated on the + // alpha-cutoff so only cut-out foliage materials get it; opaque surfaces + // (cutoff == 0) are unaffected. Matches shade_foliage's transmission term. + if (alpha_cutoff > 0.0) { + let trans = pow(max(dot(v, -legacy_dir), 0.0), 3.0) * 0.85; + lit += base_color * lighting.light_color.rgb * lighting.light_dir.w * trans; + } + let dir_count = u32(lighting.dir_light_count.x); for (var i = 0u; i < dir_count; i++) { let dl = lighting.dir_lights[i]; diff --git a/native/shared/src/renderer/shaders/env.rs b/native/shared/src/renderer/shaders/env.rs index 37d53ed..d57208c 100644 --- a/native/shared/src/renderer/shaders/env.rs +++ b/native/shared/src/renderer/shaders/env.rs @@ -706,6 +706,49 @@ fn dir_to_sky_uv(dir: vec3) -> vec2 { return vec2(u_norm, v_norm); } +// --- Procedural cloud layer (value-noise fBm) --------------------------- +fn cloud_hash(p: vec2) -> f32 { + return fract(sin(dot(p, vec2(127.1, 311.7))) * 43758.5453); +} +fn cloud_noise(p: vec2) -> f32 { + let i = floor(p); + let f = fract(p); + let uu = f * f * (3.0 - 2.0 * f); + let a = cloud_hash(i); + let b = cloud_hash(i + vec2(1.0, 0.0)); + let c = cloud_hash(i + vec2(0.0, 1.0)); + let d = cloud_hash(i + vec2(1.0, 1.0)); + return mix(mix(a, b, uu.x), mix(c, d, uu.x), uu.y); +} +fn cloud_fbm(p0: vec2) -> f32 { + var s = 0.0; + var amp = 0.5; + var q = p0; + for (var i = 0; i < 5; i = i + 1) { + s = s + amp * cloud_noise(q); + q = q * 2.03; + amp = amp * 0.5; + } + return s; +} +// Analytic cloud cover for a view ray. Projects the ray onto a virtual cloud +// plane (perspective convergence toward the horizon), samples fBm for puffy +// coverage, fades near the horizon, and thins around the sun so the disk shows +// through. Returns (coverage, sunlit-amount). +fn cloud_cover(dir: vec3, sun_dir: vec3, time: f32) -> vec2 { + if (dir.y <= 0.02) { return vec2(0.0, 0.0); } + let p = (dir.xz / dir.y) * 2.0; + // Slow wind drift + a slower second octave shift so the puffs also evolve. + let drift = vec2(time * 0.006, time * 0.0025); + var cov = cloud_fbm(p * 0.55 + vec2(23.0, 11.0) + drift); + cov = smoothstep(0.56, 1.04, cov); + let horizon_fade = smoothstep(0.03, 0.24, dir.y); + let near_sun = smoothstep(0.90, 0.999, dot(dir, sun_dir)); + cov = cov * horizon_fade * (1.0 - near_sun * 0.8) * 0.9; + let sun_amt = clamp(dot(dir, sun_dir) * 0.5 + 0.5, 0.0, 1.0); + return vec2(cov, sun_amt); +} + fn sample_transmittance(r: f32, mu: f32) -> vec3 { let v = clamp((r - GROUND_R) / (ATMOS_TOP - GROUND_R), 0.0, 1.0); let uu = clamp((mu + 1.0) * 0.5, 0.0, 1.0); @@ -745,6 +788,16 @@ fn sky_fs(in: VsOut) -> SkyOut { radiance = radiance * u.intensity.x; + // Procedural cloud layer, composited over the scaled sky radiance. Cloud + // colour is absolute HDR (puffy white in sun, cool grey in shadow) so the + // clouds read brighter than the sky behind them regardless of env intensity. + let cc = cloud_cover(dir, sun_dir, u.intensity.y); + if (cc.x > 0.0) { + let lit = cc.y * cc.y; + let cloud_col = mix(vec3(0.62, 0.66, 0.76), vec3(2.6, 2.5, 2.35), lit); + radiance = mix(radiance, cloud_col, cc.x); + } + // EN-005 Phase 4 — sub-LSB dither to break up the banding that // Rgba16Float storage produces in low-frequency regions like the // zenith (where sky color changes < 1 lsb across many pixels). diff --git a/native/shared/src/renderer/shaders/post.rs b/native/shared/src/renderer/shaders/post.rs index a8c5733..b119989 100644 --- a/native/shared/src/renderer/shaders/post.rs +++ b/native/shared/src/renderer/shaders/post.rs @@ -604,9 +604,16 @@ fn fs_main(in: VsOut) -> @location(0) vec4 { let mean_t = aerial.a; color = color * mean_t + in_scatter; } - } else { + } + // Manual height fog. Runs additively *after* the procedural-sky aerial + // perspective (not just as its `else`), because that LUT is km-scaled and + // contributes ~nothing over a small (tens-of-metres) arena. This march is + // world-scale, so a low density gives controllable near-ground haze that + // adds aerial depth and softens the distant terrain edge. Skips sky pixels + // (depth == 1.0) so the procedural sky isn't double-tinted/washed. + { let fog_density = u.fog_color_density.w; - if (fog_density > 0.0) { + if (fog_density > 0.0 && depth < 1.0) { let height_ref = u.fog_params.x; let height_falloff = u.fog_params.y; let cam_pos = vec3( diff --git a/native/shared/src/renderer/shadow_pass.rs b/native/shared/src/renderer/shadow_pass.rs index f104b73..fbf7d49 100644 --- a/native/shared/src/renderer/shadow_pass.rs +++ b/native/shared/src/renderer/shadow_pass.rs @@ -86,7 +86,12 @@ impl Renderer { || self.shadow_map.dirty || vps_changed || light_changed - || self.shadow_map.rendered_scene_version != scene_ver; + || self.shadow_map.rendered_scene_version != scene_ver + // Immediate-mode + cached-model draws aren't tracked by the scene + // version, and they're re-submitted (and usually move) every frame, + // so re-render the shadow map whenever any are present. + || !self.indices_3d.is_empty() + || !self.model_draw_commands.is_empty(); if should_render { // Build a shared caster list + buffer-ref vectors, then @@ -124,6 +129,47 @@ impl Renderer { }); } + // Immediate-mode 3D batch (drawCube/drawSphere/non-cached models). + // These verts are already in WORLD space, so the model matrix is + // identity. wmin > wmax marks "no bounds" → included in every cascade. + // Games that draw in immediate mode create no scene nodes, so without + // this nothing they draw would cast a shadow. + if !self.indices_3d.is_empty() { + let vb_idx = shadow_vbs.len(); + shadow_vbs.push(&self.persistent_vb_3d); + shadow_ibs.push(&self.persistent_ib_3d); + shadow_nodes.push(ShadowDrawEntry { + vb_idx, + ib_idx: vb_idx, + index_count: self.indices_3d.len() as u32, + transform: IDENTITY_MAT4, + wmin: [1.0, 1.0, 1.0], + wmax: [-1.0, -1.0, -1.0], + }); + } + + // Cached models (drawModel: trees, characters, etc.) — each is a + // GpuMesh plus its object→world matrix. Skinned models cast their + // rest-pose shadow (vs_shadow doesn't skin) — acceptable. + for cmd in self.model_draw_commands.iter() { + if let Some(Some(meshes)) = self.model_gpu_cache.get(&cmd.cache_handle) { + if cmd.mesh_idx < meshes.len() { + let mesh = &meshes[cmd.mesh_idx]; + let vb_idx = shadow_vbs.len(); + shadow_vbs.push(&mesh.vb); + shadow_ibs.push(&mesh.ib); + shadow_nodes.push(ShadowDrawEntry { + vb_idx, + ib_idx: vb_idx, + index_count: mesh.index_count, + transform: cmd.model, + wmin: [1.0, 1.0, 1.0], + wmax: [-1.0, -1.0, -1.0], + }); + } + } + } + let cascade_planes: [[[f32; 4]; 6]; crate::shadows::NUM_CASCADES] = std::array::from_fn(|c| { crate::scene::extract_frustum_planes(&self.shadow_map.light_vps[c]) diff --git a/native/shared/src/renderer/types.rs b/native/shared/src/renderer/types.rs index 5d9087f..48a9e2a 100644 --- a/native/shared/src/renderer/types.rs +++ b/native/shared/src/renderer/types.rs @@ -128,6 +128,11 @@ pub(super) struct LightingUniforms { /// can compute view-space Z for cascade selection without an extra /// buffer binding. pub(super) shadow_view_matrix: [[f32; 4]; 4], + /// Wind for foliage sway in the built-in scene vertex shader: + /// xy = wind direction in the XZ plane (magnitude scales sway), + /// z = amplitude, w = elapsed time (seconds) for the sway phase. + /// Appended last so existing field offsets stay stable. + pub(super) wind: [f32; 4], } impl LightingUniforms { @@ -147,6 +152,7 @@ impl LightingUniforms { shadow_cascade_vps: [IDENTITY_MAT4; 3], shadow_cascade_splits: [8.0, 25.0, 80.0, 0.0], shadow_view_matrix: IDENTITY_MAT4, + wind: [0.0, 0.0, 0.0, 0.0], } } } diff --git a/native/windows/Cargo.toml b/native/windows/Cargo.toml index 4599fad..a76ecf8 100644 --- a/native/windows/Cargo.toml +++ b/native/windows/Cargo.toml @@ -8,21 +8,32 @@ name = "bloom_windows" crate-type = ["staticlib"] [features] -# Jolt is opt-in on Windows because Perry's native-library build pipeline -# does not forward cargo `--features` to nativeLibrary crates, and bundling -# Jolt's static archives into bloom_windows.lib leaves the final lld-link -# step unable to resolve `JPH::*` symbols (the staticlib has no embedded -# search path and the C++ library lives off cargo's normal lib search). -# Games that need bloom/physics on Windows must opt in explicitly until the -# perry side gains a way to thread `cargo:rustc-link-search` through to the -# final link line. -default = ["models3d", "image-extras"] +# Jolt defaults ON, matching the macOS/iOS targets, so `bloom/physics` +# works out of the box on Windows. The historical blocker — the +# `bloom_windows.lib` staticlib can't carry the cmake-built Jolt archives' +# search path into perry's final lld-link step — is solved at the manifest +# level: `package.json`'s `perry.nativeLibrary.targets.windows` declares a +# `libDirs` entry pointing at the stable Jolt build output +# (`native/third_party/bloom_jolt/build/windows-x86_64/lib`) plus `Jolt` / +# `bloom_jolt` in `libs`, which perry turns into `/LIBPATH:` + `*.lib` on the +# link line (perry >= 0.5.1126 manifest libDirs support). Pure-2D Windows +# games opt out via Perry feature forwarding in perry.toml: +# [native-library."@bloomengine/engine"] +# default-features = false +default = ["jolt", "models3d", "image-extras"] jolt = ["bloom-shared/jolt"] # EN-014 — see native/shared/Cargo.toml. Default-on so existing games are # unaffected; pure-2D games opt out via Perry feature forwarding. models3d = ["bloom-shared/models3d"] image-extras = ["bloom-shared/image-extras"] +# Match perry-runtime's panic strategy so perry's final lld-link doesn't see +# two copies of `rust_eh_personality` (and friends) from two independent Rust +# staticlibs (perry_runtime.lib + bloom_windows.lib). Mirrors the same setting +# in native/macos/Cargo.toml. +[profile.release] +panic = "abort" + [dependencies] bloom-shared = { path = "../shared", default-features = false, features = ["mp3"] } image = { version = "0.25", default-features = false, features = ["hdr"] } diff --git a/native/windows/src/lib.rs b/native/windows/src/lib.rs index 6e7d8b9..4155025 100644 --- a/native/windows/src/lib.rs +++ b/native/windows/src/lib.rs @@ -146,7 +146,28 @@ mod win32 { let x = (lparam.0 & 0xFFFF) as i16 as f64; let y = ((lparam.0 >> 16) & 0xFFFF) as i16 as f64; if let Some(eng) = ENGINE.get_mut() { - eng.input.set_mouse_position(x, y); + if eng.input.cursor_disabled { + // FPS-style capture: accumulate the movement away from + // the window centre as a raw delta, then snap the OS + // cursor back to centre so look never hits the screen + // edge. (begin_frame consumes raw_delta when the cursor + // is disabled.) The recenter generates another + // WM_MOUSEMOVE at the centre with zero delta — no loop. + let mut rect = RECT::default(); + let _ = GetClientRect(hwnd, &mut rect); + let cx = ((rect.right - rect.left) / 2) as f64; + let cy = ((rect.bottom - rect.top) / 2) as f64; + let dx = x - cx; + let dy = y - cy; + if dx != 0.0 || dy != 0.0 { + eng.input.accumulate_mouse_delta(dx, dy); + let mut pt = POINT { x: cx as i32, y: cy as i32 }; + let _ = ClientToScreen(hwnd, &mut pt); + let _ = SetCursorPos(pt.x, pt.y); + } + } else { + eng.input.set_mouse_position(x, y); + } } DefWindowProcW(hwnd, msg, wparam, lparam) } @@ -364,6 +385,18 @@ pub extern "C" fn bloom_init_window(width: f64, height: f64, title_ptr: *const u // limit is 4. Metal / Vulkan / D3D12 support at least 7, so 5 is // safely within every real backend's capabilities. required_limits.max_bind_groups = 5; + // The refractive/translucent material profile binds up to 19 + // sampled textures in the fragment stage (5 material maps + env/ + // BRDF/3 shadow cascades/env-diffuse + planar reflection + 3 texture + // arrays + the group-4 scene_color/scene_depth/impulse/motion inputs). + // wgpu's default is 16. Raise to whatever the adapter actually + // supports — every real D3D12/Vulkan/Metal GPU exposes ≥128 — so + // opaque/transparent materials are unaffected and refractive ones link. + let adapter_limits = adapter.limits(); + required_limits.max_sampled_textures_per_shader_stage = + adapter_limits.max_sampled_textures_per_shader_stage; + required_limits.max_samplers_per_shader_stage = + adapter_limits.max_samplers_per_shader_stage; if required_features.intersects(rt_mask) { required_limits = required_limits .using_minimum_supported_acceleration_structure_values(); @@ -671,11 +704,14 @@ pub extern "C" fn bloom_set_window_icon(path_ptr: *const u8) { let _ = str_from_ #[no_mangle] pub extern "C" fn bloom_disable_cursor() { engine().input.cursor_disabled = true; + // Hide the OS cursor (ShowCursor keeps a counter; loop until it's hidden). + unsafe { while windows::Win32::UI::WindowsAndMessaging::ShowCursor(false) >= 0 {} } } #[no_mangle] pub extern "C" fn bloom_enable_cursor() { engine().input.cursor_disabled = false; + unsafe { while windows::Win32::UI::WindowsAndMessaging::ShowCursor(true) < 0 {} } } // E4: Clipboard (stub on this platform) diff --git a/package.json b/package.json index 17dbcbd..5d8532a 100644 --- a/package.json +++ b/package.json @@ -1046,6 +1046,33 @@ ], "returns": "f64" }, + { + "name": "bloom_mesh_scratch_reset", + "params": [], + "returns": "void" + }, + { + "name": "bloom_mesh_scratch_push_f32", + "params": [ + "f64" + ], + "returns": "void" + }, + { + "name": "bloom_mesh_scratch_push_u32", + "params": [ + "f64" + ], + "returns": "void" + }, + { + "name": "bloom_create_mesh_scratch", + "params": [ + "f64", + "f64" + ], + "returns": "f64" + }, { "name": "bloom_set_joint_test", "params": [ @@ -1279,6 +1306,34 @@ ], "returns": "void" }, + { + "name": "bloom_set_bloom_intensity", + "params": [ + "f64" + ], + "returns": "void" + }, + { + "name": "bloom_set_tonemap", + "params": [ + "f64" + ], + "returns": "void" + }, + { + "name": "bloom_set_auto_exposure_key", + "params": [ + "f64" + ], + "returns": "void" + }, + { + "name": "bloom_set_auto_exposure_rate", + "params": [ + "f64" + ], + "returns": "void" + }, { "name": "bloom_set_ssao_enabled", "params": [ @@ -3490,7 +3545,12 @@ "dxguid", "xinput", "opengl32", - "d3dcompiler" + "d3dcompiler", + "Jolt", + "bloom_jolt" + ], + "libDirs": [ + "native/third_party/bloom_jolt/build/windows-x86_64/lib" ] }, "linux": { diff --git a/src/core/index.ts b/src/core/index.ts index ad2880e..9e2ae87 100644 --- a/src/core/index.ts +++ b/src/core/index.ts @@ -38,6 +38,10 @@ declare function bloom_set_quality_preset(preset: number): void; declare function bloom_set_shadows_enabled(on: number): void; declare function bloom_set_shadows_always_fresh(on: number): void; declare function bloom_set_bloom_enabled(on: number): void; +declare function bloom_set_bloom_intensity(value: number): void; +declare function bloom_set_tonemap(kind: number): void; +declare function bloom_set_auto_exposure_key(key: number): void; +declare function bloom_set_auto_exposure_rate(rate: number): void; declare function bloom_set_ssao_enabled(on: number): void; declare function bloom_set_post_pass(source: number): number; declare function bloom_clear_post_pass(): void; @@ -347,6 +351,41 @@ export function setBloomEnabled(on: boolean): void { bloom_set_bloom_enabled(on ? 1 : 0); } +/** + * Bloom contribution strength added to the HDR scene before tonemap. + * 0 = none, ~0.04 subtle default, higher = stronger glow around bright pixels. + */ +export function setBloomIntensity(intensity: number): void { + bloom_set_bloom_intensity(intensity); +} + +/** Tonemap operator selection. */ +export enum Tonemap { + /** Filmic ACES (default). */ + ACES = 0, + /** AgX — more filmic highlight roll-off + a punchier, better-saturated look. */ + AgX = 1, +} + +/** Choose the tonemap operator applied in the composite pass. */ +export function setTonemap(kind: Tonemap): void { + bloom_set_tonemap(kind); +} + +/** + * Auto-exposure target (scene-average luma key). Lower aims for a darker, + * more saturated midpoint (counteracts wash-out from very bright skies); + * higher aims brighter. Only affects frames where auto-exposure is on. + */ +export function setAutoExposureKey(key: number): void { + bloom_set_auto_exposure_key(key); +} + +/** Auto-exposure adaptation rate per frame (0 = frozen, ~0.05 smooth, 1 = instant). */ +export function setAutoExposureRate(rate: number): void { + bloom_set_auto_exposure_rate(rate); +} + /** Toggle screen-space ambient occlusion + its bilateral blur. Default on. */ export function setSsaoEnabled(on: boolean): void { bloom_set_ssao_enabled(on ? 1 : 0); diff --git a/src/index.ts b/src/index.ts index 60e443d..113f7c7 100644 --- a/src/index.ts +++ b/src/index.ts @@ -66,7 +66,7 @@ export { } from './textures/index'; export { - loadModel, drawModel, unloadModel, getModelBounds, genMeshSplineRibbon, + loadModel, drawModel, drawModelRotated, unloadModel, getModelBounds, genMeshSplineRibbon, drawCube, drawCubeWires, drawSphere, drawSphereWires, drawCylinder, drawPlane, drawGrid, drawRay, genMeshCube, genMeshHeightmap, loadShader, compileMaterial, drawMeshWithMaterial, @@ -74,6 +74,7 @@ export { compileMaterialCutout, compileMaterialInstanced, createInstanceBuffer, drawMeshWithMaterialInstanced, destroyInstanceBuffer, + createPlanarReflection, setMaterialReflectionProbe, compileMaterialFromFile, loadMaterial, createMeshExplicit, loadModelAnimation, updateModelAnimation, createMesh, diff --git a/src/models/index.ts b/src/models/index.ts index f9a6ff1..3e1ef3a 100644 --- a/src/models/index.ts +++ b/src/models/index.ts @@ -40,6 +40,10 @@ declare function bloom_draw_material(material: number, meshHandle: number, meshI declare function bloom_load_model_animation(path: number): number; declare function bloom_update_model_animation(handle: number, animIndex: number, time: number, scale: number, px: number, py: number, pz: number, rotY: number): void; declare function bloom_create_mesh(vertexPtr: number, vertexCount: number, indexPtr: number, indexCount: number): number; +declare function bloom_mesh_scratch_reset(): void; +declare function bloom_mesh_scratch_push_f32(v: number): void; +declare function bloom_mesh_scratch_push_u32(v: number): void; +declare function bloom_create_mesh_scratch(vertexCount: number, indexCount: number): number; declare function bloom_set_ambient_light(r: number, g: number, b: number, intensity: number): void; declare function bloom_set_directional_light(dx: number, dy: number, dz: number, r: number, g: number, b: number, intensity: number): void; declare function bloom_set_procedural_sky(enabled: number, rayleighDensity: number, mieDensity: number, groundAlbedo: number): void; @@ -135,13 +139,10 @@ export function loadModel(path: string): Model { if (text) { const parsed = parseOBJ(text); if (parsed) { - const handle = bloom_create_mesh( - parsed.vertices as any, - parsed.vertices.length / 12, - parsed.indices as any, - parsed.indices.length, + return uploadMeshScratch( + parsed.vertices, parsed.vertices.length / 12, + parsed.indices, parsed.indices.length, ); - return makeModel(handle, 1, 1); } } return makeModel(0); @@ -650,16 +651,29 @@ export function updateModelAnimation(handle: number, animIndex: number, time: nu bloom_update_model_animation(handle, animIndex, time, scale, px, py, pz, rotY); } +// Upload a mesh via the scratch buffer (array-free). Perry 0.5.1171 rejects +// passing a `number[]` to a native `i64` pointer param (strict safe-integer +// check), so we push each vertex float + index scalar through the all-f64 +// scratch FFI and then build. One-time init cost; fine for static meshes. +function uploadMeshScratch( + vertices: number[], vertexCount: number, + indices: number[], indexCount: number, +): Model { + bloom_mesh_scratch_reset(); + const vfloats = vertexCount * 12; + for (let i = 0; i < vfloats; i++) bloom_mesh_scratch_push_f32(vertices[i]); + for (let i = 0; i < indexCount; i++) bloom_mesh_scratch_push_u32(indices[i]); + const handle = bloom_create_mesh_scratch(vertexCount, indexCount); + return makeModel(handle, 1, 1); +} + export function createMesh(vertices: number[], indices: number[]): Model { // vertices: flat array of [x,y,z, nx,ny,nz, r,g,b,a, u,v] per vertex (12 floats each) - // NOTE: `vertices.length` and `indices.length` here only return the - // correct value for arrays built via literals or `new Array(N)` + - // index assignment. Arrays built via `.push()` reflect the LITERAL - // initialization size as `.length`, not the post-push size (a - // Perry codegen bug). For `.push`-built data, use createMeshExplicit - // and pass the counts manually. - const handle = bloom_create_mesh(vertices as any, vertices.length / 12, indices as any, indices.length); - return makeModel(handle, 1, 1); + // NOTE: `vertices.length` / `indices.length` are correct only for arrays + // built via literals or `new Array(N)` + index assignment. Arrays built via + // `.push()` report the literal-init size (a Perry codegen bug) — use + // createMeshExplicit and pass the counts manually for those. + return uploadMeshScratch(vertices, vertices.length / 12, indices, indices.length); } /// Explicit-count variant of createMesh — pass `vertexCount` (number @@ -670,8 +684,7 @@ export function createMeshExplicit( vertices: number[], vertexCount: number, indices: number[], indexCount: number, ): Model { - const handle = bloom_create_mesh(vertices as any, vertexCount, indices as any, indexCount); - return makeModel(handle, 1, 1); + return uploadMeshScratch(vertices, vertexCount, indices, indexCount); } export function setAmbientLight(color: Color, intensity: number): void { From 1e88a42841fd5a1ce859ba2a1f33abe888107021 Mon Sep 17 00:00:00 2001 From: Ralph Kuepper Date: Tue, 16 Jun 2026 20:53:09 +0200 Subject: [PATCH 2/4] feat(render): alpha-tested shadows for cutout foliage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trees/grass/leaves cast opaque rest-pose blobs because the shadow pass is depth-only. Added a SECOND shadow pipeline (pipeline_cutout, SHADOW_SHADER_CUTOUT) that samples the caster's base-colour alpha and discards below the material cutoff, so cutout cards drop their real (dappled) shape. The opaque shadow pipeline is byte-identical/untouched — only cutout cached models use the new path. GpuMesh gains a per-mesh cutout shadow bind group (base colour + sampler + cutoff), built in cache_model_if_static for alpha_cutoff>0; shadow_pass.rs selects the pipeline per caster and binds group 1 for cutout entries. --- native/shared/src/renderer/mod.rs | 37 +++++++ native/shared/src/renderer/shadow_pass.rs | 28 +++++ native/shared/src/shadows.rs | 129 +++++++++++++++++++++- 3 files changed, 193 insertions(+), 1 deletion(-) diff --git a/native/shared/src/renderer/mod.rs b/native/shared/src/renderer/mod.rs index b75448a..1ba91e8 100644 --- a/native/shared/src/renderer/mod.rs +++ b/native/shared/src/renderer/mod.rs @@ -635,6 +635,12 @@ struct GpuMesh { /// its lifetime matches (bind groups reference buffers internally /// via Arc, but we also want the strong ref for future updates). _material_uniform: wgpu::Buffer, + /// Alpha-tested shadow bind group (base-colour tex + sampler + cutoff), + /// built only for cutout materials (alpha_cutoff > 0). When present, the + /// shadow pass renders this mesh with the cutout pipeline so foliage casts + /// its real shape. `None` → opaque caster, uses the plain shadow pipeline. + shadow_cutout_bg: Option, + _shadow_cutoff_buf: Option, } struct CachedModelDraw { @@ -10177,12 +10183,43 @@ impl Renderer { let material_bg = self.create_scene_material_bg( base_color_idx, normal_idx, mr_idx, em_idx, occ_idx, &material_uniform, ); + // Cutout casters get an alpha-test shadow bind group (base colour + + // sampler + cutoff). wgpu keeps the buffer alive via the bind group, + // but we hold the strong ref too (matches _material_uniform). + let (shadow_cutout_bg, shadow_cutoff_buf) = if mesh.alpha_cutoff > 0.0 { + let cutoff_buf = self.device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("shadow_cutout_cutoff"), + contents: bytemuck::cast_slice(&[mesh.alpha_cutoff, 0.0f32, 0.0, 0.0]), + usage: wgpu::BufferUsages::UNIFORM, + }); + let bi = base_color_idx as usize; + let base_tex = if base_color_idx == 0 || bi >= self.textures.len() { + &self.textures[0] + } else { + &self.textures[bi] + }; + let base_view = base_tex.create_view(&wgpu::TextureViewDescriptor::default()); + let bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("shadow_cutout_bg"), + layout: &self.shadow_map.cutout_tex_layout, + entries: &[ + wgpu::BindGroupEntry { binding: 0, resource: wgpu::BindingResource::TextureView(&base_view) }, + wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::Sampler(&self.sampler) }, + wgpu::BindGroupEntry { binding: 2, resource: cutoff_buf.as_entire_binding() }, + ], + }); + (Some(bg), Some(cutoff_buf)) + } else { + (None, None) + }; GpuMesh { vb, ib, index_count: mesh.indices.len() as u32, material_bg, _material_uniform: material_uniform, + shadow_cutout_bg, + _shadow_cutoff_buf: shadow_cutoff_buf, } }).collect(); diff --git a/native/shared/src/renderer/shadow_pass.rs b/native/shared/src/renderer/shadow_pass.rs index fbf7d49..37bd573 100644 --- a/native/shared/src/renderer/shadow_pass.rs +++ b/native/shared/src/renderer/shadow_pass.rs @@ -106,10 +106,14 @@ impl Renderer { transform: [[f32; 4]; 4], wmin: [f32; 3], wmax: [f32; 3], + // Index into `cutout_bgs` for an alpha-tested caster (cutout + // foliage), or -1 for an opaque caster (plain depth pipeline). + cutout_idx: i32, } let mut shadow_nodes: Vec = Vec::new(); let mut shadow_vbs: Vec<&wgpu::Buffer> = Vec::new(); let mut shadow_ibs: Vec<&wgpu::Buffer> = Vec::new(); + let mut cutout_bgs: Vec<&wgpu::BindGroup> = Vec::new(); for (_handle, node) in scene.nodes.iter() { if !node.visible || !node.cast_shadow || node.indices.is_empty() { continue; @@ -126,6 +130,7 @@ impl Renderer { transform: node.transform, wmin: node.world_bounds_min, wmax: node.world_bounds_max, + cutout_idx: -1, }); } @@ -145,6 +150,7 @@ impl Renderer { transform: IDENTITY_MAT4, wmin: [1.0, 1.0, 1.0], wmax: [-1.0, -1.0, -1.0], + cutout_idx: -1, }); } @@ -158,6 +164,11 @@ impl Renderer { let vb_idx = shadow_vbs.len(); shadow_vbs.push(&mesh.vb); shadow_ibs.push(&mesh.ib); + // Cutout foliage → alpha-tested shadow pipeline. + let cutout_idx = match &mesh.shadow_cutout_bg { + Some(bg) => { let i = cutout_bgs.len(); cutout_bgs.push(bg); i as i32 } + None => -1, + }; shadow_nodes.push(ShadowDrawEntry { vb_idx, ib_idx: vb_idx, @@ -165,6 +176,7 @@ impl Renderer { transform: cmd.model, wmin: [1.0, 1.0, 1.0], wmax: [-1.0, -1.0, -1.0], + cutout_idx, }); } } @@ -234,12 +246,28 @@ impl Renderer { multiview_mask: None, }); + // Track the bound pipeline so we only switch when a caster's + // opaque/cutout kind changes (cutout casters are typically + // grouped at the tail, so this is usually one switch). + let mut cur_cutout = false; shadow_pass.set_pipeline(&self.shadow_map.pipeline); for (slot, &ei) in entries.iter().take(count).enumerate() { let entry = &shadow_nodes[ei]; let offset = (slot * stride) as u32; + let is_cutout = entry.cutout_idx >= 0; + if is_cutout != cur_cutout { + shadow_pass.set_pipeline(if is_cutout { + &self.shadow_map.pipeline_cutout + } else { + &self.shadow_map.pipeline + }); + cur_cutout = is_cutout; + } shadow_pass.set_bind_group(0, &self.shadow_map.uniform_bind_group, &[offset]); + if is_cutout { + shadow_pass.set_bind_group(1, cutout_bgs[entry.cutout_idx as usize], &[]); + } shadow_pass.set_vertex_buffer(0, shadow_vbs[entry.vb_idx].slice(..)); shadow_pass.set_index_buffer(shadow_ibs[entry.ib_idx].slice(..), wgpu::IndexFormat::Uint32); shadow_pass.draw_indexed(0..entry.index_count, 0, 0..1); diff --git a/native/shared/src/shadows.rs b/native/shared/src/shadows.rs index e90e71d..07d3b90 100644 --- a/native/shared/src/shadows.rs +++ b/native/shared/src/shadows.rs @@ -50,6 +50,52 @@ fn vs_shadow(in: ShadowVertexInput) -> @builtin(position) vec4 { } "; +/// Alpha-tested shadow shader for cutout foliage (trees, grass, leaves). Same +/// depth-only output as SHADOW_SHADER but samples the caster's base-colour +/// alpha and discards below the material cutoff, so cutout cards cast their +/// real shape (dappled light) instead of an opaque billboard blob. Used by a +/// dedicated pipeline; the opaque shadow path stays untouched. +pub const SHADOW_SHADER_CUTOUT: &str = " +struct ShadowUniforms { + light_vp: mat4x4, + model: mat4x4, +}; +@group(0) @binding(0) var shadow_u: ShadowUniforms; + +struct CutoutUniforms { cutoff: vec4 }; // x = alpha cutoff +@group(1) @binding(0) var base_tex: texture_2d; +@group(1) @binding(1) var base_samp: sampler; +@group(1) @binding(2) var cut: CutoutUniforms; + +struct ShadowVertexInput { + @location(0) position: vec3, + @location(1) normal: vec3, + @location(2) color: vec4, + @location(3) uv: vec2, + @location(4) joints: vec4, + @location(5) weights: vec4, +}; +struct VsOut { + @builtin(position) pos: vec4, + @location(0) uv: vec2, +}; + +@vertex +fn vs_shadow_cutout(in: ShadowVertexInput) -> VsOut { + var o: VsOut; + let world_pos = shadow_u.model * vec4(in.position, 1.0); + o.pos = shadow_u.light_vp * world_pos; + o.uv = in.uv; + return o; +} + +@fragment +fn fs_shadow_cutout(in: VsOut) { + let a = textureSample(base_tex, base_samp, in.uv).a; + if (a < cut.cutoff.x) { discard; } +} +"; + /// Uniform data for the shadow pass. #[repr(C)] #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)] @@ -66,6 +112,13 @@ pub struct ShadowMap { pub bind_group_layout: wgpu::BindGroupLayout, pub bind_group: wgpu::BindGroup, pub pipeline: wgpu::RenderPipeline, + /// Alpha-tested variant for cutout foliage casters. Opaque casters keep + /// using `pipeline` (byte-identical to before this was added). + pub pipeline_cutout: wgpu::RenderPipeline, + /// Group-1 layout for `pipeline_cutout`: base-colour tex + sampler + a + /// cutoff uniform. Per-mesh bind groups are built against this in + /// `cache_model_if_static`. + pub cutout_tex_layout: wgpu::BindGroupLayout, pub uniform_buffer: wgpu::Buffer, pub uniform_bind_group: wgpu::BindGroup, pub uniform_layout: wgpu::BindGroupLayout, @@ -269,7 +322,7 @@ impl ShadowMap { vertex: wgpu::VertexState { module: &shader, entry_point: Some("vs_shadow"), - buffers: &[vertex_layout], + buffers: &[vertex_layout.clone()], compilation_options: Default::default(), }, fragment: None, // depth only @@ -295,6 +348,78 @@ impl ShadowMap { cache: None, }); + // Cutout (alpha-tested) shadow pipeline. Separate so the opaque path + // above is untouched. Adds a group-1 texture/sampler/cutoff layout and + // a fragment stage that discards below the alpha cutoff. Still depth- + // only (no colour targets). + let cutout_shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some("shadow_shader_cutout"), + source: wgpu::ShaderSource::Wgsl(SHADOW_SHADER_CUTOUT.into()), + }); + let cutout_tex_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("shadow_cutout_tex_layout"), + entries: &[ + wgpu::BindGroupLayoutEntry { + binding: 0, visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, multisampled: false, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 1, visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering), + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 2, visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, min_binding_size: None, + }, + count: None, + }, + ], + }); + let cutout_pl_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("shadow_cutout_pipeline_layout"), + bind_group_layouts: &[Some(&uniform_layout), Some(&cutout_tex_layout)], + immediate_size: 0, + }); + let pipeline_cutout = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor { + label: Some("shadow_pipeline_cutout"), + layout: Some(&cutout_pl_layout), + vertex: wgpu::VertexState { + module: &cutout_shader, + entry_point: Some("vs_shadow_cutout"), + buffers: &[vertex_layout], + compilation_options: Default::default(), + }, + fragment: Some(wgpu::FragmentState { + module: &cutout_shader, + entry_point: Some("fs_shadow_cutout"), + targets: &[], // depth only + compilation_options: Default::default(), + }), + primitive: wgpu::PrimitiveState { + topology: wgpu::PrimitiveTopology::TriangleList, + front_face: wgpu::FrontFace::Ccw, + cull_mode: None, + ..Default::default() + }, + depth_stencil: Some(wgpu::DepthStencilState { + format: wgpu::TextureFormat::Depth32Float, + depth_write_enabled: Some(true), + depth_compare: Some(wgpu::CompareFunction::Less), + stencil: Default::default(), + bias: wgpu::DepthBiasState { constant: 1, slope_scale: 1.0, clamp: 0.0 }, + }), + multisample: Default::default(), + multiview_mask: None, + cache: None, + }); + Self { depth_textures, depth_views, @@ -302,6 +427,8 @@ impl ShadowMap { bind_group_layout, bind_group, pipeline, + pipeline_cutout, + cutout_tex_layout, uniform_buffer, uniform_bind_group, uniform_layout, From 22b32d649306ad1c103c066a5ddf77e406449f9b Mon Sep 17 00:00:00 2001 From: Ralph Kuepper Date: Wed, 17 Jun 2026 12:23:53 +0200 Subject: [PATCH 3/4] feat(render): planar water reflections render the real scene The planar reflection probe only rendered material-system draws, so immediate-mode games (which draw the world via drawModel) got a black/empty probe. Added a single-target reflection pipeline (REFLECT_SCENE_WGSL: base-colour x sun N.L + ambient, alpha-cutout discard) that renders every cached model (trees, house, foliage) into the probe with a mirrored, oblique-clipped VP. Owned bind-group layouts (dynamic per-draw model uniform + sun/ambient); reuses scene_material_bg for base colour. Probe clears to transparent so geometry writes alpha 1 and the water shader blends the probe over its analytic sky dome by alpha. Lazily built on first dispatch; no change to the opaque/main pass. --- native/shared/src/renderer/mod.rs | 185 ++++++++++++++++++++-- native/shared/src/renderer/shaders/env.rs | 64 ++++++++ native/shared/src/renderer/shaders/mod.rs | 2 +- 3 files changed, 238 insertions(+), 13 deletions(-) diff --git a/native/shared/src/renderer/mod.rs b/native/shared/src/renderer/mod.rs index 1ba91e8..5a574f8 100644 --- a/native/shared/src/renderer/mod.rs +++ b/native/shared/src/renderer/mod.rs @@ -1484,6 +1484,15 @@ pub struct Renderer { /// alongside the probe in `create_planar_reflection`. pub planar_probe_view_buffers: Vec>, pub planar_probe_view_bgs: Vec>, + /// EN-011 — lazily-built resources for rendering cached models (trees, + /// house) into the planar probe with a mirrored VP. Single-target HDR + /// pipeline + a dynamic per-draw model uniform + a sun/ambient uniform. + /// `None` until the first `dispatch_planar_reflections` with a probe. + pub reflect_scene_pipeline: Option, + pub reflect_model_buf: Option, + pub reflect_model_bg: Option, + pub reflect_light_buf: Option, + pub reflect_light_bg: Option, /// Phase 6 — hot-reload registry for file-backed materials. Each /// frame we drain pending file-change events and recompile any @@ -6239,6 +6248,11 @@ impl Renderer { transient_pool, impulse_field, planar_probes: Vec::new(), + reflect_scene_pipeline: None, + reflect_model_buf: None, + reflect_model_bg: None, + reflect_light_buf: None, + reflect_light_bg: None, planar_probe_view_buffers: Vec::new(), planar_probe_view_bgs: Vec::new(), material_hot_reload, @@ -11542,7 +11556,116 @@ impl Renderer { encoder: &mut wgpu::CommandEncoder, ) { if self.planar_probes.iter().all(|p| p.is_none()) { return; } - if self.material_system.commands.is_empty() { return; } + if self.material_system.commands.is_empty() && self.model_draw_commands.is_empty() { return; } + + // EN-011 — lazily build the single-target reflection pipeline + buffers + // used to render cached models (trees/house) into the probe with a + // mirrored VP. Owned layouts: g0 dynamic per-draw model uniform, g1 + // sun/ambient; g2 reuses the scene material layout for base colour. + const REFLECT_STRIDE: u64 = 256; + const REFLECT_MAX_DRAWS: usize = 600; + if self.reflect_scene_pipeline.is_none() { + let model_dyn_layout = self.device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("reflect_model_dyn_layout"), + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, visibility: wgpu::ShaderStages::VERTEX, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: true, + min_binding_size: std::num::NonZeroU64::new(128), + }, + count: None, + }], + }); + let light_layout = self.device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("reflect_light_layout"), + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, min_binding_size: None, + }, + count: None, + }], + }); + let shader = self.device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some("reflect_scene_shader"), + source: wgpu::ShaderSource::Wgsl(REFLECT_SCENE_WGSL.into()), + }); + let pl = self.device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("reflect_scene_pl"), + bind_group_layouts: &[Some(&model_dyn_layout), Some(&light_layout), Some(&self.scene_material_layout)], + immediate_size: 0, + }); + let pipeline = self.device.create_render_pipeline(&wgpu::RenderPipelineDescriptor { + label: Some("reflect_scene_pipeline"), + layout: Some(&pl), + vertex: wgpu::VertexState { + module: &shader, entry_point: Some("vs_reflect"), + buffers: &[Vertex3D::desc()], compilation_options: Default::default(), + }, + fragment: Some(wgpu::FragmentState { + module: &shader, entry_point: Some("fs_reflect"), + targets: &[Some(wgpu::ColorTargetState { + format: HDR_FORMAT, blend: None, write_mask: wgpu::ColorWrites::ALL, + })], + compilation_options: Default::default(), + }), + primitive: wgpu::PrimitiveState { + topology: wgpu::PrimitiveTopology::TriangleList, + cull_mode: None, ..Default::default() + }, + depth_stencil: Some(wgpu::DepthStencilState { + format: DEPTH_FORMAT, depth_write_enabled: Some(true), + depth_compare: Some(wgpu::CompareFunction::Less), + stencil: Default::default(), bias: Default::default(), + }), + multisample: Default::default(), multiview_mask: None, cache: None, + }); + let model_buf = self.device.create_buffer(&wgpu::BufferDescriptor { + label: Some("reflect_model_buf"), + size: REFLECT_STRIDE * REFLECT_MAX_DRAWS as u64, + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + let model_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("reflect_model_bg"), layout: &model_dyn_layout, + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding { + buffer: &model_buf, offset: 0, size: std::num::NonZeroU64::new(128), + }), + }], + }); + let light_buf = self.device.create_buffer(&wgpu::BufferDescriptor { + label: Some("reflect_light_buf"), size: 48, + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + let light_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("reflect_light_bg"), layout: &light_layout, + entries: &[wgpu::BindGroupEntry { binding: 0, resource: light_buf.as_entire_binding() }], + }); + self.reflect_scene_pipeline = Some(pipeline); + self.reflect_model_buf = Some(model_buf); + self.reflect_model_bg = Some(model_bg); + self.reflect_light_buf = Some(light_buf); + self.reflect_light_bg = Some(light_bg); + } + // Sun/ambient for the reflection shading (same as the main directional). + { + let ld = self.lighting_uniforms.light_dir; + let lc = self.lighting_uniforms.light_color; + let amb = self.lighting_uniforms.ambient; + let light_data: [f32; 12] = [ + ld[0], ld[1], ld[2], ld[3], + lc[0], lc[1], lc[2], 0.0, + amb[0], amb[1], amb[2], amb[3], + ]; + if let Some(buf) = &self.reflect_light_buf { + self.queue.write_buffer(buf, 0, bytemuck::cast_slice(&light_data)); + } + } // Build the V1 exclude set: every material linked to any // probe. The water material itself shouldn't appear in its @@ -11699,21 +11822,34 @@ impl Renderer { ], }); - // The fog colour is a sensible "sky colour" approximation - // for the cleared reflection RT. Materials sampling the - // probe RT outside the rendered frustum (top of the - // texture for a horizontal water plane) get fog tinting - // instead of pure black. - let clear_color = wgpu::Color { - r: self.fog_color[0] as f64, - g: self.fog_color[1] as f64, - b: self.fog_color[2] as f64, - a: 1.0, - }; + // Write each cached-model draw's [mirror_mvp, model] into the + // dynamic reflection uniform buffer up front (queue writes + // happen-before the encoded pass), and record the draw list. + let mut reflect_draws: Vec<(u64, usize, u32)> = Vec::new(); + if let Some(model_buf) = &self.reflect_model_buf { + for cmd in self.model_draw_commands.iter() { + let slot = reflect_draws.len(); + if slot >= REFLECT_MAX_DRAWS { break; } + let mirror_mvp = mat4_multiply(mirror_vp, cmd.model); + let mut data = [0u8; 128]; + data[0..64].copy_from_slice(bytemuck::bytes_of(&mirror_mvp)); + data[64..128].copy_from_slice(bytemuck::bytes_of(&cmd.model)); + self.queue.write_buffer(model_buf, slot as u64 * REFLECT_STRIDE, &data); + reflect_draws.push((cmd.cache_handle, cmd.mesh_idx, slot as u32)); + } + } + + // Clear the probe to transparent black. Geometry fragments write + // alpha 1, so the water shader can blend the probe over its analytic + // sky by alpha (a=0 → no reflected geometry → show the sky dome). + let clear_color = wgpu::Color { r: 0.0, g: 0.0, b: 0.0, a: 0.0 }; let view_bg = &probe_view_bg; let cache = &self.model_gpu_cache; let mat_sys = &self.material_system; + let refl_pipeline = self.reflect_scene_pipeline.as_ref(); + let refl_model_bg = self.reflect_model_bg.as_ref(); + let refl_light_bg = self.reflect_light_bg.as_ref(); { let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { label: Some("bloom_planar_reflection_pass"), @@ -11761,6 +11897,31 @@ impl Renderer { None }, ); + + // Render cached models (trees/house/foliage) mirrored into the + // probe so the water reflects the actual world, not just an + // analytic sky. Single-target lit pipeline; cutout alpha is + // discarded so foliage reflects its real shape. + if let (Some(rp), Some(rmbg), Some(rlbg)) = + (refl_pipeline, refl_model_bg, refl_light_bg) + { + if !reflect_draws.is_empty() { + pass.set_pipeline(rp); + pass.set_bind_group(1, rlbg, &[]); + for (handle, midx, slot) in &reflect_draws { + if let Some(Some(meshes)) = cache.get(handle) { + if *midx < meshes.len() { + let mesh = &meshes[*midx]; + pass.set_bind_group(0, rmbg, &[*slot * REFLECT_STRIDE as u32]); + pass.set_bind_group(2, &mesh.material_bg, &[]); + pass.set_vertex_buffer(0, mesh.vb.slice(..)); + pass.set_index_buffer(mesh.ib.slice(..), wgpu::IndexFormat::Uint32); + pass.draw_indexed(0..mesh.index_count, 0, 0..1); + } + } + } + } + } } } } diff --git a/native/shared/src/renderer/shaders/env.rs b/native/shared/src/renderer/shaders/env.rs index d57208c..e0434cc 100644 --- a/native/shared/src/renderer/shaders/env.rs +++ b/native/shared/src/renderer/shaders/env.rs @@ -818,3 +818,67 @@ fn sky_fs(in: VsOut) -> SkyOut { } "; +/// EN-011 — single-target reflection shader for rendering cached models +/// (trees, house, etc.) into a planar-reflection probe with a mirrored +/// view-projection. Deliberately lightweight (base colour × sun N·L + ambient, +/// alpha-cutout discard) — a water reflection doesn't need the full deferred +/// PBR/SSAO stack, and a single HDR colour target (vs the main 4-target MRT) +/// lets it draw straight into the probe RT. Group layouts are owned by the +/// renderer: g0 = dynamic per-draw model uniform, g1 = sun/ambient, g2 = the +/// scene material bind group (we only sample base colour + alpha). +pub(in crate::renderer) const REFLECT_SCENE_WGSL: &str = " +struct ReflectModelU { mvp: mat4x4, model: mat4x4 }; +@group(0) @binding(0) var u: ReflectModelU; + +struct ReflectLight { + sun_dir: vec4, // xyz dir (travel), w = intensity + sun_color: vec4, // rgb, w unused + ambient: vec4, // rgb, w = intensity +}; +@group(1) @binding(0) var light: ReflectLight; + +@group(2) @binding(0) var base_tex: texture_2d; +@group(2) @binding(1) var base_samp: sampler; + +struct VsIn { + @location(0) position: vec3, + @location(1) normal: vec3, + @location(2) color: vec4, + @location(3) uv: vec2, +}; +struct VsOut { + @builtin(position) pos: vec4, + @location(0) n: vec3, + @location(1) uv: vec2, + @location(2) col: vec4, +}; + +@vertex +fn vs_reflect(in: VsIn) -> VsOut { + var o: VsOut; + o.pos = u.mvp * vec4(in.position, 1.0); + o.n = normalize((u.model * vec4(in.normal, 0.0)).xyz); + o.uv = in.uv; + o.col = in.color; + return o; +} + +fn srgb_lin(c: vec3) -> vec3 { + let lo = c / 12.92; + let hi = pow(max((c + vec3(0.055)) / 1.055, vec3(0.0)), vec3(2.4)); + return select(hi, lo, c <= vec3(0.04045)); +} + +@fragment +fn fs_reflect(in: VsOut) -> @location(0) vec4 { + let tex = textureSample(base_tex, base_samp, in.uv); + if (tex.a < 0.5) { discard; } // alpha-cutout foliage reflects its shape + let base = srgb_lin(tex.rgb) * in.col.rgb; + let n = normalize(in.n); + let ndl = max(dot(n, -normalize(light.sun_dir.xyz)), 0.0); + let lit = base * (light.ambient.rgb * light.ambient.w + + light.sun_color.rgb * light.sun_dir.w * ndl); + return vec4(lit, 1.0); // alpha 1 = 'real reflection here' for the water blend +} +"; + diff --git a/native/shared/src/renderer/shaders/mod.rs b/native/shared/src/renderer/shaders/mod.rs index dcc8034..23b3694 100644 --- a/native/shared/src/renderer/shaders/mod.rs +++ b/native/shared/src/renderer/shaders/mod.rs @@ -6,7 +6,7 @@ mod core; pub(super) use core::{SHADER_2D, SHADER_3D, SCENE_SHADER}; mod env; -pub(super) use env::{PREFILTER_SHADER_WGSL, SKY_SHADER_WGSL, AERIAL_PERSPECTIVE_SHADER_WGSL, SKY_VIEW_LUT_SHADER_WGSL, EQUIRECT_FROM_SKY_VIEW_SHADER_WGSL, PROCEDURAL_SKY_SHADER_WGSL}; +pub(super) use env::{PREFILTER_SHADER_WGSL, SKY_SHADER_WGSL, AERIAL_PERSPECTIVE_SHADER_WGSL, SKY_VIEW_LUT_SHADER_WGSL, EQUIRECT_FROM_SKY_VIEW_SHADER_WGSL, PROCEDURAL_SKY_SHADER_WGSL, REFLECT_SCENE_WGSL}; mod ao; pub(super) use ao::{HIZ_LINEARIZE_SHADER_WGSL, HIZ_DOWNSAMPLE_SHADER_WGSL, SSAO_SHADER_WGSL, SSAO_BLUR_SHADER_WGSL}; mod gi; From 67b654948f536cd7852c4ca89bb224fbd711626c Mon Sep 17 00:00:00 2001 From: Ralph Kuepper Date: Fri, 19 Jun 2026 11:43:42 +0200 Subject: [PATCH 4/4] fix(render): reflection shader N.L uses direction-to-sun The planar-reflection scene shader negated the sun dir for N.L, which only matched when the caller passed a below-horizon (travel-direction) sun. Engine convention is direction-TO-sun everywhere (scene diffuse, shadow fit, sky LUT mu_s=sun.y), so use +sun_dir. --- native/shared/src/renderer/shaders/env.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/native/shared/src/renderer/shaders/env.rs b/native/shared/src/renderer/shaders/env.rs index e0434cc..0b64467 100644 --- a/native/shared/src/renderer/shaders/env.rs +++ b/native/shared/src/renderer/shaders/env.rs @@ -875,7 +875,8 @@ fn fs_reflect(in: VsOut) -> @location(0) vec4 { if (tex.a < 0.5) { discard; } // alpha-cutout foliage reflects its shape let base = srgb_lin(tex.rgb) * in.col.rgb; let n = normalize(in.n); - let ndl = max(dot(n, -normalize(light.sun_dir.xyz)), 0.0); + // sun_dir is direction-TO-sun (engine convention), so N.L uses +sun_dir. + let ndl = max(dot(n, normalize(light.sun_dir.xyz)), 0.0); let lit = base * (light.ambient.rgb * light.ambient.w + light.sun_color.rgb * light.sun_dir.w * ndl); return vec4(lit, 1.0); // alpha 1 = 'real reflection here' for the water blend