From b99ae6e6ceb43f620b4e8c6300f495e67530615f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 11 Nov 2025 15:50:19 +0700 Subject: [PATCH 01/29] Fix small bug in GenericDataAccessor definition --- .../builtin/hlsl/concepts/accessors/generic_shared_data.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl index cc22595444..ab7a87c7dd 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl @@ -69,7 +69,7 @@ NBL_CONCEPT_END( #include template -NBL_BOOL_CONCEPT GenericDataAccessor = GenericWriteAccessor && GenericWriteAccessor; +NBL_BOOL_CONCEPT GenericDataAccessor = GenericReadAccessor && GenericWriteAccessor; } } From b9537ea7f623ca275236079acf9a8cd43e910909 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 11 Nov 2025 15:51:30 +0700 Subject: [PATCH 02/29] First draft of Warpmap Generation workgroup implementation --- .../hlsl/concepts/accessors/envmap.hlsl | 47 ++++++++ .../nbl/builtin/hlsl/workgroup/envmap.hlsl | 108 ++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 include/nbl/builtin/hlsl/concepts/accessors/envmap.hlsl create mode 100644 include/nbl/builtin/hlsl/workgroup/envmap.hlsl diff --git a/include/nbl/builtin/hlsl/concepts/accessors/envmap.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/envmap.hlsl new file mode 100644 index 0000000000..1d1ad2a344 --- /dev/null +++ b/include/nbl/builtin/hlsl/concepts/accessors/envmap.hlsl @@ -0,0 +1,47 @@ +#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_ENVMAP_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_ENVMAP_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace workgroup +{ +namespace envmap +{ +// declare concept +#define NBL_CONCEPT_NAME LuminanceReadAccessor +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (a,U) +#define NBL_CONCEPT_PARAM_1 (uv,uint32_t2) +#define NBL_CONCEPT_PARAM_2 (level,uint32_t) +#define NBL_CONCEPT_PARAM_3 (offset,uint32_t2) +// start concept +NBL_CONCEPT_BEGIN(4) +// need to be defined AFTER the concept begins +#define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +#define offset NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(uv,level,offset)) , ::nbl::hlsl::is_same_v, float32_t4>)) +); +#undef offset +#undef level +#undef uv +#undef a +#include + +template +NBL_BOOL_CONCEPT WarpmapWriteAccessor = concepts::accessors::GenericWriteAccessor; + +} +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/workgroup/envmap.hlsl b/include/nbl/builtin/hlsl/workgroup/envmap.hlsl new file mode 100644 index 0000000000..df452fb0e8 --- /dev/null +++ b/include/nbl/builtin/hlsl/workgroup/envmap.hlsl @@ -0,0 +1,108 @@ + +#ifndef _NBL_BUILTIN_HLSL_WORKGROUP_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ +#define _NBL_BUILTIN_HLSL_WORKGROUP_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ + +namespace nbl +{ +namespace hlsl +{ +namespace workgroup +{ +namespace envmap +{ +namespace impl +{ + bool choseSecond(float first, float second, NBL_REF_ARG(float) xi) + { + // numerical resilience against IEEE754 + float firstProb = 1.0f / (1.0f + second / first); + float dummy = 0.0f; + return math::partitionRandVariable(firstProb, xi, dummy); + } + +} + +} +} +} +} + +#ifdef __HLSL_VERSION +namespace nbl +{ +namespace hlsl +{ +namespace workgroup +{ +namespace envmap +{ + +struct WarpmapGeneration +{ + + template && envmap::WarpmapWriteAccessor) + // TODO(kevinyu): Should lumapMapSize and warpMapSize provided by Accessor? + static void __call(NBL_CONST_REF_ARG(LuminanceAccessor) luminanceAccessor, NBL_REF_ARG(OutputAcessor) outputAccessor, uint32_t2 lumaMapSize, uint32_t2 warpMapSize) + { + const uint32_t threadID = uint32_t(SubgroupContiguousIndex()); + const uint32_t lastWarpMapPixel = warpMapSize - uint32_t2(1, 1); + + if (all(threadID < warpMapSize)) + { + float32_t2 xi = float32_t2(threadID) / float32_t2(lastWarpMapPixel); + + uint32_t2 p; + p.y = 0; + + // TODO(kevinyu): Implement findMSB + const uint32_t2 mip2x1 = findMSB(lumaMapSize.x) - 1; + // do one split in the X axis first cause penultimate full mip would have been 2x1 + p.x = impl::choseSecond(luminanceAccessor.get(uint32_t2(0, 0), mip2x1, uint32_t2(0, 0)), luminanceAccessor.get(uint32_t2(0, 0), mip2x1, uint32_t2(1, 0), xi.x) ? 1 : 0; + for (uint32_t i = mip2x1; i != 0;) + { + --i; + p <<= 1; + const float32_t4 values = float32_t4( + luminanceAccessor.get(p, i, uint32_t2(0, 1)), + luminanceAccessor.get(p, i, uint32_t2(1, 1)), + luminanceAccessor.get(p, i, uint32_t2(1, 0)), + luminanceAccessor.get(p, i, uint32_t2(0, 0)) + ); + + float32_t wx_0, wx_1; + { + const float32_t wy_0 = values[3] + values[2]; + const float32_t wy_1 = values[1] + values[0]; + if (impl::choseSecond(wy_0, wy_1, xi.y)) + { + p.y |= 1; + wx_0 = values[0]; + wx_1 = values[1]; + } + else + { + wx_0 = values[3]; + wx_1 = values[2]; + } + } + + if (impl::choseSecond(wx_0, wx_1, xi.x)) + { + p.x |= 1; + } + } + + const float32_t2 directionUV = (float32_t2(p.x, p.y) + xi) / float32_t2(lumaMapSize); + outputAccessor.set(threadID, directionUV); + } + } + +}; + +} +} +} +} +#endif + +#endif \ No newline at end of file From a7371738facb18bf22e89335a377492e8e3e58f5 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 18 Nov 2025 16:48:13 +0700 Subject: [PATCH 03/29] Add warp concept --- include/nbl/builtin/hlsl/concepts/warp.hlsl | 41 +++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 include/nbl/builtin/hlsl/concepts/warp.hlsl diff --git a/include/nbl/builtin/hlsl/concepts/warp.hlsl b/include/nbl/builtin/hlsl/concepts/warp.hlsl new file mode 100644 index 0000000000..e9e981a243 --- /dev/null +++ b/include/nbl/builtin/hlsl/concepts/warp.hlsl @@ -0,0 +1,41 @@ +#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" +#include "nbl/builtin/hlsl/fft/common.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace concepts +{ + +// declare concept +#define NBL_CONCEPT_NAME WARP +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(C) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (warp,U) +#define NBL_CONCEPT_PARAM_1 (uv,float32_t2) +#define NBL_CONCEPT_PARAM_2 (out,C) +// start concept +NBL_CONCEPT_BEGIN(3) +#define warp NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define out NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template warp(uv)) , ::nbl::hlsl::is_same_v, C)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template forwardDensity(uv)) , ::nbl::hlsl::is_same_v, float32_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template backwardDensity(out)) , ::nbl::hlsl::is_same_v, float32_t)) +); +#undef out +#undef warp +#undef uv +#include + +} +} +} + +#endif \ No newline at end of file From 64349db3ac20d982fcffd871cc43308c0b0c3cc7 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 18 Nov 2025 16:48:37 +0700 Subject: [PATCH 04/29] Add spherical warp --- include/nbl/builtin/hlsl/warp/spherical.hlsl | 53 ++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 include/nbl/builtin/hlsl/warp/spherical.hlsl diff --git a/include/nbl/builtin/hlsl/warp/spherical.hlsl b/include/nbl/builtin/hlsl/warp/spherical.hlsl new file mode 100644 index 0000000000..10c341f06b --- /dev/null +++ b/include/nbl/builtin/hlsl/warp/spherical.hlsl @@ -0,0 +1,53 @@ +#ifndef _NBL_BUILTIN_HLSL_WARP_SPHERICAL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_WARP_SPHERICAL_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ +namespace warp +{ + + class Spherical + { + public: + using codomain_type = float32_t3; + + template ) + static codomain_type warp(const UV uv) + { + const float32_t phi = 2 * uv.x * numbers::pi; + const float32_t theta = uv.y * numbers::pi; + float32_t3 dir; + dir.x = cos(uv.x * 2.f * numbers::pi); + dir.y = sqrt(1.f - dir.x * dir.x); + if (uv.x > 0.5f) dir.y = -dir.y; + const float32_t cosTheta = cos(theta); + float32_t sinTheta = (1.0 - cosTheta * cosTheta); + dir.xy *= sinTheta; + dir.z = cosTheta; + return dir; + } + + template ) + static float32_t forwardDensity(const UV uv) + { + const float32_t theta = uv.y * numbers::pi; + return 1.0f / (sin(theta) * 2 * PI * PI); + + } + + template ) + static float32_t backwardDensity(const C out) + { + //TODO(kevinyu): Derive this density + } + }; + +} +} +} + +#endif \ No newline at end of file From e44fcf44b71164cc938d49c740c0be634c8e5d8b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 18 Nov 2025 16:49:02 +0700 Subject: [PATCH 05/29] Remove envmap accessors.hlsl --- .../hlsl/concepts/accessors/envmap.hlsl | 47 ------------------- 1 file changed, 47 deletions(-) delete mode 100644 include/nbl/builtin/hlsl/concepts/accessors/envmap.hlsl diff --git a/include/nbl/builtin/hlsl/concepts/accessors/envmap.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/envmap.hlsl deleted file mode 100644 index 1d1ad2a344..0000000000 --- a/include/nbl/builtin/hlsl/concepts/accessors/envmap.hlsl +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_ENVMAP_INCLUDED_ -#define _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_ENVMAP_INCLUDED_ - -#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" - -namespace nbl -{ -namespace hlsl -{ -namespace workgroup -{ -namespace envmap -{ -// declare concept -#define NBL_CONCEPT_NAME LuminanceReadAccessor -#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) -#define NBL_CONCEPT_TPLT_PRM_NAMES (U) -// not the greatest syntax but works -#define NBL_CONCEPT_PARAM_0 (a,U) -#define NBL_CONCEPT_PARAM_1 (uv,uint32_t2) -#define NBL_CONCEPT_PARAM_2 (level,uint32_t) -#define NBL_CONCEPT_PARAM_3 (offset,uint32_t2) -// start concept -NBL_CONCEPT_BEGIN(4) -// need to be defined AFTER the concept begins -#define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 -#define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 -#define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 -#define offset NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 -NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(uv,level,offset)) , ::nbl::hlsl::is_same_v, float32_t4>)) -); -#undef offset -#undef level -#undef uv -#undef a -#include - -template -NBL_BOOL_CONCEPT WarpmapWriteAccessor = concepts::accessors::GenericWriteAccessor; - -} -} -} -} - -#endif \ No newline at end of file From 9b29dfd0a08acd2414cb67a6ac2ea72ed2386e4c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 18 Nov 2025 16:49:19 +0700 Subject: [PATCH 06/29] Hierarchical image sampling implementation --- .../hlsl/sampling/hierarchical_image.hlsl | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl new file mode 100644 index 0000000000..bfcd9ffec7 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -0,0 +1,136 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ + +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +class HierarchicalImage +{ + private: + + static float32_t3 calculateSampleAndPdf(float32_t4 dirsX, float32_t4 dirsY, float32_t2 unnormCoord, uint32_t2 lastWarpmapPixel, NBL_REF_ARG(float32_t) pdf) + { + const float32_t2 interpolant = frac(unnormCoord); + const float32_t4x2 uvs = transpose(float32_t2x4(dirsX, dirsY)); + + const float32_t2 xDiffs[] = { + uvs[2] - uvs[3], + uvs[1] - uvs[0] + }; + const float32_t2 yVals[] = { + xDiffs[0] * interpolant.x + uvs[3], + xDiffs[1] * interpolant.x + uvs[0] + }; + const float32_t2 yDiff = yVals[1] - yVals[0]; + const float32_t2 uv = yDiff * interpolant.y + yVals[0]; + + // Note(kevinyu): sinTheta is calculated twice inside PostWarp::warp and PostWarp::forwardDensity + const float32_t3 L = PostWarp::warp(uv); + + const float detInterpolJacobian = determinant(float32_t2x2( + lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx + yDiff // second column dFdy + )); + + pdf = abs(PostWarp::forwardDensity(uv) / (detInterpolJacobian * float32_t(lastWarpmapPixel.x * lastWarpmapPixel.y)); + + return L; + } + + public: + template ) + static float32_t2 binarySearch(NBL_CONST_REF_ARG(LuminanceAccessor) luminanceAccessor, const uint32_t2 lumaMapSize, const float32_t2 xi, const bool aspect2x1) + { + + uint32_t2 p = uint32_t2(0, 0); + + if (aspect2x1) { + // TODO(kevinyu): Implement findMSB + const uint32_t2 mip2x1 = findMSB(lumaMapSize.x) - 1; + + // do one split in the X axis first cause penultimate full mip would have been 2x1 + p.x = impl::choseSecond(luminanceAccessor.fetch(uint32_t2(0, 0), mip2x1), luminanceAccessor.fetch(uint32_t2(0, 1), mip2x1), xi.x) ? 1 : 0; + } + + for (uint32_t i = mip2x1; i != 0;) + { + --i; + p <<= 1; + const float32_t4 values = luminanceAccessor.gather(p, i); + float32_t wx_0, wx_1; + { + const float32_t wy_0 = values[3] + values[2]; + const float32_t wy_1 = values[1] + values[0]; + if (impl::choseSecond(wy_0, wy_1, xi.y)) + { + p.y |= 1; + wx_0 = values[0]; + wx_1 = values[1]; + } + else + { + wx_0 = values[3]; + wx_1 = values[2]; + } + } + + if (impl::choseSecond(wx_0, wx_1, xi.x)) + p.x |= 1; + } + + // TODO(kevinyu): Add some comment why we add xi. + const float32_t2 directionUV = (float32_t2(p.x, p.y) + xi) / float32_t2(lumaMapSize); + return directionUV; + } + + + template && Warp) + static float32_t3 sampleWarpmap(NBL_CONST_REF_ARG(WarpmapAccessor) warpmap, const uint32_t2 warpmapSize, const float32_t2 xi, NBL_REF_ARG(float32_t) pdf) { + + // TODO(kevinyu): Add some comment why we substract by 1 + const uint32_t3 lastWarpmapPixel = warpmapSize - uint32_t3(1, 1, 1); + + const float32_t2 unnormCoord = xi * lastWarpmapPixel; + const float32_t2 interpolant = frac(unnormCoord); + const float32_t2 warpSampleCoord = (unnormCoord + float32_t2(0.5f, 0.5f)) / float32_t2(warpmapSize.x, warpmapSize.y); + const float32_t4 dirsX = warpmap.gatherU(warpSampleCoord); + const float32_t4 dirsY = warpmap.gatherV(warpSampleCoord); + + return calculateSampleAndPdf(dirsX, dirsY, unnormCoord, lastWarpmapPixel, pdf); + + } + + template && Warp) + static float32_t3 sample(NBL_CONST_REF_ARG(LuminanceReadAccessor) luminanceMap, const uint32_t2 lumaMapSize, const bool lumaAspect2x1, const uint32_t2 warpmapSize, const float32_t2 xi, NBL_REF_ARG(float32_t) pdf) { + + const uint32_t3 lastWarpmapPixel = warpmapSize - uint32_t3(1, 1, 1); + const float32_t2 unnormCoord = xi * lastWarpmapPixel; + const float32_t2 warpSampleCoord = (unnormCoord + float32_t2(0.5f, 0.5f)) / float32_t2(warpmapSize.x, warpmapSize.y); + const float32_t2 dir0 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(0, 1), lumaAspect2x1); + const float32_t2 dir1 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(1, 1), lumaAspect2x1); + const float32_t2 dir2 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(1, 0), lumaAspect2x1); + const float32_t2 dir3 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord, lumaAspect2x1); + + const float32_t4 dirsX = float32_t4(dir0.x, dir1.x, dir2.x, dir3.x); + const float32_t4 dirsY = float32_t4(dir1.y, dir1.y, dir2.y, dir3.y); + + return calculateSampleAndPdf(dirsX, dirsY, unnormCoord, lastWarpmapPixel, pdf); + + } +}; + +} +} + +#endif From 8d682b948a2ca616c6c5681c4be7bcd12b586246 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 20 Dec 2025 07:38:52 +0700 Subject: [PATCH 07/29] Remove envmap.hlsl --- .../nbl/builtin/hlsl/workgroup/envmap.hlsl | 108 ------------------ 1 file changed, 108 deletions(-) delete mode 100644 include/nbl/builtin/hlsl/workgroup/envmap.hlsl diff --git a/include/nbl/builtin/hlsl/workgroup/envmap.hlsl b/include/nbl/builtin/hlsl/workgroup/envmap.hlsl deleted file mode 100644 index df452fb0e8..0000000000 --- a/include/nbl/builtin/hlsl/workgroup/envmap.hlsl +++ /dev/null @@ -1,108 +0,0 @@ - -#ifndef _NBL_BUILTIN_HLSL_WORKGROUP_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ -#define _NBL_BUILTIN_HLSL_WORKGROUP_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ - -namespace nbl -{ -namespace hlsl -{ -namespace workgroup -{ -namespace envmap -{ -namespace impl -{ - bool choseSecond(float first, float second, NBL_REF_ARG(float) xi) - { - // numerical resilience against IEEE754 - float firstProb = 1.0f / (1.0f + second / first); - float dummy = 0.0f; - return math::partitionRandVariable(firstProb, xi, dummy); - } - -} - -} -} -} -} - -#ifdef __HLSL_VERSION -namespace nbl -{ -namespace hlsl -{ -namespace workgroup -{ -namespace envmap -{ - -struct WarpmapGeneration -{ - - template && envmap::WarpmapWriteAccessor) - // TODO(kevinyu): Should lumapMapSize and warpMapSize provided by Accessor? - static void __call(NBL_CONST_REF_ARG(LuminanceAccessor) luminanceAccessor, NBL_REF_ARG(OutputAcessor) outputAccessor, uint32_t2 lumaMapSize, uint32_t2 warpMapSize) - { - const uint32_t threadID = uint32_t(SubgroupContiguousIndex()); - const uint32_t lastWarpMapPixel = warpMapSize - uint32_t2(1, 1); - - if (all(threadID < warpMapSize)) - { - float32_t2 xi = float32_t2(threadID) / float32_t2(lastWarpMapPixel); - - uint32_t2 p; - p.y = 0; - - // TODO(kevinyu): Implement findMSB - const uint32_t2 mip2x1 = findMSB(lumaMapSize.x) - 1; - // do one split in the X axis first cause penultimate full mip would have been 2x1 - p.x = impl::choseSecond(luminanceAccessor.get(uint32_t2(0, 0), mip2x1, uint32_t2(0, 0)), luminanceAccessor.get(uint32_t2(0, 0), mip2x1, uint32_t2(1, 0), xi.x) ? 1 : 0; - for (uint32_t i = mip2x1; i != 0;) - { - --i; - p <<= 1; - const float32_t4 values = float32_t4( - luminanceAccessor.get(p, i, uint32_t2(0, 1)), - luminanceAccessor.get(p, i, uint32_t2(1, 1)), - luminanceAccessor.get(p, i, uint32_t2(1, 0)), - luminanceAccessor.get(p, i, uint32_t2(0, 0)) - ); - - float32_t wx_0, wx_1; - { - const float32_t wy_0 = values[3] + values[2]; - const float32_t wy_1 = values[1] + values[0]; - if (impl::choseSecond(wy_0, wy_1, xi.y)) - { - p.y |= 1; - wx_0 = values[0]; - wx_1 = values[1]; - } - else - { - wx_0 = values[3]; - wx_1 = values[2]; - } - } - - if (impl::choseSecond(wx_0, wx_1, xi.x)) - { - p.x |= 1; - } - } - - const float32_t2 directionUV = (float32_t2(p.x, p.y) + xi) / float32_t2(lumaMapSize); - outputAccessor.set(threadID, directionUV); - } - } - -}; - -} -} -} -} -#endif - -#endif \ No newline at end of file From 890f7c6080a4d234d231b24ce9e1073f9e106f34 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 22 Dec 2025 23:59:45 +0700 Subject: [PATCH 08/29] Move to sampling namespace and implement backward density --- include/nbl/builtin/hlsl/concepts/warp.hlsl | 41 -------------- .../hlsl/sampling/hierarchical_image.hlsl | 2 +- include/nbl/builtin/hlsl/sampling/warp.hlsl | 54 +++++++++++++++++++ .../{warp => sampling/warps}/spherical.hlsl | 29 ++++++---- 4 files changed, 73 insertions(+), 53 deletions(-) delete mode 100644 include/nbl/builtin/hlsl/concepts/warp.hlsl create mode 100644 include/nbl/builtin/hlsl/sampling/warp.hlsl rename include/nbl/builtin/hlsl/{warp => sampling/warps}/spherical.hlsl (51%) diff --git a/include/nbl/builtin/hlsl/concepts/warp.hlsl b/include/nbl/builtin/hlsl/concepts/warp.hlsl deleted file mode 100644 index e9e981a243..0000000000 --- a/include/nbl/builtin/hlsl/concepts/warp.hlsl +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ -#define _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ - -#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" -#include "nbl/builtin/hlsl/fft/common.hlsl" - -namespace nbl -{ -namespace hlsl -{ -namespace concepts -{ - -// declare concept -#define NBL_CONCEPT_NAME WARP -#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) -#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(C) -// not the greatest syntax but works -#define NBL_CONCEPT_PARAM_0 (warp,U) -#define NBL_CONCEPT_PARAM_1 (uv,float32_t2) -#define NBL_CONCEPT_PARAM_2 (out,C) -// start concept -NBL_CONCEPT_BEGIN(3) -#define warp NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 -#define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 -#define out NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 -NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template warp(uv)) , ::nbl::hlsl::is_same_v, C)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template forwardDensity(uv)) , ::nbl::hlsl::is_same_v, float32_t)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template backwardDensity(out)) , ::nbl::hlsl::is_same_v, float32_t)) -); -#undef out -#undef warp -#undef uv -#include - -} -} -} - -#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index bfcd9ffec7..f2b2750703 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -5,7 +5,7 @@ #ifndef _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ #define _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ -#include +#include #include namespace nbl diff --git a/include/nbl/builtin/hlsl/sampling/warp.hlsl b/include/nbl/builtin/hlsl/sampling/warp.hlsl new file mode 100644 index 0000000000..b8936c09f3 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/warp.hlsl @@ -0,0 +1,54 @@ +#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" +#include "nbl/builtin/hlsl/fft/common.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +template +struct WarpResult +{ + C dst; + float32_t density; +}; + +namespace concepts +{ + +// declare concept +#define NBL_CONCEPT_NAME WARP +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (warper,U) +#define NBL_CONCEPT_PARAM_1 (xi,typename U::domain_type) +#define NBL_CONCEPT_PARAM_2 (dst,typename U::codomain_type) +// start concept +NBL_CONCEPT_BEGIN(3) +#define warper NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define xi NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define dst NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(U::domain_type)) + ((NBL_CONCEPT_REQ_TYPE)(U::codomain_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template warp(xi)) , ::nbl::hlsl::is_same_v, WarpResult)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template forwardDensity(xi)) , ::nbl::hlsl::is_same_v, float32_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template backwardDensity(dst)) , ::nbl::hlsl::is_same_v, float32_t)) +); +#undef dst +#undef xi +#undef warper +#include + +} +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/warp/spherical.hlsl b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl similarity index 51% rename from include/nbl/builtin/hlsl/warp/spherical.hlsl rename to include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl index 10c341f06b..095e138d60 100644 --- a/include/nbl/builtin/hlsl/warp/spherical.hlsl +++ b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl @@ -2,21 +2,24 @@ #define _NBL_BUILTIN_HLSL_WARP_SPHERICAL_INCLUDED_ #include +#include +#include namespace nbl { namespace hlsl { +namespace sampling +{ namespace warp { - - class Spherical + struct Spherical { - public: + using domain_type = float32_t2; using codomain_type = float32_t3; - template ) - static codomain_type warp(const UV uv) + template ) + static WarpResult warp(const D uv) { const float32_t phi = 2 * uv.x * numbers::pi; const float32_t theta = uv.y * numbers::pi; @@ -28,26 +31,30 @@ namespace warp float32_t sinTheta = (1.0 - cosTheta * cosTheta); dir.xy *= sinTheta; dir.z = cosTheta; - return dir; + WarpResult warpResult; + warpResult.dst = dir; + warpResult.density = 1 / (sinTheta * numbers::pi * numbers::pi); + return warpResult; } - template ) - static float32_t forwardDensity(const UV uv) + template ) + static float32_t forwardDensity(const D uv) { const float32_t theta = uv.y * numbers::pi; - return 1.0f / (sin(theta) * 2 * PI * PI); + return 1.0f / (sin(theta) * 2 * numbers::pi * numbers::pi); } template ) - static float32_t backwardDensity(const C out) + static float32_t backwardDensity(const C dst) { - //TODO(kevinyu): Derive this density + return 1.0f / (sqrt(1.0f - dst.z * dst.z) * 2 * numbers::pi * numbers::pi); } }; } } } +} #endif \ No newline at end of file From f99c63ba8b8c4ce97dd73d18c1f3b9c51b321eac Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 23 Dec 2025 00:02:15 +0700 Subject: [PATCH 09/29] Remove private, public from hierarchical_image --- include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index f2b2750703..de50a6b0d3 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -15,9 +15,8 @@ namespace hlsl namespace sampling { -class HierarchicalImage +struct HierarchicalImage { - private: static float32_t3 calculateSampleAndPdf(float32_t4 dirsX, float32_t4 dirsY, float32_t2 unnormCoord, uint32_t2 lastWarpmapPixel, NBL_REF_ARG(float32_t) pdf) { @@ -48,7 +47,6 @@ class HierarchicalImage return L; } - public: template ) static float32_t2 binarySearch(NBL_CONST_REF_ARG(LuminanceAccessor) luminanceAccessor, const uint32_t2 lumaMapSize, const float32_t2 xi, const bool aspect2x1) { From 3ff2791f1bb47feb3a232fcfecc1dca0005dc461 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 23 Dec 2025 01:06:03 +0700 Subject: [PATCH 10/29] Refactor hierarchical image to keep accessor and common data as member --- .../hlsl/sampling/hierarchical_image.hlsl | 61 +++++++++---------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index de50a6b0d3..5509ce65c3 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -14,12 +14,21 @@ namespace hlsl { namespace sampling { - +template && hierarchical_image::LuminanceReadAccessor && Warp) struct HierarchicalImage { - - static float32_t3 calculateSampleAndPdf(float32_t4 dirsX, float32_t4 dirsY, float32_t2 unnormCoord, uint32_t2 lastWarpmapPixel, NBL_REF_ARG(float32_t) pdf) + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + LuminanceAccessor accessor; + uint32_t2 lumaMapSize; + bool lumaAspect2x1; + uint32_t2 lastWarpPixel; + + static vector2_type calculateSampleAndPdf(NBL_REF_ARG(scalar_type) rcpPdf, vector4_type dirsX, vector4_type dirsY, vector2_type unnormCoord, uint32_t2 lastWarpPixel) { + // TODO(kevinyu): Convert float32_t to scalar_type const float32_t2 interpolant = frac(unnormCoord); const float32_t4x2 uvs = transpose(float32_t2x4(dirsX, dirsY)); @@ -42,15 +51,23 @@ struct HierarchicalImage yDiff // second column dFdy )); - pdf = abs(PostWarp::forwardDensity(uv) / (detInterpolJacobian * float32_t(lastWarpmapPixel.x * lastWarpmapPixel.y)); + rcpPdf = abs((detInterpolJacobian * scalar_t(lastWarpPixel.x * lastWarpPixel.y) / PostWarp::forwardDensity(uv)); return L; } - template ) - static float32_t2 binarySearch(NBL_CONST_REF_ARG(LuminanceAccessor) luminanceAccessor, const uint32_t2 lumaMapSize, const float32_t2 xi, const bool aspect2x1) + static HierarchicalImage create(NBL_CONST_REF_ARG(LuminanceAccessor) accessor, const uint32_t2 lumaMapSize, const bool lumaAspect2x1, const uint32_t2 warpSize) { + HierarchicalImage result; + result.accessor = accessor; + result.lumaMapSize = lumaMapSize; + result.lumaAspect2x1 = lumaAspect2x1; + result.lastWarpPixel = warpSize - uint32_t2(1, 1); + return result; + } + static vector binarySearch(const vector xi) + { uint32_t2 p = uint32_t2(0, 0); if (aspect2x1) { @@ -92,28 +109,9 @@ struct HierarchicalImage return directionUV; } - - template && Warp) - static float32_t3 sampleWarpmap(NBL_CONST_REF_ARG(WarpmapAccessor) warpmap, const uint32_t2 warpmapSize, const float32_t2 xi, NBL_REF_ARG(float32_t) pdf) { - - // TODO(kevinyu): Add some comment why we substract by 1 - const uint32_t3 lastWarpmapPixel = warpmapSize - uint32_t3(1, 1, 1); - - const float32_t2 unnormCoord = xi * lastWarpmapPixel; - const float32_t2 interpolant = frac(unnormCoord); - const float32_t2 warpSampleCoord = (unnormCoord + float32_t2(0.5f, 0.5f)) / float32_t2(warpmapSize.x, warpmapSize.y); - const float32_t4 dirsX = warpmap.gatherU(warpSampleCoord); - const float32_t4 dirsY = warpmap.gatherV(warpSampleCoord); - - return calculateSampleAndPdf(dirsX, dirsY, unnormCoord, lastWarpmapPixel, pdf); - - } - - template && Warp) - static float32_t3 sample(NBL_CONST_REF_ARG(LuminanceReadAccessor) luminanceMap, const uint32_t2 lumaMapSize, const bool lumaAspect2x1, const uint32_t2 warpmapSize, const float32_t2 xi, NBL_REF_ARG(float32_t) pdf) { - - const uint32_t3 lastWarpmapPixel = warpmapSize - uint32_t3(1, 1, 1); - const float32_t2 unnormCoord = xi * lastWarpmapPixel; + uint32_t2 generate(NBL_REF_ARG(scalar_type) rcpPdf, vector xi) + { + const float32_t2 unnormCoord = xi * lastWarpPixel; const float32_t2 warpSampleCoord = (unnormCoord + float32_t2(0.5f, 0.5f)) / float32_t2(warpmapSize.x, warpmapSize.y); const float32_t2 dir0 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(0, 1), lumaAspect2x1); const float32_t2 dir1 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(1, 1), lumaAspect2x1); @@ -123,11 +121,12 @@ struct HierarchicalImage const float32_t4 dirsX = float32_t4(dir0.x, dir1.x, dir2.x, dir3.x); const float32_t4 dirsY = float32_t4(dir1.y, dir1.y, dir2.y, dir3.y); - return calculateSampleAndPdf(dirsX, dirsY, unnormCoord, lastWarpmapPixel, pdf); - - } + return calculateSampleAndPdf(rcpPdf, dirsX, dirsY, unnormCoord, lastWarpPixel); + } }; +//TODO(kevinyu): Impelemnt cached warp map sampler + } } From 76ef53697fe9dffae24cf209f11b2894af63f526 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 26 Dec 2025 16:02:12 +0700 Subject: [PATCH 11/29] Refactor hierarchical image to separate binarySearch from HierarchicalImage class --- .../accessors/hierarchical_image.hlsl | 61 +++++ .../hlsl/sampling/hierarchical_image.hlsl | 240 ++++++++++-------- include/nbl/builtin/hlsl/sampling/warp.hlsl | 19 +- 3 files changed, 203 insertions(+), 117 deletions(-) create mode 100644 include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl diff --git a/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl new file mode 100644 index 0000000000..a7326ee3da --- /dev/null +++ b/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl @@ -0,0 +1,61 @@ +#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_HIERARCHICAL_IMAGE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_HIERARCHICAL_IMAGE_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ +namespace hierarchical_image +{ +// declare concept +#define NBL_CONCEPT_NAME LuminanceReadAccessor +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (a,U) +#define NBL_CONCEPT_PARAM_1 (uv,uint32_t2) +#define NBL_CONCEPT_PARAM_2 (level,uint32_t) +// start concept +NBL_CONCEPT_BEGIN(3) +// need to be defined AFTER the concept begins +#define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(uv,level)) , ::nbl::hlsl::is_same_v, float32_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template gather(uv,level)) , ::nbl::hlsl::is_same_v, float32_t4)) +); +#undef level +#undef uv +#undef a +#include + +// declare concept +#define NBL_CONCEPT_NAME HierarchicalSampler +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (HierarchicalSamplerT)(ScalarT) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (sampler,HierarchicalSamplerT) +#define NBL_CONCEPT_PARAM_1 (coord,vector) +// start concept +NBL_CONCEPT_BEGIN(2) +// need to be defined AFTER the concept begins +#define sampler NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((sampler.template sampleUvs(coord)) , ::nbl::hlsl::is_same_v, matrix)) +); +#undef sampler +#undef coord +#include + +} +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index 5509ce65c3..5adbb5fb82 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -5,8 +5,10 @@ #ifndef _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ #define _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ +#include #include #include +#include namespace nbl { @@ -14,119 +16,147 @@ namespace hlsl { namespace sampling { -template && hierarchical_image::LuminanceReadAccessor && Warp) -struct HierarchicalImage + +template && hierarchical_image::LuminanceReadAccessor) +struct LuminanceMapSampler { - using scalar_type = T; - using vector2_type = vector; - using vector3_type = vector; - using vector4_type = vector; - LuminanceAccessor accessor; - uint32_t2 lumaMapSize; - bool lumaAspect2x1; - uint32_t2 lastWarpPixel; - - static vector2_type calculateSampleAndPdf(NBL_REF_ARG(scalar_type) rcpPdf, vector4_type dirsX, vector4_type dirsY, vector2_type unnormCoord, uint32_t2 lastWarpPixel) - { - // TODO(kevinyu): Convert float32_t to scalar_type - const float32_t2 interpolant = frac(unnormCoord); - const float32_t4x2 uvs = transpose(float32_t2x4(dirsX, dirsY)); - - const float32_t2 xDiffs[] = { - uvs[2] - uvs[3], - uvs[1] - uvs[0] - }; - const float32_t2 yVals[] = { - xDiffs[0] * interpolant.x + uvs[3], - xDiffs[1] * interpolant.x + uvs[0] - }; - const float32_t2 yDiff = yVals[1] - yVals[0]; - const float32_t2 uv = yDiff * interpolant.y + yVals[0]; - - // Note(kevinyu): sinTheta is calculated twice inside PostWarp::warp and PostWarp::forwardDensity - const float32_t3 L = PostWarp::warp(uv); - - const float detInterpolJacobian = determinant(float32_t2x2( - lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx - yDiff // second column dFdy - )); - - rcpPdf = abs((detInterpolJacobian * scalar_t(lastWarpPixel.x * lastWarpPixel.y) / PostWarp::forwardDensity(uv)); - - return L; - } - - static HierarchicalImage create(NBL_CONST_REF_ARG(LuminanceAccessor) accessor, const uint32_t2 lumaMapSize, const bool lumaAspect2x1, const uint32_t2 warpSize) - { - HierarchicalImage result; - result.accessor = accessor; - result.lumaMapSize = lumaMapSize; - result.lumaAspect2x1 = lumaAspect2x1; - result.lastWarpPixel = warpSize - uint32_t2(1, 1); - return result; - } - - static vector binarySearch(const vector xi) - { - uint32_t2 p = uint32_t2(0, 0); - - if (aspect2x1) { - // TODO(kevinyu): Implement findMSB - const uint32_t2 mip2x1 = findMSB(lumaMapSize.x) - 1; - - // do one split in the X axis first cause penultimate full mip would have been 2x1 - p.x = impl::choseSecond(luminanceAccessor.fetch(uint32_t2(0, 0), mip2x1), luminanceAccessor.fetch(uint32_t2(0, 1), mip2x1), xi.x) ? 1 : 0; - } - - for (uint32_t i = mip2x1; i != 0;) - { - --i; - p <<= 1; - const float32_t4 values = luminanceAccessor.gather(p, i); - float32_t wx_0, wx_1; - { - const float32_t wy_0 = values[3] + values[2]; - const float32_t wy_1 = values[1] + values[0]; - if (impl::choseSecond(wy_0, wy_1, xi.y)) - { - p.y |= 1; - wx_0 = values[0]; - wx_1 = values[1]; - } - else - { - wx_0 = values[3]; - wx_1 = values[2]; - } - } - - if (impl::choseSecond(wx_0, wx_1, xi.x)) - p.x |= 1; - } - - // TODO(kevinyu): Add some comment why we add xi. - const float32_t2 directionUV = (float32_t2(p.x, p.y) + xi) / float32_t2(lumaMapSize); - return directionUV; - } - - uint32_t2 generate(NBL_REF_ARG(scalar_type) rcpPdf, vector xi) + using scalar_type = T; + using vector2_type = vector; + using vector4_type = vector; + + LuminanceAccessor _map; + uint32_t _mapSize; + bool _aspect2x1; + + static LuminanceMapSampler create(NBL_CONST_REF_ARG(LuminanceAccessor) lumaMap, vector2_type mapSize, bool aspect2x1) + { + LuminanceAccessor result; + result._map = lumaMap; + result._mapSize = mapSize; + result._aspect2x1 = aspect2x1; + return result; + } + + static bool choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(scalar_type) xi) { - const float32_t2 unnormCoord = xi * lastWarpPixel; - const float32_t2 warpSampleCoord = (unnormCoord + float32_t2(0.5f, 0.5f)) / float32_t2(warpmapSize.x, warpmapSize.y); - const float32_t2 dir0 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(0, 1), lumaAspect2x1); - const float32_t2 dir1 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(1, 1), lumaAspect2x1); - const float32_t2 dir2 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(1, 0), lumaAspect2x1); - const float32_t2 dir3 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord, lumaAspect2x1); + // numerical resilience against IEEE754 + scalar_type dummy = 0.0f; + PartitionRandVariable partition; + partition.leftProb = 1.0f / (1.0f + second/ first); + return partition(xi, dummy); + } - const float32_t4 dirsX = float32_t4(dir0.x, dir1.x, dir2.x, dir3.x); - const float32_t4 dirsY = float32_t4(dir1.y, dir1.y, dir2.y, dir3.y); + vector2_type binarySearch(const vector2_type xi) + { + uint32_t2 p = uint32_t2(0, 0); + const uint32_t2 mip2x1 = findMSB(_mapSize.x) - 1; + + if (_aspect2x1) { + // do one split in the X axis first cause penultimate full mip would have been 2x1 + p.x = choseSecond(_map.get(uint32_t2(0, 0), mip2x1), _map.get(uint32_t2(0, 1), mip2x1), xi.x) ? 1 : 0; + } + + for (uint32_t i = mip2x1; i != 0;) + { + --i; + p <<= 1; + const vector4_type values = _map.gather(p, i); + scalar_type wx_0, wx_1; + { + const scalar_type wy_0 = values[3] + values[2]; + const scalar_type wy_1 = values[1] + values[0]; + if (choseSecond(wy_0, wy_1, xi.y)) + { + p.y |= 1; + wx_0 = values[0]; + wx_1 = values[1]; + } + else + { + wx_0 = values[3]; + wx_1 = values[2]; + } + } + + if (choseSecond(wx_0, wx_1, xi.x)) + p.x |= 1; + } + + // TODO(kevinyu): Add some comment why we add xi. + const vector2_type directionUV = (vector2_type(p.x, p.y) + xi) / vector2_type(_mapSize); + return directionUV; + } - return calculateSampleAndPdf(rcpPdf, dirsX, dirsY, unnormCoord, lastWarpPixel); + matrix sampleUvs(vector2_type sampleCoord) NBL_CONST_MEMBER_FUNC + { + const vector2_type dir0 = binarySearch(_map, _mapSize, sampleCoord + vector2_type(0, 1), _aspect2x1); + const vector2_type dir1 = binarySearch(_map, _mapSize, sampleCoord + vector2_type(1, 1), _aspect2x1); + const vector2_type dir2 = binarySearch(_map, _mapSize, sampleCoord + vector2_type(1, 0), _aspect2x1); + const vector2_type dir3 = binarySearch(_map, _mapSize, sampleCoord, _aspect2x1); + return { + dir0, + dir1, + dir2, + dir3 + }; } }; -//TODO(kevinyu): Impelemnt cached warp map sampler +template && hierarchical_image::HierarchicalSampler && concepts::Warp) +struct HierarchicalImage +{ + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + HierarchicalSamplerT sampler; + uint32_t warpSize; + uint32_t2 lastWarpPixel; + + static HierarchicalImage create(NBL_CONST_REF_ARG(HierarchicalSamplerT) sampler, uint32_t2 warpSize) + { + HierarchicalImage result; + result.sampler = sampler; + result.warpSize = warpSize; + result.lastWarpPixel = warpSize - uint32_t2(1, 1); + return result; + } + + + uint32_t2 generate(NBL_REF_ARG(scalar_type) rcpPdf, vector2_type xi) NBL_CONST_MEMBER_FUNC + { + const vector2_type texelCoord = xi * lastWarpPixel; + const vector2_type sampleCoord = (texelCoord + vector2_type(0.5f, 0.5f)) / vector2_type(warpSize.x, warpSize.y); + + matrix uvs = sampler.sampleUvs(sampleCoord); + + const vector2_type interpolant = frac(texelCoord); + + const vector2_type xDiffs[] = { + uvs[2] - uvs[3], + uvs[1] - uvs[0] + }; + const vector2_type yVals[] = { + xDiffs[0] * interpolant.x + uvs[3], + xDiffs[1] * interpolant.x + uvs[0] + }; + const vector2_type yDiff = yVals[1] - yVals[0]; + const vector2_type uv = yDiff * interpolant.y + yVals[0]; + const WarpResult warpResult = PostWarpT::warp(uv); + + const scalar_type detInterpolJacobian = determinant(matrix( + lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx + yDiff // second column dFdy + )); + + rcpPdf = abs((detInterpolJacobian * scalar_type(lastWarpPixel.x * lastWarpPixel.y)) / warpResult.density); + + return warpResult.dst; + } +}; + +} } } diff --git a/include/nbl/builtin/hlsl/sampling/warp.hlsl b/include/nbl/builtin/hlsl/sampling/warp.hlsl index b8936c09f3..b1c1fcb5b2 100644 --- a/include/nbl/builtin/hlsl/sampling/warp.hlsl +++ b/include/nbl/builtin/hlsl/sampling/warp.hlsl @@ -1,8 +1,6 @@ -#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ -#define _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_CONCEPTS_WARP_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_CONCEPTS_WARP_INCLUDED_ -#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" -#include "nbl/builtin/hlsl/fft/common.hlsl" namespace nbl { @@ -11,18 +9,19 @@ namespace hlsl namespace sampling { -template +template struct WarpResult { - C dst; + CodomainT dst; float32_t density; }; +} namespace concepts { // declare concept -#define NBL_CONCEPT_NAME WARP +#define NBL_CONCEPT_NAME Warp #define NBL_CONCEPT_TPLT_PRM_KINDS (typename) #define NBL_CONCEPT_TPLT_PRM_NAMES (U) // not the greatest syntax but works @@ -36,10 +35,6 @@ NBL_CONCEPT_BEGIN(3) #define dst NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 NBL_CONCEPT_END( ((NBL_CONCEPT_REQ_TYPE)(U::domain_type)) - ((NBL_CONCEPT_REQ_TYPE)(U::codomain_type)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template warp(xi)) , ::nbl::hlsl::is_same_v, WarpResult)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template forwardDensity(xi)) , ::nbl::hlsl::is_same_v, float32_t)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template backwardDensity(dst)) , ::nbl::hlsl::is_same_v, float32_t)) ); #undef dst #undef xi @@ -47,7 +42,7 @@ NBL_CONCEPT_END( #include } -} + } } From ef773fdf1cfe50182eb6b2868965a0712b0af987 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 26 Dec 2025 16:02:25 +0700 Subject: [PATCH 12/29] Fix Spherical warp indentation --- .../hlsl/sampling/warps/spherical.hlsl | 81 ++++++++++--------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl index 095e138d60..9443151c6f 100644 --- a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl +++ b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl @@ -1,5 +1,5 @@ -#ifndef _NBL_BUILTIN_HLSL_WARP_SPHERICAL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_WARP_SPHERICAL_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_WARP_SPHERICAL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_WARP_SPHERICAL_INCLUDED_ #include #include @@ -13,44 +13,45 @@ namespace sampling { namespace warp { - struct Spherical - { - using domain_type = float32_t2; - using codomain_type = float32_t3; - - template ) - static WarpResult warp(const D uv) - { - const float32_t phi = 2 * uv.x * numbers::pi; - const float32_t theta = uv.y * numbers::pi; - float32_t3 dir; - dir.x = cos(uv.x * 2.f * numbers::pi); - dir.y = sqrt(1.f - dir.x * dir.x); - if (uv.x > 0.5f) dir.y = -dir.y; - const float32_t cosTheta = cos(theta); - float32_t sinTheta = (1.0 - cosTheta * cosTheta); - dir.xy *= sinTheta; - dir.z = cosTheta; - WarpResult warpResult; - warpResult.dst = dir; - warpResult.density = 1 / (sinTheta * numbers::pi * numbers::pi); - return warpResult; - } - - template ) - static float32_t forwardDensity(const D uv) - { - const float32_t theta = uv.y * numbers::pi; - return 1.0f / (sin(theta) * 2 * numbers::pi * numbers::pi); - - } - - template ) - static float32_t backwardDensity(const C dst) - { - return 1.0f / (sqrt(1.0f - dst.z * dst.z) * 2 * numbers::pi * numbers::pi); - } - }; + +struct Spherical +{ + using domain_type = float32_t2; + using codomain_type = float32_t3; + + template ) + static WarpResult warp(const D uv) + { + const float32_t phi = 2 * uv.x * numbers::pi; + const float32_t theta = uv.y * numbers::pi; + float32_t3 dir; + dir.x = cos(uv.x * 2.f * numbers::pi); + dir.y = sqrt(1.f - dir.x * dir.x); + if (uv.x > 0.5f) dir.y = -dir.y; + const float32_t cosTheta = cos(theta); + float32_t sinTheta = (1.0 - cosTheta * cosTheta); + dir.xy *= sinTheta; + dir.z = cosTheta; + WarpResult warpResult; + warpResult.dst = dir; + warpResult.density = 1 / (sinTheta * numbers::pi * numbers::pi); + return warpResult; + } + + template ) + static float32_t forwardDensity(const D uv) + { + const float32_t theta = uv.y * numbers::pi; + return 1.0f / (sin(theta) * 2 * numbers::pi * numbers::pi); + + } + + template ) + static float32_t backwardDensity(const C dst) + { + return 1.0f / (sqrt(1.0f - dst.z * dst.z) * 2 * numbers::pi * numbers::pi); + } +}; } } From b9467fee56c7069a841137bc6438fdf753b585b1 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 26 Dec 2025 16:56:47 +0700 Subject: [PATCH 13/29] Add some comment why we add xi to the sample uvs --- include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index 5adbb5fb82..f57ce8f050 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -82,7 +82,7 @@ struct LuminanceMapSampler p.x |= 1; } - // TODO(kevinyu): Add some comment why we add xi. + // If we don`t add xi, the sample will clump to the lowest corner of environment map texel. We add xi to simulate uniform distribution within a pixel and make the sample continuous. This is why we compute the pdf not from the normalized luminance of the texel, instead from the reciprocal of the Jacobian. const vector2_type directionUV = (vector2_type(p.x, p.y) + xi) / vector2_type(_mapSize); return directionUV; } From ac1e2f3fd9fb4850d0a52d96889e37d5bd2b7c14 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 6 Jan 2026 21:54:47 +0700 Subject: [PATCH 14/29] WIP --- CMakeLists.txt | 1 + .../hlsl/sampling/warps/spherical.hlsl | 24 ++++++++++++++----- src/nbl/builtin/CMakeLists.txt | 3 +++ src/nbl/ext/CMakeLists.txt | 12 ++++++++++ 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 84c9a99dc4..2ffac18cd6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -176,6 +176,7 @@ option(NBL_BUILD_EXAMPLES "Enable building examples" ON) option(NBL_BUILD_MITSUBA_LOADER "Enable nbl::ext::MitsubaLoader?" OFF) # TODO: once it compies turn this ON by default! option(NBL_BUILD_IMGUI "Enable nbl::ext::ImGui?" ON) option(NBL_BUILD_DEBUG_DRAW "Enable Nabla Debug Draw extension?" ON) +option(NBL_BUILD_ENVMAP_IMPORTANCE_SAMPLING "Enable Nabla Envmap Importance Sampling extension?" ON) option(NBL_BUILD_OPTIX "Enable nbl::ext::OptiX?" OFF) if(NBL_COMPILE_WITH_CUDA) diff --git a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl index 9443151c6f..ecc9423916 100644 --- a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl +++ b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl @@ -19,8 +19,8 @@ struct Spherical using domain_type = float32_t2; using codomain_type = float32_t3; - template ) - static WarpResult warp(const D uv) + template ) + static WarpResult warp(const DomainT uv) { const float32_t phi = 2 * uv.x * numbers::pi; const float32_t theta = uv.y * numbers::pi; @@ -38,16 +38,28 @@ struct Spherical return warpResult; } - template ) - static float32_t forwardDensity(const D uv) + template ) + static domain_type inverseWarp(const CodomainT v) + { + float32_t2 uv = float32_t2(atan(v.y, v.x), acos(v.z)); + uv.x *= (numbers::inv_pi * 0.5); + if (v.y < 0.0f) + uv.x += 1.0f; + uv.y *= numbers::inv_pi; + return uv; + } + + + template ) + static float32_t forwardDensity(const DomainT uv) { const float32_t theta = uv.y * numbers::pi; return 1.0f / (sin(theta) * 2 * numbers::pi * numbers::pi); } - template ) - static float32_t backwardDensity(const C dst) + template ) + static float32_t backwardDensity(const CodomainT dst) { return 1.0f / (sqrt(1.0f - dst.z * dst.z) * 2 * numbers::pi * numbers::pi); } diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 085ed3c923..050907b3a3 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -339,6 +339,9 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/workgroup2/shared_scan.hlsl") #Extensions LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/FullScreenTriangle/SVertexAttributes.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/FullScreenTriangle/default.vert.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/EnvmapImportanceSampling/structs.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/EnvmapImportanceSampling/gen_luma.comp.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/EnvmapImportanceSampling/measure_luma.comp.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/text_rendering/msdf.hlsl") #memory LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/memory.hlsl") diff --git a/src/nbl/ext/CMakeLists.txt b/src/nbl/ext/CMakeLists.txt index af46b29aab..221c1fe88e 100644 --- a/src/nbl/ext/CMakeLists.txt +++ b/src/nbl/ext/CMakeLists.txt @@ -66,6 +66,18 @@ if(NBL_BUILD_DEBUG_DRAW) ) endif() +if(NBL_BUILD_ENVMAP_IMPORTANCE_SAMPLING) + add_subdirectory(EnvmapImportanceSampling) + set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDE_DIRS + ${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDE_DIRS} + PARENT_SCOPE + ) + set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_LIB + ${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_LIB} + PARENT_SCOPE + ) +endif() + propagate_changed_variables_to_parent_scope() NBL_ADJUST_FOLDERS(ext) \ No newline at end of file From baca1cf2044853df7b5f4d6d532d63960b81b482 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 10 Jan 2026 00:48:16 +0700 Subject: [PATCH 15/29] Rename uv to coord for LuminanceAccessor concepts --- .../hlsl/concepts/accessors/hierarchical_image.hlsl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl index a7326ee3da..09abd08615 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl @@ -17,20 +17,20 @@ namespace hierarchical_image #define NBL_CONCEPT_TPLT_PRM_NAMES (U) // not the greatest syntax but works #define NBL_CONCEPT_PARAM_0 (a,U) -#define NBL_CONCEPT_PARAM_1 (uv,uint32_t2) +#define NBL_CONCEPT_PARAM_1 (coord,uint32_t2) #define NBL_CONCEPT_PARAM_2 (level,uint32_t) // start concept NBL_CONCEPT_BEGIN(3) // need to be defined AFTER the concept begins #define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 -#define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 #define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(uv,level)) , ::nbl::hlsl::is_same_v, float32_t)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template gather(uv,level)) , ::nbl::hlsl::is_same_v, float32_t4)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(coord,level)) , ::nbl::hlsl::is_same_v, float32_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template gather(coord,level)) , ::nbl::hlsl::is_same_v, float32_t4)) ); #undef level -#undef uv +#undef coord #undef a #include From f12b7970b702f44030f31a425e8dce30bfe3dd27 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 10 Jan 2026 00:48:39 +0700 Subject: [PATCH 16/29] Fix hierarchical_image.hlsl --- .../hlsl/sampling/hierarchical_image.hlsl | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index f57ce8f050..82637a42f8 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -17,27 +17,29 @@ namespace hlsl namespace sampling { -template && hierarchical_image::LuminanceReadAccessor) +template && hierarchical_image::LuminanceReadAccessor) struct LuminanceMapSampler { using scalar_type = T; using vector2_type = vector; using vector4_type = vector; - LuminanceAccessor _map; - uint32_t _mapSize; + LuminanceAccessorT _map; + uint32_t2 _mapSize; + uint32_t2 _lastWarpPixel; bool _aspect2x1; - static LuminanceMapSampler create(NBL_CONST_REF_ARG(LuminanceAccessor) lumaMap, vector2_type mapSize, bool aspect2x1) + static LuminanceMapSampler create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, uint32_t2 mapSize, bool aspect2x1, uint32_t2 warpSize) { - LuminanceAccessor result; + LuminanceMapSampler result; result._map = lumaMap; result._mapSize = mapSize; + result._lastWarpPixel = warpSize - uint32_t2(1, 1); result._aspect2x1 = aspect2x1; return result; } - static bool choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(scalar_type) xi) + static bool choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(float32_t) xi) { // numerical resilience against IEEE754 scalar_type dummy = 0.0f; @@ -46,8 +48,9 @@ struct LuminanceMapSampler return partition(xi, dummy); } - vector2_type binarySearch(const vector2_type xi) + vector2_type binarySearch(const uint32_t2 coord) { + float32_t2 xi = float32_t2(coord)/ _lastWarpPixel; uint32_t2 p = uint32_t2(0, 0); const uint32_t2 mip2x1 = findMSB(_mapSize.x) - 1; @@ -87,18 +90,18 @@ struct LuminanceMapSampler return directionUV; } - matrix sampleUvs(vector2_type sampleCoord) NBL_CONST_MEMBER_FUNC + matrix sampleUvs(uint32_t2 sampleCoord) NBL_CONST_MEMBER_FUNC { - const vector2_type dir0 = binarySearch(_map, _mapSize, sampleCoord + vector2_type(0, 1), _aspect2x1); - const vector2_type dir1 = binarySearch(_map, _mapSize, sampleCoord + vector2_type(1, 1), _aspect2x1); - const vector2_type dir2 = binarySearch(_map, _mapSize, sampleCoord + vector2_type(1, 0), _aspect2x1); - const vector2_type dir3 = binarySearch(_map, _mapSize, sampleCoord, _aspect2x1); - return { + const vector2_type dir0 = binarySearch(sampleCoord + vector2_type(0, 1)); + const vector2_type dir1 = binarySearch(sampleCoord + vector2_type(1, 1)); + const vector2_type dir2 = binarySearch(sampleCoord + vector2_type(1, 0)); + const vector2_type dir3 = binarySearch(sampleCoord); + return matrix( dir0, dir1, dir2, dir3 - }; + ); } }; @@ -110,7 +113,7 @@ struct HierarchicalImage using vector3_type = vector; using vector4_type = vector; HierarchicalSamplerT sampler; - uint32_t warpSize; + uint32_t2 warpSize; uint32_t2 lastWarpPixel; static HierarchicalImage create(NBL_CONST_REF_ARG(HierarchicalSamplerT) sampler, uint32_t2 warpSize) @@ -143,7 +146,7 @@ struct HierarchicalImage const vector2_type yDiff = yVals[1] - yVals[0]; const vector2_type uv = yDiff * interpolant.y + yVals[0]; - const WarpResult warpResult = PostWarpT::warp(uv); + const WarpResult warpResult = PostWarpT::warp(uv); const scalar_type detInterpolJacobian = determinant(matrix( lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx From 0957aed7b7cc608b265a41739356325d0ab95640 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 10 Jan 2026 00:49:06 +0700 Subject: [PATCH 17/29] Fix typo in spherical.hlsl --- include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl index ecc9423916..48237c7e2a 100644 --- a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl +++ b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl @@ -38,7 +38,7 @@ struct Spherical return warpResult; } - template ) + template ) static domain_type inverseWarp(const CodomainT v) { float32_t2 uv = float32_t2(atan(v.y, v.x), acos(v.z)); From 1b35d34e0bf4e63a59f0fc34922af4590a21e589 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 10 Jan 2026 00:49:39 +0700 Subject: [PATCH 18/29] Implement gen_luma, gen_warpmap and measure_luma shaders --- .../builtin/hlsl/common.hlsl | 49 ++++++ .../builtin/hlsl/gen_luma.comp.hlsl | 30 ++++ .../builtin/hlsl/gen_warpmap.comp.hlsl | 51 +++++++ .../builtin/hlsl/measure_luma.comp.hlsl | 143 ++++++++++++++++++ 4 files changed, 273 insertions(+) create mode 100644 include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl create mode 100644 include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl create mode 100644 include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warpmap.comp.hlsl create mode 100644 include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl new file mode 100644 index 0000000000..e0240909f0 --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl @@ -0,0 +1,49 @@ +#ifndef _NBL_HLSL_EXT_ENVMAP_IMPORTANCE_SAMPLING_PARAMETERS_COMMON_INCLUDED_ +#define _NBL_HLSL_EXT_ENVMAP_IMPORTANCE_SAMPLING_PARAMETERS_COMMON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace ext +{ +namespace envmap_importance_sampling +{ + +struct SLumaGenPushConstants +{ + float32_t4 luminanceScales; + uint32_t2 lumaMapResolution; +}; + +struct SLumaMeasurePushConstants +{ + float32_t4 luminanceScales; + uint32_t2 lumaMapResolution; + uint64_t lumaMeasurementBuf; +}; + +struct SLumaMeasurement +{ + float32_t3 weightedDir; + float32_t luma; + float32_t maxLuma; +}; + +struct device_capabilities +{ +#ifdef TEST_NATIVE + NBL_CONSTEXPR_STATIC_INLINE bool shaderSubgroupArithmetic = true; +#else + NBL_CONSTEXPR_STATIC_INLINE bool shaderSubgroupArithmetic = false; +#endif +}; + +} +} +} +} + +#endif diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl new file mode 100644 index 0000000000..e701f0b00d --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl @@ -0,0 +1,30 @@ +#include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" + +using namespace nbl; +using namespace nbl::hlsl; +using namespace nbl::hlsl::ext::envmap_importance_sampling; + +[[vk::push_constant]] SLumaGenPushConstants pc; + +[[vk::combinedImageSampler]][[vk::binding(0, 0)]] Texture2D envMap; +[[vk::combinedImageSampler]][[vk::binding(0, 0)]] SamplerState envMapSampler; + +[[vk::binding(1, 0)]] RWTexture2D outImage; + +// TODO(kevinyu): Temporary to make nsc compiles +#define LUMA_MAP_GEN_WORKGROUP_DIM 16 + +[numthreads(LUMA_MAP_GEN_WORKGROUP_DIM, LUMA_MAP_GEN_WORKGROUP_DIM, 1)] +[shader("compute")] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + if (all(threadID < pc.lumaMapResolution)) + { + + const float32_t2 uv = (float32_t2(threadID.xy) + float32_t2(0.5, 0.5)) / float32_t2(pc.lumaMapResolution); + const float32_t3 envMapSample = envMap.Sample(envMapSampler, uv).rgb; + const float32_t luma = hlsl::dot(float32_t4(envMapSample, 1.0f), pc.luminanceScales) * sin(numbers::pi * uv.y); + + outImage[threadID.xy] = luma; + } +} diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warpmap.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warpmap.comp.hlsl new file mode 100644 index 0000000000..063dfaf9b9 --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warpmap.comp.hlsl @@ -0,0 +1,51 @@ +#include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" +#include "nbl/builtin/hlsl/sampling/hierarchical_image.hlsl" + + +[[vk::binding(0, 0)]] Texture2D lumaMap; + +[[vk::binding(1, 0)]] RWTexture2D outImage; + +// TODO(kevinyu): Temporary to make nsc compiles +#define WARPMAP_GEN_WORKGROUP_DIM 16 + +using namespace nbl; +using namespace nbl::hlsl; +using namespace nbl::hlsl::sampling; + +struct LuminanceAccessor +{ + float32_t get(uint32_t2 coord, uint32_t level) + { + return lumaMap.Load(uint32_t3(coord, level)); + } + + float32_t4 gather(uint32_t2 coord, uint32_t level) + { + return float32_t4( + lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 1)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 1)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 0)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 0)) + ); + + } +}; + +[numthreads(WARPMAP_GEN_WORKGROUP_DIM, WARPMAP_GEN_WORKGROUP_DIM, 1)] +[shader("compute")] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + LuminanceAccessor luminanceAccessor; + uint32_t lumaMapWidth, lumaMapHeight; + + using LuminanceSampler = LuminanceMapSampler; + + LuminanceSampler luminanceSampler = + LuminanceSampler::create(luminanceAccessor, lumaMapWidth, lumaMapHeight, lumaMapWidth != lumaMapHeight); + + uint32_t2 pixelCoord = threadID.xy; + + outImage[pixelCoord] = luminanceSampler.binarySearch(pixelCoord); + +} diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl new file mode 100644 index 0000000000..845d12632d --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl @@ -0,0 +1,143 @@ +#include "nbl/builtin/hlsl/sampling/warps/spherical.hlsl" +#include "nbl/builtin/hlsl/workgroup2/arithmetic.hlsl" + +#include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" + +using namespace nbl; +using namespace nbl::hlsl; +using namespace nbl::hlsl::ext::envmap_importance_sampling; + +// TODO(kevinyu): Temporary to make nsc works +using config_t = nbl::hlsl::workgroup2::ArithmeticConfiguration<4, 4, 2>; + +[[vk::push_constant]] SLumaMeasurePushConstants pc; + +[[vk::binding(0, 0)]] Texture2D lumaMap; + +// final (level 1/2) scan needs to fit in one subgroup exactly +groupshared float32_t scratch[mpl::max_v]; + +struct PreloadedUnitData +{ + float32_t3 weightedDir; + float32_t luma; +}; + +struct ScratchProxy +{ + template + void get(const uint32_t ix, NBL_REF_ARG(AccessType) value) + { + value = scratch[ix]; + } + + template + void set(const uint32_t ix, const AccessType value) + { + scratch[ix] = value; + } + + void workgroupExecutionAndMemoryBarrier() + { + glsl::barrier(); + } +}; + +struct PreloadedData +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = uint16_t(1u) << config_t::WorkgroupSizeLog2; + NBL_CONSTEXPR_STATIC_INLINE uint16_t PreloadedDataCount = config_t::VirtualWorkgroupSize / WorkgroupSize; + + PreloadedUnitData getData(const uint32_t ix) + { + PreloadedUnitData value; + const int32_t2 pixelCoord = int32_t2(ix % pc.lumaMapResolution.x, ix / pc.lumaMapResolution.x); + const float32_t2 uv = (float32_t2(pixelCoord) + float32_t2(0.5, 0.5)) / float32_t2(pc.lumaMapResolution); + const float32_t luma = lumaMap.Load(int32_t3(pixelCoord, 0)); + value.weightedDir = sampling::warp::Spherical::warp(uv).dst * luma; + value.luma = luma; + return value; + } + + void preload() + { + const uint16_t invocationIndex = hlsl::workgroup::SubgroupContiguousIndex(); + [unroll] + for (uint16_t idx = 0; idx < PreloadedDataCount; idx++) + data[idx] = getData(idx * WorkgroupSize + invocationIndex); + } + + void workgroupExecutionAndMemoryBarrier() + { + glsl::barrier(); + } + + PreloadedUnitData data[config_t::ItemsPerInvocation_0]; +}; + +static PreloadedData preloadData; + +struct DirXAccessor +{ + template + void get(const IndexType ix, NBL_REF_ARG(AccessType) value) + { + value = preloadData.data[ix >> config_t::WorkgroupSizeLog2].weightedDir.x; + } +}; + +struct DirYAccessor +{ + template + void get(const IndexType ix, NBL_REF_ARG(AccessType) value) + { + value = preloadData.data[ix >> config_t::WorkgroupSizeLog2].weightedDir.y; + } +}; + +struct DirZAccessor +{ + template + void get(const IndexType ix, NBL_REF_ARG(AccessType) value) + { + value = preloadData.data[ix >> config_t::WorkgroupSizeLog2].weightedDir.z; + } +}; + +struct LumaAccessor +{ + template + void get(const IndexType ix, NBL_REF_ARG(AccessType) value) + { + value = preloadData.data[ix >> config_t::WorkgroupSizeLog2].luma; + } +}; + +[numthreads(config_t::WorkgroupSize, 1, 1)] +[shader("compute")] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + ScratchProxy scratchAccessor; + + preloadData.preload(); + preloadData.workgroupExecutionAndMemoryBarrier(); + + SLumaMeasurement measurement; + + DirXAccessor dirXAccessor; + measurement.weightedDir.x= workgroup2::reduction, device_capabilities>::template __call(dirXAccessor, scratchAccessor); + + DirYAccessor dirYAccessor; + measurement.weightedDir.y = workgroup2::reduction, device_capabilities>::template __call(dirYAccessor, scratchAccessor); + + DirZAccessor dirZAccessor; + measurement.weightedDir.z = workgroup2::reduction, device_capabilities>::template __call(dirZAccessor, scratchAccessor); + + LumaAccessor lumaAccessor; + measurement.luma = workgroup2::reduction, device_capabilities>::template __call(lumaAccessor, scratchAccessor); + + measurement.maxLuma = workgroup2::reduction, device_capabilities>::template __call(lumaAccessor, scratchAccessor); + + if (all(threadID == uint32_t3(0, 0, 0))) + vk::RawBufferStore(pc.lumaMeasurementBuf, measurement); +} From 665bb8ded899a7e597a06abcdf1996beff7f4ae8 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 10 Jan 2026 00:50:18 +0700 Subject: [PATCH 19/29] EnvmapImportanceSampling CMakeLists --- .../EnvmapImportanceSampling/CMakeLists.txt | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt diff --git a/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt b/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt new file mode 100644 index 0000000000..fabd4b8b50 --- /dev/null +++ b/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt @@ -0,0 +1,83 @@ +include(${NBL_ROOT_PATH}/cmake/common.cmake) + +set(NBL_EXT_INTERNAL_INCLUDE_DIR "${NBL_ROOT_PATH}/include") + +set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_H + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h +) + +set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/CEnvmapImportanceSampling.cpp" +) + +nbl_create_ext_library_project( + ENVMAP_IMPORTANCE_SAMPLING + "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_H}" + "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_SRC}" + "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_EXTERNAL_INCLUDE}" + "" + "" +) + +get_filename_component(_ARCHIVE_ABSOLUTE_ENTRY_PATH_ "${NBL_EXT_INTERNAL_INCLUDE_DIR}" ABSOLUTE) + +set(NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT "${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}/nbl/ext/EnvmapImportanceSampling/builtin/hlsl") +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/common.hlsl + ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_warpmap.comp.hlsl + ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_luma.comp.hlsl + ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/measure_luma.comp.hlsl +) +target_sources(${LIB_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_warpmap.comp.hlsl", + "KEY": "gen_warpmap", + }, + { + "INPUT": "${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_luma.comp.hlsl", + "KEY": "gen_luma", + }, + { + "INPUT": "${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/measure_luma.comp.hlsl", + "KEY": "measure_luma", + } + +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${NBL_ROOT_PATH}/include" # a workaround due to envmap importance sampling ext common header which is not part of Nabla builtin archive + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${LIB_NAME}SPIRV + LINK_TO ${LIB_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/ext/EnvmapImportanceSampling/builtin/build/spirv/keys.hpp + NAMESPACE nbl::ext::envmap_importance_sampling::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::ext::envmap_importance_sampling::builtin::build + TARGET ${LIB_NAME}_builtinsBuild + LINK_TO ${LIB_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) + + +add_library(Nabla::ext::EnvmapImportanceSampling ALIAS ${LIB_NAME}) From b522b4f826a737b43ffb3a1d9dca681c0b9a5aef Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 10 Jan 2026 00:50:40 +0700 Subject: [PATCH 20/29] Initial implementation of CEnvmapImportanceSampling --- .../CEnvmapImportanceSampling.cpp | 114 ++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp diff --git a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp new file mode 100644 index 0000000000..87ede95598 --- /dev/null +++ b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp @@ -0,0 +1,114 @@ +#include "nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h" +#include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" + +using namespace nbl::hlsl::ext::envmap_importance_sampling; + +#ifdef NBL_EMBED_BUILTIN_RESOURCES +#include "nbl/ext/debug_draw/builtin/build/CArchive.h" +#endif + +#include "nbl/ext/EnvmapImportanceSampling/builtin/build/spirv/keys.hpp" + +using namespace nbl; +using namespace core; +using namespace video; +using namespace system; +using namespace asset; +using namespace hlsl; + +namespace nbl::ext::envmap_importance_sampling +{ + +constexpr std::string_view NBL_EXT_MOUNT_ENTRY = "nbl/ext/EnvmapImportanceSampling"; + +const smart_refctd_ptr EnvmapImportanceSampling::mount(core::smart_refctd_ptr logger, ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias) +{ + assert(system); + + if (!system) + return nullptr; + + // extension should mount everything for you, regardless if content goes from virtual filesystem + // or disk directly - and you should never rely on application framework to expose extension data + #ifdef NBL_EMBED_BUILTIN_RESOURCES + auto archive = make_smart_refctd_ptr(smart_refctd_ptr(logger)); + #else + auto archive = make_smart_refctd_ptr(std::string_view(NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT), smart_refctd_ptr(logger), system); + #endif + + system->mount(smart_refctd_ptr(archive), archiveAlias.data()); + return smart_refctd_ptr(archive); +} + +core::smart_refctd_ptr EnvmapImportanceSampling::createGenLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout) +{ + system::logger_opt_ptr logger = params.utilities->getLogger(); + auto system = smart_refctd_ptr(params.assetManager->getSystem()); + auto* device = params.utilities->getLogicalDevice(); + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), params.utilities->getLogicalDevice(), NBL_EXT_MOUNT_ENTRY); + + auto getShader = [&](const core::string& key)->smart_refctd_ptr { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = params.utilities->getLogger(); + lp.workingDirectory = NBL_EXT_MOUNT_ENTRY; + auto bundle = params.assetManager->getAsset(key.c_str(), lp); + + const auto contents = bundle.getContents(); + + if (contents.empty()) + { + logger.log("Failed to load shader %s from disk", ILogger::ELL_ERROR, key.c_str()); + return nullptr; + } + + if (bundle.getAssetType() != IAsset::ET_SHADER) + { + logger.log("Loaded asset has wrong type!", ILogger::ELL_ERROR); + return nullptr; + } + + return IAsset::castDown(contents[0]); + }; + + const auto key = nbl::ext::envmap_importance_sampling::builtin::build::get_spirv_key<"measure_luma">(device); + smart_refctd_ptr genLumaShader = getShader(key); + if (!genLumaShader) + { + params.utilities->getLogger()->log("Could not compile shaders!", ILogger::ELL_ERROR); + return nullptr; + } + + return nullptr; + +} + +// +// core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapImportanceSampling::createLumaGenPipelineLayout(video::ILogicalDevice* device) +// { +// asset::SPushConstantRange pcRange = { +// .stageFlags = hlsl::ESS_COMPUTE, +// .offset = 0, +// .size = sizeof(SLumaGenPushConstants) +// }; +// +// const IGPUDescriptorSetLayout::SBinding bindings[] = { +// { +// .binding = 0u, +// .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, +// .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, +// .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, +// .count = 1u, +// .immutableSamplers = &defaultSampler +// }, +// { +// .binding = 1u, +// .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, +// .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, +// .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, +// .count = 1u +// } +// }; +// +// } + +} From 3e51c69ce2072549cc130961250a6456450d58c6 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 12 Jan 2026 18:12:53 +0700 Subject: [PATCH 21/29] Initial implementation of CEnvmapImportanceSampling --- .../CEnvmapImportanceSampling.h | 79 ++++++++++++ .../CEnvmapImportanceSampling.cpp | 120 +++++++++++++----- 2 files changed, 168 insertions(+), 31 deletions(-) create mode 100644 include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h diff --git a/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h b/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h new file mode 100644 index 0000000000..039874202d --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h @@ -0,0 +1,79 @@ +#ifndef _NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ +#define _NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ + +#include "nbl/asset/IPipelineLayout.h" +#include "nbl/video/declarations.h" + +namespace nbl::ext::envmap_importance_sampling +{ + +class EnvmapImportanceSampling +{ + public: + + struct SCachedCreationParameters + { + // using streaming_buffer_t = video::StreamingTransientDataBufferST>; + // + // static constexpr inline auto RequiredAllocateFlags = core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + // static constexpr inline auto RequiredUsageFlags = core::bitflag(asset::IBuffer::EUF_STORAGE_BUFFER_BIT) | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + // + // DrawMode drawMode = ADM_DRAW_BOTH; + + core::smart_refctd_ptr utilities; + + //! optional, default MDI buffer allocated if not provided + // core::smart_refctd_ptr streamingBuffer = nullptr; + }; + + struct SCreationParameters : public SCachedCreationParameters + { + video::IQueue* transfer = nullptr; // only used to make the 24 element index buffer and instanced pipeline on create + core::smart_refctd_ptr assetManager = nullptr; + + core::smart_refctd_ptr genLumaPipelineLayout = nullptr; + + inline bool validate() const + { + const auto validation = std::to_array + ({ + std::make_pair(bool(assetManager), "Invalid `creationParams.assetManager` is nullptr!"), + std::make_pair(bool(utilities), "Invalid `creationParams.utilities` is nullptr!"), + std::make_pair(bool(transfer), "Invalid `creationParams.transfer` is nullptr!"), + std::make_pair(bool(utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[transfer->getFamilyIndex()].queueFlags.hasFlags(video::IQueue::FAMILY_FLAGS::TRANSFER_BIT)), "Invalid `creationParams.transfer` is not capable of transfer operations!") + }); + + system::logger_opt_ptr logger = utilities->getLogger(); + for (const auto& [ok, error] : validation) + if (!ok) + { + logger.log(error, system::ILogger::ELL_ERROR); + return false; + } + + assert(bool(assetManager->getSystem())); + + return true; + } + + }; + + static core::smart_refctd_ptr createGenLumaPipelineLayout(video::ILogicalDevice* device, const core::smart_refctd_ptr* sampler); + + static core::smart_refctd_ptr createMeasureLumaPipelineLayout(video::ILogicalDevice* device); + + static core::smart_refctd_ptr createGenWarpMapPipelineLayout(video::ILogicalDevice* device); + + //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included + static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias = ""); + + static core::smart_refctd_ptr createGenLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); + + static core::smart_refctd_ptr createMeasureLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); + private: + core::smart_refctd_ptr m_lumaGenPipeline; + +}; + +} +#endif diff --git a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp index 87ede95598..a4517123b9 100644 --- a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp +++ b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp @@ -70,45 +70,103 @@ core::smart_refctd_ptr EnvmapImportanceSampling::cre return IAsset::castDown(contents[0]); }; - const auto key = nbl::ext::envmap_importance_sampling::builtin::build::get_spirv_key<"measure_luma">(device); + const auto key = nbl::ext::envmap_importance_sampling::builtin::build::get_spirv_key<"gen_luma">(device); smart_refctd_ptr genLumaShader = getShader(key); if (!genLumaShader) { - params.utilities->getLogger()->log("Could not compile shaders!", ILogger::ELL_ERROR); + logger.log("Could not compile shaders!", ILogger::ELL_ERROR); return nullptr; } - return nullptr; + video::IGPUComputePipeline::SCreationParams pipelineParams[1] = {}; + pipelineParams[0].layout = pipelineLayout; + pipelineParams[0].shader = { .shader = genLumaShader.get(), .entryPoint = "main" }; + + smart_refctd_ptr pipeline; + params.utilities->getLogicalDevice()->createComputePipelines(nullptr, pipelineParams, &pipeline); + if (!pipeline) + { + logger.log("Could not create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + + return pipeline; +} + + +core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapImportanceSampling::createGenLumaPipelineLayout(video::ILogicalDevice* device, const smart_refctd_ptr* sampler) +{ + asset::SPushConstantRange pcRange = { + .stageFlags = hlsl::ESS_COMPUTE, + .offset = 0, + .size = sizeof(SLumaGenPushConstants) + }; + + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = sampler + }, + { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u + } + }; + + const auto setLayout = device->createDescriptorSetLayout(bindings); + return device->createPipelineLayout({ &pcRange, 1 }, setLayout, nullptr, nullptr, nullptr); } -// -// core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapImportanceSampling::createLumaGenPipelineLayout(video::ILogicalDevice* device) -// { -// asset::SPushConstantRange pcRange = { -// .stageFlags = hlsl::ESS_COMPUTE, -// .offset = 0, -// .size = sizeof(SLumaGenPushConstants) -// }; -// -// const IGPUDescriptorSetLayout::SBinding bindings[] = { -// { -// .binding = 0u, -// .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, -// .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, -// .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, -// .count = 1u, -// .immutableSamplers = &defaultSampler -// }, -// { -// .binding = 1u, -// .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, -// .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, -// .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, -// .count = 1u -// } -// }; -// -// } +core::smart_refctd_ptr EnvmapImportanceSampling::createMeasureLumaPipelineLayout(video::ILogicalDevice* device) +{ + asset::SPushConstantRange pcRange = { + .stageFlags = hlsl::ESS_COMPUTE, + .offset = 0, + .size = sizeof(SLumaMeasurePushConstants) + }; + + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + } + }; + + const auto setLayout = device->createDescriptorSetLayout(bindings); + return device->createPipelineLayout({ &pcRange, 1 }, setLayout, nullptr, nullptr, nullptr); +} +core::smart_refctd_ptr EnvmapImportanceSampling::createGenWarpMapPipelineLayout(video::ILogicalDevice* device) +{ + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + }, + { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u + } + }; + + const auto setLayout = device->createDescriptorSetLayout(bindings); + return device->createPipelineLayout({}, setLayout, nullptr, nullptr, nullptr); +} } From c72d305b9e38ddf736778a8fc23438c5a114f0f8 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 12 Jan 2026 20:31:59 +0700 Subject: [PATCH 22/29] Small fixes --- include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl | 2 +- include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index 82637a42f8..23011219f8 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -39,7 +39,7 @@ struct LuminanceMapSampler return result; } - static bool choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(float32_t) xi) + static bool choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(scalar_type) xi) { // numerical resilience against IEEE754 scalar_type dummy = 0.0f; diff --git a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl index 48237c7e2a..7df93ac651 100644 --- a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl +++ b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl @@ -38,14 +38,14 @@ struct Spherical return warpResult; } - template ) + template ) static domain_type inverseWarp(const CodomainT v) { - float32_t2 uv = float32_t2(atan(v.y, v.x), acos(v.z)); + float32_t2 uv = float32_t2(atan(v.y, v.x), acos(v.z)); uv.x *= (numbers::inv_pi * 0.5); if (v.y < 0.0f) - uv.x += 1.0f; - uv.y *= numbers::inv_pi; + uv.x += 1.0f; + uv.y *= numbers::inv_pi; return uv; } From 5ee2ce763057d4d5c322616fd5af600ac63ad742 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 20 Jan 2026 23:51:51 +0700 Subject: [PATCH 23/29] Initial implementation of computeWarpMap --- .../CEnvmapImportanceSampling.h | 87 +++++-- .../builtin/hlsl/gen_luma.comp.hlsl | 17 +- .../CEnvmapImportanceSampling.cpp | 240 +++++++++++++++--- .../EnvmapImportanceSampling/CMakeLists.txt | 66 ++--- 4 files changed, 300 insertions(+), 110 deletions(-) diff --git a/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h b/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h index 039874202d..e552635d3a 100644 --- a/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h +++ b/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h @@ -7,31 +7,25 @@ namespace nbl::ext::envmap_importance_sampling { -class EnvmapImportanceSampling +class EnvmapImportanceSampling final : public core::IReferenceCounted { public: + static constexpr uint32_t MaxMipCountLuminance = 13u; + static constexpr uint32_t DefaultLumaMipMapGenWorkgroupDimension = 16u; + static constexpr uint32_t DefaultWarpMapGenWorkgroupDimension = 16u; + struct SCachedCreationParameters { - // using streaming_buffer_t = video::StreamingTransientDataBufferST>; - // - // static constexpr inline auto RequiredAllocateFlags = core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); - // static constexpr inline auto RequiredUsageFlags = core::bitflag(asset::IBuffer::EUF_STORAGE_BUFFER_BIT) | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - // - // DrawMode drawMode = ADM_DRAW_BOTH; - core::smart_refctd_ptr utilities; - - //! optional, default MDI buffer allocated if not provided - // core::smart_refctd_ptr streamingBuffer = nullptr; + uint32_t genLumaMapWorkgroupDimension = DefaultLumaMipMapGenWorkgroupDimension; + uint32_t genWarpMapWorkgroupDimension = DefaultWarpMapGenWorkgroupDimension; }; struct SCreationParameters : public SCachedCreationParameters { - video::IQueue* transfer = nullptr; // only used to make the 24 element index buffer and instanced pipeline on create core::smart_refctd_ptr assetManager = nullptr; - - core::smart_refctd_ptr genLumaPipelineLayout = nullptr; + core::smart_refctd_ptr envMap = nullptr; inline bool validate() const { @@ -39,8 +33,7 @@ class EnvmapImportanceSampling ({ std::make_pair(bool(assetManager), "Invalid `creationParams.assetManager` is nullptr!"), std::make_pair(bool(utilities), "Invalid `creationParams.utilities` is nullptr!"), - std::make_pair(bool(transfer), "Invalid `creationParams.transfer` is nullptr!"), - std::make_pair(bool(utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[transfer->getFamilyIndex()].queueFlags.hasFlags(video::IQueue::FAMILY_FLAGS::TRANSFER_BIT)), "Invalid `creationParams.transfer` is not capable of transfer operations!") + std::make_pair(bool(envMap), "Invalid `creationParams.envMap` is nullptr!"), }); system::logger_opt_ptr logger = utilities->getLogger(); @@ -58,20 +51,76 @@ class EnvmapImportanceSampling }; - static core::smart_refctd_ptr createGenLumaPipelineLayout(video::ILogicalDevice* device, const core::smart_refctd_ptr* sampler); + static core::smart_refctd_ptr create(SCreationParameters&& params); + + static core::smart_refctd_ptr createGenLumaPipelineLayout(video::ILogicalDevice* device); static core::smart_refctd_ptr createMeasureLumaPipelineLayout(video::ILogicalDevice* device); static core::smart_refctd_ptr createGenWarpMapPipelineLayout(video::ILogicalDevice* device); //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included - static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias = ""); + static core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias = ""); static core::smart_refctd_ptr createGenLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); static core::smart_refctd_ptr createMeasureLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); + + static core::smart_refctd_ptr createLumaMap(video::ILogicalDevice* device, asset::VkExtent3D extent, uint32_t mipCount, std::string_view debugName = ""); + + static core::smart_refctd_ptr createWarpMap(video::ILogicalDevice* device, asset::VkExtent3D extent, std::string_view debugName = ""); + + bool computeWarpMap(video::IGPUCommandBuffer* cmdBuf, float envMapRegularizationFactor, float& pdfNormalizationFactor, float& maxEmittanceLuma); + + // returns if RIS should be enabled based on variance calculations + inline bool computeWarpMap(video::IGPUCommandBuffer* cmdBuf, float envMapRegularizationFactor, float& pdfNormalizationFactor) + { + [[maybe_unused]] float dummy; + return computeWarpMap(cmdBuf, envMapRegularizationFactor, pdfNormalizationFactor, dummy); + } + + + inline core::smart_refctd_ptr getLumaMapView() + { + return m_lumaMap; + } + + protected: + struct ConstructorParams + { + SCachedCreationParameters creationParams; + hlsl::uint32_t2 lumaWorkgroupSize; + hlsl::uint32_t2 warpWorkgroupSize; + core::smart_refctd_ptr lumaMap; + core::smart_refctd_ptr warpMap; + core::smart_refctd_ptr genLumaPipeline; + core::smart_refctd_ptr genLumaDescriptorSet; + }; + + explicit EnvmapImportanceSampling(ConstructorParams&& params) : + m_cachedCreationParams(std::move(params.creationParams)), + m_lumaWorkgroupSize(params.lumaWorkgroupSize), + m_warpWorkgroupSize(params.warpWorkgroupSize), + m_lumaMap(std::move(params.lumaMap)), + m_warpMap(std::move(params.warpMap)), + m_genLumaPipeline(std::move(params.genLumaPipeline)), + m_genLumaDescriptorSet(std::move(params.genLumaDescriptorSet)) + {} + + ~EnvmapImportanceSampling() override {} + private: - core::smart_refctd_ptr m_lumaGenPipeline; + + SCachedCreationParameters m_cachedCreationParams; + + hlsl::uint32_t2 m_lumaWorkgroupSize; + hlsl::uint32_t2 m_warpWorkgroupSize; + + core::smart_refctd_ptr m_lumaMap; + core::smart_refctd_ptr m_warpMap; + + core::smart_refctd_ptr m_genLumaPipeline; + core::smart_refctd_ptr m_genLumaDescriptorSet; }; diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl index e701f0b00d..3a039945b4 100644 --- a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl @@ -1,4 +1,4 @@ -#include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" +#include "common.hlsl" using namespace nbl; using namespace nbl::hlsl; @@ -6,24 +6,19 @@ using namespace nbl::hlsl::ext::envmap_importance_sampling; [[vk::push_constant]] SLumaGenPushConstants pc; -[[vk::combinedImageSampler]][[vk::binding(0, 0)]] Texture2D envMap; -[[vk::combinedImageSampler]][[vk::binding(0, 0)]] SamplerState envMapSampler; - +[[vk::binding(0, 0)]] Texture2D envMap; [[vk::binding(1, 0)]] RWTexture2D outImage; -// TODO(kevinyu): Temporary to make nsc compiles -#define LUMA_MAP_GEN_WORKGROUP_DIM 16 - -[numthreads(LUMA_MAP_GEN_WORKGROUP_DIM, LUMA_MAP_GEN_WORKGROUP_DIM, 1)] +[numthreads(WORKGROUP_DIM, WORKGROUP_DIM, 1)] [shader("compute")] void main(uint32_t3 threadID : SV_DispatchThreadID) { if (all(threadID < pc.lumaMapResolution)) { - const float32_t2 uv = (float32_t2(threadID.xy) + float32_t2(0.5, 0.5)) / float32_t2(pc.lumaMapResolution); - const float32_t3 envMapSample = envMap.Sample(envMapSampler, uv).rgb; - const float32_t luma = hlsl::dot(float32_t4(envMapSample, 1.0f), pc.luminanceScales) * sin(numbers::pi * uv.y); + const float uv_y = (float(threadID.y) + 0.5) / pc.lumaMapResolution.y; + const float32_t3 envMapSample = envMap.Load(float32_t3(threadID.xy, 0)); + const float32_t luma = hlsl::dot(float32_t4(envMapSample, 1.0f), pc.luminanceScales) * sin(numbers::pi * uv_y); outImage[threadID.xy] = luma; } diff --git a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp index a4517123b9..f6aad7c25e 100644 --- a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp +++ b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp @@ -1,5 +1,6 @@ #include "nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h" #include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" +#include "nlohmann/detail/input/parser.hpp" using namespace nbl::hlsl::ext::envmap_importance_sampling; @@ -7,8 +8,6 @@ using namespace nbl::hlsl::ext::envmap_importance_sampling; #include "nbl/ext/debug_draw/builtin/build/CArchive.h" #endif -#include "nbl/ext/EnvmapImportanceSampling/builtin/build/spirv/keys.hpp" - using namespace nbl; using namespace core; using namespace video; @@ -19,9 +18,145 @@ using namespace hlsl; namespace nbl::ext::envmap_importance_sampling { -constexpr std::string_view NBL_EXT_MOUNT_ENTRY = "nbl/ext/EnvmapImportanceSampling"; +namespace +{ + constexpr std::string_view NBL_EXT_MOUNT_ENTRY = "nbl/ext/EnvmapImportanceSampling"; + + void generateMipmap(video::IGPUCommandBuffer* cmdBuf, core::smart_refctd_ptr textureView) + { + + } + + core::smart_refctd_ptr createTexture(video::ILogicalDevice* device, const asset::VkExtent3D extent, E_FORMAT format, uint32_t mipLevels = 1u, uint32_t layers = 0u) + { + const auto real_layers = layers ? layers:1u; + + IGPUImage::SCreationParams imgParams; + imgParams.extent = extent; + imgParams.arrayLayers = real_layers; + imgParams.flags = static_cast(0); + imgParams.format = format; + imgParams.mipLevels = mipLevels; + imgParams.samples = IImage::ESCF_1_BIT; + imgParams.type = IImage::ET_2D; + imgParams.usage = IImage::EUF_STORAGE_BIT; + const auto image = device->createImage(std::move(imgParams)); + auto imageMemReqs = image->getMemoryReqs(); + imageMemReqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + device->allocate(imageMemReqs, image.get()); + + IGPUImageView::SCreationParams viewparams; + viewparams.subUsages = IImage::EUF_STORAGE_BIT; + viewparams.flags = static_cast(0); + viewparams.format = format; + viewparams.image = std::move(image); + viewparams.viewType = layers ? IGPUImageView::ET_2D_ARRAY:IGPUImageView::ET_2D; + viewparams.subresourceRange.aspectMask = IImage::EAF_COLOR_BIT; + viewparams.subresourceRange.baseArrayLayer = 0u; + viewparams.subresourceRange.layerCount = real_layers; + viewparams.subresourceRange.baseMipLevel = 0u; + viewparams.subresourceRange.levelCount = mipLevels; + + return device->createImageView(std::move(viewparams)); + } + + core::smart_refctd_ptr getShaderSource( asset::IAssetManager* assetManager, const char* filePath, system::ILogger* logger) + { + IAssetLoader::SAssetLoadParams lparams = {}; + lparams.logger = logger; + lparams.workingDirectory = NBL_EXT_MOUNT_ENTRY; + auto bundle = assetManager->getAsset(filePath, lparams); + if (bundle.getContents().empty() || bundle.getAssetType()!=IAsset::ET_SHADER) + { + const auto assetType = bundle.getAssetType(); + logger->log("Shader %s not found!", ILogger::ELL_ERROR, filePath); + exit(-1); + } + auto firstAssetInBundle = bundle.getContents()[0]; + return smart_refctd_ptr_static_cast(firstAssetInBundle); + } +} + + + +core::smart_refctd_ptr EnvmapImportanceSampling::create(SCreationParameters&& params) +{ + auto* const logger = params.utilities->getLogger(); + + if (!params.validate()) + { + logger->log("Failed creation parameters validation!", ILogger::ELL_ERROR); + return nullptr; + } + + const auto EnvmapExtent = params.envMap->getCreationParameters().image->getCreationParameters().extent; + // we don't need the 1x1 mip for anything + const uint32_t MipCountLuminance = IImage::calculateFullMipPyramidLevelCount(EnvmapExtent,IImage::ET_2D)-1; + const auto EnvMapPoTExtent = [MipCountLuminance]() -> asset::VkExtent3D + { + const uint32_t width = 0x1u<>1u,1u }; + }(); + auto calcWorkgroupSize = [](const asset::VkExtent3D extent, const uint32_t workgroupDimension) -> uint32_t2 + { + return uint32_t2(extent.width - 1, extent.height - 1) / workgroupDimension + uint32_t2(1); + }; + + const auto device = params.utilities->getLogicalDevice(); + + ConstructorParams constructorParams; + + constructorParams.lumaWorkgroupSize = calcWorkgroupSize(EnvMapPoTExtent, params.genLumaMapWorkgroupDimension); + + constructorParams.lumaMap = createLumaMap(device, EnvMapPoTExtent, MipCountLuminance); + + const auto upscale = 0; + const asset::VkExtent3D WarpMapExtent = {EnvMapPoTExtent.width<createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, genLumaPipelineLayout->getDescriptorSetLayouts()); + const auto genLumaDescriptorSet = genLumaDescriptorPool->createDescriptorSet(core::smart_refctd_ptr(genLumaPipelineLayout->getDescriptorSetLayouts()[0])); + + IGPUDescriptorSet::SDescriptorInfo envMapDescriptorInfo; + envMapDescriptorInfo.desc = params.envMap; + envMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + IGPUDescriptorSet::SDescriptorInfo lumaMapDescriptorInfo; + lumaMapDescriptorInfo.desc = constructorParams.lumaMap; + lumaMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::GENERAL; + + const IGPUDescriptorSet::SWriteDescriptorSet writes[] = { + { + .dstSet = genLumaDescriptorSet.get(), .binding = 0, .count = 1, .info = &envMapDescriptorInfo + }, + { + .dstSet = genLumaDescriptorSet.get(), .binding = 1, .count = 1, .info = &lumaMapDescriptorInfo + } + }; + + device->updateDescriptorSets(writes, {}); + + constructorParams.genLumaDescriptorSet = genLumaDescriptorSet; + + constructorParams.creationParams = std::move(params); + + return core::smart_refctd_ptr(new EnvmapImportanceSampling(std::move(constructorParams))); +} + +core::smart_refctd_ptr EnvmapImportanceSampling::createLumaMap(video::ILogicalDevice* device, asset::VkExtent3D extent, uint32_t mipCount, const std::string_view debugName) +{ + return createTexture(device, extent, EF_R32_SFLOAT, mipCount); +} + +core::smart_refctd_ptr EnvmapImportanceSampling::createWarpMap(video::ILogicalDevice* device, asset::VkExtent3D extent, const std::string_view debugName) +{ + return createTexture(device, extent, EF_R32G32_SFLOAT); +} -const smart_refctd_ptr EnvmapImportanceSampling::mount(core::smart_refctd_ptr logger, ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias) +smart_refctd_ptr EnvmapImportanceSampling::mount(core::smart_refctd_ptr logger, ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias) { assert(system); @@ -47,32 +182,33 @@ core::smart_refctd_ptr EnvmapImportanceSampling::cre auto* device = params.utilities->getLogicalDevice(); mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), params.utilities->getLogicalDevice(), NBL_EXT_MOUNT_ENTRY); - auto getShader = [&](const core::string& key)->smart_refctd_ptr { - IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = params.utilities->getLogger(); - lp.workingDirectory = NBL_EXT_MOUNT_ENTRY; - auto bundle = params.assetManager->getAsset(key.c_str(), lp); + const auto shaderSource = getShaderSource(params.assetManager.get(), "gen_luma.comp.hlsl", logger.get()); + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; + options.preprocessorOptions.targetSpirvVersion = device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; - const auto contents = bundle.getContents(); +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#else + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; +#endif + options.preprocessorOptions.sourceIdentifier = shaderSource->getFilepathHint(); + options.preprocessorOptions.logger = logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); - if (contents.empty()) - { - logger.log("Failed to load shader %s from disk", ILogger::ELL_ERROR, key.c_str()); - return nullptr; - } + const IShaderCompiler::SMacroDefinition defines[] = { + { "WORKGROUP_DIM", "16" }, + }; - if (bundle.getAssetType() != IAsset::ET_SHADER) - { - logger.log("Loaded asset has wrong type!", ILogger::ELL_ERROR); - return nullptr; - } + options.preprocessorOptions.extraDefines = defines; - return IAsset::castDown(contents[0]); - }; - - const auto key = nbl::ext::envmap_importance_sampling::builtin::build::get_spirv_key<"gen_luma">(device); - smart_refctd_ptr genLumaShader = getShader(key); - if (!genLumaShader) + const auto overridenUnspecialized = compiler->compileToSPIRV((const char*)shaderSource->getContent()->getPointer(), options); + const auto shader = device->compileShader({ overridenUnspecialized.get() }); + if (!shader) { logger.log("Could not compile shaders!", ILogger::ELL_ERROR); return nullptr; @@ -80,7 +216,7 @@ core::smart_refctd_ptr EnvmapImportanceSampling::cre video::IGPUComputePipeline::SCreationParams pipelineParams[1] = {}; pipelineParams[0].layout = pipelineLayout; - pipelineParams[0].shader = { .shader = genLumaShader.get(), .entryPoint = "main" }; + pipelineParams[0].shader = { .shader = shader.get(), .entryPoint = "main" }; smart_refctd_ptr pipeline; params.utilities->getLogicalDevice()->createComputePipelines(nullptr, pipelineParams, &pipeline); @@ -94,7 +230,7 @@ core::smart_refctd_ptr EnvmapImportanceSampling::cre } -core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapImportanceSampling::createGenLumaPipelineLayout(video::ILogicalDevice* device, const smart_refctd_ptr* sampler) +core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapImportanceSampling::createGenLumaPipelineLayout(video::ILogicalDevice* device) { asset::SPushConstantRange pcRange = { .stageFlags = hlsl::ESS_COMPUTE, @@ -105,11 +241,10 @@ core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapImportanceSampling::cr const IGPUDescriptorSetLayout::SBinding bindings[] = { { .binding = 0u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1u, - .immutableSamplers = sampler + .count = 1u }, { .binding = 1u, @@ -169,4 +304,47 @@ core::smart_refctd_ptr EnvmapImportanceSampling::crea const auto setLayout = device->createDescriptorSetLayout(bindings); return device->createPipelineLayout({}, setLayout, nullptr, nullptr, nullptr); } + +bool EnvmapImportanceSampling::computeWarpMap(video::IGPUCommandBuffer* cmdBuf, const float envMapRegularizationFactor, float& pdfNormalizationFactor, float& maxEmittanceLuma) +{ + bool enableRIS = false; + + SLumaGenPushConstants pcData = {}; + pcData.luminanceScales = { 0.2126729f, 0.7151522f, 0.0721750f, 0.0f }; + { + const auto imageExtent = m_lumaMap->getCreationParameters().image->getCreationParameters().extent; + pcData.lumaMapResolution = {imageExtent.width, imageExtent.height}; + } + + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barrier; + barrier.barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }; + barrier.image = m_lumaMap->getCreationParameters().image.get(); + barrier.subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = m_lumaMap->getCreationParameters().image->getCreationParameters().mipLevels, + .baseArrayLayer = 0u, + .layerCount = 1u + }; + barrier.oldLayout = IImage::LAYOUT::UNDEFINED; + barrier.newLayout = IImage::LAYOUT::GENERAL; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = {&barrier, 1} }); + + cmdBuf->bindComputePipeline(m_genLumaPipeline.get()); + cmdBuf->pushConstants(m_genLumaPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, + 0, sizeof(SLumaGenPushConstants), &pcData); + cmdBuf->bindDescriptorSets(EPBP_COMPUTE, m_genLumaPipeline->getLayout(), + 0, 1, &m_genLumaDescriptorSet.get()); + cmdBuf->dispatch(m_lumaWorkgroupSize.x, m_lumaWorkgroupSize.y, 1); + + return enableRIS; + +} } diff --git a/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt b/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt index fabd4b8b50..f7fbd6c55f 100644 --- a/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt +++ b/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt @@ -10,73 +10,41 @@ set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_SRC "${CMAKE_CURRENT_SOURCE_DIR}/CEnvmapImportanceSampling.cpp" ) -nbl_create_ext_library_project( - ENVMAP_IMPORTANCE_SAMPLING - "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_H}" - "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_SRC}" - "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_EXTERNAL_INCLUDE}" - "" - "" -) - get_filename_component(_ARCHIVE_ABSOLUTE_ENTRY_PATH_ "${NBL_EXT_INTERNAL_INCLUDE_DIR}" ABSOLUTE) set(NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT "${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}/nbl/ext/EnvmapImportanceSampling/builtin/hlsl") + set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") set(DEPENDS ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/common.hlsl ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_warpmap.comp.hlsl - ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_luma.comp.hlsl ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/measure_luma.comp.hlsl ) -target_sources(${LIB_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) - -set(SM 6_8) -set(JSON [=[ -[ - { - "INPUT": "${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_warpmap.comp.hlsl", - "KEY": "gen_warpmap", - }, - { - "INPUT": "${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_luma.comp.hlsl", - "KEY": "gen_luma", - }, - { - "INPUT": "${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/measure_luma.comp.hlsl", - "KEY": "measure_luma", - } - -] -]=]) -string(CONFIGURE "${JSON}" JSON) -set(COMPILE_OPTIONS - -I "${NBL_ROOT_PATH}/include" # a workaround due to envmap importance sampling ext common header which is not part of Nabla builtin archive - -I "${CMAKE_CURRENT_SOURCE_DIR}" - -T lib_${SM} +nbl_create_ext_library_project( + ENVMAP_IMPORTANCE_SAMPLING + "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_H}" + "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_SRC}" + "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_EXTERNAL_INCLUDE}" + "" + NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT="${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}" ) -NBL_CREATE_NSC_COMPILE_RULES( - TARGET ${LIB_NAME}SPIRV - LINK_TO ${LIB_NAME} - DEPENDS ${DEPENDS} - BINARY_DIR ${OUTPUT_DIRECTORY} - MOUNT_POINT_DEFINE NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT - COMMON_OPTIONS ${COMPILE_OPTIONS} - OUTPUT_VAR KEYS - INCLUDE nbl/ext/EnvmapImportanceSampling/builtin/build/spirv/keys.hpp - NAMESPACE nbl::ext::envmap_importance_sampling::builtin::build - INPUTS ${JSON} -) +target_sources(${LIB_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + NBL_CREATE_RESOURCE_ARCHIVE( NAMESPACE nbl::ext::envmap_importance_sampling::builtin::build TARGET ${LIB_NAME}_builtinsBuild LINK_TO ${LIB_NAME} BIND ${OUTPUT_DIRECTORY} - BUILTINS ${KEYS} + BUILTINS + common.hlsl + gen_luma.comp.hlsl + gen_warpmap.comp.hlsl + measure_luma.comp.hlsl + ) From 867868c2f0697b12d52bffb7c6ed8ee97c7ee27c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 30 Jan 2026 15:15:40 +0700 Subject: [PATCH 24/29] Fix arithmetic config no const specifier for method --- include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl index 22c93ce193..aa395ad524 100644 --- a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl +++ b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl @@ -205,7 +205,7 @@ struct SArithmeticConfiguration #undef DEFINE_ASSIGN } - std::string getConfigTemplateStructString() + std::string getConfigTemplateStructString() NBL_CONST_MEMBER_FUNC { std::ostringstream os; os << "nbl::hlsl::workgroup2::ArithmeticConfiguration<" << WorkgroupSizeLog2 << "," << SubgroupSizeLog2 << "," << ItemsPerInvocation_0 << ">;"; From 1a66157391a2d4009e4b1b85e0080fc153ac4754 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 30 Jan 2026 15:19:27 +0700 Subject: [PATCH 25/29] Define config_t from outside --- .../builtin/hlsl/measure_luma.comp.hlsl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl index 845d12632d..ffe6477f5a 100644 --- a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl @@ -1,14 +1,14 @@ #include "nbl/builtin/hlsl/sampling/warps/spherical.hlsl" #include "nbl/builtin/hlsl/workgroup2/arithmetic.hlsl" -#include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" +#include "common.hlsl" using namespace nbl; using namespace nbl::hlsl; using namespace nbl::hlsl::ext::envmap_importance_sampling; // TODO(kevinyu): Temporary to make nsc works -using config_t = nbl::hlsl::workgroup2::ArithmeticConfiguration<4, 4, 2>; +using config_t = WORKGROUP_CONFIG_T; [[vk::push_constant]] SLumaMeasurePushConstants pc; @@ -115,7 +115,7 @@ struct LumaAccessor [numthreads(config_t::WorkgroupSize, 1, 1)] [shader("compute")] -void main(uint32_t3 threadID : SV_DispatchThreadID) +void main(uint32_t localInvocationIndex : SV_GroupIndex, uint32_t3 groupID: SV_GroupID) { ScratchProxy scratchAccessor; @@ -138,6 +138,6 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) measurement.maxLuma = workgroup2::reduction, device_capabilities>::template __call(lumaAccessor, scratchAccessor); - if (all(threadID == uint32_t3(0, 0, 0))) - vk::RawBufferStore(pc.lumaMeasurementBuf, measurement); + if (localInvocationIndex == 0) + vk::RawBufferStore(pc.lumaMeasurementBuf + (groupID.x * sizeof(SLumaMeasurement)), measurement); } From d4b81050ea895b130ea32f462a31951d100c22bd Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 30 Jan 2026 15:19:49 +0700 Subject: [PATCH 26/29] More fixes on computeWarpMap implementation --- .../CEnvmapImportanceSampling.h | 47 ++- .../CEnvmapImportanceSampling.cpp | 398 ++++++++++++++---- 2 files changed, 350 insertions(+), 95 deletions(-) diff --git a/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h b/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h index e552635d3a..b493e88b4d 100644 --- a/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h +++ b/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h @@ -3,6 +3,7 @@ #include "nbl/asset/IPipelineLayout.h" #include "nbl/video/declarations.h" +#include "nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl" namespace nbl::ext::envmap_importance_sampling { @@ -55,56 +56,61 @@ class EnvmapImportanceSampling final : public core::IReferenceCounted static core::smart_refctd_ptr createGenLumaPipelineLayout(video::ILogicalDevice* device); - static core::smart_refctd_ptr createMeasureLumaPipelineLayout(video::ILogicalDevice* device); - - static core::smart_refctd_ptr createGenWarpMapPipelineLayout(video::ILogicalDevice* device); + static core::smart_refctd_ptr createGenWarpPipelineLayout(video::ILogicalDevice* device); //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included static core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias = ""); static core::smart_refctd_ptr createGenLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); - static core::smart_refctd_ptr createMeasureLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); + static core::smart_refctd_ptr createGenWarpPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); static core::smart_refctd_ptr createLumaMap(video::ILogicalDevice* device, asset::VkExtent3D extent, uint32_t mipCount, std::string_view debugName = ""); static core::smart_refctd_ptr createWarpMap(video::ILogicalDevice* device, asset::VkExtent3D extent, std::string_view debugName = ""); - bool computeWarpMap(video::IGPUCommandBuffer* cmdBuf, float envMapRegularizationFactor, float& pdfNormalizationFactor, float& maxEmittanceLuma); - - // returns if RIS should be enabled based on variance calculations - inline bool computeWarpMap(video::IGPUCommandBuffer* cmdBuf, float envMapRegularizationFactor, float& pdfNormalizationFactor) - { - [[maybe_unused]] float dummy; - return computeWarpMap(cmdBuf, envMapRegularizationFactor, pdfNormalizationFactor, dummy); - } + void computeWarpMap(video::IGPUCommandBuffer* cmdBuf); + // use this to synchronize warp map after computeWarpMap call + nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t getWarpMapBarrier( + core::bitflag dstStageMask, + core::bitflag dstAccessMask, + nbl::video::IGPUImage::LAYOUT oldLayout); inline core::smart_refctd_ptr getLumaMapView() { return m_lumaMap; } + inline core::smart_refctd_ptr getWarpMapView() + { + return m_warpMap; + } + protected: struct ConstructorParams { SCachedCreationParameters creationParams; - hlsl::uint32_t2 lumaWorkgroupSize; - hlsl::uint32_t2 warpWorkgroupSize; + hlsl::uint32_t2 lumaWorkgroupCount; + hlsl::uint32_t2 warpWorkgroupCount; core::smart_refctd_ptr lumaMap; core::smart_refctd_ptr warpMap; core::smart_refctd_ptr genLumaPipeline; core::smart_refctd_ptr genLumaDescriptorSet; + core::smart_refctd_ptr genWarpPipeline; + core::smart_refctd_ptr genWarpDescriptorSet; }; explicit EnvmapImportanceSampling(ConstructorParams&& params) : m_cachedCreationParams(std::move(params.creationParams)), - m_lumaWorkgroupSize(params.lumaWorkgroupSize), - m_warpWorkgroupSize(params.warpWorkgroupSize), + m_lumaWorkgroupCount(params.lumaWorkgroupCount), + m_warpWorkgroupCount(params.warpWorkgroupCount), m_lumaMap(std::move(params.lumaMap)), m_warpMap(std::move(params.warpMap)), m_genLumaPipeline(std::move(params.genLumaPipeline)), - m_genLumaDescriptorSet(std::move(params.genLumaDescriptorSet)) + m_genLumaDescriptorSet(std::move(params.genLumaDescriptorSet)), + m_genWarpPipeline(std::move(params.genWarpPipeline)), + m_genWarpDescriptorSet(std::move(params.genWarpDescriptorSet)) {} ~EnvmapImportanceSampling() override {} @@ -113,14 +119,17 @@ class EnvmapImportanceSampling final : public core::IReferenceCounted SCachedCreationParameters m_cachedCreationParams; - hlsl::uint32_t2 m_lumaWorkgroupSize; - hlsl::uint32_t2 m_warpWorkgroupSize; + hlsl::uint32_t2 m_lumaWorkgroupCount; + hlsl::uint32_t2 m_warpWorkgroupCount; core::smart_refctd_ptr m_lumaMap; core::smart_refctd_ptr m_warpMap; core::smart_refctd_ptr m_genLumaPipeline; core::smart_refctd_ptr m_genLumaDescriptorSet; + + core::smart_refctd_ptr m_genWarpPipeline; + core::smart_refctd_ptr m_genWarpDescriptorSet; }; diff --git a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp index f6aad7c25e..bfe25b625e 100644 --- a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp +++ b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp @@ -22,9 +22,51 @@ namespace { constexpr std::string_view NBL_EXT_MOUNT_ENTRY = "nbl/ext/EnvmapImportanceSampling"; - void generateMipmap(video::IGPUCommandBuffer* cmdBuf, core::smart_refctd_ptr textureView) + // image must have the first mip layout set to transfer src, and the rest to dst + void generateMipmap(video::IGPUCommandBuffer* cmdBuf, IGPUImage* image) { - + const auto mipLevels = image->getCreationParameters().mipLevels; + const auto extent = image->getCreationParameters().extent; + for (uint32_t mip_i = 1; mip_i < mipLevels; mip_i++) + { + + const IGPUCommandBuffer::SImageBlit blit = { + .srcMinCoord = {0, 0, 0}, + .srcMaxCoord = {extent.width >> (mip_i - 1), extent.height >> (mip_i - 1), 1}, + .dstMinCoord = {0, 0, 0}, + .dstMaxCoord = {extent.width >> mip_i, extent.height >> mip_i, 1}, + .layerCount = 1, + .srcBaseLayer = 0, + .dstBaseLayer = 0, + .srcMipLevel = mip_i - 1, + .dstMipLevel = mip_i, + .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + }; + cmdBuf->blitImage(image, IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, image, IImage::LAYOUT::TRANSFER_DST_OPTIMAL, { &blit, 1 }, IGPUSampler::E_TEXTURE_FILTER::ETF_LINEAR); + + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barrier = { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT + } + }, + .image = image, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = mip_i, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + .newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = {&barrier, 1} }); + + } } core::smart_refctd_ptr createTexture(video::ILogicalDevice* device, const asset::VkExtent3D extent, E_FORMAT format, uint32_t mipLevels = 1u, uint32_t layers = 0u) @@ -39,14 +81,14 @@ namespace imgParams.mipLevels = mipLevels; imgParams.samples = IImage::ESCF_1_BIT; imgParams.type = IImage::ET_2D; - imgParams.usage = IImage::EUF_STORAGE_BIT; + imgParams.usage = IImage::EUF_STORAGE_BIT | IImage::EUF_TRANSFER_SRC_BIT | IImage::EUF_TRANSFER_DST_BIT | IImage::EUF_SAMPLED_BIT; const auto image = device->createImage(std::move(imgParams)); auto imageMemReqs = image->getMemoryReqs(); imageMemReqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); device->allocate(imageMemReqs, image.get()); IGPUImageView::SCreationParams viewparams; - viewparams.subUsages = IImage::EUF_STORAGE_BIT; + viewparams.subUsages = IImage::EUF_STORAGE_BIT | IImage::EUF_SAMPLED_BIT; viewparams.flags = static_cast(0); viewparams.format = format; viewparams.image = std::move(image); @@ -77,8 +119,6 @@ namespace } } - - core::smart_refctd_ptr EnvmapImportanceSampling::create(SCreationParameters&& params) { auto* const logger = params.utilities->getLogger(); @@ -106,40 +146,59 @@ core::smart_refctd_ptr EnvmapImportanceSampling::creat ConstructorParams constructorParams; - constructorParams.lumaWorkgroupSize = calcWorkgroupSize(EnvMapPoTExtent, params.genLumaMapWorkgroupDimension); - + constructorParams.lumaWorkgroupCount = calcWorkgroupSize(EnvMapPoTExtent, params.genLumaMapWorkgroupDimension); constructorParams.lumaMap = createLumaMap(device, EnvMapPoTExtent, MipCountLuminance); const auto upscale = 0; const asset::VkExtent3D WarpMapExtent = {EnvMapPoTExtent.width<createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, genLumaPipelineLayout->getDescriptorSetLayouts()); const auto genLumaDescriptorSet = genLumaDescriptorPool->createDescriptorSet(core::smart_refctd_ptr(genLumaPipelineLayout->getDescriptorSetLayouts()[0])); + const auto genWarpPipelineLayout = createGenWarpPipelineLayout(device); + constructorParams.genWarpPipeline = createGenWarpPipeline(params, genWarpPipelineLayout.get()); + const auto genWarpDescriptorPool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, genWarpPipelineLayout->getDescriptorSetLayouts()); + const auto genWarpDescriptorSet = genWarpDescriptorPool->createDescriptorSet(core::smart_refctd_ptr(genWarpPipelineLayout->getDescriptorSetLayouts()[0])); + IGPUDescriptorSet::SDescriptorInfo envMapDescriptorInfo; envMapDescriptorInfo.desc = params.envMap; envMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - IGPUDescriptorSet::SDescriptorInfo lumaMapDescriptorInfo; - lumaMapDescriptorInfo.desc = constructorParams.lumaMap; - lumaMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::GENERAL; + IGPUDescriptorSet::SDescriptorInfo lumaMapGeneralDescriptorInfo; + lumaMapGeneralDescriptorInfo.desc = constructorParams.lumaMap; + lumaMapGeneralDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::GENERAL; + + IGPUDescriptorSet::SDescriptorInfo lumaMapReadDescriptorInfo; + lumaMapReadDescriptorInfo.desc = constructorParams.lumaMap; + lumaMapReadDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + IGPUDescriptorSet::SDescriptorInfo warpMapDescriptorInfo; + warpMapDescriptorInfo.desc = constructorParams.warpMap; + warpMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::GENERAL; const IGPUDescriptorSet::SWriteDescriptorSet writes[] = { { .dstSet = genLumaDescriptorSet.get(), .binding = 0, .count = 1, .info = &envMapDescriptorInfo }, { - .dstSet = genLumaDescriptorSet.get(), .binding = 1, .count = 1, .info = &lumaMapDescriptorInfo - } + .dstSet = genLumaDescriptorSet.get(), .binding = 1, .count = 1, .info = &lumaMapGeneralDescriptorInfo + }, + { + .dstSet = genWarpDescriptorSet.get(), .binding = 0, .count = 1, .info = &lumaMapReadDescriptorInfo + }, + { + .dstSet = genWarpDescriptorSet.get(), .binding = 1, .count = 1, .info = &warpMapDescriptorInfo + }, }; device->updateDescriptorSets(writes, {}); constructorParams.genLumaDescriptorSet = genLumaDescriptorSet; + constructorParams.genWarpDescriptorSet = genWarpDescriptorSet; constructorParams.creationParams = std::move(params); @@ -148,12 +207,12 @@ core::smart_refctd_ptr EnvmapImportanceSampling::creat core::smart_refctd_ptr EnvmapImportanceSampling::createLumaMap(video::ILogicalDevice* device, asset::VkExtent3D extent, uint32_t mipCount, const std::string_view debugName) { - return createTexture(device, extent, EF_R32_SFLOAT, mipCount); + return createTexture(device, extent, EF_R32_SFLOAT, mipCount); } core::smart_refctd_ptr EnvmapImportanceSampling::createWarpMap(video::ILogicalDevice* device, asset::VkExtent3D extent, const std::string_view debugName) { - return createTexture(device, extent, EF_R32G32_SFLOAT); + return createTexture(device, extent, EF_R32G32_SFLOAT); } smart_refctd_ptr EnvmapImportanceSampling::mount(core::smart_refctd_ptr logger, ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias) @@ -200,8 +259,9 @@ core::smart_refctd_ptr EnvmapImportanceSampling::cre options.preprocessorOptions.logger = logger.get(); options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + const auto workgroupDimStr = std::to_string(params.genLumaMapWorkgroupDimension); const IShaderCompiler::SMacroDefinition defines[] = { - { "WORKGROUP_DIM", "16" }, + { "WORKGROUP_DIM", workgroupDimStr.data() }, }; options.preprocessorOptions.extraDefines = defines; @@ -229,6 +289,60 @@ core::smart_refctd_ptr EnvmapImportanceSampling::cre return pipeline; } +core::smart_refctd_ptr EnvmapImportanceSampling::createGenWarpPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout) +{ + system::logger_opt_ptr logger = params.utilities->getLogger(); + auto system = smart_refctd_ptr(params.assetManager->getSystem()); + auto* device = params.utilities->getLogicalDevice(); + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), params.utilities->getLogicalDevice(), NBL_EXT_MOUNT_ENTRY); + + const auto shaderSource = getShaderSource(params.assetManager.get(), "gen_warp.comp.hlsl", logger.get()); + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; + options.preprocessorOptions.targetSpirvVersion = device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; + +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#else + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; +#endif + options.preprocessorOptions.sourceIdentifier = shaderSource->getFilepathHint(); + options.preprocessorOptions.logger = logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + + const auto workgroupDimStr = std::to_string(params.genWarpMapWorkgroupDimension); + const IShaderCompiler::SMacroDefinition defines[] = { + { "WORKGROUP_DIM", workgroupDimStr.data() }, + }; + + options.preprocessorOptions.extraDefines = defines; + + const auto overridenUnspecialized = compiler->compileToSPIRV((const char*)shaderSource->getContent()->getPointer(), options); + const auto shader = device->compileShader({ overridenUnspecialized.get() }); + if (!shader) + { + logger.log("Could not compile shaders!", ILogger::ELL_ERROR); + return nullptr; + } + + video::IGPUComputePipeline::SCreationParams pipelineParams[1] = {}; + pipelineParams[0].layout = pipelineLayout; + pipelineParams[0].shader = { .shader = shader.get(), .entryPoint = "main" }; + + smart_refctd_ptr pipeline; + params.utilities->getLogicalDevice()->createComputePipelines(nullptr, pipelineParams, &pipeline); + if (!pipeline) + { + logger.log("Could not create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + + return pipeline; +} core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapImportanceSampling::createGenLumaPipelineLayout(video::ILogicalDevice* device) { @@ -260,29 +374,7 @@ core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapImportanceSampling::cr } -core::smart_refctd_ptr EnvmapImportanceSampling::createMeasureLumaPipelineLayout(video::ILogicalDevice* device) -{ - asset::SPushConstantRange pcRange = { - .stageFlags = hlsl::ESS_COMPUTE, - .offset = 0, - .size = sizeof(SLumaMeasurePushConstants) - }; - - const IGPUDescriptorSetLayout::SBinding bindings[] = { - { - .binding = 0u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1u, - } - }; - - const auto setLayout = device->createDescriptorSetLayout(bindings); - return device->createPipelineLayout({ &pcRange, 1 }, setLayout, nullptr, nullptr, nullptr); -} - -core::smart_refctd_ptr EnvmapImportanceSampling::createGenWarpMapPipelineLayout(video::ILogicalDevice* device) +core::smart_refctd_ptr EnvmapImportanceSampling::createGenWarpPipelineLayout(video::ILogicalDevice* device) { const IGPUDescriptorSetLayout::SBinding bindings[] = { { @@ -305,46 +397,200 @@ core::smart_refctd_ptr EnvmapImportanceSampling::crea return device->createPipelineLayout({}, setLayout, nullptr, nullptr, nullptr); } -bool EnvmapImportanceSampling::computeWarpMap(video::IGPUCommandBuffer* cmdBuf, const float envMapRegularizationFactor, float& pdfNormalizationFactor, float& maxEmittanceLuma) +void EnvmapImportanceSampling::computeWarpMap(video::IGPUCommandBuffer* cmdBuf) { - bool enableRIS = false; - - SLumaGenPushConstants pcData = {}; - pcData.luminanceScales = { 0.2126729f, 0.7151522f, 0.0721750f, 0.0f }; + const auto lumaMapImage = m_lumaMap->getCreationParameters().image.get(); + const auto lumaMapMipLevels = lumaMapImage->getCreationParameters().mipLevels; + const auto lumaMapExtent = lumaMapImage->getCreationParameters().extent; + + const auto warpMapImage = m_warpMap->getCreationParameters().image.get(); + { - const auto imageExtent = m_lumaMap->getCreationParameters().image->getCreationParameters().extent; - pcData.lumaMapResolution = {imageExtent.width, imageExtent.height}; + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = lumaMapMipLevels, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); } - IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barrier; - barrier.barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, - .srcAccessMask = ACCESS_FLAGS::NONE, - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS - } - }; - barrier.image = m_lumaMap->getCreationParameters().image.get(); - barrier.subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = m_lumaMap->getCreationParameters().image->getCreationParameters().mipLevels, - .baseArrayLayer = 0u, - .layerCount = 1u + // Gen Luma Map + { + SLumaGenPushConstants pcData = {}; + pcData.luminanceScales = { 0.2126729f, 0.7151522f, 0.0721750f, 0.0f }; + pcData.lumaMapResolution = {lumaMapExtent.width, lumaMapExtent.height}; + + cmdBuf->bindComputePipeline(m_genLumaPipeline.get()); + cmdBuf->pushConstants(m_genLumaPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, + 0, sizeof(SLumaGenPushConstants), &pcData); + cmdBuf->bindDescriptorSets(EPBP_COMPUTE, m_genLumaPipeline->getLayout(), + 0, 1, &m_genLumaDescriptorSet.get()); + cmdBuf->dispatch(m_lumaWorkgroupCount.x, m_lumaWorkgroupCount.y, 1); + } + + // Generate luminance mip map + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + }, + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 1u, + .levelCount = lumaMapMipLevels - 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + generateMipmap(cmdBuf, lumaMapImage); + } + + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = lumaMapMipLevels - 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + }, + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = lumaMapMipLevels - 1, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + }, + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = warpMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + cmdBuf->bindComputePipeline(m_genWarpPipeline.get()); + cmdBuf->bindDescriptorSets(EPBP_COMPUTE, m_genWarpPipeline->getLayout(), + 0, 1, &m_genWarpDescriptorSet.get()); + cmdBuf->dispatch(m_warpWorkgroupCount.x, m_warpWorkgroupCount.y, 1); + } + +} + +nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t EnvmapImportanceSampling::getWarpMapBarrier( + core::bitflag dstStageMask, + core::bitflag dstAccessMask, + nbl::video::IGPUImage::LAYOUT newLayout) +{ + const auto warpMapImage = m_warpMap->getCreationParameters().image.get(); + return { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = dstStageMask, + .dstAccessMask = dstAccessMask + } + }, + .image = warpMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = newLayout, }; - barrier.oldLayout = IImage::LAYOUT::UNDEFINED; - barrier.newLayout = IImage::LAYOUT::GENERAL; - cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = {&barrier, 1} }); - - cmdBuf->bindComputePipeline(m_genLumaPipeline.get()); - cmdBuf->pushConstants(m_genLumaPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, - 0, sizeof(SLumaGenPushConstants), &pcData); - cmdBuf->bindDescriptorSets(EPBP_COMPUTE, m_genLumaPipeline->getLayout(), - 0, 1, &m_genLumaDescriptorSet.get()); - cmdBuf->dispatch(m_lumaWorkgroupSize.x, m_lumaWorkgroupSize.y, 1); - - return enableRIS; - } + } From 8853738d6a4b1d1d4a12908222ee3cf4344abf04 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 30 Jan 2026 15:20:48 +0700 Subject: [PATCH 27/29] Fix chose second to be placed inside the loop --- include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index 23011219f8..61c2ce7dde 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -79,11 +79,11 @@ struct LuminanceMapSampler wx_0 = values[3]; wx_1 = values[2]; } + } + if (choseSecond(wx_0, wx_1, xi.x)) + p.x |= 1; } - if (choseSecond(wx_0, wx_1, xi.x)) - p.x |= 1; - } // If we don`t add xi, the sample will clump to the lowest corner of environment map texel. We add xi to simulate uniform distribution within a pixel and make the sample continuous. This is why we compute the pdf not from the normalized luminance of the texel, instead from the reciprocal of the Jacobian. const vector2_type directionUV = (vector2_type(p.x, p.y) + xi) / vector2_type(_mapSize); From 6bde48958c43d77ec04fb24a7f94f0678d8731cc Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 30 Jan 2026 15:21:29 +0700 Subject: [PATCH 28/29] LuminanceReadAccessor take ScalarT as template parameter --- .../hlsl/concepts/accessors/hierarchical_image.hlsl | 8 ++++---- .../nbl/builtin/hlsl/sampling/hierarchical_image.hlsl | 10 ++++------ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl index 09abd08615..51bcce8c92 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl @@ -13,8 +13,8 @@ namespace hierarchical_image { // declare concept #define NBL_CONCEPT_NAME LuminanceReadAccessor -#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) -#define NBL_CONCEPT_TPLT_PRM_NAMES (U) +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(ScalarT) // not the greatest syntax but works #define NBL_CONCEPT_PARAM_0 (a,U) #define NBL_CONCEPT_PARAM_1 (coord,uint32_t2) @@ -26,8 +26,8 @@ NBL_CONCEPT_BEGIN(3) #define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 #define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(coord,level)) , ::nbl::hlsl::is_same_v, float32_t)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template gather(coord,level)) , ::nbl::hlsl::is_same_v, float32_t4)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(coord,level)) , ::nbl::hlsl::is_same_v, ScalarT)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template gather(coord,level)) , ::nbl::hlsl::is_same_v, vector)) ); #undef level #undef coord diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl index 61c2ce7dde..de03696e53 100644 --- a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -17,7 +17,7 @@ namespace hlsl namespace sampling { -template && hierarchical_image::LuminanceReadAccessor) +template && hierarchical_image::LuminanceReadAccessor) struct LuminanceMapSampler { using scalar_type = T; @@ -44,7 +44,7 @@ struct LuminanceMapSampler // numerical resilience against IEEE754 scalar_type dummy = 0.0f; PartitionRandVariable partition; - partition.leftProb = 1.0f / (1.0f + second/ first); + partition.leftProb = 1.0f / (1.0f + (second / first)); return partition(xi, dummy); } @@ -52,16 +52,15 @@ struct LuminanceMapSampler { float32_t2 xi = float32_t2(coord)/ _lastWarpPixel; uint32_t2 p = uint32_t2(0, 0); - const uint32_t2 mip2x1 = findMSB(_mapSize.x) - 1; + const uint32_t2 mip2x1 = findMSB(_mapSize.y); if (_aspect2x1) { // do one split in the X axis first cause penultimate full mip would have been 2x1 p.x = choseSecond(_map.get(uint32_t2(0, 0), mip2x1), _map.get(uint32_t2(0, 1), mip2x1), xi.x) ? 1 : 0; } - for (uint32_t i = mip2x1; i != 0;) + for (int i = mip2x1 - 1; i >= 0; i--) { - --i; p <<= 1; const vector4_type values = _map.gather(p, i); scalar_type wx_0, wx_1; @@ -125,7 +124,6 @@ struct HierarchicalImage return result; } - uint32_t2 generate(NBL_REF_ARG(scalar_type) rcpPdf, vector2_type xi) NBL_CONST_MEMBER_FUNC { const vector2_type texelCoord = xi * lastWarpPixel; From 756fbb051092ec729b3cf69400e2d895af3a2700 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 30 Jan 2026 15:23:37 +0700 Subject: [PATCH 29/29] gen_warpmap to gen_warp --- .../{gen_warpmap.comp.hlsl => gen_warp.comp.hlsl} | 13 +++++-------- src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt | 5 +++-- 2 files changed, 8 insertions(+), 10 deletions(-) rename include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/{gen_warpmap.comp.hlsl => gen_warp.comp.hlsl} (74%) diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warpmap.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warp.comp.hlsl similarity index 74% rename from include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warpmap.comp.hlsl rename to include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warp.comp.hlsl index 063dfaf9b9..c621efd4af 100644 --- a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warpmap.comp.hlsl +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warp.comp.hlsl @@ -1,13 +1,8 @@ -#include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" #include "nbl/builtin/hlsl/sampling/hierarchical_image.hlsl" - [[vk::binding(0, 0)]] Texture2D lumaMap; -[[vk::binding(1, 0)]] RWTexture2D outImage; - -// TODO(kevinyu): Temporary to make nsc compiles -#define WARPMAP_GEN_WORKGROUP_DIM 16 +[[vk::binding(1, 0)]] RWTexture2D outImage; using namespace nbl; using namespace nbl::hlsl; @@ -32,17 +27,19 @@ struct LuminanceAccessor } }; -[numthreads(WARPMAP_GEN_WORKGROUP_DIM, WARPMAP_GEN_WORKGROUP_DIM, 1)] +[numthreads(WORKGROUP_DIM, WORKGROUP_DIM, 1)] [shader("compute")] void main(uint32_t3 threadID : SV_DispatchThreadID) { LuminanceAccessor luminanceAccessor; uint32_t lumaMapWidth, lumaMapHeight; + lumaMap.GetDimensions(lumaMapWidth, lumaMapHeight); + using LuminanceSampler = LuminanceMapSampler; LuminanceSampler luminanceSampler = - LuminanceSampler::create(luminanceAccessor, lumaMapWidth, lumaMapHeight, lumaMapWidth != lumaMapHeight); + LuminanceSampler::create(luminanceAccessor, uint32_t2(lumaMapWidth, lumaMapHeight), lumaMapWidth != lumaMapHeight, uint32_t2(lumaMapWidth, lumaMapHeight)); uint32_t2 pixelCoord = threadID.xy; diff --git a/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt b/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt index f7fbd6c55f..7486ba8923 100644 --- a/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt +++ b/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt @@ -17,7 +17,8 @@ set(NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT "${_ARCHIVE_ABSOLUTE_ENTRY_P set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") set(DEPENDS ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/common.hlsl - ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_warpmap.comp.hlsl + ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_luma.comp.hlsl + ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_warp.comp.hlsl ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/measure_luma.comp.hlsl ) @@ -42,7 +43,7 @@ NBL_CREATE_RESOURCE_ARCHIVE( BUILTINS common.hlsl gen_luma.comp.hlsl - gen_warpmap.comp.hlsl + gen_warp.comp.hlsl measure_luma.comp.hlsl )