diff --git a/CMakeLists.txt b/CMakeLists.txt index 84c9a99dc4..2ffac18cd6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -176,6 +176,7 @@ option(NBL_BUILD_EXAMPLES "Enable building examples" ON) option(NBL_BUILD_MITSUBA_LOADER "Enable nbl::ext::MitsubaLoader?" OFF) # TODO: once it compies turn this ON by default! option(NBL_BUILD_IMGUI "Enable nbl::ext::ImGui?" ON) option(NBL_BUILD_DEBUG_DRAW "Enable Nabla Debug Draw extension?" ON) +option(NBL_BUILD_ENVMAP_IMPORTANCE_SAMPLING "Enable Nabla Envmap Importance Sampling extension?" ON) option(NBL_BUILD_OPTIX "Enable nbl::ext::OptiX?" OFF) if(NBL_COMPILE_WITH_CUDA) diff --git a/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl index cc22595444..ab7a87c7dd 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl @@ -69,7 +69,7 @@ NBL_CONCEPT_END( #include template -NBL_BOOL_CONCEPT GenericDataAccessor = GenericWriteAccessor && GenericWriteAccessor; +NBL_BOOL_CONCEPT GenericDataAccessor = GenericReadAccessor && GenericWriteAccessor; } } diff --git a/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl new file mode 100644 index 0000000000..51bcce8c92 --- /dev/null +++ b/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl @@ -0,0 +1,61 @@ +#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_HIERARCHICAL_IMAGE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_HIERARCHICAL_IMAGE_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ +namespace hierarchical_image +{ +// declare concept +#define NBL_CONCEPT_NAME LuminanceReadAccessor +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(ScalarT) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (a,U) +#define NBL_CONCEPT_PARAM_1 (coord,uint32_t2) +#define NBL_CONCEPT_PARAM_2 (level,uint32_t) +// start concept +NBL_CONCEPT_BEGIN(3) +// need to be defined AFTER the concept begins +#define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(coord,level)) , ::nbl::hlsl::is_same_v, ScalarT)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template gather(coord,level)) , ::nbl::hlsl::is_same_v, vector)) +); +#undef level +#undef coord +#undef a +#include + +// declare concept +#define NBL_CONCEPT_NAME HierarchicalSampler +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (HierarchicalSamplerT)(ScalarT) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (sampler,HierarchicalSamplerT) +#define NBL_CONCEPT_PARAM_1 (coord,vector) +// start concept +NBL_CONCEPT_BEGIN(2) +// need to be defined AFTER the concept begins +#define sampler NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((sampler.template sampleUvs(coord)) , ::nbl::hlsl::is_same_v, matrix)) +); +#undef sampler +#undef coord +#include + +} +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl new file mode 100644 index 0000000000..de03696e53 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -0,0 +1,164 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ + +#include +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +template && hierarchical_image::LuminanceReadAccessor) +struct LuminanceMapSampler +{ + using scalar_type = T; + using vector2_type = vector; + using vector4_type = vector; + + LuminanceAccessorT _map; + uint32_t2 _mapSize; + uint32_t2 _lastWarpPixel; + bool _aspect2x1; + + static LuminanceMapSampler create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, uint32_t2 mapSize, bool aspect2x1, uint32_t2 warpSize) + { + LuminanceMapSampler result; + result._map = lumaMap; + result._mapSize = mapSize; + result._lastWarpPixel = warpSize - uint32_t2(1, 1); + result._aspect2x1 = aspect2x1; + return result; + } + + static bool choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(scalar_type) xi) + { + // numerical resilience against IEEE754 + scalar_type dummy = 0.0f; + PartitionRandVariable partition; + partition.leftProb = 1.0f / (1.0f + (second / first)); + return partition(xi, dummy); + } + + vector2_type binarySearch(const uint32_t2 coord) + { + float32_t2 xi = float32_t2(coord)/ _lastWarpPixel; + uint32_t2 p = uint32_t2(0, 0); + const uint32_t2 mip2x1 = findMSB(_mapSize.y); + + if (_aspect2x1) { + // do one split in the X axis first cause penultimate full mip would have been 2x1 + p.x = choseSecond(_map.get(uint32_t2(0, 0), mip2x1), _map.get(uint32_t2(0, 1), mip2x1), xi.x) ? 1 : 0; + } + + for (int i = mip2x1 - 1; i >= 0; i--) + { + p <<= 1; + const vector4_type values = _map.gather(p, i); + scalar_type wx_0, wx_1; + { + const scalar_type wy_0 = values[3] + values[2]; + const scalar_type wy_1 = values[1] + values[0]; + if (choseSecond(wy_0, wy_1, xi.y)) + { + p.y |= 1; + wx_0 = values[0]; + wx_1 = values[1]; + } + else + { + wx_0 = values[3]; + wx_1 = values[2]; + } + } + if (choseSecond(wx_0, wx_1, xi.x)) + p.x |= 1; + } + + + // If we don`t add xi, the sample will clump to the lowest corner of environment map texel. We add xi to simulate uniform distribution within a pixel and make the sample continuous. This is why we compute the pdf not from the normalized luminance of the texel, instead from the reciprocal of the Jacobian. + const vector2_type directionUV = (vector2_type(p.x, p.y) + xi) / vector2_type(_mapSize); + return directionUV; + } + + matrix sampleUvs(uint32_t2 sampleCoord) NBL_CONST_MEMBER_FUNC + { + const vector2_type dir0 = binarySearch(sampleCoord + vector2_type(0, 1)); + const vector2_type dir1 = binarySearch(sampleCoord + vector2_type(1, 1)); + const vector2_type dir2 = binarySearch(sampleCoord + vector2_type(1, 0)); + const vector2_type dir3 = binarySearch(sampleCoord); + return matrix( + dir0, + dir1, + dir2, + dir3 + ); + } +}; + +template && hierarchical_image::HierarchicalSampler && concepts::Warp) +struct HierarchicalImage +{ + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + HierarchicalSamplerT sampler; + uint32_t2 warpSize; + uint32_t2 lastWarpPixel; + + static HierarchicalImage create(NBL_CONST_REF_ARG(HierarchicalSamplerT) sampler, uint32_t2 warpSize) + { + HierarchicalImage result; + result.sampler = sampler; + result.warpSize = warpSize; + result.lastWarpPixel = warpSize - uint32_t2(1, 1); + return result; + } + + uint32_t2 generate(NBL_REF_ARG(scalar_type) rcpPdf, vector2_type xi) NBL_CONST_MEMBER_FUNC + { + const vector2_type texelCoord = xi * lastWarpPixel; + const vector2_type sampleCoord = (texelCoord + vector2_type(0.5f, 0.5f)) / vector2_type(warpSize.x, warpSize.y); + + matrix uvs = sampler.sampleUvs(sampleCoord); + + const vector2_type interpolant = frac(texelCoord); + + const vector2_type xDiffs[] = { + uvs[2] - uvs[3], + uvs[1] - uvs[0] + }; + const vector2_type yVals[] = { + xDiffs[0] * interpolant.x + uvs[3], + xDiffs[1] * interpolant.x + uvs[0] + }; + const vector2_type yDiff = yVals[1] - yVals[0]; + const vector2_type uv = yDiff * interpolant.y + yVals[0]; + + const WarpResult warpResult = PostWarpT::warp(uv); + + const scalar_type detInterpolJacobian = determinant(matrix( + lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx + yDiff // second column dFdy + )); + + rcpPdf = abs((detInterpolJacobian * scalar_type(lastWarpPixel.x * lastWarpPixel.y)) / warpResult.density); + + return warpResult.dst; + } +}; + +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/sampling/warp.hlsl b/include/nbl/builtin/hlsl/sampling/warp.hlsl new file mode 100644 index 0000000000..b1c1fcb5b2 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/warp.hlsl @@ -0,0 +1,49 @@ +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_CONCEPTS_WARP_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_CONCEPTS_WARP_INCLUDED_ + + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +template +struct WarpResult +{ + CodomainT dst; + float32_t density; +}; +} + +namespace concepts +{ + +// declare concept +#define NBL_CONCEPT_NAME Warp +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (warper,U) +#define NBL_CONCEPT_PARAM_1 (xi,typename U::domain_type) +#define NBL_CONCEPT_PARAM_2 (dst,typename U::codomain_type) +// start concept +NBL_CONCEPT_BEGIN(3) +#define warper NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define xi NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define dst NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(U::domain_type)) +); +#undef dst +#undef xi +#undef warper +#include + +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl new file mode 100644 index 0000000000..7df93ac651 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl @@ -0,0 +1,73 @@ +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_WARP_SPHERICAL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_WARP_SPHERICAL_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ +namespace warp +{ + +struct Spherical +{ + using domain_type = float32_t2; + using codomain_type = float32_t3; + + template ) + static WarpResult warp(const DomainT uv) + { + const float32_t phi = 2 * uv.x * numbers::pi; + const float32_t theta = uv.y * numbers::pi; + float32_t3 dir; + dir.x = cos(uv.x * 2.f * numbers::pi); + dir.y = sqrt(1.f - dir.x * dir.x); + if (uv.x > 0.5f) dir.y = -dir.y; + const float32_t cosTheta = cos(theta); + float32_t sinTheta = (1.0 - cosTheta * cosTheta); + dir.xy *= sinTheta; + dir.z = cosTheta; + WarpResult warpResult; + warpResult.dst = dir; + warpResult.density = 1 / (sinTheta * numbers::pi * numbers::pi); + return warpResult; + } + + template ) + static domain_type inverseWarp(const CodomainT v) + { + float32_t2 uv = float32_t2(atan(v.y, v.x), acos(v.z)); + uv.x *= (numbers::inv_pi * 0.5); + if (v.y < 0.0f) + uv.x += 1.0f; + uv.y *= numbers::inv_pi; + return uv; + } + + + template ) + static float32_t forwardDensity(const DomainT uv) + { + const float32_t theta = uv.y * numbers::pi; + return 1.0f / (sin(theta) * 2 * numbers::pi * numbers::pi); + + } + + template ) + static float32_t backwardDensity(const CodomainT dst) + { + return 1.0f / (sqrt(1.0f - dst.z * dst.z) * 2 * numbers::pi * numbers::pi); + } +}; + +} +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl index 22c93ce193..aa395ad524 100644 --- a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl +++ b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl @@ -205,7 +205,7 @@ struct SArithmeticConfiguration #undef DEFINE_ASSIGN } - std::string getConfigTemplateStructString() + std::string getConfigTemplateStructString() NBL_CONST_MEMBER_FUNC { std::ostringstream os; os << "nbl::hlsl::workgroup2::ArithmeticConfiguration<" << WorkgroupSizeLog2 << "," << SubgroupSizeLog2 << "," << ItemsPerInvocation_0 << ">;"; diff --git a/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h b/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h new file mode 100644 index 0000000000..b493e88b4d --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h @@ -0,0 +1,137 @@ +#ifndef _NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ +#define _NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ + +#include "nbl/asset/IPipelineLayout.h" +#include "nbl/video/declarations.h" +#include "nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl" + +namespace nbl::ext::envmap_importance_sampling +{ + +class EnvmapImportanceSampling final : public core::IReferenceCounted +{ + public: + + static constexpr uint32_t MaxMipCountLuminance = 13u; + static constexpr uint32_t DefaultLumaMipMapGenWorkgroupDimension = 16u; + static constexpr uint32_t DefaultWarpMapGenWorkgroupDimension = 16u; + + struct SCachedCreationParameters + { + core::smart_refctd_ptr utilities; + uint32_t genLumaMapWorkgroupDimension = DefaultLumaMipMapGenWorkgroupDimension; + uint32_t genWarpMapWorkgroupDimension = DefaultWarpMapGenWorkgroupDimension; + }; + + struct SCreationParameters : public SCachedCreationParameters + { + core::smart_refctd_ptr assetManager = nullptr; + core::smart_refctd_ptr envMap = nullptr; + + inline bool validate() const + { + const auto validation = std::to_array + ({ + std::make_pair(bool(assetManager), "Invalid `creationParams.assetManager` is nullptr!"), + std::make_pair(bool(utilities), "Invalid `creationParams.utilities` is nullptr!"), + std::make_pair(bool(envMap), "Invalid `creationParams.envMap` is nullptr!"), + }); + + system::logger_opt_ptr logger = utilities->getLogger(); + for (const auto& [ok, error] : validation) + if (!ok) + { + logger.log(error, system::ILogger::ELL_ERROR); + return false; + } + + assert(bool(assetManager->getSystem())); + + return true; + } + + }; + + static core::smart_refctd_ptr create(SCreationParameters&& params); + + static core::smart_refctd_ptr createGenLumaPipelineLayout(video::ILogicalDevice* device); + + static core::smart_refctd_ptr createGenWarpPipelineLayout(video::ILogicalDevice* device); + + //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included + static core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias = ""); + + static core::smart_refctd_ptr createGenLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); + + static core::smart_refctd_ptr createGenWarpPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); + + static core::smart_refctd_ptr createLumaMap(video::ILogicalDevice* device, asset::VkExtent3D extent, uint32_t mipCount, std::string_view debugName = ""); + + static core::smart_refctd_ptr createWarpMap(video::ILogicalDevice* device, asset::VkExtent3D extent, std::string_view debugName = ""); + + void computeWarpMap(video::IGPUCommandBuffer* cmdBuf); + + // use this to synchronize warp map after computeWarpMap call + nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t getWarpMapBarrier( + core::bitflag dstStageMask, + core::bitflag dstAccessMask, + nbl::video::IGPUImage::LAYOUT oldLayout); + + inline core::smart_refctd_ptr getLumaMapView() + { + return m_lumaMap; + } + + inline core::smart_refctd_ptr getWarpMapView() + { + return m_warpMap; + } + + protected: + struct ConstructorParams + { + SCachedCreationParameters creationParams; + hlsl::uint32_t2 lumaWorkgroupCount; + hlsl::uint32_t2 warpWorkgroupCount; + core::smart_refctd_ptr lumaMap; + core::smart_refctd_ptr warpMap; + core::smart_refctd_ptr genLumaPipeline; + core::smart_refctd_ptr genLumaDescriptorSet; + core::smart_refctd_ptr genWarpPipeline; + core::smart_refctd_ptr genWarpDescriptorSet; + }; + + explicit EnvmapImportanceSampling(ConstructorParams&& params) : + m_cachedCreationParams(std::move(params.creationParams)), + m_lumaWorkgroupCount(params.lumaWorkgroupCount), + m_warpWorkgroupCount(params.warpWorkgroupCount), + m_lumaMap(std::move(params.lumaMap)), + m_warpMap(std::move(params.warpMap)), + m_genLumaPipeline(std::move(params.genLumaPipeline)), + m_genLumaDescriptorSet(std::move(params.genLumaDescriptorSet)), + m_genWarpPipeline(std::move(params.genWarpPipeline)), + m_genWarpDescriptorSet(std::move(params.genWarpDescriptorSet)) + {} + + ~EnvmapImportanceSampling() override {} + + private: + + SCachedCreationParameters m_cachedCreationParams; + + hlsl::uint32_t2 m_lumaWorkgroupCount; + hlsl::uint32_t2 m_warpWorkgroupCount; + + core::smart_refctd_ptr m_lumaMap; + core::smart_refctd_ptr m_warpMap; + + core::smart_refctd_ptr m_genLumaPipeline; + core::smart_refctd_ptr m_genLumaDescriptorSet; + + core::smart_refctd_ptr m_genWarpPipeline; + core::smart_refctd_ptr m_genWarpDescriptorSet; + +}; + +} +#endif diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl new file mode 100644 index 0000000000..e0240909f0 --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl @@ -0,0 +1,49 @@ +#ifndef _NBL_HLSL_EXT_ENVMAP_IMPORTANCE_SAMPLING_PARAMETERS_COMMON_INCLUDED_ +#define _NBL_HLSL_EXT_ENVMAP_IMPORTANCE_SAMPLING_PARAMETERS_COMMON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace ext +{ +namespace envmap_importance_sampling +{ + +struct SLumaGenPushConstants +{ + float32_t4 luminanceScales; + uint32_t2 lumaMapResolution; +}; + +struct SLumaMeasurePushConstants +{ + float32_t4 luminanceScales; + uint32_t2 lumaMapResolution; + uint64_t lumaMeasurementBuf; +}; + +struct SLumaMeasurement +{ + float32_t3 weightedDir; + float32_t luma; + float32_t maxLuma; +}; + +struct device_capabilities +{ +#ifdef TEST_NATIVE + NBL_CONSTEXPR_STATIC_INLINE bool shaderSubgroupArithmetic = true; +#else + NBL_CONSTEXPR_STATIC_INLINE bool shaderSubgroupArithmetic = false; +#endif +}; + +} +} +} +} + +#endif diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl new file mode 100644 index 0000000000..3a039945b4 --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl @@ -0,0 +1,25 @@ +#include "common.hlsl" + +using namespace nbl; +using namespace nbl::hlsl; +using namespace nbl::hlsl::ext::envmap_importance_sampling; + +[[vk::push_constant]] SLumaGenPushConstants pc; + +[[vk::binding(0, 0)]] Texture2D envMap; +[[vk::binding(1, 0)]] RWTexture2D outImage; + +[numthreads(WORKGROUP_DIM, WORKGROUP_DIM, 1)] +[shader("compute")] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + if (all(threadID < pc.lumaMapResolution)) + { + + const float uv_y = (float(threadID.y) + 0.5) / pc.lumaMapResolution.y; + const float32_t3 envMapSample = envMap.Load(float32_t3(threadID.xy, 0)); + const float32_t luma = hlsl::dot(float32_t4(envMapSample, 1.0f), pc.luminanceScales) * sin(numbers::pi * uv_y); + + outImage[threadID.xy] = luma; + } +} diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warp.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warp.comp.hlsl new file mode 100644 index 0000000000..c621efd4af --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warp.comp.hlsl @@ -0,0 +1,48 @@ +#include "nbl/builtin/hlsl/sampling/hierarchical_image.hlsl" + +[[vk::binding(0, 0)]] Texture2D lumaMap; + +[[vk::binding(1, 0)]] RWTexture2D outImage; + +using namespace nbl; +using namespace nbl::hlsl; +using namespace nbl::hlsl::sampling; + +struct LuminanceAccessor +{ + float32_t get(uint32_t2 coord, uint32_t level) + { + return lumaMap.Load(uint32_t3(coord, level)); + } + + float32_t4 gather(uint32_t2 coord, uint32_t level) + { + return float32_t4( + lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 1)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 1)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 0)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 0)) + ); + + } +}; + +[numthreads(WORKGROUP_DIM, WORKGROUP_DIM, 1)] +[shader("compute")] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + LuminanceAccessor luminanceAccessor; + uint32_t lumaMapWidth, lumaMapHeight; + + lumaMap.GetDimensions(lumaMapWidth, lumaMapHeight); + + using LuminanceSampler = LuminanceMapSampler; + + LuminanceSampler luminanceSampler = + LuminanceSampler::create(luminanceAccessor, uint32_t2(lumaMapWidth, lumaMapHeight), lumaMapWidth != lumaMapHeight, uint32_t2(lumaMapWidth, lumaMapHeight)); + + uint32_t2 pixelCoord = threadID.xy; + + outImage[pixelCoord] = luminanceSampler.binarySearch(pixelCoord); + +} diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl new file mode 100644 index 0000000000..ffe6477f5a --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl @@ -0,0 +1,143 @@ +#include "nbl/builtin/hlsl/sampling/warps/spherical.hlsl" +#include "nbl/builtin/hlsl/workgroup2/arithmetic.hlsl" + +#include "common.hlsl" + +using namespace nbl; +using namespace nbl::hlsl; +using namespace nbl::hlsl::ext::envmap_importance_sampling; + +// TODO(kevinyu): Temporary to make nsc works +using config_t = WORKGROUP_CONFIG_T; + +[[vk::push_constant]] SLumaMeasurePushConstants pc; + +[[vk::binding(0, 0)]] Texture2D lumaMap; + +// final (level 1/2) scan needs to fit in one subgroup exactly +groupshared float32_t scratch[mpl::max_v]; + +struct PreloadedUnitData +{ + float32_t3 weightedDir; + float32_t luma; +}; + +struct ScratchProxy +{ + template + void get(const uint32_t ix, NBL_REF_ARG(AccessType) value) + { + value = scratch[ix]; + } + + template + void set(const uint32_t ix, const AccessType value) + { + scratch[ix] = value; + } + + void workgroupExecutionAndMemoryBarrier() + { + glsl::barrier(); + } +}; + +struct PreloadedData +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = uint16_t(1u) << config_t::WorkgroupSizeLog2; + NBL_CONSTEXPR_STATIC_INLINE uint16_t PreloadedDataCount = config_t::VirtualWorkgroupSize / WorkgroupSize; + + PreloadedUnitData getData(const uint32_t ix) + { + PreloadedUnitData value; + const int32_t2 pixelCoord = int32_t2(ix % pc.lumaMapResolution.x, ix / pc.lumaMapResolution.x); + const float32_t2 uv = (float32_t2(pixelCoord) + float32_t2(0.5, 0.5)) / float32_t2(pc.lumaMapResolution); + const float32_t luma = lumaMap.Load(int32_t3(pixelCoord, 0)); + value.weightedDir = sampling::warp::Spherical::warp(uv).dst * luma; + value.luma = luma; + return value; + } + + void preload() + { + const uint16_t invocationIndex = hlsl::workgroup::SubgroupContiguousIndex(); + [unroll] + for (uint16_t idx = 0; idx < PreloadedDataCount; idx++) + data[idx] = getData(idx * WorkgroupSize + invocationIndex); + } + + void workgroupExecutionAndMemoryBarrier() + { + glsl::barrier(); + } + + PreloadedUnitData data[config_t::ItemsPerInvocation_0]; +}; + +static PreloadedData preloadData; + +struct DirXAccessor +{ + template + void get(const IndexType ix, NBL_REF_ARG(AccessType) value) + { + value = preloadData.data[ix >> config_t::WorkgroupSizeLog2].weightedDir.x; + } +}; + +struct DirYAccessor +{ + template + void get(const IndexType ix, NBL_REF_ARG(AccessType) value) + { + value = preloadData.data[ix >> config_t::WorkgroupSizeLog2].weightedDir.y; + } +}; + +struct DirZAccessor +{ + template + void get(const IndexType ix, NBL_REF_ARG(AccessType) value) + { + value = preloadData.data[ix >> config_t::WorkgroupSizeLog2].weightedDir.z; + } +}; + +struct LumaAccessor +{ + template + void get(const IndexType ix, NBL_REF_ARG(AccessType) value) + { + value = preloadData.data[ix >> config_t::WorkgroupSizeLog2].luma; + } +}; + +[numthreads(config_t::WorkgroupSize, 1, 1)] +[shader("compute")] +void main(uint32_t localInvocationIndex : SV_GroupIndex, uint32_t3 groupID: SV_GroupID) +{ + ScratchProxy scratchAccessor; + + preloadData.preload(); + preloadData.workgroupExecutionAndMemoryBarrier(); + + SLumaMeasurement measurement; + + DirXAccessor dirXAccessor; + measurement.weightedDir.x= workgroup2::reduction, device_capabilities>::template __call(dirXAccessor, scratchAccessor); + + DirYAccessor dirYAccessor; + measurement.weightedDir.y = workgroup2::reduction, device_capabilities>::template __call(dirYAccessor, scratchAccessor); + + DirZAccessor dirZAccessor; + measurement.weightedDir.z = workgroup2::reduction, device_capabilities>::template __call(dirZAccessor, scratchAccessor); + + LumaAccessor lumaAccessor; + measurement.luma = workgroup2::reduction, device_capabilities>::template __call(lumaAccessor, scratchAccessor); + + measurement.maxLuma = workgroup2::reduction, device_capabilities>::template __call(lumaAccessor, scratchAccessor); + + if (localInvocationIndex == 0) + vk::RawBufferStore(pc.lumaMeasurementBuf + (groupID.x * sizeof(SLumaMeasurement)), measurement); +} diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 085ed3c923..050907b3a3 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -339,6 +339,9 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/workgroup2/shared_scan.hlsl") #Extensions LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/FullScreenTriangle/SVertexAttributes.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/FullScreenTriangle/default.vert.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/EnvmapImportanceSampling/structs.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/EnvmapImportanceSampling/gen_luma.comp.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/EnvmapImportanceSampling/measure_luma.comp.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/text_rendering/msdf.hlsl") #memory LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/memory.hlsl") diff --git a/src/nbl/ext/CMakeLists.txt b/src/nbl/ext/CMakeLists.txt index af46b29aab..221c1fe88e 100644 --- a/src/nbl/ext/CMakeLists.txt +++ b/src/nbl/ext/CMakeLists.txt @@ -66,6 +66,18 @@ if(NBL_BUILD_DEBUG_DRAW) ) endif() +if(NBL_BUILD_ENVMAP_IMPORTANCE_SAMPLING) + add_subdirectory(EnvmapImportanceSampling) + set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDE_DIRS + ${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDE_DIRS} + PARENT_SCOPE + ) + set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_LIB + ${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_LIB} + PARENT_SCOPE + ) +endif() + propagate_changed_variables_to_parent_scope() NBL_ADJUST_FOLDERS(ext) \ No newline at end of file diff --git a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp new file mode 100644 index 0000000000..bfe25b625e --- /dev/null +++ b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp @@ -0,0 +1,596 @@ +#include "nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h" +#include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" +#include "nlohmann/detail/input/parser.hpp" + +using namespace nbl::hlsl::ext::envmap_importance_sampling; + +#ifdef NBL_EMBED_BUILTIN_RESOURCES +#include "nbl/ext/debug_draw/builtin/build/CArchive.h" +#endif + +using namespace nbl; +using namespace core; +using namespace video; +using namespace system; +using namespace asset; +using namespace hlsl; + +namespace nbl::ext::envmap_importance_sampling +{ + +namespace +{ + constexpr std::string_view NBL_EXT_MOUNT_ENTRY = "nbl/ext/EnvmapImportanceSampling"; + + // image must have the first mip layout set to transfer src, and the rest to dst + void generateMipmap(video::IGPUCommandBuffer* cmdBuf, IGPUImage* image) + { + const auto mipLevels = image->getCreationParameters().mipLevels; + const auto extent = image->getCreationParameters().extent; + for (uint32_t mip_i = 1; mip_i < mipLevels; mip_i++) + { + + const IGPUCommandBuffer::SImageBlit blit = { + .srcMinCoord = {0, 0, 0}, + .srcMaxCoord = {extent.width >> (mip_i - 1), extent.height >> (mip_i - 1), 1}, + .dstMinCoord = {0, 0, 0}, + .dstMaxCoord = {extent.width >> mip_i, extent.height >> mip_i, 1}, + .layerCount = 1, + .srcBaseLayer = 0, + .dstBaseLayer = 0, + .srcMipLevel = mip_i - 1, + .dstMipLevel = mip_i, + .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + }; + cmdBuf->blitImage(image, IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, image, IImage::LAYOUT::TRANSFER_DST_OPTIMAL, { &blit, 1 }, IGPUSampler::E_TEXTURE_FILTER::ETF_LINEAR); + + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barrier = { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT + } + }, + .image = image, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = mip_i, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + .newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = {&barrier, 1} }); + + } + } + + core::smart_refctd_ptr createTexture(video::ILogicalDevice* device, const asset::VkExtent3D extent, E_FORMAT format, uint32_t mipLevels = 1u, uint32_t layers = 0u) + { + const auto real_layers = layers ? layers:1u; + + IGPUImage::SCreationParams imgParams; + imgParams.extent = extent; + imgParams.arrayLayers = real_layers; + imgParams.flags = static_cast(0); + imgParams.format = format; + imgParams.mipLevels = mipLevels; + imgParams.samples = IImage::ESCF_1_BIT; + imgParams.type = IImage::ET_2D; + imgParams.usage = IImage::EUF_STORAGE_BIT | IImage::EUF_TRANSFER_SRC_BIT | IImage::EUF_TRANSFER_DST_BIT | IImage::EUF_SAMPLED_BIT; + const auto image = device->createImage(std::move(imgParams)); + auto imageMemReqs = image->getMemoryReqs(); + imageMemReqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + device->allocate(imageMemReqs, image.get()); + + IGPUImageView::SCreationParams viewparams; + viewparams.subUsages = IImage::EUF_STORAGE_BIT | IImage::EUF_SAMPLED_BIT; + viewparams.flags = static_cast(0); + viewparams.format = format; + viewparams.image = std::move(image); + viewparams.viewType = layers ? IGPUImageView::ET_2D_ARRAY:IGPUImageView::ET_2D; + viewparams.subresourceRange.aspectMask = IImage::EAF_COLOR_BIT; + viewparams.subresourceRange.baseArrayLayer = 0u; + viewparams.subresourceRange.layerCount = real_layers; + viewparams.subresourceRange.baseMipLevel = 0u; + viewparams.subresourceRange.levelCount = mipLevels; + + return device->createImageView(std::move(viewparams)); + } + + core::smart_refctd_ptr getShaderSource( asset::IAssetManager* assetManager, const char* filePath, system::ILogger* logger) + { + IAssetLoader::SAssetLoadParams lparams = {}; + lparams.logger = logger; + lparams.workingDirectory = NBL_EXT_MOUNT_ENTRY; + auto bundle = assetManager->getAsset(filePath, lparams); + if (bundle.getContents().empty() || bundle.getAssetType()!=IAsset::ET_SHADER) + { + const auto assetType = bundle.getAssetType(); + logger->log("Shader %s not found!", ILogger::ELL_ERROR, filePath); + exit(-1); + } + auto firstAssetInBundle = bundle.getContents()[0]; + return smart_refctd_ptr_static_cast(firstAssetInBundle); + } +} + +core::smart_refctd_ptr EnvmapImportanceSampling::create(SCreationParameters&& params) +{ + auto* const logger = params.utilities->getLogger(); + + if (!params.validate()) + { + logger->log("Failed creation parameters validation!", ILogger::ELL_ERROR); + return nullptr; + } + + const auto EnvmapExtent = params.envMap->getCreationParameters().image->getCreationParameters().extent; + // we don't need the 1x1 mip for anything + const uint32_t MipCountLuminance = IImage::calculateFullMipPyramidLevelCount(EnvmapExtent,IImage::ET_2D)-1; + const auto EnvMapPoTExtent = [MipCountLuminance]() -> asset::VkExtent3D + { + const uint32_t width = 0x1u<>1u,1u }; + }(); + auto calcWorkgroupSize = [](const asset::VkExtent3D extent, const uint32_t workgroupDimension) -> uint32_t2 + { + return uint32_t2(extent.width - 1, extent.height - 1) / workgroupDimension + uint32_t2(1); + }; + + const auto device = params.utilities->getLogicalDevice(); + + ConstructorParams constructorParams; + + constructorParams.lumaWorkgroupCount = calcWorkgroupSize(EnvMapPoTExtent, params.genLumaMapWorkgroupDimension); + constructorParams.lumaMap = createLumaMap(device, EnvMapPoTExtent, MipCountLuminance); + + const auto upscale = 0; + const asset::VkExtent3D WarpMapExtent = {EnvMapPoTExtent.width<createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, genLumaPipelineLayout->getDescriptorSetLayouts()); + const auto genLumaDescriptorSet = genLumaDescriptorPool->createDescriptorSet(core::smart_refctd_ptr(genLumaPipelineLayout->getDescriptorSetLayouts()[0])); + + const auto genWarpPipelineLayout = createGenWarpPipelineLayout(device); + constructorParams.genWarpPipeline = createGenWarpPipeline(params, genWarpPipelineLayout.get()); + const auto genWarpDescriptorPool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, genWarpPipelineLayout->getDescriptorSetLayouts()); + const auto genWarpDescriptorSet = genWarpDescriptorPool->createDescriptorSet(core::smart_refctd_ptr(genWarpPipelineLayout->getDescriptorSetLayouts()[0])); + + IGPUDescriptorSet::SDescriptorInfo envMapDescriptorInfo; + envMapDescriptorInfo.desc = params.envMap; + envMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + IGPUDescriptorSet::SDescriptorInfo lumaMapGeneralDescriptorInfo; + lumaMapGeneralDescriptorInfo.desc = constructorParams.lumaMap; + lumaMapGeneralDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::GENERAL; + + IGPUDescriptorSet::SDescriptorInfo lumaMapReadDescriptorInfo; + lumaMapReadDescriptorInfo.desc = constructorParams.lumaMap; + lumaMapReadDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + IGPUDescriptorSet::SDescriptorInfo warpMapDescriptorInfo; + warpMapDescriptorInfo.desc = constructorParams.warpMap; + warpMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::GENERAL; + + const IGPUDescriptorSet::SWriteDescriptorSet writes[] = { + { + .dstSet = genLumaDescriptorSet.get(), .binding = 0, .count = 1, .info = &envMapDescriptorInfo + }, + { + .dstSet = genLumaDescriptorSet.get(), .binding = 1, .count = 1, .info = &lumaMapGeneralDescriptorInfo + }, + { + .dstSet = genWarpDescriptorSet.get(), .binding = 0, .count = 1, .info = &lumaMapReadDescriptorInfo + }, + { + .dstSet = genWarpDescriptorSet.get(), .binding = 1, .count = 1, .info = &warpMapDescriptorInfo + }, + }; + + device->updateDescriptorSets(writes, {}); + + constructorParams.genLumaDescriptorSet = genLumaDescriptorSet; + constructorParams.genWarpDescriptorSet = genWarpDescriptorSet; + + constructorParams.creationParams = std::move(params); + + return core::smart_refctd_ptr(new EnvmapImportanceSampling(std::move(constructorParams))); +} + +core::smart_refctd_ptr EnvmapImportanceSampling::createLumaMap(video::ILogicalDevice* device, asset::VkExtent3D extent, uint32_t mipCount, const std::string_view debugName) +{ + return createTexture(device, extent, EF_R32_SFLOAT, mipCount); +} + +core::smart_refctd_ptr EnvmapImportanceSampling::createWarpMap(video::ILogicalDevice* device, asset::VkExtent3D extent, const std::string_view debugName) +{ + return createTexture(device, extent, EF_R32G32_SFLOAT); +} + +smart_refctd_ptr EnvmapImportanceSampling::mount(core::smart_refctd_ptr logger, ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias) +{ + assert(system); + + if (!system) + return nullptr; + + // extension should mount everything for you, regardless if content goes from virtual filesystem + // or disk directly - and you should never rely on application framework to expose extension data + #ifdef NBL_EMBED_BUILTIN_RESOURCES + auto archive = make_smart_refctd_ptr(smart_refctd_ptr(logger)); + #else + auto archive = make_smart_refctd_ptr(std::string_view(NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT), smart_refctd_ptr(logger), system); + #endif + + system->mount(smart_refctd_ptr(archive), archiveAlias.data()); + return smart_refctd_ptr(archive); +} + +core::smart_refctd_ptr EnvmapImportanceSampling::createGenLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout) +{ + system::logger_opt_ptr logger = params.utilities->getLogger(); + auto system = smart_refctd_ptr(params.assetManager->getSystem()); + auto* device = params.utilities->getLogicalDevice(); + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), params.utilities->getLogicalDevice(), NBL_EXT_MOUNT_ENTRY); + + const auto shaderSource = getShaderSource(params.assetManager.get(), "gen_luma.comp.hlsl", logger.get()); + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; + options.preprocessorOptions.targetSpirvVersion = device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; + +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#else + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; +#endif + options.preprocessorOptions.sourceIdentifier = shaderSource->getFilepathHint(); + options.preprocessorOptions.logger = logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + + const auto workgroupDimStr = std::to_string(params.genLumaMapWorkgroupDimension); + const IShaderCompiler::SMacroDefinition defines[] = { + { "WORKGROUP_DIM", workgroupDimStr.data() }, + }; + + options.preprocessorOptions.extraDefines = defines; + + const auto overridenUnspecialized = compiler->compileToSPIRV((const char*)shaderSource->getContent()->getPointer(), options); + const auto shader = device->compileShader({ overridenUnspecialized.get() }); + if (!shader) + { + logger.log("Could not compile shaders!", ILogger::ELL_ERROR); + return nullptr; + } + + video::IGPUComputePipeline::SCreationParams pipelineParams[1] = {}; + pipelineParams[0].layout = pipelineLayout; + pipelineParams[0].shader = { .shader = shader.get(), .entryPoint = "main" }; + + smart_refctd_ptr pipeline; + params.utilities->getLogicalDevice()->createComputePipelines(nullptr, pipelineParams, &pipeline); + if (!pipeline) + { + logger.log("Could not create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + + return pipeline; +} + +core::smart_refctd_ptr EnvmapImportanceSampling::createGenWarpPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout) +{ + system::logger_opt_ptr logger = params.utilities->getLogger(); + auto system = smart_refctd_ptr(params.assetManager->getSystem()); + auto* device = params.utilities->getLogicalDevice(); + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), params.utilities->getLogicalDevice(), NBL_EXT_MOUNT_ENTRY); + + const auto shaderSource = getShaderSource(params.assetManager.get(), "gen_warp.comp.hlsl", logger.get()); + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; + options.preprocessorOptions.targetSpirvVersion = device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; + +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#else + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; +#endif + options.preprocessorOptions.sourceIdentifier = shaderSource->getFilepathHint(); + options.preprocessorOptions.logger = logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + + const auto workgroupDimStr = std::to_string(params.genWarpMapWorkgroupDimension); + const IShaderCompiler::SMacroDefinition defines[] = { + { "WORKGROUP_DIM", workgroupDimStr.data() }, + }; + + options.preprocessorOptions.extraDefines = defines; + + const auto overridenUnspecialized = compiler->compileToSPIRV((const char*)shaderSource->getContent()->getPointer(), options); + const auto shader = device->compileShader({ overridenUnspecialized.get() }); + if (!shader) + { + logger.log("Could not compile shaders!", ILogger::ELL_ERROR); + return nullptr; + } + + video::IGPUComputePipeline::SCreationParams pipelineParams[1] = {}; + pipelineParams[0].layout = pipelineLayout; + pipelineParams[0].shader = { .shader = shader.get(), .entryPoint = "main" }; + + smart_refctd_ptr pipeline; + params.utilities->getLogicalDevice()->createComputePipelines(nullptr, pipelineParams, &pipeline); + if (!pipeline) + { + logger.log("Could not create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + + return pipeline; +} + +core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapImportanceSampling::createGenLumaPipelineLayout(video::ILogicalDevice* device) +{ + asset::SPushConstantRange pcRange = { + .stageFlags = hlsl::ESS_COMPUTE, + .offset = 0, + .size = sizeof(SLumaGenPushConstants) + }; + + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u + }, + { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u + } + }; + + const auto setLayout = device->createDescriptorSetLayout(bindings); + return device->createPipelineLayout({ &pcRange, 1 }, setLayout, nullptr, nullptr, nullptr); + +} + +core::smart_refctd_ptr EnvmapImportanceSampling::createGenWarpPipelineLayout(video::ILogicalDevice* device) +{ + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + }, + { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u + } + }; + + const auto setLayout = device->createDescriptorSetLayout(bindings); + return device->createPipelineLayout({}, setLayout, nullptr, nullptr, nullptr); +} + +void EnvmapImportanceSampling::computeWarpMap(video::IGPUCommandBuffer* cmdBuf) +{ + const auto lumaMapImage = m_lumaMap->getCreationParameters().image.get(); + const auto lumaMapMipLevels = lumaMapImage->getCreationParameters().mipLevels; + const auto lumaMapExtent = lumaMapImage->getCreationParameters().extent; + + const auto warpMapImage = m_warpMap->getCreationParameters().image.get(); + + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = lumaMapMipLevels, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + } + + // Gen Luma Map + { + SLumaGenPushConstants pcData = {}; + pcData.luminanceScales = { 0.2126729f, 0.7151522f, 0.0721750f, 0.0f }; + pcData.lumaMapResolution = {lumaMapExtent.width, lumaMapExtent.height}; + + cmdBuf->bindComputePipeline(m_genLumaPipeline.get()); + cmdBuf->pushConstants(m_genLumaPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, + 0, sizeof(SLumaGenPushConstants), &pcData); + cmdBuf->bindDescriptorSets(EPBP_COMPUTE, m_genLumaPipeline->getLayout(), + 0, 1, &m_genLumaDescriptorSet.get()); + cmdBuf->dispatch(m_lumaWorkgroupCount.x, m_lumaWorkgroupCount.y, 1); + } + + // Generate luminance mip map + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + }, + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 1u, + .levelCount = lumaMapMipLevels - 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + generateMipmap(cmdBuf, lumaMapImage); + } + + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = lumaMapMipLevels - 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + }, + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = lumaMapMipLevels - 1, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + }, + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = warpMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + cmdBuf->bindComputePipeline(m_genWarpPipeline.get()); + cmdBuf->bindDescriptorSets(EPBP_COMPUTE, m_genWarpPipeline->getLayout(), + 0, 1, &m_genWarpDescriptorSet.get()); + cmdBuf->dispatch(m_warpWorkgroupCount.x, m_warpWorkgroupCount.y, 1); + } + +} + +nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t EnvmapImportanceSampling::getWarpMapBarrier( + core::bitflag dstStageMask, + core::bitflag dstAccessMask, + nbl::video::IGPUImage::LAYOUT newLayout) +{ + const auto warpMapImage = m_warpMap->getCreationParameters().image.get(); + return { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = dstStageMask, + .dstAccessMask = dstAccessMask + } + }, + .image = warpMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = newLayout, + }; +} + +} diff --git a/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt b/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt new file mode 100644 index 0000000000..7486ba8923 --- /dev/null +++ b/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt @@ -0,0 +1,52 @@ +include(${NBL_ROOT_PATH}/cmake/common.cmake) + +set(NBL_EXT_INTERNAL_INCLUDE_DIR "${NBL_ROOT_PATH}/include") + +set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_H + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h +) + +set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/CEnvmapImportanceSampling.cpp" +) + +get_filename_component(_ARCHIVE_ABSOLUTE_ENTRY_PATH_ "${NBL_EXT_INTERNAL_INCLUDE_DIR}" ABSOLUTE) + +set(NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT "${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}/nbl/ext/EnvmapImportanceSampling/builtin/hlsl") + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/common.hlsl + ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_luma.comp.hlsl + ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_warp.comp.hlsl + ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/measure_luma.comp.hlsl +) + +nbl_create_ext_library_project( + ENVMAP_IMPORTANCE_SAMPLING + "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_H}" + "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_SRC}" + "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_EXTERNAL_INCLUDE}" + "" + NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT="${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}" +) + +target_sources(${LIB_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::ext::envmap_importance_sampling::builtin::build + TARGET ${LIB_NAME}_builtinsBuild + LINK_TO ${LIB_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS + common.hlsl + gen_luma.comp.hlsl + gen_warp.comp.hlsl + measure_luma.comp.hlsl + +) + + +add_library(Nabla::ext::EnvmapImportanceSampling ALIAS ${LIB_NAME})