diff --git a/examples_tests b/examples_tests index 587cbff28b..301fb402e6 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 587cbff28b1d0b42f2f704c3ba9b247ad0276590 +Subproject commit 301fb402e6d0d3c204b1da67e920283d6f9abca5 diff --git a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl index 7e92cbf282..235cdde8e4 100644 --- a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl +++ b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl @@ -22,16 +22,33 @@ namespace glsl #ifndef __HLSL_VERSION // GLM Aliases -template -genIUType bitfieldExtract(genIUType Value, int Offset, int Bits) +namespace impl { - return glm::bitfieldExtract(Value, Offset, Bits); -} +template +struct bitfieldInsert; -template -genIUType bitfieldInsert(genIUType const& Base, genIUType const& Insert, int Offset, int Bits) +template +NBL_PARTIAL_REQ_TOP(concepts::Integral::scalar_type> && size_of_v::scalar_type> >= 4) +struct bitfieldInsert::scalar_type> && size_of_v::scalar_type> >= 4) > +{ + static T __call( T base, T insert, uint32_t offset, uint32_t bits ) + { + return glm::bitfieldInsert(base, insert, offset, bits); + } +}; + +template +struct bitfieldExtract; + +template +NBL_PARTIAL_REQ_TOP(concepts::Integral::scalar_type> && size_of_v::scalar_type> >= 4) +struct bitfieldExtract::scalar_type> && size_of_v::scalar_type> >= 4) > { - return glm::bitfieldInsert(Base, Insert, Offset, Bits); + static T __call( T val, uint32_t offsetBits, uint32_t numBits ) + { + return glm::bitfieldExtract(val, offsetBits, numBits); + } +}; } template @@ -184,21 +201,25 @@ void memoryBarrierShared() { namespace impl { -template -struct bitfieldExtract {}; +template +struct bitfieldInsert; -template -struct bitfieldExtract +template +NBL_PARTIAL_REQ_TOP(concepts::Integral::scalar_type> && size_of_v::scalar_type> >= 4) +struct bitfieldInsert::scalar_type> && size_of_v::scalar_type> >= 4) > { - static T __call( T val, uint32_t offsetBits, uint32_t numBits ) + static T __call( T base, T insert, uint32_t offset, uint32_t bits ) { - static_assert( is_integral::value, "T is not an integral type!" ); - return val; + return spirv::bitFieldInsert(base, insert, offset, bits); } }; +template +struct bitfieldExtract; + template -struct bitfieldExtract +NBL_PARTIAL_REQ_TOP(concepts::SignedIntegral::scalar_type> && size_of_v::scalar_type> >= 4) +struct bitfieldExtract::scalar_type> && size_of_v::scalar_type> >= 4) > { static T __call( T val, uint32_t offsetBits, uint32_t numBits ) { @@ -207,7 +228,8 @@ struct bitfieldExtract }; template -struct bitfieldExtract +NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> >= 4) +struct bitfieldExtract::scalar_type> && size_of_v::scalar_type> >= 4) > { static T __call( T val, uint32_t offsetBits, uint32_t numBits ) { @@ -218,25 +240,63 @@ struct bitfieldExtract } //namespace impl template -T bitfieldExtract( T val, uint32_t offsetBits, uint32_t numBits ) +T bitfieldReverse(T value) +{ + return spirv::bitReverse(value); +} + +#endif + +namespace impl +{ +template +NBL_PARTIAL_REQ_TOP(concepts::Integral::scalar_type> && size_of_v::scalar_type> == 2) +struct bitfieldInsert::scalar_type> && size_of_v::scalar_type> == 2) > +{ + static T __call( T base, T insert, uint32_t offset, uint32_t bits ) + { + const T mask = (T(1u) << bits) - T(1u); + const T shifted_mask = mask << offset; + return (base & ~shifted_mask) | ((insert & mask) << T(offset)); + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::SignedIntegral::scalar_type> && size_of_v::scalar_type> == 2) +struct bitfieldExtract::scalar_type> && size_of_v::scalar_type> == 2) > +{ + static T __call( T val, uint32_t offsetBits, uint32_t numBits ) + { + const T ret = (val >> T(offsetBits)) & T((T(1u) << numBits) - T(1u)); + if (ret & (T(1u) << (numBits-1u))) + ret |= T(~0ull) << numBits; + return ret; + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> == 2) +struct bitfieldExtract::scalar_type> && size_of_v::scalar_type> == 2) > { - return impl::bitfieldExtract::value, is_integral::value>::__call(val,offsetBits,numBits); + static T __call( T val, uint32_t offsetBits, uint32_t numBits ) + { + return (val >> T(offsetBits)) & T((T(1u) << numBits) - T(1u)); + } +}; } template T bitfieldInsert(T base, T insert, uint32_t offset, uint32_t bits) { - return spirv::bitFieldInsert(base, insert, offset, bits); + return impl::bitfieldInsert::__call(base, insert, offset, bits); } template -T bitfieldReverse(T value) +T bitfieldExtract( T val, uint32_t offsetBits, uint32_t numBits ) { - return spirv::bitReverse(value); + return impl::bitfieldExtract::__call(val, offsetBits, numBits); } -#endif - namespace impl { template diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index f838156c86..25fa61162d 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -110,16 +110,16 @@ struct quaternion uniformScaleSq = traits.uniformScaleSq; if (dontAssertValidMatrix) + { if (!valid) { this_t retval; retval.data = hlsl::promote(bit_cast(numeric_limits::quiet_NaN)); return retval; } + } else - { assert(valid); - } } if (uniformScaleSq < numeric_limits::min) { diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl index 1ad16dc28d..df56d46549 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -2,6 +2,7 @@ #define _NBL_BUILTIN_HLSL_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ #include +#include namespace nbl { @@ -125,30 +126,16 @@ inline matrix buildCameraLookAtMatrixRH( //! Replaces curent rocation and scale by rotation represented by quaternion `quat`, leaves 4th row and 4th colum unchanged template -inline void setRotation(matrix& outMat, NBL_CONST_REF_ARG(core::quaternion) quat) +inline void setRotation(matrix& outMat, NBL_CONST_REF_ARG(math::quaternion) quat) { static_assert(N == 3 || N == 4); + matrix mat = _static_cast>(quat); - outMat[0] = vector( - 1 - 2 * (quat.y * quat.y + quat.z * quat.z), - 2 * (quat.x * quat.y - quat.z * quat.w), - 2 * (quat.x * quat.z + quat.y * quat.w), - outMat[0][3] - ); - - outMat[1] = vector( - 2 * (quat.x * quat.y + quat.z * quat.w), - 1 - 2 * (quat.x * quat.x + quat.z * quat.z), - 2 * (quat.y * quat.z - quat.x * quat.w), - outMat[1][3] - ); - - outMat[2] = vector( - 2 * (quat.x * quat.z - quat.y * quat.w), - 2 * (quat.y * quat.z + quat.x * quat.w), - 1 - 2 * (quat.x * quat.x + quat.y * quat.y), - outMat[2][3] - ); + outMat[0] = mat[0]; + + outMat[1] = mat[1]; + + outMat[2] = mat[2]; } template diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 8929609c34..64573ac85f 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -5,9 +5,11 @@ #ifndef _NBL_BUILTIN_HLSL_SAMPLING_QUANTIZED_SEQUENCE_INCLUDED_ #define _NBL_BUILTIN_HLSL_SAMPLING_QUANTIZED_SEQUENCE_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/concepts/vector.hlsl" #include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" -#include "nbl/builtin/hlsl/random/pcg.hlsl" namespace nbl { @@ -22,284 +24,389 @@ struct QuantizedSequence; namespace impl { -template +template struct unorm_constant; template<> -struct unorm_constant<4> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d888889u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d888889u; }; template<> -struct unorm_constant<5> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d042108u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d042108u; }; template<> -struct unorm_constant<8> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3b808081u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3b808081u; }; template<> -struct unorm_constant<10> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3a802008u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3a802008u; }; template<> -struct unorm_constant<16> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x37800080u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x37800080u; }; template<> -struct unorm_constant<21> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x35000004u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x35000004u; }; template<> -struct unorm_constant<32> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; }; -template -struct decode_helper; - -template -struct decode_helper +// FullWidth if intend to decode before scramble, not if decode after scramble +template +struct encode_helper { - using scalar_type = typename vector_traits::scalar_type; - using fp_type = typename float_of_size::type; - using uvec_type = vector; - using sequence_type = QuantizedSequence; - using return_type = vector; - NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension; + using sequence_type = Q; + using input_type = vector; + using uniform_storage_scalar_type = unsigned_integer_of_size_t; + using uniform_storage_type = vector; // type that holds uint bit representation of a unorm that can have 1s in MSB (normalized w.r.t whole scalar) + NBL_CONSTEXPR_STATIC_INLINE uint16_t Bits = FullWidth ? (8u * size_of_v - 1u) : sequence_type::BitsPerComponent; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormMultiplier = (1u << Bits) - 1u; + + static sequence_type __call(const input_type unormvec) + { + uniform_storage_type asuint; + NBL_UNROLL for(uint16_t i = 0; i < Dim; i++) + asuint[i] = uniform_storage_scalar_type(unormvec[i] * UNormMultiplier); + NBL_IF_CONSTEXPR(Dim==1) + return sequence_type::create(asuint[0]); + else + return sequence_type::create(asuint); + } +}; - static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) +template +struct decode_before_scramble_helper +{ + using storage_scalar_type = typename Q::scalar_type; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension; + using uvec_type = vector; + using sequence_type = Q; + using return_type = vector; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant::value; + + return_type operator()(const uvec_type scrambleKey) { uvec_type seqVal; - NBL_UNROLL for(uint16_t i = 0; i < D; i++) - seqVal[i] = val.get(i) ^ scrambleKey[i]; - return return_type(seqVal) * bit_cast(UNormConstant); + NBL_UNROLL for(uint16_t i = 0; i < Dim; i++) + seqVal[i] = val.get(i); + seqVal ^= scrambleKey; + return return_type(seqVal) * bit_cast >(UNormConstant); } + + sequence_type val; }; -template -struct decode_helper +template +struct decode_after_scramble_helper { - using scalar_type = typename vector_traits::scalar_type; - using fp_type = typename float_of_size::type; - using uvec_type = vector; - using sequence_type = QuantizedSequence; - using sequence_store_type = typename sequence_type::store_type; - using sequence_scalar_type = typename vector_traits::scalar_type; - using return_type = vector; - NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = sequence_type::UNormConstant; - - static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) + using storage_scalar_type = typename Q::scalar_type; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension; + using uvec_type = vector; + using sequence_type = Q; + using return_type = vector; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant::value; + + return_type operator()(NBL_CONST_REF_ARG(sequence_type) scrambleKey) { sequence_type scramble; - NBL_UNROLL for(uint16_t i = 0; i < D; i++) - scramble.set(i, scrambleKey[i]); - scramble.data ^= val.data; + scramble.data = val.data ^ scrambleKey.data; uvec_type seqVal; - NBL_UNROLL for(uint16_t i = 0; i < D; i++) + NBL_UNROLL for(uint16_t i = 0; i < Dim; i++) seqVal[i] = scramble.get(i); - return return_type(seqVal) * bit_cast(UNormConstant); + return return_type(seqVal) * bit_cast >(UNormConstant); } + + sequence_type val; }; -} -template -vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector::scalar_type, D> scrambleKey) -{ - return impl::decode_helper::__call(val, scrambleKey); +template +NBL_BOOL_CONCEPT SequenceSpecialization = concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4; } -#define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4 - // all Dim=1 -template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT) -struct QuantizedSequence +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization) +struct QuantizedSequence) > { + using this_t = QuantizedSequence; using store_type = T; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(store_type)>::value; + using scalar_type = typename vector_traits::scalar_type; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = uint16_t(1u); + + static this_t create(const store_type value) + { + this_t seq; + seq.data = value; + return seq; + } + + store_type get(const uint16_t idx) { assert(idx >= 0 && idx < 1); return data; } + void set(const uint16_t idx, const store_type value) { assert(idx >= 0 && idx < 1); data = value; } + + template + static this_t encode(const vector value) + { + return impl::encode_helper::__call(value); + } - store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; } - void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; } + template + vector decode(const vector,Dimension> scrambleKey) + { + impl::decode_before_scramble_helper helper; + helper.val.data = data; + return helper(scrambleKey); + } + template + vector decode(NBL_CONST_REF_ARG(this_t) scrambleKey) + { + impl::decode_after_scramble_helper helper; + helper.val.data = data; + return helper(scrambleKey); + } store_type data; }; // uint16_t, uint32_t; Dim=2,3,4 -template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 1 && Dim > 1 && Dim < 5) -struct QuantizedSequence::Dimension == 1 && Dim > 1 && Dim < 5) > +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 1 && Dim > 1 && Dim < 5) +struct QuantizedSequence && vector_traits::Dimension == 1 && Dim > 1 && Dim < 5) > { + using this_t = QuantizedSequence; using store_type = T; + using scalar_type = store_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE store_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim; + + static this_t create(const vector value) + { + this_t seq; + seq.data = store_type(0u); + NBL_UNROLL for (uint16_t i = 0; i < Dimension; i++) + seq.set(i, value[i]); + return seq; + } store_type get(const uint16_t idx) { - assert(idx > 0 && idx < Dim); - return (data >> (BitsPerComponent * idx)) & Mask; + assert(idx >= 0 && idx < Dim); + return glsl::bitfieldExtract(data, BitsPerComponent * idx, BitsPerComponent); } void set(const uint16_t idx, const store_type value) { - assert(idx > 0 && idx < Dim); - const uint16_t bits = (BitsPerComponent * idx); - data &= ~(Mask << bits); - data |= ((value >> DiscardBits) & Mask) << bits; + assert(idx >= 0 && idx < Dim); + data = glsl::bitfieldInsert(data, scalar_type(value >> DiscardBits), BitsPerComponent * idx, BitsPerComponent); + } + + template + static this_t encode(const vector value) + { + return impl::encode_helper::__call(value); + } + + template + vector decode(const vector,Dimension> scrambleKey) + { + impl::decode_before_scramble_helper helper; + helper.val.data = data; + return helper(scrambleKey); + } + template + vector decode(NBL_CONST_REF_ARG(this_t) scrambleKey) + { + impl::decode_after_scramble_helper helper; + helper.val.data = data; + return helper(scrambleKey); } store_type data; }; // Dim 2,3,4 matches vector dim -template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == Dim && Dim > 1 && Dim < 5) -struct QuantizedSequence::Dimension == Dim && Dim > 1 && Dim < 5) > +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == Dim && Dim > 1 && Dim < 5) +struct QuantizedSequence && vector_traits::Dimension == Dim && Dim > 1 && Dim < 5) > { + using this_t = QuantizedSequence; using store_type = T; using scalar_type = typename vector_traits::scalar_type; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim; - scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; } - void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; } + static this_t create(const store_type value) + { + this_t seq; + seq.data = value; + return seq; + } + + scalar_type get(const uint16_t idx) { assert(idx >= 0 && idx < Dim); return data[idx]; } + void set(const uint16_t idx, const scalar_type value) { assert(idx >= 0 && idx < Dim); data[idx] = value; } + + template + static this_t encode(const vector value) + { + return impl::encode_helper::__call(value); + } + + template + vector decode(const vector,Dimension> scrambleKey) + { + impl::decode_before_scramble_helper helper; + helper.val.data = data; + return helper(scrambleKey); + } + template + vector decode(NBL_CONST_REF_ARG(this_t) scrambleKey) + { + impl::decode_after_scramble_helper helper; + helper.val.data = data; + return helper(scrambleKey); + } store_type data; }; -// uint16_t2, uint32_t2; Dim=3 -template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 2 && Dim == 3) -struct QuantizedSequence::Dimension == 2 && Dim == 3) > +// uint32_t2; Dim=3 -- should never use uint16_t2 instead of uint32_t +template NBL_PARTIAL_REQ_TOP(is_same_v && Dim == 3) +struct QuantizedSequence && Dim == 3) > { + using this_t = QuantizedSequence; using store_type = T; using scalar_type = typename vector_traits::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (scalar_type(1u) << BitsPerComponent) - scalar_type(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim; + + static this_t create(const vector value) + { + this_t seq; + seq.data = hlsl::promote(0u); + NBL_UNROLL for (uint16_t i = 0; i < Dimension; i++) + seq.set(i, value[i]); + return seq; + } scalar_type get(const uint16_t idx) { assert(idx >= 0 && idx < 3); - if (idx < 2) + if (idx == 0) // x + return glsl::bitfieldExtract(data[0], 0u, BitsPerComponent); + else if (idx == 1) // y { - return data[idx] & Mask; - } - else - { - const scalar_type zbits = scalar_type(DiscardBits); - const scalar_type zmask = (scalar_type(1u) << zbits) - scalar_type(1u); - scalar_type z = (data[0] >> BitsPerComponent) & zmask; - z |= ((data[1] >> BitsPerComponent) & zmask) << DiscardBits; - return z; + scalar_type y = glsl::bitfieldExtract(data[0], BitsPerComponent, DiscardBits); + y |= glsl::bitfieldExtract(data[1], 0u, DiscardBits - 1u) << DiscardBits; + return y; } + else // z + return glsl::bitfieldExtract(data[1], DiscardBits - 1u, BitsPerComponent); } void set(const uint16_t idx, const scalar_type value) { assert(idx >= 0 && idx < 3); - if (idx < 2) + const scalar_type trunc_val = value >> DiscardBits; + if (idx == 0) // x + data[0] = glsl::bitfieldInsert(data[0], trunc_val, 0u, BitsPerComponent); + else if (idx == 1) // y { - const scalar_type trunc_val = value >> DiscardBits; - data[idx] &= ~Mask; - data[idx] |= trunc_val & Mask; - } - else - { - const scalar_type zbits = scalar_type(DiscardBits); - const scalar_type zmask = (scalar_type(1u) << zbits) - scalar_type(1u); - const scalar_type trunc_val = value >> DiscardBits; - data[0] &= Mask; - data[1] &= Mask; - data[0] |= (trunc_val & zmask) << BitsPerComponent; - data[1] |= ((trunc_val >> zbits) & zmask) << BitsPerComponent; + data[0] = glsl::bitfieldInsert(data[0], trunc_val, BitsPerComponent, DiscardBits); + data[1] = glsl::bitfieldInsert(data[1], trunc_val >> DiscardBits, 0u, DiscardBits - 1u); } + else // z + data[1] = glsl::bitfieldInsert(data[1], trunc_val, DiscardBits - 1u, BitsPerComponent); + } + + template + static this_t encode(const vector value) + { + return impl::encode_helper::__call(value); + } + + template + vector decode(const vector,Dimension> scrambleKey) + { + impl::decode_before_scramble_helper helper; + helper.val.data = data; + return helper(scrambleKey); + } + template + vector decode(NBL_CONST_REF_ARG(this_t) scrambleKey) + { + impl::decode_after_scramble_helper helper; + helper.val.data = data; + return helper(scrambleKey); } store_type data; }; -// uint16_t2, uint32_t2; Dim=4 -template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 2 && Dim == 4) -struct QuantizedSequence::Dimension == 2 && Dim == 4) > +// uint16_t2; Dim=4 -- should use uint16_t4 instead of uint32_t2 +template NBL_PARTIAL_REQ_TOP(is_same_v && Dim == 4) +struct QuantizedSequence && Dim == 4) > { + using this_t = QuantizedSequence; using store_type = T; using scalar_type = typename vector_traits::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim; - scalar_type get(const uint16_t idx) + static this_t create(const vector value) { - assert(idx >= 0 && idx < 4); - const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u); - return (data[i] >> (BitsPerComponent * (idx & uint16_t(1u)))) & Mask; + this_t seq; + seq.data = hlsl::promote(0u); + NBL_UNROLL for (uint16_t i = 0; i < Dimension; i++) + seq.set(i, value[i]); + return seq; } - void set(const uint16_t idx, const scalar_type value) + scalar_type get(const uint16_t idx) { assert(idx >= 0 && idx < 4); - const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u); - const uint16_t odd = idx & uint16_t(1u); - data[i] &= hlsl::mix(~Mask, Mask, bool(odd)); - data[i] |= ((value >> DiscardBits) & Mask) << (BitsPerComponent * odd); + if (idx < 2) // x y + { + return glsl::bitfieldExtract(data[0], BitsPerComponent * idx, BitsPerComponent); + } + else // z w + { + return glsl::bitfieldExtract(data[1], BitsPerComponent * (idx - uint16_t(2u)), BitsPerComponent); + } } - store_type data; -}; - -// uint16_t4, uint32_t4; Dim=2 -template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 4 && Dim == 2) -struct QuantizedSequence::Dimension == 4 && Dim == 2) > -{ - using store_type = T; - using scalar_type = typename vector_traits::scalar_type; - using base_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; - - base_type get(const uint16_t idx) + void set(const uint16_t idx, const scalar_type value) { - assert(idx >= 0 && idx < 2); - base_type a; - a[0] = data[uint16_t(2u) * idx]; - a[1] = data[uint16_t(2u) * idx + 1]; - return a; + assert(idx >= 0 && idx < 4); + const scalar_type trunc_val = value >> DiscardBits; + if (idx < 2) // x y + { + data[0] = glsl::bitfieldInsert(data[0], trunc_val, BitsPerComponent * idx, BitsPerComponent); + } + else // z w + { + data[1] = glsl::bitfieldInsert(data[1], trunc_val, BitsPerComponent * (idx - uint16_t(2u)), BitsPerComponent); + } } - void set(const uint16_t idx, const base_type value) + template + static this_t encode(const vector value) { - assert(idx >= 0 && idx < 2); - base_type a; - data[uint16_t(2u) * idx] = value[0]; - data[uint16_t(2u) * idx + 1] = value[1]; + return impl::encode_helper::__call(value); } - store_type data; -}; - -// uint16_t4, uint32_t4; Dim=3 -// uint16_t4 --> returns uint16_t2 - 21 bits per component: 16 in x, 5 in y -// uint16_t4 --> returns uint32_t2 - 42 bits per component: 32 in x, 10 in y -template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 4 && Dim == 3) -struct QuantizedSequence::Dimension == 4 && Dim == 3) > -{ - using store_type = T; - using scalar_type = typename vector_traits::scalar_type; - using base_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; - NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - uint16_t(8u) * size_of_v; - NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; - - base_type get(const uint16_t idx) + template + vector decode(const vector,Dimension> scrambleKey) { - assert(idx >= 0 && idx < 3); - base_type a; - a[0] = data[idx]; - a[1] = (data[3] >> (LeftoverBitsPerComponent * idx)) & Mask; - return a; + impl::decode_before_scramble_helper helper; + helper.val.data = data; + return helper(scrambleKey); } - - void set(const uint16_t idx, const base_type value) + template + vector decode(NBL_CONST_REF_ARG(this_t) scrambleKey) { - assert(idx >= 0 && idx < 3); - data[idx] = value[0]; - data[3] &= ~Mask; - data[3] |= ((value[1] >> DiscardBits) & Mask) << (LeftoverBitsPerComponent * idx); + impl::decode_after_scramble_helper helper; + helper.val.data = data; + return helper(scrambleKey); } store_type data; }; -#undef SEQUENCE_SPECIALIZATION_CONCEPT +// no uint16_t4, uint32_t4; Dim=2 }