From df94ffe689ccdc6f273dddd0f6710e9c310f0733 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 9 Jan 2026 10:25:45 +0700
Subject: [PATCH 01/20] changed unormconstant template params to include float

---
 .../hlsl/sampling/quantized_sequence.hlsl     | 83 +++++++++----------
 1 file changed, 40 insertions(+), 43 deletions(-)
diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
index 8929609c34..b6f1e0d2e5 100644
--- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
@@ -22,22 +22,22 @@ struct QuantizedSequence;
 
 namespace impl
 {
-template<uint16_t Bits>
+template<typename FloatScalar, uint16_t Bits>
 struct unorm_constant;
 template<>
-struct unorm_constant<4> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d888889u; };
+struct unorm_constant<float,4> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d888889u; };
 template<>
-struct unorm_constant<5> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d042108u; };
+struct unorm_constant<float,5> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d042108u; };
 template<>
-struct unorm_constant<8> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3b808081u; };
+struct unorm_constant<float,8> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3b808081u; };
 template<>
-struct unorm_constant<10> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3a802008u; };
+struct unorm_constant<float,10> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3a802008u; };
 template<>
-struct unorm_constant<16> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x37800080u; };
+struct unorm_constant<float,16> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x37800080u; };
 template<>
-struct unorm_constant<21> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x35000004u; };
+struct unorm_constant<float,21> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x35000004u; };
 template<>
-struct unorm_constant<32> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; };
+struct unorm_constant<float,32> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; };
 
 template<typename T, uint16_t D, bool EncodeScramble>
 struct decode_helper;
@@ -46,30 +46,28 @@ template<typename T, uint16_t D>
 struct decode_helper<T, D, false>
 {
     using scalar_type = typename vector_traits<T>::scalar_type;
-    using fp_type = typename float_of_size<sizeof(scalar_type)>::type;
-    using uvec_type = vector<scalar_type, D>;
+    using uvec_type = vector<uint32_t, D>;
     using sequence_type = QuantizedSequence<T, D>;
-    using return_type = vector<fp_type, D>;
-    NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value;
+    using return_type = vector<float32_t, D>;
+    NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<float,8u*sizeof(scalar_type)>::value;
 
     static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey)
     {
         uvec_type seqVal;
         NBL_UNROLL for(uint16_t i = 0; i < D; i++)
             seqVal[i] = val.get(i) ^ scrambleKey[i];
-        return return_type(seqVal) * bit_cast<fp_type>(UNormConstant);
+        return return_type(seqVal) * bit_cast<float_of_size_t<sizeof(scalar_type)> >(UNormConstant);
     }
 };
 template<typename T, uint16_t D>
 struct decode_helper<T, D, true>
 {
     using scalar_type = typename vector_traits<T>::scalar_type;
-    using fp_type = typename float_of_size<sizeof(scalar_type)>::type;
-    using uvec_type = vector<scalar_type, D>;
+    using uvec_type = vector<uint32_t, D>;
     using sequence_type = QuantizedSequence<T, D>;
     using sequence_store_type = typename sequence_type::store_type;
     using sequence_scalar_type = typename vector_traits<sequence_store_type>::scalar_type;
-    using return_type = vector<fp_type, D>;
+    using return_type = vector<float32_t, D>;
     NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = sequence_type::UNormConstant;
 
     static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey)
@@ -82,25 +80,26 @@ struct decode_helper<T, D, true>
         uvec_type seqVal;
         NBL_UNROLL for(uint16_t i = 0; i < D; i++)
             seqVal[i] = scramble.get(i);
-        return return_type(seqVal) * bit_cast<fp_type>(UNormConstant);
+        return return_type(seqVal) * bit_cast<float_of_size_t<sizeof(scalar_type)> >(UNormConstant);
     }
 };
+
+template<typename T>
+NBL_BOOL_CONCEPT SequenceSpecialization = concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> <= 4;
 }
 
 template<typename T, uint16_t D, bool EncodeScramble=false>
-vector<typename float_of_size<sizeof(typename vector_traits<T>::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, D>) val, const vector<typename vector_traits<T>::scalar_type, D> scrambleKey)
+vector<float_of_size_t<sizeof(typename vector_traits<T>::scalar_type)>, D> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, D>) val, const vector<typename vector_traits<T>::scalar_type, D> scrambleKey)
 {
     return impl::decode_helper<T,D,EncodeScramble>::__call(val, scrambleKey);
 }
 
-#define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> <= 4
-
 // all Dim=1
-template<typename T> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT)
-struct QuantizedSequence<T, 1 NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT) >
+template<typename T> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T>)
+struct QuantizedSequence<T, 1 NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T>) >
 {
     using store_type = T;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(store_type)>::value;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<float,8u*sizeof(store_type)>::value;
 
     store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; }
     void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; }
@@ -109,15 +108,15 @@ struct QuantizedSequence<T, 1 NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEP
 };
 
 // uint16_t, uint32_t; Dim=2,3,4
-template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 1 && Dim > 1 && Dim < 5)
-struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 1 && Dim > 1 && Dim < 5) >
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 1 && Dim > 1 && Dim < 5)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 1 && Dim > 1 && Dim < 5) >
 {
     using store_type = T;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
     NBL_CONSTEXPR_STATIC_INLINE store_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u);
     NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<BitsPerComponent>::value;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<float,BitsPerComponent>::value;
 
     store_type get(const uint16_t idx)
     {
@@ -137,12 +136,12 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONC
 };
 
 // Dim 2,3,4 matches vector dim
-template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == Dim && Dim > 1 && Dim < 5)
-struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == Dim && Dim > 1 && Dim < 5) >
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == Dim && Dim > 1 && Dim < 5)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == Dim && Dim > 1 && Dim < 5) >
 {
     using store_type = T;
     using scalar_type = typename vector_traits<T>::scalar_type;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<float,8u*sizeof(scalar_type)>::value;
 
     scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; }
     void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; }
@@ -151,8 +150,8 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONC
 };
 
 // uint16_t2, uint32_t2; Dim=3
-template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 2 && Dim == 3)
-struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 2 && Dim == 3) >
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 2 && Dim == 3)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 2 && Dim == 3) >
 {
     using store_type = T;
     using scalar_type = typename vector_traits<T>::scalar_type;
@@ -160,7 +159,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONC
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
     NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (scalar_type(1u) << BitsPerComponent) - scalar_type(1u);
     NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<scalar_type>) - BitsPerComponent;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<BitsPerComponent>::value;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<float,BitsPerComponent>::value;
 
     scalar_type get(const uint16_t idx)
     {
@@ -204,8 +203,8 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONC
 };
 
 // uint16_t2, uint32_t2; Dim=4
-template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 2 && Dim == 4)
-struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 2 && Dim == 4) >
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 2 && Dim == 4)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 2 && Dim == 4) >
 {
     using store_type = T;
     using scalar_type = typename vector_traits<T>::scalar_type;
@@ -213,7 +212,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONC
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
     NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u);
     NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<scalar_type>) - BitsPerComponent;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<BitsPerComponent>::value;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<float,BitsPerComponent>::value;
 
     scalar_type get(const uint16_t idx)
     {
@@ -235,13 +234,13 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONC
 };
 
 // uint16_t4, uint32_t4; Dim=2
-template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 4 && Dim == 2)
-struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 4 && Dim == 2) >
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 4 && Dim == 2)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 4 && Dim == 2) >
 {
     using store_type = T;
     using scalar_type = typename vector_traits<T>::scalar_type;
     using base_type = vector<scalar_type, 2>;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<float,8u*sizeof(scalar_type)>::value;
 
     base_type get(const uint16_t idx)
     {
@@ -266,8 +265,8 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONC
 // uint16_t4, uint32_t4; Dim=3
 // uint16_t4 --> returns uint16_t2 - 21 bits per component: 16 in x, 5 in y
 // uint16_t4 --> returns uint32_t2 - 42 bits per component: 32 in x, 10 in y
-template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 4 && Dim == 3)
-struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 4 && Dim == 3) >
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 4 && Dim == 3)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 4 && Dim == 3) >
 {
     using store_type = T;
     using scalar_type = typename vector_traits<T>::scalar_type;
@@ -277,7 +276,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONC
     NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - uint16_t(8u) * size_of_v<scalar_type>;
     NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u);
     NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<base_type>) - BitsPerComponent;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<float,8u*sizeof(scalar_type)>::value;
 
     base_type get(const uint16_t idx)
     {
@@ -299,8 +298,6 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONC
     store_type data;
 };
 
-#undef SEQUENCE_SPECIALIZATION_CONCEPT
-
 }
 
 }

From 2c1a99d52cd87ac6228ca4447a8c94b2edd528ec Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 9 Jan 2026 12:20:35 +0700
Subject: [PATCH 02/20] separate pre and post decode scramble into different
 structs, no unormconstant in quantized sequence

---
 .../hlsl/sampling/quantized_sequence.hlsl     | 46 +++++++++----------
 1 file changed, 22 insertions(+), 24 deletions(-)

diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
index b6f1e0d2e5..f669a9fd3f 100644
--- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
@@ -39,43 +39,37 @@ struct unorm_constant<float,21> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0
 template<>
 struct unorm_constant<float,32> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; };
 
-template<typename T, uint16_t D, bool EncodeScramble>
-struct decode_helper;
-
 template<typename T, uint16_t D>
-struct decode_helper<T, D, false>
+struct decode_before_scramble_helper
 {
     using scalar_type = typename vector_traits<T>::scalar_type;
     using uvec_type = vector<uint32_t, D>;
     using sequence_type = QuantizedSequence<T, D>;
     using return_type = vector<float32_t, D>;
-    NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<float,8u*sizeof(scalar_type)>::value;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant<float,8u*sizeof(scalar_type)>::value;
 
     static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey)
     {
         uvec_type seqVal;
         NBL_UNROLL for(uint16_t i = 0; i < D; i++)
-            seqVal[i] = val.get(i) ^ scrambleKey[i];
+            seqVal[i] = val.get(i);
+        seqVal ^= scrambleKey;
         return return_type(seqVal) * bit_cast<float_of_size_t<sizeof(scalar_type)> >(UNormConstant);
     }
 };
 template<typename T, uint16_t D>
-struct decode_helper<T, D, true>
+struct decode_after_scramble_helper
 {
     using scalar_type = typename vector_traits<T>::scalar_type;
     using uvec_type = vector<uint32_t, D>;
     using sequence_type = QuantizedSequence<T, D>;
-    using sequence_store_type = typename sequence_type::store_type;
-    using sequence_scalar_type = typename vector_traits<sequence_store_type>::scalar_type;
     using return_type = vector<float32_t, D>;
-    NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = sequence_type::UNormConstant;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant<float,sequence_type::BitsPerComponent>::value;
 
-    static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey)
+    static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, NBL_CONST_REF_ARG(sequence_type) scrambleKey)
     {
         sequence_type scramble;
-        NBL_UNROLL for(uint16_t i = 0; i < D; i++)
-            scramble.set(i, scrambleKey[i]);
-        scramble.data ^= val.data;
+        scramble.data = val.data ^ scrambleKey.data;
 
         uvec_type seqVal;
         NBL_UNROLL for(uint16_t i = 0; i < D; i++)
@@ -88,10 +82,18 @@ template<typename T>
 NBL_BOOL_CONCEPT SequenceSpecialization = concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> <= 4;
 }
 
-template<typename T, uint16_t D, bool EncodeScramble=false>
-vector<float_of_size_t<sizeof(typename vector_traits<T>::scalar_type)>, D> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, D>) val, const vector<typename vector_traits<T>::scalar_type, D> scrambleKey)
+// post-decode scramble
+template<typename R, typename T, uint16_t D>
+vector<R,D> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, D>) val, const vector<unsigned_integer_of_size_t<sizeof(R)>,D> scrambleKey)
+{
+    return impl::decode_before_scramble_helper<T,D>::__call(val, scrambleKey);
+}
+
+// pre-decode scramble
+template<typename R, typename T, uint16_t D>
+vector<R,D> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, D>) val, NBL_CONST_REF_ARG(QuantizedSequence<T, D>) scrambleKey)
 {
-    return impl::decode_helper<T,D,EncodeScramble>::__call(val, scrambleKey);
+    return impl::decode_after_scramble_helper<T,D>::__call(val, scrambleKey);
 }
 
 // all Dim=1
@@ -99,7 +101,7 @@ template<typename T> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T>)
 struct QuantizedSequence<T, 1 NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T>) >
 {
     using store_type = T;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<float,8u*sizeof(store_type)>::value;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v<store_type>;
 
     store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; }
     void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; }
@@ -116,7 +118,6 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
     NBL_CONSTEXPR_STATIC_INLINE store_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u);
     NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<float,BitsPerComponent>::value;
 
     store_type get(const uint16_t idx)
     {
@@ -141,7 +142,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
 {
     using store_type = T;
     using scalar_type = typename vector_traits<T>::scalar_type;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<float,8u*sizeof(scalar_type)>::value;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v<scalar_type>;
 
     scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; }
     void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; }
@@ -159,7 +160,6 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
     NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (scalar_type(1u) << BitsPerComponent) - scalar_type(1u);
     NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<scalar_type>) - BitsPerComponent;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<float,BitsPerComponent>::value;
 
     scalar_type get(const uint16_t idx)
     {
@@ -212,7 +212,6 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
     NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u);
     NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<scalar_type>) - BitsPerComponent;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<float,BitsPerComponent>::value;
 
     scalar_type get(const uint16_t idx)
     {
@@ -240,7 +239,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     using store_type = T;
     using scalar_type = typename vector_traits<T>::scalar_type;
     using base_type = vector<scalar_type, 2>;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<float,8u*sizeof(scalar_type)>::value;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v<scalar_type>;
 
     base_type get(const uint16_t idx)
     {
@@ -276,7 +275,6 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - uint16_t(8u) * size_of_v<scalar_type>;
     NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u);
     NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<base_type>) - BitsPerComponent;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<float,8u*sizeof(scalar_type)>::value;
 
     base_type get(const uint16_t idx)
     {

From 8894dd10863ead92a9f54271281a03bf7ef8d49a Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 9 Jan 2026 15:28:12 +0700
Subject: [PATCH 03/20] change quantized sequence to set/get with
 bitfieldInsert/Extract, also order stored values from LSB to MSB

---
 .../hlsl/sampling/quantized_sequence.hlsl     | 176 +++++++++---------
 1 file changed, 88 insertions(+), 88 deletions(-)

diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
index f669a9fd3f..025e506fb4 100644
--- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
@@ -116,21 +116,17 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     using store_type = T;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
-    NBL_CONSTEXPR_STATIC_INLINE store_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u);
-    NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent;
 
     store_type get(const uint16_t idx)
     {
         assert(idx > 0 && idx < Dim);
-        return (data >> (BitsPerComponent * idx)) & Mask;
+        return glsl::bitfieldExtract(data, BitsPerComponent * idx, BitsPerComponent);
     }
 
     void set(const uint16_t idx, const store_type value)
     {
         assert(idx > 0 && idx < Dim);
-        const uint16_t bits = (BitsPerComponent * idx);
-        data &= ~(Mask << bits);
-        data |= ((value >> DiscardBits) & Mask) << bits;
+        glsl::bitfieldInsert(data, value, BitsPerComponent * idx, BitsPerComponent);
     }
 
     store_type data;
@@ -150,150 +146,154 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     store_type data;
 };
 
-// uint16_t2, uint32_t2; Dim=3
-template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 2 && Dim == 3)
-struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 2 && Dim == 3) >
+// uint32_t2; Dim=3 -- should never use uint16_t2 instead of uint32_t
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 4 && vector_traits<T>::Dimension == 2 && Dim == 3)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 4 && vector_traits<T>::Dimension == 2 && Dim == 3) >
 {
     using store_type = T;
     using scalar_type = typename vector_traits<T>::scalar_type;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
-    NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (scalar_type(1u) << BitsPerComponent) - scalar_type(1u);
     NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<scalar_type>) - BitsPerComponent;
 
     scalar_type get(const uint16_t idx)
     {
         assert(idx >= 0 && idx < 3);
-        if (idx < 2)
+        if (idx == 0)   // x
+            return glsl::bitfieldExtract(data[0], 0u, BitsPerComponent);
+        else if (idx == 1)  // y
         {
-            return data[idx] & Mask;
-        }
-        else
-        {
-            const scalar_type zbits = scalar_type(DiscardBits);
-            const scalar_type zmask = (scalar_type(1u) << zbits) - scalar_type(1u);
-            scalar_type z = (data[0] >> BitsPerComponent) & zmask;
-            z |= ((data[1] >> BitsPerComponent) & zmask) << DiscardBits;
-            return z;
+            scalar_type y = glsl::bitfieldExtract(data[0], BitsPerComponent, DiscardBits);
+            y |= glsl::bitfieldExtract(data[1], 0u, DiscardBits - 1u) << DiscardBits;
+            return y;
         }
+        else    // z
+            return glsl::bitfieldExtract(data[1], DiscardBits - 1u, BitsPerComponent);
     }
 
     void set(const uint16_t idx, const scalar_type value)
     {
         assert(idx >= 0 && idx < 3);
-        if (idx < 2)
+        if (idx == 0)   // x
+            glsl::bitfieldInsert(data[0], value, 0u, BitsPerComponent);
+        else if (idx == 1)  // y
         {
-            const scalar_type trunc_val = value >> DiscardBits;
-            data[idx] &= ~Mask;
-            data[idx] |= trunc_val & Mask;
-        }
-        else
-        {
-            const scalar_type zbits = scalar_type(DiscardBits);
-            const scalar_type zmask = (scalar_type(1u) << zbits) - scalar_type(1u);
-            const scalar_type trunc_val = value >> DiscardBits;
-            data[0] &= Mask;
-            data[1] &= Mask;
-            data[0] |= (trunc_val & zmask) << BitsPerComponent;
-            data[1] |= ((trunc_val >> zbits) & zmask) << BitsPerComponent;
+            glsl::bitfieldInsert(data[0], value, BitsPerComponent, DiscardBits);
+            glsl::bitfieldInsert(data[1], value >> DiscardBits, 0u, DiscardBits - 1u);
         }
+        else    // z
+            glsl::bitfieldInsert(data[1], value, DiscardBits - 1u, BitsPerComponent);
     }
 
     store_type data;
 };
 
-// uint16_t2, uint32_t2; Dim=4
-template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 2 && Dim == 4)
-struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 2 && Dim == 4) >
+// uint16_t2; Dim=4 -- should use uint16_t4 instead of uint32_t2
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 2 && vector_traits<T>::Dimension == 2 && Dim == 4)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 2 && vector_traits<T>::Dimension == 2 && Dim == 4) >
 {
     using store_type = T;
     using scalar_type = typename vector_traits<T>::scalar_type;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
-    NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u);
-    NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<scalar_type>) - BitsPerComponent;
 
     scalar_type get(const uint16_t idx)
     {
         assert(idx >= 0 && idx < 4);
-        const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u);
-        return (data[i] >> (BitsPerComponent * (idx & uint16_t(1u)))) & Mask;
+        if (idx >= 0 && idx < 2) // x y
+        {
+            return glsl::bitfieldExtract(data[0], BitsPerComponent * idx, BitsPerComponent);
+        }
+        else    // z w
+        {
+            return glsl::bitfieldExtract(data[1], BitsPerComponent * (idx & uint16_t(1u)), BitsPerComponent);
+        }
     }
 
     void set(const uint16_t idx, const scalar_type value)
     {
         assert(idx >= 0 && idx < 4);
-        const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u);
-        const uint16_t odd = idx & uint16_t(1u);
-        data[i] &= hlsl::mix(~Mask, Mask, bool(odd));
-        data[i] |= ((value >> DiscardBits) & Mask) << (BitsPerComponent * odd);
+        if (idx >= 0 && idx < 2) // x y
+        {
+            glsl::bitfieldInsert(data[0], value, BitsPerComponent * idx, BitsPerComponent);
+        }
+        else    // z w
+        {
+            glsl::bitfieldInsert(data[1], value, BitsPerComponent * (idx & uint16_t(1u)), BitsPerComponent);
+        }
     }
 
     store_type data;
 };
 
-// uint16_t4, uint32_t4; Dim=2
-template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 4 && Dim == 2)
-struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 4 && Dim == 2) >
-{
-    using store_type = T;
-    using scalar_type = typename vector_traits<T>::scalar_type;
-    using base_type = vector<scalar_type, 2>;
-    NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v<scalar_type>;
+// no uint16_t4, uint32_t4; Dim=2
 
-    base_type get(const uint16_t idx)
-    {
-        assert(idx >= 0 && idx < 2);
-        base_type a;
-        a[0] = data[uint16_t(2u) * idx];
-        a[1] = data[uint16_t(2u) * idx + 1];
-        return a;
-    }
-
-    void set(const uint16_t idx, const base_type value)
-    {
-        assert(idx >= 0 && idx < 2);
-        base_type a;
-        data[uint16_t(2u) * idx] = value[0];
-        data[uint16_t(2u) * idx + 1] = value[1];
-    }
-
-    store_type data;
-};
-
-// uint16_t4, uint32_t4; Dim=3
-// uint16_t4 --> returns uint16_t2 - 21 bits per component: 16 in x, 5 in y
-// uint16_t4 --> returns uint32_t2 - 42 bits per component: 32 in x, 10 in y
-template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 4 && Dim == 3)
-struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 4 && Dim == 3) >
+// uint32_t4; Dim=3 --> returns uint32_t2 - 42 bits per component: 32 in x, 10 in y
+// use uint32_t2 instead of uint16_t4
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 4 && vector_traits<T>::Dimension == 4 && Dim == 3)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 4 && vector_traits<T>::Dimension == 4 && Dim == 3) >
 {
     using store_type = T;
     using scalar_type = typename vector_traits<T>::scalar_type;
     using base_type = vector<scalar_type, 2>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
-    NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - uint16_t(8u) * size_of_v<scalar_type>;
-    NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u);
-    NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<base_type>) - BitsPerComponent;
 
     base_type get(const uint16_t idx)
     {
         assert(idx >= 0 && idx < 3);
-        base_type a;
-        a[0] = data[idx];
-        a[1] = (data[3] >> (LeftoverBitsPerComponent * idx)) & Mask;
-        return a;
+        if (idx == 0)   // x
+        {
+            base_type x;
+            x[0] = data[0];
+            x[1] = glsl::bitfieldExtract(data[1], 0u, 10u);
+            return x;
+        }
+        else if (idx == 1)  // y
+        {
+            base_type y;
+            y[0] = glsl::bitfieldExtract(data[1], 10u, 22u);
+            y[0] |= glsl::bitfieldExtract(data[2], 0u, 10u) << 22u;
+            y[1] = glsl::bitfieldExtract(data[2], 10u, 10u);
+            return y;
+        }
+        else    // z
+        {
+            base_type z;
+            z[0] = glsl::bitfieldInsert(data[2], 20u, 12u);
+            z[0] |= glsl::bitfieldInsert(data[3], 0u, 20u) << 12u;
+            z[1] = glsl::bitfieldInsert(data[3], 20u, 10u);
+            return z;
+        }
     }
 
     void set(const uint16_t idx, const base_type value)
     {
         assert(idx >= 0 && idx < 3);
-        data[idx] = value[0];
-        data[3] &= ~Mask;
-        data[3] |= ((value[1] >> DiscardBits) & Mask) << (LeftoverBitsPerComponent * idx);
+        if (idx == 0)   // x
+        {
+            data[0] = value[0];
+            glsl::bitfieldInsert(data[1], value[1], 0u, 10u);
+        }
+        else if (idx == 1)  // y
+        {
+            glsl::bitfieldInsert(data[1], value[0], 10u, 22u);
+            glsl::bitfieldInsert(data[2], value[0] >> 22u, 0u, 10u);
+            glsl::bitfieldInsert(data[2], value[1], 10u, 10u);
+        }
+        else    // z
+        {
+            glsl::bitfieldInsert(data[2], value[0], 20u, 12u);
+            glsl::bitfieldInsert(data[3], value[0] >> 12u, 0u, 20u);
+            glsl::bitfieldInsert(data[3], value[1], 20u, 10u);
+        }
     }
 
     store_type data;
+    // data[0] = | -- x 32 bits -- |
+    // data[1] = MSB | -- y 22 bits -- | -- x 10 bits -- | LSB
+    // data[2] = MSB | -- z 12 bits -- | -- y 20 bits -- | LSB
+    // data[3] = | -- z 30 bits -- |
 };
 
 }

From 639f464a1788117f8c5bb32646f487380098c836 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 9 Jan 2026 15:42:44 +0700
Subject: [PATCH 04/20] added create functions that should not preserve
 existing bits

---
 .../hlsl/sampling/quantized_sequence.hlsl     | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
index 025e506fb4..bd763ad6d7 100644
--- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
@@ -117,6 +117,14 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
 
+    static QuantizedSequence<T, Dim> create(const vector<store_type, Dim> value)
+    {
+        QuantizedSequence<T, Dim> seq;
+        NBL_UNROLL for (uint16_t i = 0; i < Dim; i++)
+            seq.set(i, value[i]);
+        return seq;
+    }
+
     store_type get(const uint16_t idx)
     {
         assert(idx > 0 && idx < Dim);
@@ -156,6 +164,14 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<scalar_type>) - BitsPerComponent;
 
+    static QuantizedSequence<T, Dim> create(const vector<scalar_type, Dim> value)
+    {
+        QuantizedSequence<T, Dim> seq;
+        NBL_UNROLL for (uint16_t i = 0; i < Dim; i++)
+            seq.set(i, value[i]);
+        return seq;
+    }
+
     scalar_type get(const uint16_t idx)
     {
         assert(idx >= 0 && idx < 3);
@@ -197,6 +213,14 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
 
+    static QuantizedSequence<T, Dim> create(const vector<scalar_type, Dim> value)
+    {
+        QuantizedSequence<T, Dim> seq;
+        NBL_UNROLL for (uint16_t i = 0; i < Dim; i++)
+            seq.set(i, value[i]);
+        return seq;
+    }
+
     scalar_type get(const uint16_t idx)
     {
         assert(idx >= 0 && idx < 4);

From 85d169e19168d21807f9d7f74152a3c19c1ddafa Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Mon, 19 Jan 2026 15:25:48 +0700
Subject: [PATCH 05/20] make decode a quantized sequence member instead

---
 .../hlsl/sampling/quantized_sequence.hlsl     | 156 ++++++++++++++----
 1 file changed, 123 insertions(+), 33 deletions(-)

diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
index bd763ad6d7..2769d6e8dc 100644
--- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
@@ -39,61 +39,53 @@ struct unorm_constant<float,21> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0
 template<>
 struct unorm_constant<float,32> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; };
 
-template<typename T, uint16_t D>
+template<typename Q, typename F>
 struct decode_before_scramble_helper
 {
-    using scalar_type = typename vector_traits<T>::scalar_type;
-    using uvec_type = vector<uint32_t, D>;
-    using sequence_type = QuantizedSequence<T, D>;
-    using return_type = vector<float32_t, D>;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant<float,8u*sizeof(scalar_type)>::value;
-
-    static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey)
+    using scalar_type = typename Q::scalar_type;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension;
+    using uvec_type = vector<uint32_t, Dim>;
+    using sequence_type = Q;
+    using return_type = vector<F, Dim>;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant<float32_t,8u*sizeof(scalar_type)>::value;
+
+    return_type operator()(const uvec_type scrambleKey)
     {
         uvec_type seqVal;
-        NBL_UNROLL for(uint16_t i = 0; i < D; i++)
+        NBL_UNROLL for(uint16_t i = 0; i < Dim; i++)
             seqVal[i] = val.get(i);
         seqVal ^= scrambleKey;
         return return_type(seqVal) * bit_cast<float_of_size_t<sizeof(scalar_type)> >(UNormConstant);
     }
+
+    sequence_type val;
 };
-template<typename T, uint16_t D>
+template<typename Q, typename F>
 struct decode_after_scramble_helper
 {
-    using scalar_type = typename vector_traits<T>::scalar_type;
-    using uvec_type = vector<uint32_t, D>;
-    using sequence_type = QuantizedSequence<T, D>;
-    using return_type = vector<float32_t, D>;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant<float,sequence_type::BitsPerComponent>::value;
-
-    static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, NBL_CONST_REF_ARG(sequence_type) scrambleKey)
+    using scalar_type = typename Q::scalar_type;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension;
+    using uvec_type = vector<uint32_t, Dim>;
+    using sequence_type = Q;
+    using return_type = vector<F, Dim>;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant<float32_t,sequence_type::BitsPerComponent>::value;
+
+    return_type operator()(NBL_CONST_REF_ARG(sequence_type) scrambleKey)
     {
         sequence_type scramble;
         scramble.data = val.data ^ scrambleKey.data;
 
         uvec_type seqVal;
-        NBL_UNROLL for(uint16_t i = 0; i < D; i++)
+        NBL_UNROLL for(uint16_t i = 0; i < Dim; i++)
             seqVal[i] = scramble.get(i);
         return return_type(seqVal) * bit_cast<float_of_size_t<sizeof(scalar_type)> >(UNormConstant);
     }
+
+    sequence_type val;
 };
 
 template<typename T>
-NBL_BOOL_CONCEPT SequenceSpecialization = concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> <= 4;
-}
-
-// post-decode scramble
-template<typename R, typename T, uint16_t D>
-vector<R,D> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, D>) val, const vector<unsigned_integer_of_size_t<sizeof(R)>,D> scrambleKey)
-{
-    return impl::decode_before_scramble_helper<T,D>::__call(val, scrambleKey);
-}
-
-// pre-decode scramble
-template<typename R, typename T, uint16_t D>
-vector<R,D> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, D>) val, NBL_CONST_REF_ARG(QuantizedSequence<T, D>) scrambleKey)
-{
-    return impl::decode_after_scramble_helper<T,D>::__call(val, scrambleKey);
+NBL_BOOL_CONCEPT SequenceSpecialization = concepts::IntVector<T> && size_of_v<typename vector_traits<T>::scalar_type> <= 4;
 }
 
 // all Dim=1
@@ -101,11 +93,28 @@ template<typename T> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T>)
 struct QuantizedSequence<T, 1 NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T>) >
 {
     using store_type = T;
+    using scalar_type = typename vector_traits<T>::scalar_type;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v<store_type>;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = uint16_t(1u);
 
     store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; }
     void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; }
 
+    template<typename F>
+    vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey)
+    {
+        impl::decode_before_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
+    }
+    template<typename F>
+    vector<F,Dimension> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, Dimension>) scrambleKey)
+    {
+        impl::decode_after_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
+    }
+
     store_type data;
 };
 
@@ -114,8 +123,10 @@ template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecializat
 struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 1 && Dim > 1 && Dim < 5) >
 {
     using store_type = T;
+    using scalar_type = typename vector_traits<T>::scalar_type;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim;
 
     static QuantizedSequence<T, Dim> create(const vector<store_type, Dim> value)
     {
@@ -137,6 +148,21 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
         glsl::bitfieldInsert(data, value, BitsPerComponent * idx, BitsPerComponent);
     }
 
+    template<typename F>
+    vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey)
+    {
+        impl::decode_before_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
+    }
+    template<typename F>
+    vector<F,Dimension> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, Dimension>) scrambleKey)
+    {
+        impl::decode_after_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
+    }
+
     store_type data;
 };
 
@@ -147,10 +173,26 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     using store_type = T;
     using scalar_type = typename vector_traits<T>::scalar_type;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v<scalar_type>;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim;
 
     scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; }
     void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; }
 
+    template<typename F>
+    vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey)
+    {
+        impl::decode_before_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
+    }
+    template<typename F>
+    vector<F,Dimension> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, Dimension>) scrambleKey)
+    {
+        impl::decode_after_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
+    }
+
     store_type data;
 };
 
@@ -163,6 +205,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<scalar_type>) - BitsPerComponent;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim;
 
     static QuantizedSequence<T, Dim> create(const vector<scalar_type, Dim> value)
     {
@@ -201,6 +244,21 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
             glsl::bitfieldInsert(data[1], value, DiscardBits - 1u, BitsPerComponent);
     }
 
+    template<typename F>
+    vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey)
+    {
+        impl::decode_before_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
+    }
+    template<typename F>
+    vector<F,Dimension> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, Dimension>) scrambleKey)
+    {
+        impl::decode_after_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
+    }
+
     store_type data;
 };
 
@@ -212,6 +270,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     using scalar_type = typename vector_traits<T>::scalar_type;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim;
 
     static QuantizedSequence<T, Dim> create(const vector<scalar_type, Dim> value)
     {
@@ -247,6 +306,21 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
         }
     }
 
+    template<typename F>
+    vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey)
+    {
+        impl::decode_before_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
+    }
+    template<typename F>
+    vector<F,Dimension> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, Dimension>) scrambleKey)
+    {
+        impl::decode_after_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
+    }
+
     store_type data;
 };
 
@@ -262,6 +336,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     using base_type = vector<scalar_type, 2>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim;
 
     base_type get(const uint16_t idx)
     {
@@ -313,6 +388,21 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
         }
     }
 
+    template<typename F>
+    vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey)
+    {
+        impl::decode_before_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
+    }
+    template<typename F>
+    vector<F,Dimension> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, Dimension>) scrambleKey)
+    {
+        impl::decode_after_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
+    }
+
     store_type data;
     // data[0] = | -- x 32 bits -- |
     // data[1] = MSB | -- y 22 bits -- | -- x 10 bits -- | LSB

From 64fb4d6f457f68cf08eb8731d38c442a898f9ed7 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Mon, 19 Jan 2026 17:03:44 +0700
Subject: [PATCH 06/20] added create factory for all quantized sequences, added
 encode method for taking unorm input

---
 .../hlsl/sampling/quantized_sequence.hlsl     | 140 +++++++++++++-----
 1 file changed, 106 insertions(+), 34 deletions(-)

diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
index 2769d6e8dc..dfc22634f6 100644
--- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
@@ -39,15 +39,34 @@ struct unorm_constant<float,21> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0
 template<>
 struct unorm_constant<float,32> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; };
 
+template<typename Q, typename F>
+struct encode_helper
+{
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension;
+    using sequence_type = Q;
+    using unorm_vec_type = vector<F, Dim>;
+    using unsigned_scalar_type = unsigned_integer_of_size_t<sizeof(F)>; 
+    using uvec_type = vector<unsigned_scalar_type, Dim>;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormMultiplier = (1u << (8u * size_of_v<unsigned_scalar_type> - 1u)) - 1u;
+
+    static sequence_type __call(const unorm_vec_type unormvec)
+    {
+        uvec_type asuint;
+        NBL_UNROLL for(uint16_t i = 0; i < Dim; i++)
+            asuint[i] = unsigned_scalar_type(unormvec[i] * UNormMultiplier);
+        return sequence_type::create(asuint);
+    }
+};
+
 template<typename Q, typename F>
 struct decode_before_scramble_helper
 {
-    using scalar_type = typename Q::scalar_type;
+    using unsigned_scalar_type = typename Q::scalar_type;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension;
     using uvec_type = vector<uint32_t, Dim>;
     using sequence_type = Q;
     using return_type = vector<F, Dim>;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant<float32_t,8u*sizeof(scalar_type)>::value;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant<float32_t,8u*sizeof(unsigned_scalar_type)>::value;
 
     return_type operator()(const uvec_type scrambleKey)
     {
@@ -55,7 +74,7 @@ struct decode_before_scramble_helper
         NBL_UNROLL for(uint16_t i = 0; i < Dim; i++)
             seqVal[i] = val.get(i);
         seqVal ^= scrambleKey;
-        return return_type(seqVal) * bit_cast<float_of_size_t<sizeof(scalar_type)> >(UNormConstant);
+        return return_type(seqVal) * bit_cast<float_of_size_t<sizeof(unsigned_scalar_type)> >(UNormConstant);
     }
 
     sequence_type val;
@@ -63,7 +82,7 @@ struct decode_before_scramble_helper
 template<typename Q, typename F>
 struct decode_after_scramble_helper
 {
-    using scalar_type = typename Q::scalar_type;
+    using unsigned_scalar_type = typename Q::scalar_type;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension;
     using uvec_type = vector<uint32_t, Dim>;
     using sequence_type = Q;
@@ -78,39 +97,53 @@ struct decode_after_scramble_helper
         uvec_type seqVal;
         NBL_UNROLL for(uint16_t i = 0; i < Dim; i++)
             seqVal[i] = scramble.get(i);
-        return return_type(seqVal) * bit_cast<float_of_size_t<sizeof(scalar_type)> >(UNormConstant);
+        return return_type(seqVal) * bit_cast<float_of_size_t<sizeof(unsigned_scalar_type)> >(UNormConstant);
     }
 
     sequence_type val;
 };
 
 template<typename T>
-NBL_BOOL_CONCEPT SequenceSpecialization = concepts::IntVector<T> && size_of_v<typename vector_traits<T>::scalar_type> <= 4;
+NBL_BOOL_CONCEPT SequenceSpecialization = concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> <= 4;
 }
 
 // all Dim=1
 template<typename T> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T>)
 struct QuantizedSequence<T, 1 NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T>) >
 {
+    using this_t = QuantizedSequence<T, 1>;
     using store_type = T;
     using scalar_type = typename vector_traits<T>::scalar_type;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v<store_type>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = uint16_t(1u);
 
+    static this_t create(const store_type value)
+    {
+        this_t seq;
+        seq.data = value;
+        return seq;
+    }
+
     store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; }
     void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; }
 
+    template<typename F>
+    static this_t encode(const vector<F, Dimension> value)
+    {
+        return impl::encode_helper<this_t,F>::__call(value);
+    }
+
     template<typename F>
     vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey)
     {
-        impl::decode_before_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        impl::decode_before_scramble_helper<this_t,F> helper;
         helper.val.data = data;
         return helper(scrambleKey);
     }
     template<typename F>
-    vector<F,Dimension> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, Dimension>) scrambleKey)
+    vector<F,Dimension> decode(NBL_CONST_REF_ARG(this_t) scrambleKey)
     {
-        impl::decode_after_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        impl::decode_after_scramble_helper<this_t,F> helper;
         helper.val.data = data;
         return helper(scrambleKey);
     }
@@ -122,16 +155,18 @@ struct QuantizedSequence<T, 1 NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T
 template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 1 && Dim > 1 && Dim < 5)
 struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 1 && Dim > 1 && Dim < 5) >
 {
+    using this_t = QuantizedSequence<T, Dim>;
     using store_type = T;
-    using scalar_type = typename vector_traits<T>::scalar_type;
+    using scalar_type = store_type;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim;
 
-    static QuantizedSequence<T, Dim> create(const vector<store_type, Dim> value)
+    static this_t create(const vector<store_type, Dimension> value)
     {
-        QuantizedSequence<T, Dim> seq;
-        NBL_UNROLL for (uint16_t i = 0; i < Dim; i++)
+        this_t seq;
+        seq.data = store_type(0u);
+        NBL_UNROLL for (uint16_t i = 0; i < Dimension; i++)
             seq.set(i, value[i]);
         return seq;
     }
@@ -148,17 +183,23 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
         glsl::bitfieldInsert(data, value, BitsPerComponent * idx, BitsPerComponent);
     }
 
+    template<typename F>
+    static this_t encode(const vector<F, Dimension> value)
+    {
+        return impl::encode_helper<this_t,F>::__call(value);
+    }
+
     template<typename F>
     vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey)
     {
-        impl::decode_before_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        impl::decode_before_scramble_helper<this_t,F> helper;
         helper.val.data = data;
         return helper(scrambleKey);
     }
     template<typename F>
-    vector<F,Dimension> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, Dimension>) scrambleKey)
+    vector<F,Dimension> decode(NBL_CONST_REF_ARG(this_t) scrambleKey)
     {
-        impl::decode_after_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        impl::decode_after_scramble_helper<this_t,F> helper;
         helper.val.data = data;
         return helper(scrambleKey);
     }
@@ -170,25 +211,39 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
 template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == Dim && Dim > 1 && Dim < 5)
 struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == Dim && Dim > 1 && Dim < 5) >
 {
+    using this_t = QuantizedSequence<T, Dim>;
     using store_type = T;
     using scalar_type = typename vector_traits<T>::scalar_type;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v<scalar_type>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim;
 
+    static this_t create(const store_type value)
+    {
+        this_t seq;
+        seq.data = value;
+        return seq;
+    }
+
     scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; }
     void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; }
 
+    template<typename F>
+    static this_t encode(const vector<F, Dimension> value)
+    {
+        return impl::encode_helper<this_t,F>::__call(value);
+    }
+
     template<typename F>
     vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey)
     {
-        impl::decode_before_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        impl::decode_before_scramble_helper<this_t,F> helper;
         helper.val.data = data;
         return helper(scrambleKey);
     }
     template<typename F>
-    vector<F,Dimension> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, Dimension>) scrambleKey)
+    vector<F,Dimension> decode(NBL_CONST_REF_ARG(this_t) scrambleKey)
     {
-        impl::decode_after_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        impl::decode_after_scramble_helper<this_t,F> helper;
         helper.val.data = data;
         return helper(scrambleKey);
     }
@@ -200,6 +255,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
 template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 4 && vector_traits<T>::Dimension == 2 && Dim == 3)
 struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 4 && vector_traits<T>::Dimension == 2 && Dim == 3) >
 {
+    using this_t = QuantizedSequence<T, Dim>;
     using store_type = T;
     using scalar_type = typename vector_traits<T>::scalar_type;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
@@ -207,10 +263,11 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<scalar_type>) - BitsPerComponent;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim;
 
-    static QuantizedSequence<T, Dim> create(const vector<scalar_type, Dim> value)
+    static this_t create(const vector<scalar_type, Dimension> value)
     {
-        QuantizedSequence<T, Dim> seq;
-        NBL_UNROLL for (uint16_t i = 0; i < Dim; i++)
+        this_t seq;
+        seq.data = hlsl::promote<store_type>(0u);
+        NBL_UNROLL for (uint16_t i = 0; i < Dimension; i++)
             seq.set(i, value[i]);
         return seq;
     }
@@ -244,17 +301,23 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
             glsl::bitfieldInsert(data[1], value, DiscardBits - 1u, BitsPerComponent);
     }
 
+    template<typename F>
+    static this_t encode(const vector<F, Dimension> value)
+    {
+        return impl::encode_helper<this_t,F>::__call(value);
+    }
+
     template<typename F>
     vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey)
     {
-        impl::decode_before_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        impl::decode_before_scramble_helper<this_t,F> helper;
         helper.val.data = data;
         return helper(scrambleKey);
     }
     template<typename F>
-    vector<F,Dimension> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, Dimension>) scrambleKey)
+    vector<F,Dimension> decode(NBL_CONST_REF_ARG(this_t) scrambleKey)
     {
-        impl::decode_after_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        impl::decode_after_scramble_helper<this_t,F> helper;
         helper.val.data = data;
         return helper(scrambleKey);
     }
@@ -266,16 +329,18 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
 template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 2 && vector_traits<T>::Dimension == 2 && Dim == 4)
 struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 2 && vector_traits<T>::Dimension == 2 && Dim == 4) >
 {
+    using this_t = QuantizedSequence<T, Dim>;
     using store_type = T;
     using scalar_type = typename vector_traits<T>::scalar_type;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim;
 
-    static QuantizedSequence<T, Dim> create(const vector<scalar_type, Dim> value)
+    static this_t create(const vector<scalar_type, Dimension> value)
     {
-        QuantizedSequence<T, Dim> seq;
-        NBL_UNROLL for (uint16_t i = 0; i < Dim; i++)
+        this_t seq;
+        seq.data = hlsl::promote<store_type>(0u);
+        NBL_UNROLL for (uint16_t i = 0; i < Dimension; i++)
             seq.set(i, value[i]);
         return seq;
     }
@@ -306,17 +371,23 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
         }
     }
 
+    template<typename F>
+    static this_t encode(const vector<F, Dimension> value)
+    {
+        return impl::encode_helper<this_t,F>::__call(value);
+    }
+
     template<typename F>
     vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey)
     {
-        impl::decode_before_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        impl::decode_before_scramble_helper<this_t,F> helper;
         helper.val.data = data;
         return helper(scrambleKey);
     }
     template<typename F>
-    vector<F,Dimension> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, Dimension>) scrambleKey)
+    vector<F,Dimension> decode(NBL_CONST_REF_ARG(this_t) scrambleKey)
     {
-        impl::decode_after_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        impl::decode_after_scramble_helper<this_t,F> helper;
         helper.val.data = data;
         return helper(scrambleKey);
     }
@@ -331,6 +402,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
 template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 4 && vector_traits<T>::Dimension == 4 && Dim == 3)
 struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 4 && vector_traits<T>::Dimension == 4 && Dim == 3) >
 {
+    using this_t = QuantizedSequence<T, Dim>;
     using store_type = T;
     using scalar_type = typename vector_traits<T>::scalar_type;
     using base_type = vector<scalar_type, 2>;
@@ -391,14 +463,14 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     template<typename F>
     vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey)
     {
-        impl::decode_before_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        impl::decode_before_scramble_helper<this_t,F> helper;
         helper.val.data = data;
         return helper(scrambleKey);
     }
     template<typename F>
-    vector<F,Dimension> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, Dimension>) scrambleKey)
+    vector<F,Dimension> decode(NBL_CONST_REF_ARG(this_t) scrambleKey)
     {
-        impl::decode_after_scramble_helper<QuantizedSequence<T, Dimension>,F> helper;
+        impl::decode_after_scramble_helper<this_t,F> helper;
         helper.val.data = data;
         return helper(scrambleKey);
     }

From 1223e88db2f17ac059e6d28001ac77ba3028c816 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Tue, 20 Jan 2026 11:06:54 +0700
Subject: [PATCH 07/20] fixes to removal of core::matrix

---
 .../nbl/builtin/hlsl/math/quaternions.hlsl    |  4 +--
 .../transformation_matrix_utils.hlsl          | 29 +++++--------------
 2 files changed, 10 insertions(+), 23 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index f838156c86..25fa61162d 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -110,16 +110,16 @@ struct quaternion
             uniformScaleSq = traits.uniformScaleSq;
 
             if (dontAssertValidMatrix)
+            {
                 if (!valid)
                 {
                     this_t retval;
                     retval.data = hlsl::promote<data_type>(bit_cast<scalar_type>(numeric_limits<scalar_type>::quiet_NaN));
                     return retval;
                 }
+            }
             else
-            {
                 assert(valid);
-            }
         }
         if (uniformScaleSq < numeric_limits<scalar_type>::min)
         {
diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl
index 1ad16dc28d..df56d46549 100644
--- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl
+++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl
@@ -2,6 +2,7 @@
 #define _NBL_BUILTIN_HLSL_TRANSFORMATION_MATRIX_UTILS_INCLUDED_
 
 #include <nbl/builtin/hlsl/cpp_compat.hlsl>
+#include <nbl/builtin/hlsl/math/quaternions.hlsl>
 
 namespace nbl
 {
@@ -125,30 +126,16 @@ inline matrix<T, 3, 4> buildCameraLookAtMatrixRH(
 
 //! Replaces curent rocation and scale by rotation represented by quaternion `quat`, leaves 4th row and 4th colum unchanged
 template<typename T, uint32_t N>
-inline void setRotation(matrix<T, N, 4>& outMat, NBL_CONST_REF_ARG(core::quaternion) quat)
+inline void setRotation(matrix<T, N, 4>& outMat, NBL_CONST_REF_ARG(math::quaternion<T>) quat)
 {
 	static_assert(N == 3 || N == 4);
+	matrix<T, 3, 3> mat = _static_cast<matrix<T, 3, 3>>(quat);
 
-	outMat[0] = vector<T, 4>(
-		1 - 2 * (quat.y * quat.y + quat.z * quat.z),
-		2 * (quat.x * quat.y - quat.z * quat.w),
-		2 * (quat.x * quat.z + quat.y * quat.w),
-		outMat[0][3]
-	);
-
-	outMat[1] = vector<T, 4>(
-		2 * (quat.x * quat.y + quat.z * quat.w),
-		1 - 2 * (quat.x * quat.x + quat.z * quat.z),
-		2 * (quat.y * quat.z - quat.x * quat.w),
-		outMat[1][3]
-	);
-
-	outMat[2] = vector<T, 4>(
-		2 * (quat.x * quat.z - quat.y * quat.w),
-		2 * (quat.y * quat.z + quat.x * quat.w),
-		1 - 2 * (quat.x * quat.x + quat.y * quat.y),
-		outMat[2][3]
-	);
+	outMat[0] = mat[0];
+
+	outMat[1] = mat[1];
+
+	outMat[2] = mat[2];
 }
 
 template<typename T, uint32_t N>

From cd67d94fed332d3c13a3bf95f66f9c334763f039 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Tue, 20 Jan 2026 11:08:35 +0700
Subject: [PATCH 08/20] removed sequence partial spec data too big, minor fixes

---
 .../hlsl/sampling/quantized_sequence.hlsl     | 97 ++-----------------
 1 file changed, 6 insertions(+), 91 deletions(-)

diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
index dfc22634f6..70ca28b70d 100644
--- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
@@ -252,8 +252,8 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
 };
 
 // uint32_t2; Dim=3 -- should never use uint16_t2 instead of uint32_t
-template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 4 && vector_traits<T>::Dimension == 2 && Dim == 3)
-struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 4 && vector_traits<T>::Dimension == 2 && Dim == 3) >
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(is_same_v<T,uint32_t2> && Dim == 3)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(is_same_v<T,uint32_t2> && Dim == 3) >
 {
     using this_t = QuantizedSequence<T, Dim>;
     using store_type = T;
@@ -326,8 +326,8 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
 };
 
 // uint16_t2; Dim=4 -- should use uint16_t4 instead of uint32_t2
-template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 2 && vector_traits<T>::Dimension == 2 && Dim == 4)
-struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 2 && vector_traits<T>::Dimension == 2 && Dim == 4) >
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(is_same_v<T,uint16_t2> && Dim == 4)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(is_same_v<T,uint16_t2> && Dim == 4) >
 {
     using this_t = QuantizedSequence<T, Dim>;
     using store_type = T;
@@ -348,7 +348,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     scalar_type get(const uint16_t idx)
     {
         assert(idx >= 0 && idx < 4);
-        if (idx >= 0 && idx < 2) // x y
+        if (idx < 2) // x y
         {
             return glsl::bitfieldExtract(data[0], BitsPerComponent * idx, BitsPerComponent);
         }
@@ -361,7 +361,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     void set(const uint16_t idx, const scalar_type value)
     {
         assert(idx >= 0 && idx < 4);
-        if (idx >= 0 && idx < 2) // x y
+        if (idx < 2) // x y
         {
             glsl::bitfieldInsert(data[0], value, BitsPerComponent * idx, BitsPerComponent);
         }
@@ -397,91 +397,6 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
 
 // no uint16_t4, uint32_t4; Dim=2
 
-// uint32_t4; Dim=3 --> returns uint32_t2 - 42 bits per component: 32 in x, 10 in y
-// use uint32_t2 instead of uint16_t4
-template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 4 && vector_traits<T>::Dimension == 4 && Dim == 3)
-struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 4 && vector_traits<T>::Dimension == 4 && Dim == 3) >
-{
-    using this_t = QuantizedSequence<T, Dim>;
-    using store_type = T;
-    using scalar_type = typename vector_traits<T>::scalar_type;
-    using base_type = vector<scalar_type, 2>;
-    NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
-    NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
-    NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim;
-
-    base_type get(const uint16_t idx)
-    {
-        assert(idx >= 0 && idx < 3);
-        if (idx == 0)   // x
-        {
-            base_type x;
-            x[0] = data[0];
-            x[1] = glsl::bitfieldExtract(data[1], 0u, 10u);
-            return x;
-        }
-        else if (idx == 1)  // y
-        {
-            base_type y;
-            y[0] = glsl::bitfieldExtract(data[1], 10u, 22u);
-            y[0] |= glsl::bitfieldExtract(data[2], 0u, 10u) << 22u;
-            y[1] = glsl::bitfieldExtract(data[2], 10u, 10u);
-            return y;
-        }
-        else    // z
-        {
-            base_type z;
-            z[0] = glsl::bitfieldInsert(data[2], 20u, 12u);
-            z[0] |= glsl::bitfieldInsert(data[3], 0u, 20u) << 12u;
-            z[1] = glsl::bitfieldInsert(data[3], 20u, 10u);
-            return z;
-        }
-    }
-
-    void set(const uint16_t idx, const base_type value)
-    {
-        assert(idx >= 0 && idx < 3);
-        if (idx == 0)   // x
-        {
-            data[0] = value[0];
-            glsl::bitfieldInsert(data[1], value[1], 0u, 10u);
-        }
-        else if (idx == 1)  // y
-        {
-            glsl::bitfieldInsert(data[1], value[0], 10u, 22u);
-            glsl::bitfieldInsert(data[2], value[0] >> 22u, 0u, 10u);
-            glsl::bitfieldInsert(data[2], value[1], 10u, 10u);
-        }
-        else    // z
-        {
-            glsl::bitfieldInsert(data[2], value[0], 20u, 12u);
-            glsl::bitfieldInsert(data[3], value[0] >> 12u, 0u, 20u);
-            glsl::bitfieldInsert(data[3], value[1], 20u, 10u);
-        }
-    }
-
-    template<typename F>
-    vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey)
-    {
-        impl::decode_before_scramble_helper<this_t,F> helper;
-        helper.val.data = data;
-        return helper(scrambleKey);
-    }
-    template<typename F>
-    vector<F,Dimension> decode(NBL_CONST_REF_ARG(this_t) scrambleKey)
-    {
-        impl::decode_after_scramble_helper<this_t,F> helper;
-        helper.val.data = data;
-        return helper(scrambleKey);
-    }
-
-    store_type data;
-    // data[0] = | -- x 32 bits -- |
-    // data[1] = MSB | -- y 22 bits -- | -- x 10 bits -- | LSB
-    // data[2] = MSB | -- z 12 bits -- | -- y 20 bits -- | LSB
-    // data[3] = | -- z 30 bits -- |
-};
-
 }
 
 }

From 5b833280d4754c1c28132fc797fe48c36694c66b Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Wed, 28 Jan 2026 10:42:32 +0700
Subject: [PATCH 09/20] fix bitfieldInsert usage, truncate data

---
 .../hlsl/sampling/quantized_sequence.hlsl     | 22 ++++++++++++-------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
index 70ca28b70d..a681de07de 100644
--- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
@@ -5,9 +5,11 @@
 #ifndef _NBL_BUILTIN_HLSL_SAMPLING_QUANTIZED_SEQUENCE_INCLUDED_
 #define _NBL_BUILTIN_HLSL_SAMPLING_QUANTIZED_SEQUENCE_INCLUDED_
 
+#include "nbl/builtin/hlsl/cpp_compat.hlsl"
+#include "nbl/builtin/hlsl/bit.hlsl"
+#include "nbl/builtin/hlsl/glsl_compat/core.hlsl"
 #include "nbl/builtin/hlsl/concepts/vector.hlsl"
 #include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl"
-#include "nbl/builtin/hlsl/random/pcg.hlsl"
 
 namespace nbl
 {
@@ -160,6 +162,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     using scalar_type = store_type;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<scalar_type>) - BitsPerComponent;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim;
 
     static this_t create(const vector<store_type, Dimension> value)
@@ -180,7 +183,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     void set(const uint16_t idx, const store_type value)
     {
         assert(idx > 0 && idx < Dim);
-        glsl::bitfieldInsert(data, value, BitsPerComponent * idx, BitsPerComponent);
+        data = glsl::bitfieldInsert(data, value >> DiscardBits, BitsPerComponent * idx, BitsPerComponent);
     }
 
     template<typename F>
@@ -290,15 +293,16 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(is_same_v<T,uint32_t2> && Di
     void set(const uint16_t idx, const scalar_type value)
     {
         assert(idx >= 0 && idx < 3);
+        const scalar_type trunc_val = value >> DiscardBits;
         if (idx == 0)   // x
-            glsl::bitfieldInsert(data[0], value, 0u, BitsPerComponent);
+            data[0] = glsl::bitfieldInsert(data[0], trunc_val, 0u, BitsPerComponent);
         else if (idx == 1)  // y
         {
-            glsl::bitfieldInsert(data[0], value, BitsPerComponent, DiscardBits);
-            glsl::bitfieldInsert(data[1], value >> DiscardBits, 0u, DiscardBits - 1u);
+            data[0] = glsl::bitfieldInsert(data[0], trunc_val, BitsPerComponent, DiscardBits);
+            data[1] = glsl::bitfieldInsert(data[1], trunc_val >> DiscardBits, 0u, DiscardBits - 1u);
         }
         else    // z
-            glsl::bitfieldInsert(data[1], value, DiscardBits - 1u, BitsPerComponent);
+            data[1] = glsl::bitfieldInsert(data[1], trunc_val, DiscardBits - 1u, BitsPerComponent);
     }
 
     template<typename F>
@@ -334,6 +338,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(is_same_v<T,uint16_t2> && Di
     using scalar_type = typename vector_traits<T>::scalar_type;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<scalar_type>) - BitsPerComponent;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim;
 
     static this_t create(const vector<scalar_type, Dimension> value)
@@ -361,13 +366,14 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(is_same_v<T,uint16_t2> && Di
     void set(const uint16_t idx, const scalar_type value)
     {
         assert(idx >= 0 && idx < 4);
+        const scalar_type trunc_val = value >> DiscardBits;
         if (idx < 2) // x y
         {
-            glsl::bitfieldInsert(data[0], value, BitsPerComponent * idx, BitsPerComponent);
+            data[0] = glsl::bitfieldInsert(data[0], trunc_val, BitsPerComponent * idx, BitsPerComponent);
         }
         else    // z w
         {
-            glsl::bitfieldInsert(data[1], value, BitsPerComponent * (idx & uint16_t(1u)), BitsPerComponent);
+            data[1] = glsl::bitfieldInsert(data[1], trunc_val, BitsPerComponent * (idx & uint16_t(1u)), BitsPerComponent);
         }
     }
 

From 57b1b3d80c3e8518262bd99e84fccfd3b8cb22a0 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Wed, 28 Jan 2026 10:56:30 +0700
Subject: [PATCH 10/20] change some type alias names to make more sense

---
 examples_tests                                |  2 +-
 .../hlsl/sampling/quantized_sequence.hlsl     | 36 +++++++++----------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/examples_tests b/examples_tests
index 587cbff28b..6e96a2f68f 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 587cbff28b1d0b42f2f704c3ba9b247ad0276590
+Subproject commit 6e96a2f68fcc23eeb8524a35839e5c93454b98ec
diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
index a681de07de..a3c6cb4685 100644
--- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
@@ -46,16 +46,16 @@ struct encode_helper
 {
     NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension;
     using sequence_type = Q;
-    using unorm_vec_type = vector<F, Dim>;
-    using unsigned_scalar_type = unsigned_integer_of_size_t<sizeof(F)>; 
-    using uvec_type = vector<unsigned_scalar_type, Dim>;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormMultiplier = (1u << (8u * size_of_v<unsigned_scalar_type> - 1u)) - 1u;
+    using input_type = vector<F, Dim>;
+    using uniform_storage_scalar_type = unsigned_integer_of_size_t<sizeof(F)>; 
+    using uniform_storage_type = vector<uniform_storage_scalar_type, Dim>; // type that holds uint bit representation of a unorm that can have 1s in MSB (normalized w.r.t whole scalar)
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormMultiplier = (1u << (8u * size_of_v<uniform_storage_scalar_type> - 1u)) - 1u;
 
-    static sequence_type __call(const unorm_vec_type unormvec)
+    static sequence_type __call(const input_type unormvec)
     {
-        uvec_type asuint;
+        uniform_storage_type asuint;
         NBL_UNROLL for(uint16_t i = 0; i < Dim; i++)
-            asuint[i] = unsigned_scalar_type(unormvec[i] * UNormMultiplier);
+            asuint[i] = uniform_storage_scalar_type(unormvec[i] * UNormMultiplier);
         return sequence_type::create(asuint);
     }
 };
@@ -63,12 +63,12 @@ struct encode_helper
 template<typename Q, typename F>
 struct decode_before_scramble_helper
 {
-    using unsigned_scalar_type = typename Q::scalar_type;
+    using storage_scalar_type = typename Q::scalar_type;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension;
     using uvec_type = vector<uint32_t, Dim>;
     using sequence_type = Q;
     using return_type = vector<F, Dim>;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant<float32_t,8u*sizeof(unsigned_scalar_type)>::value;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant<float32_t,8u*sizeof(storage_scalar_type)>::value;
 
     return_type operator()(const uvec_type scrambleKey)
     {
@@ -76,7 +76,7 @@ struct decode_before_scramble_helper
         NBL_UNROLL for(uint16_t i = 0; i < Dim; i++)
             seqVal[i] = val.get(i);
         seqVal ^= scrambleKey;
-        return return_type(seqVal) * bit_cast<float_of_size_t<sizeof(unsigned_scalar_type)> >(UNormConstant);
+        return return_type(seqVal) * bit_cast<float_of_size_t<sizeof(storage_scalar_type)> >(UNormConstant);
     }
 
     sequence_type val;
@@ -84,7 +84,7 @@ struct decode_before_scramble_helper
 template<typename Q, typename F>
 struct decode_after_scramble_helper
 {
-    using unsigned_scalar_type = typename Q::scalar_type;
+    using storage_scalar_type = typename Q::scalar_type;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension;
     using uvec_type = vector<uint32_t, Dim>;
     using sequence_type = Q;
@@ -99,7 +99,7 @@ struct decode_after_scramble_helper
         uvec_type seqVal;
         NBL_UNROLL for(uint16_t i = 0; i < Dim; i++)
             seqVal[i] = scramble.get(i);
-        return return_type(seqVal) * bit_cast<float_of_size_t<sizeof(unsigned_scalar_type)> >(UNormConstant);
+        return return_type(seqVal) * bit_cast<float_of_size_t<sizeof(storage_scalar_type)> >(UNormConstant);
     }
 
     sequence_type val;
@@ -126,8 +126,8 @@ struct QuantizedSequence<T, 1 NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T
         return seq;
     }
 
-    store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; }
-    void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; }
+    store_type get(const uint16_t idx) { assert(idx >= 0 && idx < 1); return data; }
+    void set(const uint16_t idx, const store_type value) { assert(idx >= 0 && idx < 1); data = value; }
 
     template<typename F>
     static this_t encode(const vector<F, Dimension> value)
@@ -176,13 +176,13 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
 
     store_type get(const uint16_t idx)
     {
-        assert(idx > 0 && idx < Dim);
+        assert(idx >= 0 && idx < Dim);
         return glsl::bitfieldExtract(data, BitsPerComponent * idx, BitsPerComponent);
     }
 
     void set(const uint16_t idx, const store_type value)
     {
-        assert(idx > 0 && idx < Dim);
+        assert(idx >= 0 && idx < Dim);
         data = glsl::bitfieldInsert(data, value >> DiscardBits, BitsPerComponent * idx, BitsPerComponent);
     }
 
@@ -227,8 +227,8 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
         return seq;
     }
 
-    scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; }
-    void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; }
+    scalar_type get(const uint16_t idx) { assert(idx >= 0 && idx < Dim); return data[idx]; }
+    void set(const uint16_t idx, const scalar_type value) { assert(idx >= 0 && idx < Dim); data[idx] = value; }
 
     template<typename F>
     static this_t encode(const vector<F, Dimension> value)

From c0350634f1ea285c0f5f94b3ab1c2f70511ac655 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Wed, 28 Jan 2026 12:01:46 +0700
Subject: [PATCH 11/20] added option for encode fullwidth or not

---
 .../hlsl/sampling/quantized_sequence.hlsl     | 26 ++++++++++---------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
index a3c6cb4685..357ea7843b 100644
--- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
@@ -41,7 +41,8 @@ struct unorm_constant<float,21> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0
 template<>
 struct unorm_constant<float,32> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; };
 
-template<typename Q, typename F>
+// FullWidth if intend to decode before scramble, not if decode after scramble
+template<typename Q, typename F, bool FullWidth=true>
 struct encode_helper
 {
     NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension;
@@ -49,7 +50,8 @@ struct encode_helper
     using input_type = vector<F, Dim>;
     using uniform_storage_scalar_type = unsigned_integer_of_size_t<sizeof(F)>; 
     using uniform_storage_type = vector<uniform_storage_scalar_type, Dim>; // type that holds uint bit representation of a unorm that can have 1s in MSB (normalized w.r.t whole scalar)
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormMultiplier = (1u << (8u * size_of_v<uniform_storage_scalar_type> - 1u)) - 1u;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t Bits = FullWidth ? (8u * size_of_v<uniform_storage_scalar_type> - 1u) : sequence_type::BitsPerComponent;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormMultiplier = (1u << Bits) - 1u;
 
     static sequence_type __call(const input_type unormvec)
     {
@@ -129,10 +131,10 @@ struct QuantizedSequence<T, 1 NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T
     store_type get(const uint16_t idx) { assert(idx >= 0 && idx < 1); return data; }
     void set(const uint16_t idx, const store_type value) { assert(idx >= 0 && idx < 1); data = value; }
 
-    template<typename F>
+    template<typename F, bool FullWidth>
     static this_t encode(const vector<F, Dimension> value)
     {
-        return impl::encode_helper<this_t,F>::__call(value);
+        return impl::encode_helper<this_t,F,FullWidth>::__call(value);
     }
 
     template<typename F>
@@ -186,10 +188,10 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
         data = glsl::bitfieldInsert(data, value >> DiscardBits, BitsPerComponent * idx, BitsPerComponent);
     }
 
-    template<typename F>
+    template<typename F, bool FullWidth>
     static this_t encode(const vector<F, Dimension> value)
     {
-        return impl::encode_helper<this_t,F>::__call(value);
+        return impl::encode_helper<this_t,F,FullWidth>::__call(value);
     }
 
     template<typename F>
@@ -230,10 +232,10 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     scalar_type get(const uint16_t idx) { assert(idx >= 0 && idx < Dim); return data[idx]; }
     void set(const uint16_t idx, const scalar_type value) { assert(idx >= 0 && idx < Dim); data[idx] = value; }
 
-    template<typename F>
+    template<typename F, bool FullWidth>
     static this_t encode(const vector<F, Dimension> value)
     {
-        return impl::encode_helper<this_t,F>::__call(value);
+        return impl::encode_helper<this_t,F,FullWidth>::__call(value);
     }
 
     template<typename F>
@@ -305,10 +307,10 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(is_same_v<T,uint32_t2> && Di
             data[1] = glsl::bitfieldInsert(data[1], trunc_val, DiscardBits - 1u, BitsPerComponent);
     }
 
-    template<typename F>
+    template<typename F, bool FullWidth>
     static this_t encode(const vector<F, Dimension> value)
     {
-        return impl::encode_helper<this_t,F>::__call(value);
+        return impl::encode_helper<this_t,F,FullWidth>::__call(value);
     }
 
     template<typename F>
@@ -377,10 +379,10 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(is_same_v<T,uint16_t2> && Di
         }
     }
 
-    template<typename F>
+    template<typename F, bool FullWidth>
     static this_t encode(const vector<F, Dimension> value)
     {
-        return impl::encode_helper<this_t,F>::__call(value);
+        return impl::encode_helper<this_t,F,FullWidth>::__call(value);
     }
 
     template<typename F>

From 09846179fc1a9c2d90ab611b9a8f91c186598ea4 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Wed, 28 Jan 2026 16:58:54 +0700
Subject: [PATCH 12/20] update examples to include tests

---
 examples_tests                                            | 2 +-
 include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples_tests b/examples_tests
index 6e96a2f68f..533a90e027 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 6e96a2f68fcc23eeb8524a35839e5c93454b98ec
+Subproject commit 533a90e027fc263b6091fbc00eecd29a37f1d254
diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
index 357ea7843b..b1af365c86 100644
--- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
@@ -48,7 +48,7 @@ struct encode_helper
     NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension;
     using sequence_type = Q;
     using input_type = vector<F, Dim>;
-    using uniform_storage_scalar_type = unsigned_integer_of_size_t<sizeof(F)>; 
+    using uniform_storage_scalar_type = unsigned_integer_of_size_t<sizeof(F)>;
     using uniform_storage_type = vector<uniform_storage_scalar_type, Dim>; // type that holds uint bit representation of a unorm that can have 1s in MSB (normalized w.r.t whole scalar)
     NBL_CONSTEXPR_STATIC_INLINE uint16_t Bits = FullWidth ? (8u * size_of_v<uniform_storage_scalar_type> - 1u) : sequence_type::BitsPerComponent;
     NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormMultiplier = (1u << Bits) - 1u;

From 5b73aae3d608a5d6d5b2c57ed2b372c1fba7d708 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 29 Jan 2026 12:21:44 +0700
Subject: [PATCH 13/20] partial specs for bitfield insert and extract for 16
 bit types

---
 .../nbl/builtin/hlsl/glsl_compat/core.hlsl    | 92 ++++++++++++++-----
 1 file changed, 69 insertions(+), 23 deletions(-)

diff --git a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl
index 7e92cbf282..6d971abca3 100644
--- a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl
+++ b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl
@@ -22,16 +22,33 @@ namespace glsl
 #ifndef __HLSL_VERSION
 
 // GLM Aliases
-template<typename genIUType>
-genIUType bitfieldExtract(genIUType Value, int Offset, int Bits)
+namespace impl
 {
-	return glm::bitfieldExtract<genIUType>(Value, Offset, Bits);
-}
+template<typename T NBL_STRUCT_CONSTRAINABLE>
+struct bitfieldInsert;
 
-template<typename genIUType>
-genIUType bitfieldInsert(genIUType const& Base, genIUType const& Insert, int Offset, int Bits)
+template<typename T>
+NBL_PARTIAL_REQ_TOP(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4)
+struct bitfieldInsert<T NBL_PARTIAL_REQ_BOT(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 4) >
 {
-	return glm::bitfieldInsert<genIUType>(Base, Insert, Offset, Bits);
+    static T __call( T base, T insert, uint32_t offset, uint32_t bits )
+    {
+        return glm::bitfieldInsert<T>(base, insert, offset, bits);
+    }
+};
+
+template<typename T NBL_STRUCT_CONSTRAINABLE>
+struct bitfieldExtract;
+
+template<typename T>
+NBL_PARTIAL_REQ_TOP(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4)
+struct bitfieldExtract<T NBL_PARTIAL_REQ_BOT(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 4) >
+{
+    static T __call( T val, uint32_t offsetBits, uint32_t numBits )
+    {
+        return glm::bitfieldExtract<T>(val, offsetBits, numBits);
+    }
+};
 }
 
 template<typename genIUType>
@@ -184,21 +201,25 @@ void memoryBarrierShared() {
 namespace impl 
 {
 
-template<typename T, bool isSigned, bool isIntegral>
-struct bitfieldExtract {};
+template<typename T NBL_STRUCT_CONSTRAINABLE>
+struct bitfieldInsert;
 
-template<typename T, bool isSigned>
-struct bitfieldExtract<T, isSigned, false>
+template<typename T>
+NBL_PARTIAL_REQ_TOP(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4)
+struct bitfieldInsert<T NBL_PARTIAL_REQ_BOT(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 4) >
 {
-    static T __call( T val, uint32_t offsetBits, uint32_t numBits )
+    static T __call( T base, T insert, uint32_t offset, uint32_t bits )
     {
-        static_assert( is_integral<T>::value, "T is not an integral type!" );
-        return val;
+        return spirv::bitFieldInsert<T>(base, insert, offset, bits);
     }
 };
 
+template<typename T NBL_STRUCT_CONSTRAINABLE>
+struct bitfieldExtract;
+
 template<typename T>
-struct bitfieldExtract<T, true, true>
+NBL_PARTIAL_REQ_TOP(concepts::SignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4)
+struct bitfieldExtract<T NBL_PARTIAL_REQ_BOT(concepts::SignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 4) >
 {
     static T __call( T val, uint32_t offsetBits, uint32_t numBits )
     {
@@ -207,7 +228,8 @@ struct bitfieldExtract<T, true, true>
 };
 
 template<typename T>
-struct bitfieldExtract<T, false, true>
+NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 4)
+struct bitfieldExtract<T NBL_PARTIAL_REQ_BOT(concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 4) >
 {
     static T __call( T val, uint32_t offsetBits, uint32_t numBits )
     {
@@ -218,24 +240,48 @@ struct bitfieldExtract<T, false, true>
 } //namespace impl
 
 template<typename T>
-T bitfieldExtract( T val, uint32_t offsetBits, uint32_t numBits )
+T bitfieldReverse(T value)
 {
-    return impl::bitfieldExtract<T, is_signed<T>::value, is_integral<T>::value>::__call(val,offsetBits,numBits);
+    return spirv::bitReverse<T>(value);
 }
 
+#endif
+
+namespace impl 
+{
 template<typename T>
-T bitfieldInsert(T base, T insert, uint32_t offset, uint32_t bits)
+NBL_PARTIAL_REQ_TOP(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 2)
+struct bitfieldInsert<T NBL_PARTIAL_REQ_BOT(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 2) >
+{
+    static T __call( T base, T insert, uint32_t offset, uint32_t bits )
+    {
+        const T mask = (T(1u) << T(bits) - T(1u)) << offset;
+        return (base & ~mask) | ((insert << T(offset)) & mask);
+    }
+};
+
+template<typename T>
+NBL_PARTIAL_REQ_TOP(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 2)
+struct bitfieldExtract<T NBL_PARTIAL_REQ_BOT(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 2) >
 {
-    return spirv::bitFieldInsert<T>(base, insert, offset, bits);
+    static T __call( T val, uint32_t offsetBits, uint32_t numBits )
+    {
+        return (val >> T(offsetBits)) & T(T(1u) << T(numBits) - T(1u));
+    }
+};
 }
 
 template<typename T>
-T bitfieldReverse(T value)
+T bitfieldExtract( T val, uint32_t offsetBits, uint32_t numBits )
 {
-    return spirv::bitReverse<T>(value);
+    return impl::bitfieldExtract<T>::__call(val, offsetBits, numBits);
 }
 
-#endif
+template<typename T>
+T bitfieldInsert(T base, T insert, uint32_t offset, uint32_t bits)
+{
+    return impl::bitfieldInsert<T>::__call(base, insert, offset, bits);
+}
 
 namespace impl
 {

From b63fa26dc46312f640c1fc19e4be28f87002d8cb Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 29 Jan 2026 14:37:16 +0700
Subject: [PATCH 14/20] fixes bitfieldInsert, account for sign in
 bitfieldExtract

---
 .../nbl/builtin/hlsl/glsl_compat/core.hlsl    | 30 ++++++++++++++-----
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl
index 6d971abca3..b112762568 100644
--- a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl
+++ b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl
@@ -255,26 +255,34 @@ struct bitfieldInsert<T NBL_PARTIAL_REQ_BOT(concepts::Integral<typename vector_t
 {
     static T __call( T base, T insert, uint32_t offset, uint32_t bits )
     {
-        const T mask = (T(1u) << T(bits) - T(1u)) << offset;
-        return (base & ~mask) | ((insert << T(offset)) & mask);
+        const T mask = (T(1u) << bits) - T(1u);
+        const T shifted_mask = mask << offset;
+        return (base & ~shifted_mask) | ((insert & mask) << T(offset));
     }
 };
 
 template<typename T>
-NBL_PARTIAL_REQ_TOP(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 2)
-struct bitfieldExtract<T NBL_PARTIAL_REQ_BOT(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 2) >
+NBL_PARTIAL_REQ_TOP(concepts::SignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 2)
+struct bitfieldExtract<T NBL_PARTIAL_REQ_BOT(concepts::SignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 2) >
 {
     static T __call( T val, uint32_t offsetBits, uint32_t numBits )
     {
-        return (val >> T(offsetBits)) & T(T(1u) << T(numBits) - T(1u));
+        const T ret = (val >> T(offsetBits)) & T((T(1u) << numBits) - T(1u));
+        if (ret & (T(1u) << (numBits-1u)))
+            ret |= T(~0ull) << numBits;
+        return ret;
     }
 };
-}
 
 template<typename T>
-T bitfieldExtract( T val, uint32_t offsetBits, uint32_t numBits )
+NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 2)
+struct bitfieldExtract<T NBL_PARTIAL_REQ_BOT(concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 2) >
 {
-    return impl::bitfieldExtract<T>::__call(val, offsetBits, numBits);
+    static T __call( T val, uint32_t offsetBits, uint32_t numBits )
+    {
+        return (val >> T(offsetBits)) & T((T(1u) << numBits) - T(1u));
+    }
+};
 }
 
 template<typename T>
@@ -283,6 +291,12 @@ T bitfieldInsert(T base, T insert, uint32_t offset, uint32_t bits)
     return impl::bitfieldInsert<T>::__call(base, insert, offset, bits);
 }
 
+template<typename T>
+T bitfieldExtract( T val, uint32_t offsetBits, uint32_t numBits )
+{
+    return impl::bitfieldExtract<T>::__call(val, offsetBits, numBits);
+}
+
 namespace impl
 {
 template<typename T NBL_STRUCT_CONSTRAINABLE>

From e1de22554fd884f4800d8766fb1759ac058aa2bf Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 29 Jan 2026 15:36:49 +0700
Subject: [PATCH 15/20] minor fixes to uint16_t2 dim 4

---
 include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
index b1af365c86..d3313d2875 100644
--- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
@@ -185,7 +185,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     void set(const uint16_t idx, const store_type value)
     {
         assert(idx >= 0 && idx < Dim);
-        data = glsl::bitfieldInsert(data, value >> DiscardBits, BitsPerComponent * idx, BitsPerComponent);
+        data = glsl::bitfieldInsert(data, scalar_type(value >> DiscardBits), BitsPerComponent * idx, BitsPerComponent);
     }
 
     template<typename F, bool FullWidth>
@@ -346,7 +346,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(is_same_v<T,uint16_t2> && Di
     static this_t create(const vector<scalar_type, Dimension> value)
     {
         this_t seq;
-        seq.data = hlsl::promote<store_type>(0u);
+        seq.data = store_type(0u,0u);
         NBL_UNROLL for (uint16_t i = 0; i < Dimension; i++)
             seq.set(i, value[i]);
         return seq;
@@ -361,7 +361,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(is_same_v<T,uint16_t2> && Di
         }
         else    // z w
         {
-            return glsl::bitfieldExtract(data[1], BitsPerComponent * (idx & uint16_t(1u)), BitsPerComponent);
+            return glsl::bitfieldExtract(data[1], BitsPerComponent * (idx - uint16_t(2u)), BitsPerComponent);
         }
     }
 
@@ -375,7 +375,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(is_same_v<T,uint16_t2> && Di
         }
         else    // z w
         {
-            data[1] = glsl::bitfieldInsert(data[1], trunc_val, BitsPerComponent * (idx & uint16_t(1u)), BitsPerComponent);
+            data[1] = glsl::bitfieldInsert(data[1], trunc_val, BitsPerComponent * (idx - uint16_t(2u)), BitsPerComponent);
         }
     }
 

From 00c5379bc9a606fdf27776bf989f1fd47bfbe9b3 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 29 Jan 2026 16:55:34 +0700
Subject: [PATCH 16/20] enforce fullwidth if dim == storage dim

---
 examples_tests                                        |  2 +-
 .../nbl/builtin/hlsl/sampling/quantized_sequence.hlsl | 11 +++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/examples_tests b/examples_tests
index 533a90e027..53667051b8 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 533a90e027fc263b6091fbc00eecd29a37f1d254
+Subproject commit 53667051b8dbc53ab8273df1b716b7faa9d97b54
diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
index d3313d2875..64573ac85f 100644
--- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
@@ -58,7 +58,10 @@ struct encode_helper
         uniform_storage_type asuint;
         NBL_UNROLL for(uint16_t i = 0; i < Dim; i++)
             asuint[i] = uniform_storage_scalar_type(unormvec[i] * UNormMultiplier);
-        return sequence_type::create(asuint);
+        NBL_IF_CONSTEXPR(Dim==1)
+            return sequence_type::create(asuint[0]);
+        else
+            return sequence_type::create(asuint);
     }
 };
 
@@ -134,7 +137,7 @@ struct QuantizedSequence<T, 1 NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T
     template<typename F, bool FullWidth>
     static this_t encode(const vector<F, Dimension> value)
     {
-        return impl::encode_helper<this_t,F,FullWidth>::__call(value);
+        return impl::encode_helper<this_t,F,true>::__call(value);
     }
 
     template<typename F>
@@ -235,7 +238,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization
     template<typename F, bool FullWidth>
     static this_t encode(const vector<F, Dimension> value)
     {
-        return impl::encode_helper<this_t,F,FullWidth>::__call(value);
+        return impl::encode_helper<this_t,F,true>::__call(value);
     }
 
     template<typename F>
@@ -346,7 +349,7 @@ struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(is_same_v<T,uint16_t2> && Di
     static this_t create(const vector<scalar_type, Dimension> value)
     {
         this_t seq;
-        seq.data = store_type(0u,0u);
+        seq.data = hlsl::promote<store_type, scalar_type>(0u);
         NBL_UNROLL for (uint16_t i = 0; i < Dimension; i++)
             seq.set(i, value[i]);
         return seq;

From ab5e771db460276b8f3accf7d869e91279a88f6d Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 29 Jan 2026 16:59:02 +0700
Subject: [PATCH 17/20] latest example

---
 examples_tests | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples_tests b/examples_tests
index 53667051b8..8485356fc4 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 53667051b8dbc53ab8273df1b716b7faa9d97b54
+Subproject commit 8485356fc4263232746b517d4eca602d56a16816

From a5f431d39c90a50229529be3561052a0fa214485 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 29 Jan 2026 17:50:49 +0700
Subject: [PATCH 18/20] latest example 2

---
 examples_tests | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples_tests b/examples_tests
index 8485356fc4..40bc21ae4a 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 8485356fc4263232746b517d4eca602d56a16816
+Subproject commit 40bc21ae4aae26cf467910bf696d1e195ec8fc77

From 20aa5ab34a5203b532c4a12a6f43d631f265791d Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 30 Jan 2026 10:43:09 +0700
Subject: [PATCH 19/20] fixes to bitfield insert/extract requires

---
 examples_tests                                 |  2 +-
 include/nbl/builtin/hlsl/glsl_compat/core.hlsl | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/examples_tests b/examples_tests
index 40bc21ae4a..63f0079781 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 40bc21ae4aae26cf467910bf696d1e195ec8fc77
+Subproject commit 63f0079781fe189c672297343b30308333646294
diff --git a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl
index b112762568..235cdde8e4 100644
--- a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl
+++ b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl
@@ -29,7 +29,7 @@ struct bitfieldInsert;
 
 template<typename T>
 NBL_PARTIAL_REQ_TOP(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4)
-struct bitfieldInsert<T NBL_PARTIAL_REQ_BOT(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 4) >
+struct bitfieldInsert<T NBL_PARTIAL_REQ_BOT(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4) >
 {
     static T __call( T base, T insert, uint32_t offset, uint32_t bits )
     {
@@ -42,7 +42,7 @@ struct bitfieldExtract;
 
 template<typename T>
 NBL_PARTIAL_REQ_TOP(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4)
-struct bitfieldExtract<T NBL_PARTIAL_REQ_BOT(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 4) >
+struct bitfieldExtract<T NBL_PARTIAL_REQ_BOT(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4) >
 {
     static T __call( T val, uint32_t offsetBits, uint32_t numBits )
     {
@@ -206,7 +206,7 @@ struct bitfieldInsert;
 
 template<typename T>
 NBL_PARTIAL_REQ_TOP(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4)
-struct bitfieldInsert<T NBL_PARTIAL_REQ_BOT(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 4) >
+struct bitfieldInsert<T NBL_PARTIAL_REQ_BOT(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4) >
 {
     static T __call( T base, T insert, uint32_t offset, uint32_t bits )
     {
@@ -219,7 +219,7 @@ struct bitfieldExtract;
 
 template<typename T>
 NBL_PARTIAL_REQ_TOP(concepts::SignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4)
-struct bitfieldExtract<T NBL_PARTIAL_REQ_BOT(concepts::SignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 4) >
+struct bitfieldExtract<T NBL_PARTIAL_REQ_BOT(concepts::SignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4) >
 {
     static T __call( T val, uint32_t offsetBits, uint32_t numBits )
     {
@@ -228,8 +228,8 @@ struct bitfieldExtract<T NBL_PARTIAL_REQ_BOT(concepts::SignedIntegral<typename v
 };
 
 template<typename T>
-NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 4)
-struct bitfieldExtract<T NBL_PARTIAL_REQ_BOT(concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 4) >
+NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4)
+struct bitfieldExtract<T NBL_PARTIAL_REQ_BOT(concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4) >
 {
     static T __call( T val, uint32_t offsetBits, uint32_t numBits )
     {

From 36965e1bec51690efd3484bd4ded523cbbffaef5 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 30 Jan 2026 13:55:50 +0700
Subject: [PATCH 20/20] latest example

---
 examples_tests | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples_tests b/examples_tests
index 63f0079781..301fb402e6 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 63f0079781fe189c672297343b30308333646294
+Subproject commit 301fb402e6d0d3c204b1da67e920283d6f9abca5