diff --git a/examples_tests b/examples_tests
index 587cbff28b..301fb402e6 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 587cbff28b1d0b42f2f704c3ba9b247ad0276590
+Subproject commit 301fb402e6d0d3c204b1da67e920283d6f9abca5
diff --git a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl
index 7e92cbf282..235cdde8e4 100644
--- a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl
+++ b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl
@@ -22,16 +22,33 @@ namespace glsl
 #ifndef __HLSL_VERSION
 
 // GLM Aliases
-template<typename genIUType>
-genIUType bitfieldExtract(genIUType Value, int Offset, int Bits)
+namespace impl
 {
-	return glm::bitfieldExtract<genIUType>(Value, Offset, Bits);
-}
+template<typename T NBL_STRUCT_CONSTRAINABLE>
+struct bitfieldInsert;
 
-template<typename genIUType>
-genIUType bitfieldInsert(genIUType const& Base, genIUType const& Insert, int Offset, int Bits)
+template<typename T>
+NBL_PARTIAL_REQ_TOP(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4)
+struct bitfieldInsert<T NBL_PARTIAL_REQ_BOT(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4) >
+{
+    static T __call( T base, T insert, uint32_t offset, uint32_t bits )
+    {
+        return glm::bitfieldInsert<T>(base, insert, offset, bits);
+    }
+};
+
+template<typename T NBL_STRUCT_CONSTRAINABLE>
+struct bitfieldExtract;
+
+template<typename T>
+NBL_PARTIAL_REQ_TOP(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4)
+struct bitfieldExtract<T NBL_PARTIAL_REQ_BOT(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4) >
 {
-	return glm::bitfieldInsert<genIUType>(Base, Insert, Offset, Bits);
+    static T __call( T val, uint32_t offsetBits, uint32_t numBits )
+    {
+        return glm::bitfieldExtract<T>(val, offsetBits, numBits);
+    }
+};
 }
 
 template<typename genIUType>
@@ -184,21 +201,25 @@ void memoryBarrierShared() {
 namespace impl 
 {
 
-template<typename T, bool isSigned, bool isIntegral>
-struct bitfieldExtract {};
+template<typename T NBL_STRUCT_CONSTRAINABLE>
+struct bitfieldInsert;
 
-template<typename T, bool isSigned>
-struct bitfieldExtract<T, isSigned, false>
+template<typename T>
+NBL_PARTIAL_REQ_TOP(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4)
+struct bitfieldInsert<T NBL_PARTIAL_REQ_BOT(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4) >
 {
-    static T __call( T val, uint32_t offsetBits, uint32_t numBits )
+    static T __call( T base, T insert, uint32_t offset, uint32_t bits )
     {
-        static_assert( is_integral<T>::value, "T is not an integral type!" );
-        return val;
+        return spirv::bitFieldInsert<T>(base, insert, offset, bits);
     }
 };
 
+template<typename T NBL_STRUCT_CONSTRAINABLE>
+struct bitfieldExtract;
+
 template<typename T>
-struct bitfieldExtract<T, true, true>
+NBL_PARTIAL_REQ_TOP(concepts::SignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4)
+struct bitfieldExtract<T NBL_PARTIAL_REQ_BOT(concepts::SignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4) >
 {
     static T __call( T val, uint32_t offsetBits, uint32_t numBits )
     {
@@ -207,7 +228,8 @@ struct bitfieldExtract<T, true, true>
 };
 
 template<typename T>
-struct bitfieldExtract<T, false, true>
+NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4)
+struct bitfieldExtract<T NBL_PARTIAL_REQ_BOT(concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> >= 4) >
 {
     static T __call( T val, uint32_t offsetBits, uint32_t numBits )
     {
@@ -218,25 +240,63 @@ struct bitfieldExtract<T, false, true>
 } //namespace impl
 
 template<typename T>
-T bitfieldExtract( T val, uint32_t offsetBits, uint32_t numBits )
+T bitfieldReverse(T value)
+{
+    return spirv::bitReverse<T>(value);
+}
+
+#endif
+
+namespace impl 
+{
+template<typename T>
+NBL_PARTIAL_REQ_TOP(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 2)
+struct bitfieldInsert<T NBL_PARTIAL_REQ_BOT(concepts::Integral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 2) >
+{
+    static T __call( T base, T insert, uint32_t offset, uint32_t bits )
+    {
+        const T mask = (T(1u) << bits) - T(1u);
+        const T shifted_mask = mask << offset;
+        return (base & ~shifted_mask) | ((insert & mask) << T(offset));
+    }
+};
+
+template<typename T>
+NBL_PARTIAL_REQ_TOP(concepts::SignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 2)
+struct bitfieldExtract<T NBL_PARTIAL_REQ_BOT(concepts::SignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 2) >
+{
+    static T __call( T val, uint32_t offsetBits, uint32_t numBits )
+    {
+        const T ret = (val >> T(offsetBits)) & T((T(1u) << numBits) - T(1u));
+        if (ret & (T(1u) << (numBits-1u)))
+            ret |= T(~0ull) << numBits;
+        return ret;
+    }
+};
+
+template<typename T>
+NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 2)
+struct bitfieldExtract<T NBL_PARTIAL_REQ_BOT(concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> == 2) >
 {
-    return impl::bitfieldExtract<T, is_signed<T>::value, is_integral<T>::value>::__call(val,offsetBits,numBits);
+    static T __call( T val, uint32_t offsetBits, uint32_t numBits )
+    {
+        return (val >> T(offsetBits)) & T((T(1u) << numBits) - T(1u));
+    }
+};
 }
 
 template<typename T>
 T bitfieldInsert(T base, T insert, uint32_t offset, uint32_t bits)
 {
-    return spirv::bitFieldInsert<T>(base, insert, offset, bits);
+    return impl::bitfieldInsert<T>::__call(base, insert, offset, bits);
 }
 
 template<typename T>
-T bitfieldReverse(T value)
+T bitfieldExtract( T val, uint32_t offsetBits, uint32_t numBits )
 {
-    return spirv::bitReverse<T>(value);
+    return impl::bitfieldExtract<T>::__call(val, offsetBits, numBits);
 }
 
-#endif
-
 namespace impl
 {
 template<typename T NBL_STRUCT_CONSTRAINABLE>
diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index f838156c86..25fa61162d 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -110,16 +110,16 @@ struct quaternion
             uniformScaleSq = traits.uniformScaleSq;
 
             if (dontAssertValidMatrix)
+            {
                 if (!valid)
                 {
                     this_t retval;
                     retval.data = hlsl::promote<data_type>(bit_cast<scalar_type>(numeric_limits<scalar_type>::quiet_NaN));
                     return retval;
                 }
+            }
             else
-            {
                 assert(valid);
-            }
         }
         if (uniformScaleSq < numeric_limits<scalar_type>::min)
         {
diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl
index 1ad16dc28d..df56d46549 100644
--- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl
+++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl
@@ -2,6 +2,7 @@
 #define _NBL_BUILTIN_HLSL_TRANSFORMATION_MATRIX_UTILS_INCLUDED_
 
 #include <nbl/builtin/hlsl/cpp_compat.hlsl>
+#include <nbl/builtin/hlsl/math/quaternions.hlsl>
 
 namespace nbl
 {
@@ -125,30 +126,16 @@ inline matrix<T, 3, 4> buildCameraLookAtMatrixRH(
 
 //! Replaces curent rocation and scale by rotation represented by quaternion `quat`, leaves 4th row and 4th colum unchanged
 template<typename T, uint32_t N>
-inline void setRotation(matrix<T, N, 4>& outMat, NBL_CONST_REF_ARG(core::quaternion) quat)
+inline void setRotation(matrix<T, N, 4>& outMat, NBL_CONST_REF_ARG(math::quaternion<T>) quat)
 {
 	static_assert(N == 3 || N == 4);
+	matrix<T, 3, 3> mat = _static_cast<matrix<T, 3, 3>>(quat);
 
-	outMat[0] = vector<T, 4>(
-		1 - 2 * (quat.y * quat.y + quat.z * quat.z),
-		2 * (quat.x * quat.y - quat.z * quat.w),
-		2 * (quat.x * quat.z + quat.y * quat.w),
-		outMat[0][3]
-	);
-
-	outMat[1] = vector<T, 4>(
-		2 * (quat.x * quat.y + quat.z * quat.w),
-		1 - 2 * (quat.x * quat.x + quat.z * quat.z),
-		2 * (quat.y * quat.z - quat.x * quat.w),
-		outMat[1][3]
-	);
-
-	outMat[2] = vector<T, 4>(
-		2 * (quat.x * quat.z - quat.y * quat.w),
-		2 * (quat.y * quat.z + quat.x * quat.w),
-		1 - 2 * (quat.x * quat.x + quat.y * quat.y),
-		outMat[2][3]
-	);
+	outMat[0] = mat[0];
+
+	outMat[1] = mat[1];
+
+	outMat[2] = mat[2];
 }
 
 template<typename T, uint32_t N>
diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
index 8929609c34..64573ac85f 100644
--- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl
@@ -5,9 +5,11 @@
 #ifndef _NBL_BUILTIN_HLSL_SAMPLING_QUANTIZED_SEQUENCE_INCLUDED_
 #define _NBL_BUILTIN_HLSL_SAMPLING_QUANTIZED_SEQUENCE_INCLUDED_
 
+#include "nbl/builtin/hlsl/cpp_compat.hlsl"
+#include "nbl/builtin/hlsl/bit.hlsl"
+#include "nbl/builtin/hlsl/glsl_compat/core.hlsl"
 #include "nbl/builtin/hlsl/concepts/vector.hlsl"
 #include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl"
-#include "nbl/builtin/hlsl/random/pcg.hlsl"
 
 namespace nbl
 {
@@ -22,284 +24,389 @@ struct QuantizedSequence;
 
 namespace impl
 {
-template<uint16_t Bits>
+template<typename FloatScalar, uint16_t Bits>
 struct unorm_constant;
 template<>
-struct unorm_constant<4> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d888889u; };
+struct unorm_constant<float,4> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d888889u; };
 template<>
-struct unorm_constant<5> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d042108u; };
+struct unorm_constant<float,5> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d042108u; };
 template<>
-struct unorm_constant<8> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3b808081u; };
+struct unorm_constant<float,8> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3b808081u; };
 template<>
-struct unorm_constant<10> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3a802008u; };
+struct unorm_constant<float,10> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3a802008u; };
 template<>
-struct unorm_constant<16> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x37800080u; };
+struct unorm_constant<float,16> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x37800080u; };
 template<>
-struct unorm_constant<21> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x35000004u; };
+struct unorm_constant<float,21> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x35000004u; };
 template<>
-struct unorm_constant<32> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; };
+struct unorm_constant<float,32> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; };
 
-template<typename T, uint16_t D, bool EncodeScramble>
-struct decode_helper;
-
-template<typename T, uint16_t D>
-struct decode_helper<T, D, false>
+// FullWidth if intend to decode before scramble, not if decode after scramble
+template<typename Q, typename F, bool FullWidth=true>
+struct encode_helper
 {
-    using scalar_type = typename vector_traits<T>::scalar_type;
-    using fp_type = typename float_of_size<sizeof(scalar_type)>::type;
-    using uvec_type = vector<scalar_type, D>;
-    using sequence_type = QuantizedSequence<T, D>;
-    using return_type = vector<fp_type, D>;
-    NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension;
+    using sequence_type = Q;
+    using input_type = vector<F, Dim>;
+    using uniform_storage_scalar_type = unsigned_integer_of_size_t<sizeof(F)>;
+    using uniform_storage_type = vector<uniform_storage_scalar_type, Dim>; // type that holds uint bit representation of a unorm that can have 1s in MSB (normalized w.r.t whole scalar)
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t Bits = FullWidth ? (8u * size_of_v<uniform_storage_scalar_type> - 1u) : sequence_type::BitsPerComponent;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormMultiplier = (1u << Bits) - 1u;
+
+    static sequence_type __call(const input_type unormvec)
+    {
+        uniform_storage_type asuint;
+        NBL_UNROLL for(uint16_t i = 0; i < Dim; i++)
+            asuint[i] = uniform_storage_scalar_type(unormvec[i] * UNormMultiplier);
+        NBL_IF_CONSTEXPR(Dim==1)
+            return sequence_type::create(asuint[0]);
+        else
+            return sequence_type::create(asuint);
+    }
+};
 
-    static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey)
+template<typename Q, typename F>
+struct decode_before_scramble_helper
+{
+    using storage_scalar_type = typename Q::scalar_type;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension;
+    using uvec_type = vector<uint32_t, Dim>;
+    using sequence_type = Q;
+    using return_type = vector<F, Dim>;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant<float32_t,8u*sizeof(storage_scalar_type)>::value;
+
+    return_type operator()(const uvec_type scrambleKey)
     {
         uvec_type seqVal;
-        NBL_UNROLL for(uint16_t i = 0; i < D; i++)
-            seqVal[i] = val.get(i) ^ scrambleKey[i];
-        return return_type(seqVal) * bit_cast<fp_type>(UNormConstant);
+        NBL_UNROLL for(uint16_t i = 0; i < Dim; i++)
+            seqVal[i] = val.get(i);
+        seqVal ^= scrambleKey;
+        return return_type(seqVal) * bit_cast<float_of_size_t<sizeof(storage_scalar_type)> >(UNormConstant);
     }
+
+    sequence_type val;
 };
-template<typename T, uint16_t D>
-struct decode_helper<T, D, true>
+template<typename Q, typename F>
+struct decode_after_scramble_helper
 {
-    using scalar_type = typename vector_traits<T>::scalar_type;
-    using fp_type = typename float_of_size<sizeof(scalar_type)>::type;
-    using uvec_type = vector<scalar_type, D>;
-    using sequence_type = QuantizedSequence<T, D>;
-    using sequence_store_type = typename sequence_type::store_type;
-    using sequence_scalar_type = typename vector_traits<sequence_store_type>::scalar_type;
-    using return_type = vector<fp_type, D>;
-    NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = sequence_type::UNormConstant;
-
-    static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey)
+    using storage_scalar_type = typename Q::scalar_type;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension;
+    using uvec_type = vector<uint32_t, Dim>;
+    using sequence_type = Q;
+    using return_type = vector<F, Dim>;
+    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant<float32_t,sequence_type::BitsPerComponent>::value;
+
+    return_type operator()(NBL_CONST_REF_ARG(sequence_type) scrambleKey)
     {
         sequence_type scramble;
-        NBL_UNROLL for(uint16_t i = 0; i < D; i++)
-            scramble.set(i, scrambleKey[i]);
-        scramble.data ^= val.data;
+        scramble.data = val.data ^ scrambleKey.data;
 
         uvec_type seqVal;
-        NBL_UNROLL for(uint16_t i = 0; i < D; i++)
+        NBL_UNROLL for(uint16_t i = 0; i < Dim; i++)
             seqVal[i] = scramble.get(i);
-        return return_type(seqVal) * bit_cast<fp_type>(UNormConstant);
+        return return_type(seqVal) * bit_cast<float_of_size_t<sizeof(storage_scalar_type)> >(UNormConstant);
     }
+
+    sequence_type val;
 };
-}
 
-template<typename T, uint16_t D, bool EncodeScramble=false>
-vector<typename float_of_size<sizeof(typename vector_traits<T>::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, D>) val, const vector<typename vector_traits<T>::scalar_type, D> scrambleKey)
-{
-    return impl::decode_helper<T,D,EncodeScramble>::__call(val, scrambleKey);
+template<typename T>
+NBL_BOOL_CONCEPT SequenceSpecialization = concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> <= 4;
 }
 
-#define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> <= 4
-
 // all Dim=1
-template<typename T> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT)
-struct QuantizedSequence<T, 1 NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT) >
+template<typename T> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T>)
+struct QuantizedSequence<T, 1 NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T>) >
 {
+    using this_t = QuantizedSequence<T, 1>;
     using store_type = T;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(store_type)>::value;
+    using scalar_type = typename vector_traits<T>::scalar_type;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v<store_type>;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = uint16_t(1u);
+
+    static this_t create(const store_type value)
+    {
+        this_t seq;
+        seq.data = value;
+        return seq;
+    }
+
+    store_type get(const uint16_t idx) { assert(idx >= 0 && idx < 1); return data; }
+    void set(const uint16_t idx, const store_type value) { assert(idx >= 0 && idx < 1); data = value; }
+
+    template<typename F, bool FullWidth>
+    static this_t encode(const vector<F, Dimension> value)
+    {
+        return impl::encode_helper<this_t,F,true>::__call(value);
+    }
 
-    store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; }
-    void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; }
+    template<typename F>
+    vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey)
+    {
+        impl::decode_before_scramble_helper<this_t,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
+    }
+    template<typename F>
+    vector<F,Dimension> decode(NBL_CONST_REF_ARG(this_t) scrambleKey)
+    {
+        impl::decode_after_scramble_helper<this_t,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
+    }
 
     store_type data;
 };
 
 // uint16_t, uint32_t; Dim=2,3,4
-template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 1 && Dim > 1 && Dim < 5)
-struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 1 && Dim > 1 && Dim < 5) >
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 1 && Dim > 1 && Dim < 5)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == 1 && Dim > 1 && Dim < 5) >
 {
+    using this_t = QuantizedSequence<T, Dim>;
     using store_type = T;
+    using scalar_type = store_type;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
-    NBL_CONSTEXPR_STATIC_INLINE store_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u);
-    NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<BitsPerComponent>::value;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<scalar_type>) - BitsPerComponent;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim;
+
+    static this_t create(const vector<store_type, Dimension> value)
+    {
+        this_t seq;
+        seq.data = store_type(0u);
+        NBL_UNROLL for (uint16_t i = 0; i < Dimension; i++)
+            seq.set(i, value[i]);
+        return seq;
+    }
 
     store_type get(const uint16_t idx)
     {
-        assert(idx > 0 && idx < Dim);
-        return (data >> (BitsPerComponent * idx)) & Mask;
+        assert(idx >= 0 && idx < Dim);
+        return glsl::bitfieldExtract(data, BitsPerComponent * idx, BitsPerComponent);
     }
 
     void set(const uint16_t idx, const store_type value)
     {
-        assert(idx > 0 && idx < Dim);
-        const uint16_t bits = (BitsPerComponent * idx);
-        data &= ~(Mask << bits);
-        data |= ((value >> DiscardBits) & Mask) << bits;
+        assert(idx >= 0 && idx < Dim);
+        data = glsl::bitfieldInsert(data, scalar_type(value >> DiscardBits), BitsPerComponent * idx, BitsPerComponent);
+    }
+
+    template<typename F, bool FullWidth>
+    static this_t encode(const vector<F, Dimension> value)
+    {
+        return impl::encode_helper<this_t,F,FullWidth>::__call(value);
+    }
+
+    template<typename F>
+    vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey)
+    {
+        impl::decode_before_scramble_helper<this_t,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
+    }
+    template<typename F>
+    vector<F,Dimension> decode(NBL_CONST_REF_ARG(this_t) scrambleKey)
+    {
+        impl::decode_after_scramble_helper<this_t,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
     }
 
     store_type data;
 };
 
 // Dim 2,3,4 matches vector dim
-template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == Dim && Dim > 1 && Dim < 5)
-struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == Dim && Dim > 1 && Dim < 5) >
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == Dim && Dim > 1 && Dim < 5)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && vector_traits<T>::Dimension == Dim && Dim > 1 && Dim < 5) >
 {
+    using this_t = QuantizedSequence<T, Dim>;
     using store_type = T;
     using scalar_type = typename vector_traits<T>::scalar_type;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v<scalar_type>;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim;
 
-    scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; }
-    void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; }
+    static this_t create(const store_type value)
+    {
+        this_t seq;
+        seq.data = value;
+        return seq;
+    }
+
+    scalar_type get(const uint16_t idx) { assert(idx >= 0 && idx < Dim); return data[idx]; }
+    void set(const uint16_t idx, const scalar_type value) { assert(idx >= 0 && idx < Dim); data[idx] = value; }
+
+    template<typename F, bool FullWidth>
+    static this_t encode(const vector<F, Dimension> value)
+    {
+        return impl::encode_helper<this_t,F,true>::__call(value);
+    }
+
+    template<typename F>
+    vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey)
+    {
+        impl::decode_before_scramble_helper<this_t,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
+    }
+    template<typename F>
+    vector<F,Dimension> decode(NBL_CONST_REF_ARG(this_t) scrambleKey)
+    {
+        impl::decode_after_scramble_helper<this_t,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
+    }
 
     store_type data;
 };
 
-// uint16_t2, uint32_t2; Dim=3
-template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 2 && Dim == 3)
-struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 2 && Dim == 3) >
+// uint32_t2; Dim=3 -- should never use uint16_t2 instead of uint32_t
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(is_same_v<T,uint32_t2> && Dim == 3)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(is_same_v<T,uint32_t2> && Dim == 3) >
 {
+    using this_t = QuantizedSequence<T, Dim>;
     using store_type = T;
     using scalar_type = typename vector_traits<T>::scalar_type;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
-    NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (scalar_type(1u) << BitsPerComponent) - scalar_type(1u);
     NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<scalar_type>) - BitsPerComponent;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<BitsPerComponent>::value;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim;
+
+    static this_t create(const vector<scalar_type, Dimension> value)
+    {
+        this_t seq;
+        seq.data = hlsl::promote<store_type>(0u);
+        NBL_UNROLL for (uint16_t i = 0; i < Dimension; i++)
+            seq.set(i, value[i]);
+        return seq;
+    }
 
     scalar_type get(const uint16_t idx)
     {
         assert(idx >= 0 && idx < 3);
-        if (idx < 2)
+        if (idx == 0)   // x
+            return glsl::bitfieldExtract(data[0], 0u, BitsPerComponent);
+        else if (idx == 1)  // y
         {
-            return data[idx] & Mask;
-        }
-        else
-        {
-            const scalar_type zbits = scalar_type(DiscardBits);
-            const scalar_type zmask = (scalar_type(1u) << zbits) - scalar_type(1u);
-            scalar_type z = (data[0] >> BitsPerComponent) & zmask;
-            z |= ((data[1] >> BitsPerComponent) & zmask) << DiscardBits;
-            return z;
+            scalar_type y = glsl::bitfieldExtract(data[0], BitsPerComponent, DiscardBits);
+            y |= glsl::bitfieldExtract(data[1], 0u, DiscardBits - 1u) << DiscardBits;
+            return y;
         }
+        else    // z
+            return glsl::bitfieldExtract(data[1], DiscardBits - 1u, BitsPerComponent);
     }
 
     void set(const uint16_t idx, const scalar_type value)
     {
         assert(idx >= 0 && idx < 3);
-        if (idx < 2)
+        const scalar_type trunc_val = value >> DiscardBits;
+        if (idx == 0)   // x
+            data[0] = glsl::bitfieldInsert(data[0], trunc_val, 0u, BitsPerComponent);
+        else if (idx == 1)  // y
         {
-            const scalar_type trunc_val = value >> DiscardBits;
-            data[idx] &= ~Mask;
-            data[idx] |= trunc_val & Mask;
-        }
-        else
-        {
-            const scalar_type zbits = scalar_type(DiscardBits);
-            const scalar_type zmask = (scalar_type(1u) << zbits) - scalar_type(1u);
-            const scalar_type trunc_val = value >> DiscardBits;
-            data[0] &= Mask;
-            data[1] &= Mask;
-            data[0] |= (trunc_val & zmask) << BitsPerComponent;
-            data[1] |= ((trunc_val >> zbits) & zmask) << BitsPerComponent;
+            data[0] = glsl::bitfieldInsert(data[0], trunc_val, BitsPerComponent, DiscardBits);
+            data[1] = glsl::bitfieldInsert(data[1], trunc_val >> DiscardBits, 0u, DiscardBits - 1u);
         }
+        else    // z
+            data[1] = glsl::bitfieldInsert(data[1], trunc_val, DiscardBits - 1u, BitsPerComponent);
+    }
+
+    template<typename F, bool FullWidth>
+    static this_t encode(const vector<F, Dimension> value)
+    {
+        return impl::encode_helper<this_t,F,FullWidth>::__call(value);
+    }
+
+    template<typename F>
+    vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey)
+    {
+        impl::decode_before_scramble_helper<this_t,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
+    }
+    template<typename F>
+    vector<F,Dimension> decode(NBL_CONST_REF_ARG(this_t) scrambleKey)
+    {
+        impl::decode_after_scramble_helper<this_t,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
     }
 
     store_type data;
 };
 
-// uint16_t2, uint32_t2; Dim=4
-template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 2 && Dim == 4)
-struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 2 && Dim == 4) >
+// uint16_t2; Dim=4 -- should use uint16_t4 instead of uint32_t2
+template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(is_same_v<T,uint16_t2> && Dim == 4)
+struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(is_same_v<T,uint16_t2> && Dim == 4) >
 {
+    using this_t = QuantizedSequence<T, Dim>;
     using store_type = T;
     using scalar_type = typename vector_traits<T>::scalar_type;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
     NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
-    NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u);
     NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<scalar_type>) - BitsPerComponent;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<BitsPerComponent>::value;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim;
 
-    scalar_type get(const uint16_t idx)
+    static this_t create(const vector<scalar_type, Dimension> value)
     {
-        assert(idx >= 0 && idx < 4);
-        const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u);
-        return (data[i] >> (BitsPerComponent * (idx & uint16_t(1u)))) & Mask;
+        this_t seq;
+        seq.data = hlsl::promote<store_type, scalar_type>(0u);
+        NBL_UNROLL for (uint16_t i = 0; i < Dimension; i++)
+            seq.set(i, value[i]);
+        return seq;
     }
 
-    void set(const uint16_t idx, const scalar_type value)
+    scalar_type get(const uint16_t idx)
     {
         assert(idx >= 0 && idx < 4);
-        const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u);
-        const uint16_t odd = idx & uint16_t(1u);
-        data[i] &= hlsl::mix(~Mask, Mask, bool(odd));
-        data[i] |= ((value >> DiscardBits) & Mask) << (BitsPerComponent * odd);
+        if (idx < 2) // x y
+        {
+            return glsl::bitfieldExtract(data[0], BitsPerComponent * idx, BitsPerComponent);
+        }
+        else    // z w
+        {
+            return glsl::bitfieldExtract(data[1], BitsPerComponent * (idx - uint16_t(2u)), BitsPerComponent);
+        }
     }
 
-    store_type data;
-};
-
-// uint16_t4, uint32_t4; Dim=2
-template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 4 && Dim == 2)
-struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 4 && Dim == 2) >
-{
-    using store_type = T;
-    using scalar_type = typename vector_traits<T>::scalar_type;
-    using base_type = vector<scalar_type, 2>;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value;
-
-    base_type get(const uint16_t idx)
+    void set(const uint16_t idx, const scalar_type value)
     {
-        assert(idx >= 0 && idx < 2);
-        base_type a;
-        a[0] = data[uint16_t(2u) * idx];
-        a[1] = data[uint16_t(2u) * idx + 1];
-        return a;
+        assert(idx >= 0 && idx < 4);
+        const scalar_type trunc_val = value >> DiscardBits;
+        if (idx < 2) // x y
+        {
+            data[0] = glsl::bitfieldInsert(data[0], trunc_val, BitsPerComponent * idx, BitsPerComponent);
+        }
+        else    // z w
+        {
+            data[1] = glsl::bitfieldInsert(data[1], trunc_val, BitsPerComponent * (idx - uint16_t(2u)), BitsPerComponent);
+        }
     }
 
-    void set(const uint16_t idx, const base_type value)
+    template<typename F, bool FullWidth>
+    static this_t encode(const vector<F, Dimension> value)
     {
-        assert(idx >= 0 && idx < 2);
-        base_type a;
-        data[uint16_t(2u) * idx] = value[0];
-        data[uint16_t(2u) * idx + 1] = value[1];
+        return impl::encode_helper<this_t,F,FullWidth>::__call(value);
     }
 
-    store_type data;
-};
-
-// uint16_t4, uint32_t4; Dim=3
-// uint16_t4 --> returns uint16_t2 - 21 bits per component: 16 in x, 5 in y
-// uint16_t4 --> returns uint32_t2 - 42 bits per component: 32 in x, 10 in y
-template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 4 && Dim == 3)
-struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 4 && Dim == 3) >
-{
-    using store_type = T;
-    using scalar_type = typename vector_traits<T>::scalar_type;
-    using base_type = vector<scalar_type, 2>;
-    NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v<store_type>;
-    NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim;
-    NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - uint16_t(8u) * size_of_v<scalar_type>;
-    NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u);
-    NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v<base_type>) - BitsPerComponent;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value;
-
-    base_type get(const uint16_t idx)
+    template<typename F>
+    vector<F,Dimension> decode(const vector<unsigned_integer_of_size_t<sizeof(F)>,Dimension> scrambleKey)
     {
-        assert(idx >= 0 && idx < 3);
-        base_type a;
-        a[0] = data[idx];
-        a[1] = (data[3] >> (LeftoverBitsPerComponent * idx)) & Mask;
-        return a;
+        impl::decode_before_scramble_helper<this_t,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
     }
-
-    void set(const uint16_t idx, const base_type value)
+    template<typename F>
+    vector<F,Dimension> decode(NBL_CONST_REF_ARG(this_t) scrambleKey)
     {
-        assert(idx >= 0 && idx < 3);
-        data[idx] = value[0];
-        data[3] &= ~Mask;
-        data[3] |= ((value[1] >> DiscardBits) & Mask) << (LeftoverBitsPerComponent * idx);
+        impl::decode_after_scramble_helper<this_t,F> helper;
+        helper.val.data = data;
+        return helper(scrambleKey);
     }
 
     store_type data;
 };
 
-#undef SEQUENCE_SPECIALIZATION_CONCEPT
+// no uint16_t4, uint32_t4; Dim=2
 
 }