diff --git a/encodings/sparse/Cargo.toml b/encodings/sparse/Cargo.toml index b24f64e37e6..acf99a74ccf 100644 --- a/encodings/sparse/Cargo.toml +++ b/encodings/sparse/Cargo.toml @@ -35,3 +35,7 @@ vortex-array = { workspace = true, features = ["_test-harness"] } [[bench]] name = "sparse_canonical" harness = false + +[[bench]] +name = "sparse_pushdown" +harness = false diff --git a/encodings/sparse/benches/sparse_pushdown.rs b/encodings/sparse/benches/sparse_pushdown.rs new file mode 100644 index 00000000000..5abf0905f6e --- /dev/null +++ b/encodings/sparse/benches/sparse_pushdown.rs @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Benchmarks for the Sparse pushdown kernels (`is_constant`, `sum`, `min_max`, +//! `null_count`, compare). +//! +//! Each benchmark exercises the registered kernel path on a single representative +//! sparse `i32` array. All are `O(num_patches)`; the patch counts below are sized so +//! each lands in the ~10-100µs range for a stable CodSpeed signal. `between`/`fill_null`/ +//! `nan_count` are omitted since they mirror the compare/null_count cost profiles. + +#![expect(clippy::cast_possible_truncation)] + +use std::sync::LazyLock; + +use divan::Bencher; +use vortex_array::ArrayRef; +use vortex_array::Canonical; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::VortexSessionExecute; +use vortex_array::aggregate_fn::fns::is_constant::is_constant; +use vortex_array::aggregate_fn::fns::min_max::min_max; +use vortex_array::aggregate_fn::fns::null_count::null_count; +use vortex_array::aggregate_fn::fns::sum::sum; +use vortex_array::arrays::ConstantArray; +use vortex_array::arrays::PrimitiveArray; +use vortex_array::builtins::ArrayBuiltins; +use vortex_array::dtype::DType; +use vortex_array::dtype::Nullability; +use vortex_array::dtype::PType; +use vortex_array::scalar::Scalar; +use vortex_array::scalar_fn::fns::operators::Operator; +use vortex_array::session::ArraySession; +use vortex_buffer::Buffer; +use vortex_error::VortexExpect; +use vortex_session::VortexSession; +use vortex_sparse::Sparse; + +fn main() { + divan::main(); +} + +const LEN: usize = 1_000_000; + +/// Session with Sparse and its pushdown kernels registered. +static SESSION: LazyLock = LazyLock::new(|| { + let session = VortexSession::empty().with::(); + vortex_sparse::initialize(&session); + session +}); + +/// Build a sparse `i32` array of `LEN` with `num_patches` uniformly-spaced patches and +/// fill value 1. When `constant` is true every patch also equals 1, so the whole array +/// is constant (the worst case for `is_constant`: it must scan all patches to confirm). +fn make_sparse(num_patches: usize, constant: bool) -> ArrayRef { + let stride = LEN / num_patches; + let indices: Buffer = (0..num_patches).map(|i| (i * stride) as u32).collect(); + let values: Buffer = (0..num_patches) + .map(|i| if constant { 1 } else { 2 + i as i32 }) + .collect(); + Sparse::try_new( + indices.into_array(), + values.into_array(), + LEN, + Scalar::from(1i32), + ) + .vortex_expect("valid sparse") + .into_array() +} + +/// Build a sparse `i32` array of `LEN` with a null fill and `num_patches` nullable patches +/// (every third patch null), so `null_count` does real `O(P)` work over the patch validity. +fn make_sparse_nullable(num_patches: usize) -> ArrayRef { + let stride = LEN / num_patches; + let indices: Buffer = (0..num_patches).map(|i| (i * stride) as u32).collect(); + let values = PrimitiveArray::from_option_iter( + (0..num_patches).map(|i| if i % 3 == 0 { None } else { Some(i as i32) }), + ) + .into_array(); + let nullable = DType::Primitive(PType::I32, Nullability::Nullable); + Sparse::try_new(indices.into_array(), values, LEN, Scalar::null(nullable)) + .vortex_expect("valid sparse") + .into_array() +} + +#[divan::bench] +fn sparse_is_constant(bencher: Bencher) { + bencher + .with_inputs(|| (make_sparse(100_000, true), SESSION.create_execution_ctx())) + .bench_values(|(array, mut ctx)| { + divan::black_box(is_constant(&array, &mut ctx).vortex_expect("is_constant")) + }); +} + +#[divan::bench] +fn sparse_sum(bencher: Bencher) { + bencher + .with_inputs(|| (make_sparse(100_000, false), SESSION.create_execution_ctx())) + .bench_values(|(array, mut ctx)| { + divan::black_box(sum(&array, &mut ctx).vortex_expect("sum")) + }); +} + +#[divan::bench] +fn sparse_min_max(bencher: Bencher) { + bencher + .with_inputs(|| (make_sparse(40_000, false), SESSION.create_execution_ctx())) + .bench_values(|(array, mut ctx)| { + divan::black_box(min_max(&array, &mut ctx).vortex_expect("min_max")) + }); +} + +#[divan::bench] +fn sparse_null_count(bencher: Bencher) { + bencher + .with_inputs(|| { + ( + make_sparse_nullable(130_000), + SESSION.create_execution_ctx(), + ) + }) + .bench_values(|(array, mut ctx)| { + divan::black_box(null_count(&array, &mut ctx).vortex_expect("null_count")) + }); +} + +#[divan::bench] +fn sparse_compare(bencher: Bencher) { + bencher + .with_inputs(|| (make_sparse(10_000, false), SESSION.create_execution_ctx())) + .bench_values(|(array, mut ctx)| { + let rhs = ConstantArray::new(Scalar::from(1i32), array.len()).into_array(); + let result = array.binary(rhs, Operator::Eq).vortex_expect("binary"); + divan::black_box(materialize(result, &mut ctx)) + }); +} + +fn materialize(array: ArrayRef, ctx: &mut ExecutionCtx) -> ArrayRef { + array + .execute::(ctx) + .vortex_expect("execute") + .into_array() +} diff --git a/encodings/sparse/public-api.lock b/encodings/sparse/public-api.lock index 33ec9fc9361..4b1badddae4 100644 --- a/encodings/sparse/public-api.lock +++ b/encodings/sparse/public-api.lock @@ -68,10 +68,22 @@ impl vortex_array::arrays::slice::SliceKernel for vortex_sparse::Sparse pub fn vortex_sparse::Sparse::slice(vortex_array::array::view::ArrayView<'_, Self>, core::ops::range::Range, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> +impl vortex_array::scalar_fn::fns::between::kernel::BetweenKernel for vortex_sparse::Sparse + +pub fn vortex_sparse::Sparse::between(vortex_array::array::view::ArrayView<'_, Self>, &vortex_array::array::erased::ArrayRef, &vortex_array::array::erased::ArrayRef, &vortex_array::scalar_fn::fns::between::BetweenOptions, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::scalar_fn::fns::binary::compare::CompareKernel for vortex_sparse::Sparse + +pub fn vortex_sparse::Sparse::compare(vortex_array::array::view::ArrayView<'_, Self>, &vortex_array::array::erased::ArrayRef, vortex_array::scalar_fn::fns::operators::CompareOperator, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::scalar_fn::fns::cast::kernel::CastReduce for vortex_sparse::Sparse pub fn vortex_sparse::Sparse::cast(vortex_array::array::view::ArrayView<'_, Self>, &vortex_array::dtype::DType) -> vortex_error::VortexResult> +impl vortex_array::scalar_fn::fns::fill_null::kernel::FillNullKernel for vortex_sparse::Sparse + +pub fn vortex_sparse::Sparse::fill_null(vortex_array::array::view::ArrayView<'_, Self>, &vortex_array::scalar::Scalar, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::scalar_fn::fns::not::kernel::NotReduce for vortex_sparse::Sparse pub fn vortex_sparse::Sparse::invert(vortex_array::array::view::ArrayView<'_, Self>) -> vortex_error::VortexResult> @@ -222,4 +234,6 @@ pub fn vortex_array::array::view::ArrayView<'_, vortex_sparse::Sparse>::patches( pub fn vortex_array::array::view::ArrayView<'_, vortex_sparse::Sparse>::resolved_patches(&self) -> vortex_error::VortexResult +pub fn vortex_sparse::initialize(&vortex_session::VortexSession) + pub type vortex_sparse::SparseArray = vortex_array::array::typed::Array diff --git a/encodings/sparse/src/compute/between.rs b/encodings/sparse/src/compute/between.rs new file mode 100644 index 00000000000..053a6694131 --- /dev/null +++ b/encodings/sparse/src/compute/between.rs @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_array::ArrayRef; +use vortex_array::ArrayView; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::arrays::ConstantArray; +use vortex_array::builtins::ArrayBuiltins; +use vortex_array::scalar_fn::fns::between::BetweenKernel; +use vortex_array::scalar_fn::fns::between::BetweenOptions; +use vortex_error::VortexResult; + +use crate::Sparse; +use crate::SparseExt as _; + +/// Sparse-specific between kernel. +/// +/// `lower <= x <= upper` (with per-bound strictness) over a Sparse column with constant +/// bounds is itself sparse: every unpatched position resolves to `between(F, lo, hi)` and +/// every patched position to `between(patch, lo, hi)`. We push the range check into the +/// patches and rebuild a `Sparse` with the new fill, preserving downstream sparsity. +/// +/// Declines (falls back to canonical) unless both bounds are constants. +impl BetweenKernel for Sparse { + fn between( + array: ArrayView<'_, Self>, + lower: &ArrayRef, + upper: &ArrayRef, + options: &BetweenOptions, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + let (Some(lo), Some(hi)) = (lower.as_constant(), upper.as_constant()) else { + return Ok(None); + }; + + let patches = array.patches(); + + let fill_bool = ConstantArray::new(array.fill_scalar().clone(), 1) + .into_array() + .between( + ConstantArray::new(lo.clone(), 1).into_array(), + ConstantArray::new(hi.clone(), 1).into_array(), + options.clone(), + )? + .execute_scalar(0, ctx)?; + + let new_patches = patches.map_values(|values| { + let len = values.len(); + values.between( + ConstantArray::new(lo.clone(), len).into_array(), + ConstantArray::new(hi.clone(), len).into_array(), + options.clone(), + ) + })?; + + Ok(Some( + Sparse::try_new_from_patches(new_patches, fill_bool)?.into_array(), + )) + } +} + +#[cfg(test)] +mod tests { + use std::sync::LazyLock; + + use rstest::rstest; + use vortex_array::Canonical; + use vortex_array::IntoArray; + use vortex_array::VortexSessionExecute; + use vortex_array::arrays::ConstantArray; + use vortex_array::assert_arrays_eq; + use vortex_array::builtins::ArrayBuiltins; + use vortex_array::scalar::Scalar; + use vortex_array::scalar_fn::fns::between::BetweenOptions; + use vortex_array::scalar_fn::fns::between::StrictComparison; + use vortex_array::session::ArraySession; + use vortex_buffer::buffer; + use vortex_session::VortexSession; + + use crate::Sparse; + use crate::initialize; + + static SESSION: LazyLock = LazyLock::new(|| { + let session = VortexSession::empty().with::(); + initialize(&session); + session + }); + + #[rstest] + #[case(0i32, 100i32, StrictComparison::NonStrict, StrictComparison::NonStrict)] + #[case(5i32, 25i32, StrictComparison::Strict, StrictComparison::Strict)] + #[case(1i32, 20i32, StrictComparison::NonStrict, StrictComparison::Strict)] + fn between_matches_canonical( + #[case] lo: i32, + #[case] hi: i32, + #[case] lower_strict: StrictComparison, + #[case] upper_strict: StrictComparison, + ) { + let array = Sparse::try_new( + buffer![1u64, 3, 5].into_array(), + buffer![10i32, 20, 30].into_array(), + 8, + Scalar::from(1i32), + ) + .unwrap() + .into_array(); + let len = array.len(); + let options = BetweenOptions { + lower_strict, + upper_strict, + }; + + let lower = ConstantArray::new(Scalar::from(lo), len).into_array(); + let upper = ConstantArray::new(Scalar::from(hi), len).into_array(); + + let mut ctx = SESSION.create_execution_ctx(); + + // Kernel path: between pushes through the Sparse encoding. + let kernel = array + .clone() + .between(lower.clone(), upper.clone(), options.clone()) + .unwrap() + .execute::(&mut ctx) + .unwrap(); + + // Baseline: canonicalize the input first so between runs on a PrimitiveArray. + let canonical_input = array.execute::(&mut ctx).unwrap().into_array(); + let baseline = canonical_input + .between(lower, upper, options) + .unwrap() + .execute::(&mut ctx) + .unwrap(); + + assert_arrays_eq!(kernel, baseline); + } +} diff --git a/encodings/sparse/src/compute/compare.rs b/encodings/sparse/src/compute/compare.rs new file mode 100644 index 00000000000..c64e8142298 --- /dev/null +++ b/encodings/sparse/src/compute/compare.rs @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_array::ArrayRef; +use vortex_array::ArrayView; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::arrays::ConstantArray; +use vortex_array::builtins::ArrayBuiltins; +use vortex_array::scalar_fn::fns::binary::CompareKernel; +use vortex_array::scalar_fn::fns::binary::scalar_cmp; +use vortex_array::scalar_fn::fns::operators::CompareOperator; +use vortex_array::scalar_fn::fns::operators::Operator; +use vortex_error::VortexResult; + +use crate::Sparse; +use crate::SparseExt as _; + +/// Sparse-specific compare kernel. +/// +/// When the RHS is a constant scalar, the result of any comparison is itself sparse: +/// every unpatched position resolves to `compare(fill, rhs)`, and every patched position +/// to `compare(patch, rhs)`. We push the comparison into the patches and rebuild a +/// `Sparse` with the new fill, preserving downstream sparsity (filter masks, etc.). +/// +/// For non-constant RHS we decline and let the canonical fallback handle it. +impl CompareKernel for Sparse { + fn compare( + lhs: ArrayView<'_, Self>, + rhs: &ArrayRef, + operator: CompareOperator, + _ctx: &mut ExecutionCtx, + ) -> VortexResult> { + let Some(rhs_scalar) = rhs.as_constant() else { + return Ok(None); + }; + + let fill_bool = scalar_cmp(lhs.fill_scalar(), &rhs_scalar, operator)?; + let patches = lhs.patches(); + + let new_patches = patches.map_values(|values| { + let len = values.len(); + values.binary( + ConstantArray::new(rhs_scalar.clone(), len).into_array(), + Operator::from(operator), + ) + })?; + + Ok(Some( + Sparse::try_new_from_patches(new_patches, fill_bool)?.into_array(), + )) + } +} + +#[cfg(test)] +mod tests { + use std::sync::LazyLock; + + use rstest::rstest; + use vortex_array::Canonical; + use vortex_array::IntoArray; + use vortex_array::VortexSessionExecute; + use vortex_array::arrays::ConstantArray; + use vortex_array::assert_arrays_eq; + use vortex_array::builtins::ArrayBuiltins; + use vortex_array::scalar::Scalar; + use vortex_array::scalar_fn::fns::operators::Operator; + use vortex_array::session::ArraySession; + use vortex_buffer::buffer; + use vortex_session::VortexSession; + + use crate::Sparse; + use crate::SparseArray; + use crate::initialize; + + static SESSION: LazyLock = LazyLock::new(|| { + let session = VortexSession::empty().with::(); + initialize(&session); + session + }); + + #[rstest] + #[case::eq_fill(Scalar::from(1i32), Operator::Eq)] + #[case::eq_patch(Scalar::from(10i32), Operator::Eq)] + #[case::gt(Scalar::from(5i32), Operator::Gt)] + #[case::lte(Scalar::from(10i32), Operator::Lte)] + #[case::neq(Scalar::from(1i32), Operator::NotEq)] + fn compare_matches_canonical(#[case] rhs: Scalar, #[case] op: Operator) { + let array: SparseArray = Sparse::try_new( + buffer![1u64, 3, 5].into_array(), + buffer![10i32, 20, 30].into_array(), + 8, + Scalar::from(1i32), + ) + .unwrap(); + let arr = array.into_array(); + let len = arr.len(); + let mut ctx = SESSION.create_execution_ctx(); + + // Kernel path: compare pushes through the Sparse encoding. + let kernel_bool = arr + .binary(ConstantArray::new(rhs.clone(), len).into_array(), op) + .unwrap() + .execute::(&mut ctx) + .unwrap(); + + // Baseline: canonicalize first, then compare on the PrimitiveArray. + let canonical_input = arr.execute::(&mut ctx).unwrap().into_array(); + let canonical_bool = canonical_input + .binary(ConstantArray::new(rhs, len).into_array(), op) + .unwrap() + .execute::(&mut ctx) + .unwrap(); + + assert_arrays_eq!(kernel_bool, canonical_bool); + } +} diff --git a/encodings/sparse/src/compute/fill_null.rs b/encodings/sparse/src/compute/fill_null.rs new file mode 100644 index 00000000000..fd62926b2f3 --- /dev/null +++ b/encodings/sparse/src/compute/fill_null.rs @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_array::ArrayRef; +use vortex_array::ArrayView; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::builtins::ArrayBuiltins; +use vortex_array::scalar::Scalar; +use vortex_array::scalar_fn::fns::fill_null::FillNullKernel; +use vortex_error::VortexResult; + +use crate::Sparse; +use crate::SparseExt as _; + +/// Sparse-specific fill_null kernel. +/// +/// `fill_null(Sparse{ F, patches }, v)` replaces nulls in the fill and in each patch value +/// with the (non-null) `v`, staying sparse: the new fill is `v` if `F` was null, else `F` +/// cast to the non-nullable result dtype. The work is `O(P)`. +impl FillNullKernel for Sparse { + fn fill_null( + array: ArrayView<'_, Self>, + fill_value: &Scalar, + _ctx: &mut ExecutionCtx, + ) -> VortexResult> { + let new_fill = if array.fill_scalar().is_null() { + fill_value.clone() + } else { + array.fill_scalar().cast(fill_value.dtype())? + }; + + let new_patches = array + .patches() + .map_values(|values| values.fill_null(fill_value.clone()))?; + + Ok(Some( + Sparse::try_new_from_patches(new_patches, new_fill)?.into_array(), + )) + } +} + +#[cfg(test)] +mod tests { + use std::sync::LazyLock; + + use rstest::rstest; + use vortex_array::Canonical; + use vortex_array::IntoArray; + use vortex_array::VortexSessionExecute; + use vortex_array::arrays::PrimitiveArray; + use vortex_array::assert_arrays_eq; + use vortex_array::builtins::ArrayBuiltins; + use vortex_array::dtype::DType; + use vortex_array::dtype::Nullability; + use vortex_array::dtype::PType; + use vortex_array::scalar::Scalar; + use vortex_array::session::ArraySession; + use vortex_buffer::buffer; + use vortex_session::VortexSession; + + use crate::Sparse; + use crate::initialize; + + static SESSION: LazyLock = LazyLock::new(|| { + let session = VortexSession::empty().with::(); + initialize(&session); + session + }); + + fn nullable_i32() -> DType { + DType::Primitive(PType::I32, Nullability::Nullable) + } + + #[rstest] + // null fill, some null patches + #[case(Sparse::try_new( + buffer![1u64, 3, 5].into_array(), + PrimitiveArray::from_option_iter([Some(10i32), None, Some(30)]).into_array().cast(nullable_i32()).unwrap(), + 8, + Scalar::null(nullable_i32()), + ).unwrap().into_array())] + // non-null fill, nullable patches with a null + #[case(Sparse::try_new( + buffer![0u64, 2].into_array(), + PrimitiveArray::from_option_iter([Some(7i32), None]).into_array().cast(nullable_i32()).unwrap(), + 4, + Scalar::from(1i32).cast(&nullable_i32()).unwrap(), + ).unwrap().into_array())] + fn fill_null_matches_canonical(#[case] array: vortex_array::ArrayRef) { + let mut ctx = SESSION.create_execution_ctx(); + let fill = Scalar::from(0i32); + + // Kernel path: fill_null pushes through the Sparse encoding. + let kernel = array + .fill_null(fill.clone()) + .unwrap() + .execute::(&mut ctx) + .unwrap(); + + // Baseline: canonicalize first, then fill_null on the PrimitiveArray. + let canonical_input = array.execute::(&mut ctx).unwrap().into_array(); + let baseline = canonical_input + .fill_null(fill) + .unwrap() + .execute::(&mut ctx) + .unwrap(); + + assert_arrays_eq!(kernel, baseline); + } +} diff --git a/encodings/sparse/src/compute/is_constant.rs b/encodings/sparse/src/compute/is_constant.rs new file mode 100644 index 00000000000..cfd31c6698d --- /dev/null +++ b/encodings/sparse/src/compute/is_constant.rs @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::aggregate_fn::AggregateFnRef; +use vortex_array::aggregate_fn::fns::is_constant::IsConstant; +use vortex_array::aggregate_fn::fns::is_constant::is_constant; +use vortex_array::aggregate_fn::kernels::DynAggregateKernel; +use vortex_array::scalar::Scalar; +use vortex_error::VortexResult; + +use crate::Sparse; +use crate::SparseExt as _; + +/// Sparse-specific `is_constant` kernel. +/// +/// A `SparseArray` of length `N` with `P` patches and a fill value `F` is constant iff: +/// - `P == 0`: all positions hold `F`. +/// - `0 < P < N`: every patch equals `F`, i.e. `is_constant(patch_values)` AND the common +/// patch value equals `F`. +/// - `P == N`: every position is patched, so the answer is `is_constant(patch_values)`. +/// +/// In all cases the work is `O(P)` instead of `O(N)`. +#[derive(Debug)] +pub(crate) struct SparseIsConstantKernel; + +impl DynAggregateKernel for SparseIsConstantKernel { + fn aggregate( + &self, + aggregate_fn: &AggregateFnRef, + batch: &ArrayRef, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + if !aggregate_fn.is::() { + return Ok(None); + } + + let Some(sparse) = batch.as_opt::() else { + return Ok(None); + }; + + let patches = sparse.patches(); + let num_patches = patches.num_patches(); + let len = sparse.len(); + + let result = if num_patches == 0 { + // Whole array is the fill value. + true + } else if num_patches < len { + // Mixed: needs all patches equal AND equal to fill. + if !is_constant(patches.values(), ctx)? { + false + } else { + let first_patch = patches.values().execute_scalar(0, ctx)?; + &first_patch == sparse.fill_scalar() + } + } else { + // Every position is patched; answer depends purely on patch_values. + is_constant(patches.values(), ctx)? + }; + + Ok(Some(IsConstant::make_partial(batch, result, ctx)?)) + } +} + +#[cfg(test)] +mod tests { + use std::sync::LazyLock; + + use rstest::rstest; + use vortex_array::IntoArray; + use vortex_array::VortexSessionExecute; + use vortex_array::aggregate_fn::fns::is_constant::is_constant; + use vortex_array::scalar::Scalar; + use vortex_array::session::ArraySession; + use vortex_array::session::ArraySessionExt; + use vortex_buffer::buffer; + use vortex_error::VortexResult; + use vortex_session::VortexSession; + + use crate::Sparse; + use crate::SparseArray; + use crate::initialize; + + /// Session with Sparse + its pushdown kernels. + static SESSION: LazyLock = LazyLock::new(|| { + let session = VortexSession::empty().with::(); + initialize(&session); + session + }); + + /// Baseline session: Sparse registered but no pushdown kernels. + static CANONICAL_SESSION: LazyLock = LazyLock::new(|| { + let session = VortexSession::empty().with::(); + session.arrays().register(Sparse); + session + }); + + fn check(array: SparseArray) -> VortexResult { + let arr = array.into_array(); + let kernel_result = is_constant(&arr, &mut SESSION.create_execution_ctx())?; + let canonical_result = is_constant(&arr, &mut CANONICAL_SESSION.create_execution_ctx())?; + assert_eq!( + kernel_result, canonical_result, + "kernel and canonical paths disagree" + ); + Ok(kernel_result) + } + + #[rstest] + #[case::all_patches_equal_fill( + Sparse::try_new( + buffer![1u64, 3, 5].into_array(), + buffer![7i32, 7, 7].into_array(), + 10, + Scalar::from(7i32), + ).unwrap(), + true, + )] + #[case::mixed_patches_but_unequal_fill( + Sparse::try_new( + buffer![1u64, 3].into_array(), + buffer![9i32, 9].into_array(), + 5, + Scalar::from(7i32), + ).unwrap(), + false, + )] + #[case::single_patch_differs( + Sparse::try_new( + buffer![1u64].into_array(), + buffer![3i32].into_array(), + 5, + Scalar::from(7i32), + ).unwrap(), + false, + )] + #[case::all_patched_constant( + Sparse::try_new( + buffer![0u64, 1, 2, 3].into_array(), + buffer![5i32, 5, 5, 5].into_array(), + 4, + Scalar::from(99i32), // fill is unreachable + ).unwrap(), + true, + )] + #[case::all_patched_not_constant( + Sparse::try_new( + buffer![0u64, 1, 2].into_array(), + buffer![1i32, 2, 3].into_array(), + 3, + Scalar::from(99i32), + ).unwrap(), + false, + )] + fn is_constant_kernel(#[case] array: SparseArray, #[case] expected: bool) { + assert_eq!(check(array).unwrap(), expected); + } +} diff --git a/encodings/sparse/src/compute/min_max.rs b/encodings/sparse/src/compute/min_max.rs new file mode 100644 index 00000000000..22ff3e86c53 --- /dev/null +++ b/encodings/sparse/src/compute/min_max.rs @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::aggregate_fn::Accumulator; +use vortex_array::aggregate_fn::AggregateFnRef; +use vortex_array::aggregate_fn::DynAccumulator; +use vortex_array::aggregate_fn::EmptyOptions; +use vortex_array::aggregate_fn::fns::min_max::MinMax; +use vortex_array::aggregate_fn::kernels::DynAggregateKernel; +use vortex_array::arrays::ConstantArray; +use vortex_array::scalar::Scalar; +use vortex_error::VortexResult; + +use crate::Sparse; +use crate::SparseExt as _; + +/// Sparse-specific min/max kernel. +/// +/// `min/max(Sparse{ F, patches })` folds the min/max of `patch_values` together with the +/// fill scalar `F` — but only when `F` is reachable (`P < N`) and valid. The work is +/// `O(P)` instead of `O(N)`. +#[derive(Debug)] +pub(crate) struct SparseMinMaxKernel; + +impl DynAggregateKernel for SparseMinMaxKernel { + fn aggregate( + &self, + aggregate_fn: &AggregateFnRef, + batch: &ArrayRef, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + if !aggregate_fn.is::() { + return Ok(None); + } + + let Some(sparse) = batch.as_opt::() else { + return Ok(None); + }; + + let patches = sparse.patches(); + + let mut acc = Accumulator::try_new(MinMax, EmptyOptions, batch.dtype().clone())?; + + if !patches.values().is_empty() { + acc.accumulate(patches.values(), ctx)?; + } + + // Fold the fill value in only when at least one position is unpatched and the fill + // is non-null (null fill never participates in min/max). + if patches.num_patches() < sparse.len() && sparse.fill_scalar().is_valid() { + let fill_array = ConstantArray::new(sparse.fill_scalar().clone(), 1).into_array(); + acc.accumulate(&fill_array, ctx)?; + } + + Ok(Some(acc.partial_scalar()?)) + } +} + +#[cfg(test)] +mod tests { + use std::sync::LazyLock; + + use rstest::rstest; + use vortex_array::IntoArray; + use vortex_array::VortexSessionExecute; + use vortex_array::aggregate_fn::fns::min_max::MinMaxResult; + use vortex_array::aggregate_fn::fns::min_max::min_max; + use vortex_array::scalar::Scalar; + use vortex_array::session::ArraySession; + use vortex_array::session::ArraySessionExt; + use vortex_buffer::buffer; + use vortex_session::VortexSession; + + use crate::Sparse; + use crate::SparseArray; + use crate::initialize; + + static SESSION: LazyLock = LazyLock::new(|| { + let session = VortexSession::empty().with::(); + initialize(&session); + session + }); + + static CANONICAL_SESSION: LazyLock = LazyLock::new(|| { + let session = VortexSession::empty().with::(); + session.arrays().register(Sparse); + session + }); + + #[rstest] + // fill below all patches + #[case(Sparse::try_new(buffer![1u64, 3, 5].into_array(), buffer![10i32, 20, 30].into_array(), 8, Scalar::from(1i32)).unwrap())] + // fill above all patches + #[case(Sparse::try_new(buffer![1u64, 3, 5].into_array(), buffer![10i32, 20, 30].into_array(), 8, Scalar::from(99i32)).unwrap())] + // fill in the middle + #[case(Sparse::try_new(buffer![1u64, 3, 5].into_array(), buffer![10i32, 20, 30].into_array(), 8, Scalar::from(15i32)).unwrap())] + // every position patched (fill unreachable) + #[case(Sparse::try_new(buffer![0u64, 1, 2].into_array(), buffer![7i32, 3, 9].into_array(), 3, Scalar::from(99i32)).unwrap())] + fn min_max_matches_canonical(#[case] array: SparseArray) { + let arr = array.into_array(); + let kernel: Option = + min_max(&arr, &mut SESSION.create_execution_ctx()).unwrap(); + let canonical: Option = + min_max(&arr, &mut CANONICAL_SESSION.create_execution_ctx()).unwrap(); + assert_eq!(kernel, canonical); + } +} diff --git a/encodings/sparse/src/compute/mod.rs b/encodings/sparse/src/compute/mod.rs index 28e8c366b51..fac33edf47e 100644 --- a/encodings/sparse/src/compute/mod.rs +++ b/encodings/sparse/src/compute/mod.rs @@ -1,8 +1,16 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +mod between; mod cast; +mod compare; +mod fill_null; mod filter; +pub(crate) mod is_constant; +pub(crate) mod min_max; +pub(crate) mod nan_count; +pub(crate) mod null_count; +pub(crate) mod sum; mod take; #[cfg(test)] diff --git a/encodings/sparse/src/compute/nan_count.rs b/encodings/sparse/src/compute/nan_count.rs new file mode 100644 index 00000000000..7fe03e21103 --- /dev/null +++ b/encodings/sparse/src/compute/nan_count.rs @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::aggregate_fn::Accumulator; +use vortex_array::aggregate_fn::AggregateFnRef; +use vortex_array::aggregate_fn::AggregateFnVTable as _; +use vortex_array::aggregate_fn::DynAccumulator; +use vortex_array::aggregate_fn::EmptyOptions; +use vortex_array::aggregate_fn::fns::nan_count::NanCount; +use vortex_array::aggregate_fn::kernels::DynAggregateKernel; +use vortex_array::arrays::ConstantArray; +use vortex_array::scalar::Scalar; +use vortex_error::VortexResult; + +use crate::Sparse; +use crate::SparseExt as _; + +/// Sparse-specific NaN-count kernel. +/// +/// `nan_count(Sparse{ F, patches }) = nan_count(patch_values) + (F is NaN ? N - P : 0)`. +/// +/// Declines for non-float dtypes. The work is `O(P)` instead of `O(N)`. +#[derive(Debug)] +pub(crate) struct SparseNanCountKernel; + +impl DynAggregateKernel for SparseNanCountKernel { + fn aggregate( + &self, + aggregate_fn: &AggregateFnRef, + batch: &ArrayRef, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + if !aggregate_fn.is::() { + return Ok(None); + } + + let Some(sparse) = batch.as_opt::() else { + return Ok(None); + }; + + // NaN count is only defined for floating-point dtypes. + if NanCount + .return_dtype(&EmptyOptions, batch.dtype()) + .is_none() + { + return Ok(None); + } + + let patches = sparse.patches(); + + let mut acc = Accumulator::try_new(NanCount, EmptyOptions, batch.dtype().clone())?; + + let n_fill = sparse.len() - patches.num_patches(); + if n_fill > 0 { + // The Constant accumulate path checks `is_nan` once and multiplies by length. + let fill_array = ConstantArray::new(sparse.fill_scalar().clone(), n_fill).into_array(); + acc.accumulate(&fill_array, ctx)?; + } + + if !patches.values().is_empty() { + acc.accumulate(patches.values(), ctx)?; + } + + Ok(Some(acc.partial_scalar()?)) + } +} + +#[cfg(test)] +mod tests { + use std::sync::LazyLock; + + use rstest::rstest; + use vortex_array::IntoArray; + use vortex_array::VortexSessionExecute; + use vortex_array::aggregate_fn::fns::nan_count::nan_count; + use vortex_array::scalar::Scalar; + use vortex_array::session::ArraySession; + use vortex_array::session::ArraySessionExt; + use vortex_buffer::buffer; + use vortex_session::VortexSession; + + use crate::Sparse; + use crate::SparseArray; + use crate::initialize; + + static SESSION: LazyLock = LazyLock::new(|| { + let session = VortexSession::empty().with::(); + initialize(&session); + session + }); + + static CANONICAL_SESSION: LazyLock = LazyLock::new(|| { + let session = VortexSession::empty().with::(); + session.arrays().register(Sparse); + session + }); + + #[rstest] + // NaN fill value → all unpatched positions are NaN + #[case(Sparse::try_new(buffer![1u64, 3].into_array(), buffer![1.0f32, 2.0].into_array(), 6, Scalar::from(f32::NAN)).unwrap())] + // NaN patch values, finite fill + #[case(Sparse::try_new(buffer![1u64, 3].into_array(), buffer![f32::NAN, 2.0].into_array(), 6, Scalar::from(0.0f32)).unwrap())] + // no NaNs anywhere + #[case(Sparse::try_new(buffer![1u64, 3].into_array(), buffer![1.0f32, 2.0].into_array(), 6, Scalar::from(0.0f32)).unwrap())] + fn nan_count_matches_canonical(#[case] array: SparseArray) { + let arr = array.into_array(); + let kernel = nan_count(&arr, &mut SESSION.create_execution_ctx()).unwrap(); + let canonical = nan_count(&arr, &mut CANONICAL_SESSION.create_execution_ctx()).unwrap(); + assert_eq!(kernel, canonical); + } +} diff --git a/encodings/sparse/src/compute/null_count.rs b/encodings/sparse/src/compute/null_count.rs new file mode 100644 index 00000000000..ad5e875ea6c --- /dev/null +++ b/encodings/sparse/src/compute/null_count.rs @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::aggregate_fn::AggregateFnRef; +use vortex_array::aggregate_fn::fns::null_count::NullCount; +use vortex_array::aggregate_fn::fns::null_count::null_count; +use vortex_array::aggregate_fn::kernels::DynAggregateKernel; +use vortex_array::dtype::Nullability::NonNullable; +use vortex_array::scalar::Scalar; +use vortex_error::VortexResult; + +use crate::Sparse; +use crate::SparseExt as _; + +/// Sparse-specific null-count kernel. +/// +/// `null_count(Sparse{ F, patches }) = null_count(patch_values) + (F is null ? N - P : 0)`. +/// +/// When the fill is non-null this is just the patches' null count (often a cached `O(1)` +/// statistic); either way the work is `O(P)` instead of `O(N)`. +#[derive(Debug)] +pub(crate) struct SparseNullCountKernel; + +impl DynAggregateKernel for SparseNullCountKernel { + fn aggregate( + &self, + aggregate_fn: &AggregateFnRef, + batch: &ArrayRef, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + if !aggregate_fn.is::() { + return Ok(None); + } + + let Some(sparse) = batch.as_opt::() else { + return Ok(None); + }; + + let patches = sparse.patches(); + let fill_nulls = if sparse.fill_scalar().is_null() { + (sparse.len() - patches.num_patches()) as u64 + } else { + 0 + }; + let patch_nulls = null_count(patches.values(), ctx)? as u64; + + Ok(Some(Scalar::primitive( + fill_nulls + patch_nulls, + NonNullable, + ))) + } +} + +#[cfg(test)] +mod tests { + use std::sync::LazyLock; + + use rstest::rstest; + use vortex_array::IntoArray; + use vortex_array::VortexSessionExecute; + use vortex_array::aggregate_fn::fns::null_count::null_count; + use vortex_array::arrays::PrimitiveArray; + use vortex_array::builtins::ArrayBuiltins; + use vortex_array::dtype::DType; + use vortex_array::dtype::Nullability; + use vortex_array::dtype::PType; + use vortex_array::scalar::Scalar; + use vortex_array::session::ArraySession; + use vortex_array::session::ArraySessionExt; + use vortex_buffer::buffer; + use vortex_session::VortexSession; + + use crate::Sparse; + use crate::SparseArray; + use crate::initialize; + + static SESSION: LazyLock = LazyLock::new(|| { + let session = VortexSession::empty().with::(); + initialize(&session); + session + }); + + static CANONICAL_SESSION: LazyLock = LazyLock::new(|| { + let session = VortexSession::empty().with::(); + session.arrays().register(Sparse); + session + }); + + fn nullable_i32() -> DType { + DType::Primitive(PType::I32, Nullability::Nullable) + } + + #[rstest] + // non-null fill, no null patches → 0 + #[case(Sparse::try_new(buffer![1u64, 3].into_array(), buffer![10i32, 20].into_array(), 5, Scalar::from(1i32)).unwrap())] + // null fill (8 - 2 = 6 fill nulls), patches non-null + #[case(Sparse::try_new( + buffer![1u64, 3].into_array(), + PrimitiveArray::from_option_iter([Some(10i32), Some(20)]).into_array().cast(nullable_i32()).unwrap(), + 8, + Scalar::null(nullable_i32()), + ).unwrap())] + // null fill + some null patches + #[case(Sparse::try_new( + buffer![0u64, 2, 4].into_array(), + PrimitiveArray::from_option_iter([Some(10i32), None, Some(30)]).into_array().cast(nullable_i32()).unwrap(), + 6, + Scalar::null(nullable_i32()), + ).unwrap())] + fn null_count_matches_canonical(#[case] array: SparseArray) { + let arr = array.into_array(); + let kernel = null_count(&arr, &mut SESSION.create_execution_ctx()).unwrap(); + let canonical = null_count(&arr, &mut CANONICAL_SESSION.create_execution_ctx()).unwrap(); + assert_eq!(kernel, canonical); + } +} diff --git a/encodings/sparse/src/compute/sum.rs b/encodings/sparse/src/compute/sum.rs new file mode 100644 index 00000000000..4b068aa57b4 --- /dev/null +++ b/encodings/sparse/src/compute/sum.rs @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::aggregate_fn::Accumulator; +use vortex_array::aggregate_fn::AggregateFnRef; +use vortex_array::aggregate_fn::DynAccumulator; +use vortex_array::aggregate_fn::EmptyOptions; +use vortex_array::aggregate_fn::fns::sum::Sum; +use vortex_array::aggregate_fn::kernels::DynAggregateKernel; +use vortex_array::arrays::ConstantArray; +use vortex_array::scalar::Scalar; +use vortex_error::VortexResult; + +use crate::Sparse; +use crate::SparseExt as _; + +/// Sparse-specific `sum` kernel. +/// +/// `sum(Sparse{ F, patches }) = sum(patches.values) + F * (N - patches.num_patches())`. +/// +/// The constant contribution is computed via the existing `Sum` accumulator's constant +/// short-circuit (`multiply_constant`), so overflow saturates to null exactly as in the +/// baseline. The work is `O(P)` instead of `O(N)`. +#[derive(Debug)] +pub(crate) struct SparseSumKernel; + +impl DynAggregateKernel for SparseSumKernel { + fn aggregate( + &self, + aggregate_fn: &AggregateFnRef, + batch: &ArrayRef, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + if !aggregate_fn.is::() { + return Ok(None); + } + + let Some(sparse) = batch.as_opt::() else { + return Ok(None); + }; + + let patches = sparse.patches(); + let n_fill = sparse.len() - patches.num_patches(); + + // Build a fresh Sum accumulator over the array dtype and fold in the fill and patch + // contributions. The accumulator's existing semantics (checked overflow → null + // partial) are preserved. + let mut acc = Accumulator::try_new(Sum, EmptyOptions, batch.dtype().clone())?; + + if n_fill > 0 { + let fill_array = ConstantArray::new(sparse.fill_scalar().clone(), n_fill).into_array(); + acc.accumulate(&fill_array, ctx)?; + } + + if !patches.values().is_empty() { + acc.accumulate(patches.values(), ctx)?; + } + + Ok(Some(acc.partial_scalar()?)) + } +} + +#[cfg(test)] +mod tests { + use std::sync::LazyLock; + + use rstest::rstest; + use vortex_array::IntoArray; + use vortex_array::VortexSessionExecute; + use vortex_array::aggregate_fn::fns::sum::sum; + use vortex_array::scalar::Scalar; + use vortex_array::session::ArraySession; + use vortex_array::session::ArraySessionExt; + use vortex_buffer::buffer; + use vortex_error::VortexResult; + use vortex_session::VortexSession; + + use crate::Sparse; + use crate::SparseArray; + use crate::initialize; + + static SESSION: LazyLock = LazyLock::new(|| { + let session = VortexSession::empty().with::(); + initialize(&session); + session + }); + + static CANONICAL_SESSION: LazyLock = LazyLock::new(|| { + let session = VortexSession::empty().with::(); + session.arrays().register(Sparse); + session + }); + + fn check(array: SparseArray) -> VortexResult { + let arr = array.into_array(); + let kernel_result = sum(&arr, &mut SESSION.create_execution_ctx())?; + let canonical_result = sum(&arr, &mut CANONICAL_SESSION.create_execution_ctx())?; + assert_eq!( + kernel_result, canonical_result, + "kernel and canonical sum paths disagree" + ); + Ok(kernel_result) + } + + #[rstest] + #[case::positive_fill( + Sparse::try_new( + buffer![0u64, 2].into_array(), + buffer![10i32, 20].into_array(), + 5, + Scalar::from(1i32), + ).unwrap(), + // 10 + 1 + 20 + 1 + 1 = 33 + 33i64, + )] + #[case::zero_fill( + Sparse::try_new( + buffer![1u64, 4].into_array(), + buffer![7i32, 8].into_array(), + 10, + Scalar::from(0i32), + ).unwrap(), + 15i64, + )] + fn sum_kernel_i32(#[case] array: SparseArray, #[case] expected: i64) { + let result = check(array).unwrap(); + assert_eq!(result.as_primitive().typed_value::(), Some(expected)); + } + + #[rstest] + #[case::null_fill_no_overflow( + Sparse::try_new( + buffer![0u64, 3].into_array(), + vortex_array::arrays::PrimitiveArray::from_option_iter([Some(5i64), Some(11)]) + .into_array(), + 6, + Scalar::null(vortex_array::dtype::DType::Primitive( + vortex_array::dtype::PType::I64, + vortex_array::dtype::Nullability::Nullable, + )), + ).unwrap(), + 16i64, + )] + fn sum_kernel_nullable(#[case] array: SparseArray, #[case] expected: i64) { + let result = check(array).unwrap(); + assert_eq!(result.as_primitive().typed_value::(), Some(expected)); + } +} diff --git a/encodings/sparse/src/kernel.rs b/encodings/sparse/src/kernel.rs index 18928ea0142..0f5d9fd51c0 100644 --- a/encodings/sparse/src/kernel.rs +++ b/encodings/sparse/src/kernel.rs @@ -5,10 +5,16 @@ use vortex_array::arrays::dict::TakeExecuteAdaptor; use vortex_array::arrays::filter::FilterExecuteAdaptor; use vortex_array::arrays::slice::SliceExecuteAdaptor; use vortex_array::kernel::ParentKernelSet; +use vortex_array::scalar_fn::fns::between::BetweenExecuteAdaptor; +use vortex_array::scalar_fn::fns::binary::CompareExecuteAdaptor; +use vortex_array::scalar_fn::fns::fill_null::FillNullExecuteAdaptor; use crate::Sparse; pub(crate) static PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[ + ParentKernelSet::lift(&BetweenExecuteAdaptor(Sparse)), + ParentKernelSet::lift(&CompareExecuteAdaptor(Sparse)), + ParentKernelSet::lift(&FillNullExecuteAdaptor(Sparse)), ParentKernelSet::lift(&FilterExecuteAdaptor(Sparse)), ParentKernelSet::lift(&SliceExecuteAdaptor(Sparse)), ParentKernelSet::lift(&TakeExecuteAdaptor(Sparse)), diff --git a/encodings/sparse/src/lib.rs b/encodings/sparse/src/lib.rs index 74b137fd1c7..42b5cd46724 100644 --- a/encodings/sparse/src/lib.rs +++ b/encodings/sparse/src/lib.rs @@ -68,6 +68,51 @@ mod ops; mod rules; mod slice; +use vortex_array::aggregate_fn::AggregateFnVTable as _; +use vortex_array::aggregate_fn::fns::is_constant::IsConstant; +use vortex_array::aggregate_fn::fns::min_max::MinMax; +use vortex_array::aggregate_fn::fns::nan_count::NanCount; +use vortex_array::aggregate_fn::fns::null_count::NullCount; +use vortex_array::aggregate_fn::fns::sum::Sum; +use vortex_array::aggregate_fn::session::AggregateFnSessionExt; +use vortex_array::session::ArraySessionExt; + +/// Initialize Sparse encoding in the given session. +/// +/// Registers the Sparse array vtable and its aggregate kernels (`IsConstant`, `Sum`, +/// `MinMax`, `NullCount`, `NanCount`). Compare/between/fill_null pushdown is wired +/// through `PARENT_KERNELS` (see `kernel.rs`) and does not require registration here. +pub fn initialize(session: &VortexSession) { + session.arrays().register(Sparse); + + let aggregate_fns = session.aggregate_fns(); + aggregate_fns.register_aggregate_kernel( + Sparse.id(), + Some(IsConstant.id()), + &compute::is_constant::SparseIsConstantKernel, + ); + aggregate_fns.register_aggregate_kernel( + Sparse.id(), + Some(Sum.id()), + &compute::sum::SparseSumKernel, + ); + aggregate_fns.register_aggregate_kernel( + Sparse.id(), + Some(MinMax.id()), + &compute::min_max::SparseMinMaxKernel, + ); + aggregate_fns.register_aggregate_kernel( + Sparse.id(), + Some(NullCount.id()), + &compute::null_count::SparseNullCountKernel, + ); + aggregate_fns.register_aggregate_kernel( + Sparse.id(), + Some(NanCount.id()), + &compute::nan_count::SparseNanCountKernel, + ); +} + /// A [`Sparse`]-encoded Vortex array. pub type SparseArray = Array; diff --git a/vortex-file/src/lib.rs b/vortex-file/src/lib.rs index ce6598173a6..e69b5848de2 100644 --- a/vortex-file/src/lib.rs +++ b/vortex-file/src/lib.rs @@ -117,7 +117,6 @@ use vortex_bytebool::ByteBool; use vortex_fsst::FSST; use vortex_pco::Pco; use vortex_session::VortexSession; -use vortex_sparse::Sparse; use vortex_zigzag::ZigZag; pub use writer::*; @@ -164,7 +163,6 @@ pub fn register_default_encodings(session: &VortexSession) { arrays.register(Dict); arrays.register(FSST); arrays.register(Pco); - arrays.register(Sparse); arrays.register(ZigZag); #[cfg(feature = "zstd")] arrays.register(vortex_zstd::Zstd); @@ -183,6 +181,7 @@ pub fn register_default_encodings(session: &VortexSession) { vortex_fastlanes::initialize(session); vortex_runend::initialize(session); vortex_sequence::initialize(session); + vortex_sparse::initialize(session); #[cfg(feature = "unstable_encodings")] vortex_tensor::initialize(session);