From d21056430fb1355d55ec7975ad2322d0a54e00c9 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 2 Mar 2026 16:57:02 +0000 Subject: [PATCH 01/12] fix Signed-off-by: Joe Isaacs --- vortex-array/src/array/mod.rs | 16 ++++++++++++++++ vortex-array/src/scalar_fn/fns/binary/compare.rs | 16 ++++++++++++++++ vortex-layout/src/layouts/row_idx/mod.rs | 1 + 3 files changed, 33 insertions(+) diff --git a/vortex-array/src/array/mod.rs b/vortex-array/src/array/mod.rs index fc47631f35a..7dca5d8aef1 100644 --- a/vortex-array/src/array/mod.rs +++ b/vortex-array/src/array/mod.rs @@ -16,6 +16,7 @@ pub use visitor::*; use vortex_buffer::ByteBuffer; use vortex_error::VortexExpect; use vortex_error::VortexResult; +use vortex_error::vortex_bail; use vortex_error::vortex_ensure; use vortex_error::vortex_err; use vortex_error::vortex_panic; @@ -43,6 +44,7 @@ use crate::arrays::VarBinVTable; use crate::arrays::VarBinViewVTable; use crate::buffer::BufferHandle; use crate::builders::ArrayBuilder; +use crate::builtins::ArrayBuiltins; use crate::compute; use crate::dtype::DType; use crate::dtype::Nullability; @@ -353,6 +355,20 @@ impl dyn DynArray + '_ { pub fn is_canonical(&self) -> bool { self.is::() } + + /// Converts from a possible nullable boolean array. Null values are treated as false. + pub fn try_to_mask_fill_null_false(&self) -> VortexResult { + if !matches!(self.dtype(), DType::Bool(_)) { + vortex_bail!("mask must be bool array, has dtype {}", self.dtype()); + } + + // Convert nulls to false first in case this can be done cheaply by the encoding. + let array = self + .to_array() + .fill_null(Scalar::bool(false, self.dtype().nullability()))?; + + Ok(array.to_bool().to_mask_fill_null_false()) + } } /// Trait for converting a type into a Vortex [`ArrayRef`]. diff --git a/vortex-array/src/scalar_fn/fns/binary/compare.rs b/vortex-array/src/scalar_fn/fns/binary/compare.rs index 3730b13cfe8..8e6db72d43e 100644 --- a/vortex-array/src/scalar_fn/fns/binary/compare.rs +++ b/vortex-array/src/scalar_fn/fns/binary/compare.rs @@ -490,4 +490,20 @@ mod tests { assert!(result.scalar_at(1).unwrap().is_valid()); assert!(result.scalar_at(2).unwrap().is_valid()); } + + #[rstest] + #[case(CompareOperator::Eq, vec![false, false, false, true])] + #[case(CompareOperator::NotEq, vec![true, true, true, false])] + #[case(CompareOperator::Gt, vec![true, true, true, false])] + #[case(CompareOperator::Gte, vec![true, true, true, true])] + #[case(CompareOperator::Lt, vec![false, false, false, false])] + #[case(CompareOperator::Lte, vec![false, false, false, true])] + fn test_cmp_to_empty(#[case] op: CompareOperator, #[case] expected: Vec) { + use crate::compute::compare_lengths_to_empty; + + let lengths: Vec = vec![1, 5, 7, 0]; + + let output = compare_lengths_to_empty(lengths.iter().copied(), op); + assert_eq!(Vec::from_iter(output.iter()), expected); + } } diff --git a/vortex-layout/src/layouts/row_idx/mod.rs b/vortex-layout/src/layouts/row_idx/mod.rs index fd7513aad96..a632778e63a 100644 --- a/vortex-layout/src/layouts/row_idx/mod.rs +++ b/vortex-layout/src/layouts/row_idx/mod.rs @@ -18,6 +18,7 @@ use vortex_array::ArrayRef; use vortex_array::DynArray; use vortex_array::IntoArray; use vortex_array::MaskFuture; +use vortex_array::ToCanonical; use vortex_array::VortexSessionExecute; use vortex_array::dtype::DType; use vortex_array::dtype::FieldMask; From b7bf576beb22b1f0925e92c82dbc15eb6d95db1e Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 3 Mar 2026 10:13:34 +0000 Subject: [PATCH 02/12] fix Signed-off-by: Joe Isaacs --- vortex-array/src/scalar_fn/fns/binary/compare.rs | 1 + vortex-layout/src/layouts/row_idx/mod.rs | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-array/src/scalar_fn/fns/binary/compare.rs b/vortex-array/src/scalar_fn/fns/binary/compare.rs index 8e6db72d43e..4980bd5d590 100644 --- a/vortex-array/src/scalar_fn/fns/binary/compare.rs +++ b/vortex-array/src/scalar_fn/fns/binary/compare.rs @@ -255,6 +255,7 @@ mod tests { use crate::dtype::Nullability; use crate::dtype::PType; use crate::scalar::Scalar; + use crate::scalar_fn::fns::operators::CompareOperator; use crate::scalar_fn::fns::operators::Operator; use crate::test_harness::to_int_indices; use crate::validity::Validity; diff --git a/vortex-layout/src/layouts/row_idx/mod.rs b/vortex-layout/src/layouts/row_idx/mod.rs index a632778e63a..fd7513aad96 100644 --- a/vortex-layout/src/layouts/row_idx/mod.rs +++ b/vortex-layout/src/layouts/row_idx/mod.rs @@ -18,7 +18,6 @@ use vortex_array::ArrayRef; use vortex_array::DynArray; use vortex_array::IntoArray; use vortex_array::MaskFuture; -use vortex_array::ToCanonical; use vortex_array::VortexSessionExecute; use vortex_array::dtype::DType; use vortex_array::dtype::FieldMask; From 0bafa49e25a950e8071bbbe74badab37cd4a45c8 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 3 Mar 2026 10:25:55 +0000 Subject: [PATCH 03/12] fix Signed-off-by: Joe Isaacs --- .../src/scalar_fn/fns/binary/compare.rs | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/vortex-array/src/scalar_fn/fns/binary/compare.rs b/vortex-array/src/scalar_fn/fns/binary/compare.rs index 4980bd5d590..3730b13cfe8 100644 --- a/vortex-array/src/scalar_fn/fns/binary/compare.rs +++ b/vortex-array/src/scalar_fn/fns/binary/compare.rs @@ -255,7 +255,6 @@ mod tests { use crate::dtype::Nullability; use crate::dtype::PType; use crate::scalar::Scalar; - use crate::scalar_fn::fns::operators::CompareOperator; use crate::scalar_fn::fns::operators::Operator; use crate::test_harness::to_int_indices; use crate::validity::Validity; @@ -491,20 +490,4 @@ mod tests { assert!(result.scalar_at(1).unwrap().is_valid()); assert!(result.scalar_at(2).unwrap().is_valid()); } - - #[rstest] - #[case(CompareOperator::Eq, vec![false, false, false, true])] - #[case(CompareOperator::NotEq, vec![true, true, true, false])] - #[case(CompareOperator::Gt, vec![true, true, true, false])] - #[case(CompareOperator::Gte, vec![true, true, true, true])] - #[case(CompareOperator::Lt, vec![false, false, false, false])] - #[case(CompareOperator::Lte, vec![false, false, false, true])] - fn test_cmp_to_empty(#[case] op: CompareOperator, #[case] expected: Vec) { - use crate::compute::compare_lengths_to_empty; - - let lengths: Vec = vec![1, 5, 7, 0]; - - let output = compare_lengths_to_empty(lengths.iter().copied(), op); - assert_eq!(Vec::from_iter(output.iter()), expected); - } } From 7ed00a8441d32107a58e88b35430a253739dd5b5 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 3 Mar 2026 11:07:53 +0000 Subject: [PATCH 04/12] fix Signed-off-by: Joe Isaacs --- vortex-array/src/array/mod.rs | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/vortex-array/src/array/mod.rs b/vortex-array/src/array/mod.rs index 7dca5d8aef1..fc47631f35a 100644 --- a/vortex-array/src/array/mod.rs +++ b/vortex-array/src/array/mod.rs @@ -16,7 +16,6 @@ pub use visitor::*; use vortex_buffer::ByteBuffer; use vortex_error::VortexExpect; use vortex_error::VortexResult; -use vortex_error::vortex_bail; use vortex_error::vortex_ensure; use vortex_error::vortex_err; use vortex_error::vortex_panic; @@ -44,7 +43,6 @@ use crate::arrays::VarBinVTable; use crate::arrays::VarBinViewVTable; use crate::buffer::BufferHandle; use crate::builders::ArrayBuilder; -use crate::builtins::ArrayBuiltins; use crate::compute; use crate::dtype::DType; use crate::dtype::Nullability; @@ -355,20 +353,6 @@ impl dyn DynArray + '_ { pub fn is_canonical(&self) -> bool { self.is::() } - - /// Converts from a possible nullable boolean array. Null values are treated as false. - pub fn try_to_mask_fill_null_false(&self) -> VortexResult { - if !matches!(self.dtype(), DType::Bool(_)) { - vortex_bail!("mask must be bool array, has dtype {}", self.dtype()); - } - - // Convert nulls to false first in case this can be done cheaply by the encoding. - let array = self - .to_array() - .fill_null(Scalar::bool(false, self.dtype().nullability()))?; - - Ok(array.to_bool().to_mask_fill_null_false()) - } } /// Trait for converting a type into a Vortex [`ArrayRef`]. From aaece75ad3bec42974f22a06918ef3de6b95daf2 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 3 Mar 2026 11:54:34 +0000 Subject: [PATCH 05/12] iwp Signed-off-by: Joe Isaacs --- vortex-array/benches/varbinview_zip.rs | 35 ++++--------------- .../src/arrays/chunked/compute/zip.rs | 8 ----- vortex-array/src/builtins.rs | 7 +++- vortex-array/src/scalar_fn/fns/zip/mod.rs | 32 ++++++++--------- 4 files changed, 26 insertions(+), 56 deletions(-) diff --git a/vortex-array/benches/varbinview_zip.rs b/vortex-array/benches/varbinview_zip.rs index b3ce220f558..fa70974f122 100644 --- a/vortex-array/benches/varbinview_zip.rs +++ b/vortex-array/benches/varbinview_zip.rs @@ -5,9 +5,6 @@ use divan::Bencher; use vortex_array::IntoArray; -use vortex_array::LEGACY_SESSION; -use vortex_array::RecursiveCanonical; -use vortex_array::VortexSessionExecute; use vortex_array::arrays::VarBinViewArray; use vortex_array::builtins::ArrayBuiltins; use vortex_array::dtype::DType; @@ -27,19 +24,9 @@ fn varbinview_zip_fragmented_mask(bencher: Bencher) { let mask = alternating_mask(len); bencher - .with_inputs(|| { - ( - if_true.clone(), - if_false.clone(), - mask.clone().into_array(), - LEGACY_SESSION.create_execution_ctx(), - ) - }) - .bench_refs(|(t, f, m, ctx)| { - m.zip(t.clone(), f.clone()) - .unwrap() - .execute::(ctx) - .unwrap(); + .with_inputs(|| (&if_true, &if_false, &mask)) + .bench_refs(|(t, f, m)| { + m.clone().into_array().zip(t.clone(), f.clone()).unwrap(); }); } @@ -52,19 +39,9 @@ fn varbinview_zip_block_mask(bencher: Bencher) { let mask = block_mask(len, 128); bencher - .with_inputs(|| { - ( - if_true.clone(), - if_false.clone(), - mask.clone().into_array(), - LEGACY_SESSION.create_execution_ctx(), - ) - }) - .bench_refs(|(t, f, m, ctx)| { - m.zip(t.clone(), f.clone()) - .unwrap() - .execute::(ctx) - .unwrap(); + .with_inputs(|| (&if_true, &if_false, &mask)) + .bench_refs(|(t, f, m)| { + m.clone().into_array().zip(t.clone(), f.clone()).unwrap(); }); } diff --git a/vortex-array/src/arrays/chunked/compute/zip.rs b/vortex-array/src/arrays/chunked/compute/zip.rs index b76970852b7..bbeaf32abb2 100644 --- a/vortex-array/src/arrays/chunked/compute/zip.rs +++ b/vortex-array/src/arrays/chunked/compute/zip.rs @@ -74,11 +74,8 @@ mod tests { use vortex_buffer::buffer; use vortex_mask::Mask; - use crate::ArrayRef; use crate::IntoArray; - use crate::LEGACY_SESSION; use crate::ToCanonical; - use crate::VortexSessionExecute; use crate::arrays::ChunkedArray; use crate::arrays::ChunkedVTable; use crate::builtins::ArrayBuiltins; @@ -114,11 +111,6 @@ mod tests { .into_array() .zip(if_true.to_array(), if_false.to_array()) .unwrap(); - // One step of execution will push down the zip. - let zipped = zipped - .clone() - .execute::(&mut LEGACY_SESSION.create_execution_ctx()) - .unwrap(); let zipped = zipped .as_opt::() .expect("zip should keep chunked encoding"); diff --git a/vortex-array/src/builtins.rs b/vortex-array/src/builtins.rs index 5a00dfb784c..830a966c083 100644 --- a/vortex-array/src/builtins.rs +++ b/vortex-array/src/builtins.rs @@ -10,8 +10,10 @@ //! the equivalent Arrow compute function. use vortex_error::VortexResult; +use vortex_session::VortexSession; use crate::ArrayRef; +use crate::ExecutionCtx; use crate::IntoArray; use crate::arrays::ConstantArray; use crate::arrays::ScalarFnArrayExt; @@ -193,7 +195,10 @@ impl ArrayBuiltins for ArrayRef { } fn zip(&self, if_true: ArrayRef, if_false: ArrayRef) -> VortexResult { - Zip.try_new_array(self.len(), EmptyOptions, [if_true, if_false, self.clone()]) + let scalar_fn = + Zip.try_new_array(if_true.len(), EmptyOptions, [if_true, if_false, self.clone()])?; + let mut ctx = ExecutionCtx::new(VortexSession::empty()); + scalar_fn.execute::(&mut ctx) } fn list_contains(&self, value: ArrayRef) -> VortexResult { diff --git a/vortex-array/src/scalar_fn/fns/zip/mod.rs b/vortex-array/src/scalar_fn/fns/zip/mod.rs index 7a11dbd04dc..9832c5ce3f6 100644 --- a/vortex-array/src/scalar_fn/fns/zip/mod.rs +++ b/vortex-array/src/scalar_fn/fns/zip/mod.rs @@ -227,15 +227,11 @@ mod tests { use vortex_error::VortexResult; use vortex_mask::Mask; - use crate::ArrayRef; use crate::IntoArray; - use crate::LEGACY_SESSION; - use crate::VortexSessionExecute; use crate::arrays::ConstantArray; use crate::arrays::PrimitiveArray; use crate::arrays::StructArray; - use crate::arrays::StructVTable; - use crate::arrays::VarBinViewArray; + use crate::arrays::VarBinViewVTable; use crate::arrow::IntoArrowArray; use crate::assert_arrays_eq; use crate::builders::ArrayBuilder; @@ -326,10 +322,7 @@ mod tests { let mask = Mask::from_indices(len, indices); let mask_array = mask.into_array(); - let result = mask_array - .clone() - .zip(const1.clone(), const2.clone())? - .execute::(&mut LEGACY_SESSION.create_execution_ctx())?; + let result = mask_array.clone().zip(const1.clone(), const2.clone()).unwrap(); insta::assert_snapshot!(result.display_tree(), @r" root: vortex.varbinview(utf8?, len=100) nbytes=1.66 kB (100.00%) [all_valid] @@ -343,10 +336,16 @@ mod tests { let wrapped1 = StructArray::try_from_iter([("nested", const1)])?.into_array(); let wrapped2 = StructArray::try_from_iter([("nested", const2)])?.into_array(); - let wrapped_result = mask_array - .zip(wrapped1, wrapped2)? - .execute::(&mut LEGACY_SESSION.create_execution_ctx())?; - assert!(wrapped_result.is::()); + let wrapped_result = mask_array.zip(wrapped1, wrapped2).unwrap(); + insta::assert_snapshot!(wrapped_result.display_tree(), @r" + root: vortex.struct({nested=utf8?}, len=100) nbytes=1.66 kB (100.00%) + metadata: EmptyMetadata + nested: vortex.varbinview(utf8?, len=100) nbytes=1.66 kB (100.00%) [all_valid] + metadata: EmptyMetadata + buffer: buffer_0 host 29 B (align=1) (1.75%) + buffer: buffer_1 host 28 B (align=1) (1.69%) + buffer: views host 1.60 kB (align=16) (96.56%) + "); Ok(()) } @@ -387,11 +386,8 @@ mod tests { let mask = Mask::from_indices(200, (0..100).filter(|i| i % 3 != 0).collect()); let mask_array = mask.clone().into_array(); - let zipped = mask_array - .zip(if_true.clone(), if_false.clone()) - .unwrap() - .execute::(&mut LEGACY_SESSION.create_execution_ctx()) - .unwrap(); + let zipped = mask_array.zip(if_true.clone(), if_false.clone()).unwrap(); + let zipped = zipped.as_opt::().unwrap(); assert_eq!(zipped.nbuffers(), 2); // assert the result is the same as arrow From 00daab643e26b5592fbe834952611c8f79439f4d Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 3 Mar 2026 14:43:26 +0000 Subject: [PATCH 06/12] fix Signed-off-by: Joe Isaacs --- vortex-array/src/builtins.rs | 7 +------ vortex-array/src/scalar_fn/fns/zip/mod.rs | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/vortex-array/src/builtins.rs b/vortex-array/src/builtins.rs index 830a966c083..5a00dfb784c 100644 --- a/vortex-array/src/builtins.rs +++ b/vortex-array/src/builtins.rs @@ -10,10 +10,8 @@ //! the equivalent Arrow compute function. use vortex_error::VortexResult; -use vortex_session::VortexSession; use crate::ArrayRef; -use crate::ExecutionCtx; use crate::IntoArray; use crate::arrays::ConstantArray; use crate::arrays::ScalarFnArrayExt; @@ -195,10 +193,7 @@ impl ArrayBuiltins for ArrayRef { } fn zip(&self, if_true: ArrayRef, if_false: ArrayRef) -> VortexResult { - let scalar_fn = - Zip.try_new_array(if_true.len(), EmptyOptions, [if_true, if_false, self.clone()])?; - let mut ctx = ExecutionCtx::new(VortexSession::empty()); - scalar_fn.execute::(&mut ctx) + Zip.try_new_array(self.len(), EmptyOptions, [if_true, if_false, self.clone()]) } fn list_contains(&self, value: ArrayRef) -> VortexResult { diff --git a/vortex-array/src/scalar_fn/fns/zip/mod.rs b/vortex-array/src/scalar_fn/fns/zip/mod.rs index 9832c5ce3f6..7f1156c9f0b 100644 --- a/vortex-array/src/scalar_fn/fns/zip/mod.rs +++ b/vortex-array/src/scalar_fn/fns/zip/mod.rs @@ -228,9 +228,12 @@ mod tests { use vortex_mask::Mask; use crate::IntoArray; + use crate::LEGACY_SESSION; + use crate::VortexSessionExecute; use crate::arrays::ConstantArray; use crate::arrays::PrimitiveArray; use crate::arrays::StructArray; + use crate::arrays::VarBinViewArray; use crate::arrays::VarBinViewVTable; use crate::arrow::IntoArrowArray; use crate::assert_arrays_eq; @@ -322,7 +325,10 @@ mod tests { let mask = Mask::from_indices(len, indices); let mask_array = mask.into_array(); - let result = mask_array.clone().zip(const1.clone(), const2.clone()).unwrap(); + let result = mask_array + .clone() + .zip(const1.clone(), const2.clone())? + .execute::(&mut LEGACY_SESSION.create_execution_ctx())?; insta::assert_snapshot!(result.display_tree(), @r" root: vortex.varbinview(utf8?, len=100) nbytes=1.66 kB (100.00%) [all_valid] @@ -336,7 +342,7 @@ mod tests { let wrapped1 = StructArray::try_from_iter([("nested", const1)])?.into_array(); let wrapped2 = StructArray::try_from_iter([("nested", const2)])?.into_array(); - let wrapped_result = mask_array.zip(wrapped1, wrapped2).unwrap(); + let wrapped_result = mask_array.zip(wrapped1, wrapped2)?; insta::assert_snapshot!(wrapped_result.display_tree(), @r" root: vortex.struct({nested=utf8?}, len=100) nbytes=1.66 kB (100.00%) metadata: EmptyMetadata @@ -386,8 +392,11 @@ mod tests { let mask = Mask::from_indices(200, (0..100).filter(|i| i % 3 != 0).collect()); let mask_array = mask.clone().into_array(); - let zipped = mask_array.zip(if_true.clone(), if_false.clone()).unwrap(); - let zipped = zipped.as_opt::().unwrap(); + let zipped = mask_array + .zip(if_true.clone(), if_false.clone()) + .unwrap() + .execute::(&mut LEGACY_SESSION.create_execution_ctx()) + .unwrap(); assert_eq!(zipped.nbuffers(), 2); // assert the result is the same as arrow From 6ef7d87d524fc386bd96eecf0b8c018e1e3cd05b Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 3 Mar 2026 14:53:10 +0000 Subject: [PATCH 07/12] fix Signed-off-by: Joe Isaacs --- vortex-array/src/arrays/chunked/compute/zip.rs | 8 ++++++++ vortex-array/src/scalar_fn/fns/zip/mod.rs | 1 - 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/vortex-array/src/arrays/chunked/compute/zip.rs b/vortex-array/src/arrays/chunked/compute/zip.rs index bbeaf32abb2..b76970852b7 100644 --- a/vortex-array/src/arrays/chunked/compute/zip.rs +++ b/vortex-array/src/arrays/chunked/compute/zip.rs @@ -74,8 +74,11 @@ mod tests { use vortex_buffer::buffer; use vortex_mask::Mask; + use crate::ArrayRef; use crate::IntoArray; + use crate::LEGACY_SESSION; use crate::ToCanonical; + use crate::VortexSessionExecute; use crate::arrays::ChunkedArray; use crate::arrays::ChunkedVTable; use crate::builtins::ArrayBuiltins; @@ -111,6 +114,11 @@ mod tests { .into_array() .zip(if_true.to_array(), if_false.to_array()) .unwrap(); + // One step of execution will push down the zip. + let zipped = zipped + .clone() + .execute::(&mut LEGACY_SESSION.create_execution_ctx()) + .unwrap(); let zipped = zipped .as_opt::() .expect("zip should keep chunked encoding"); diff --git a/vortex-array/src/scalar_fn/fns/zip/mod.rs b/vortex-array/src/scalar_fn/fns/zip/mod.rs index 7f1156c9f0b..9c8d9261be9 100644 --- a/vortex-array/src/scalar_fn/fns/zip/mod.rs +++ b/vortex-array/src/scalar_fn/fns/zip/mod.rs @@ -234,7 +234,6 @@ mod tests { use crate::arrays::PrimitiveArray; use crate::arrays::StructArray; use crate::arrays::VarBinViewArray; - use crate::arrays::VarBinViewVTable; use crate::arrow::IntoArrowArray; use crate::assert_arrays_eq; use crate::builders::ArrayBuilder; From 7dbd77ff0fa8e135481fbe07fc135b7c4d598689 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 3 Mar 2026 15:54:09 +0000 Subject: [PATCH 08/12] fix Signed-off-by: Joe Isaacs --- vortex-array/src/scalar_fn/fns/zip/mod.rs | 81 ++++++++++++++++++----- 1 file changed, 66 insertions(+), 15 deletions(-) diff --git a/vortex-array/src/scalar_fn/fns/zip/mod.rs b/vortex-array/src/scalar_fn/fns/zip/mod.rs index 9c8d9261be9..4b13bf53c89 100644 --- a/vortex-array/src/scalar_fn/fns/zip/mod.rs +++ b/vortex-array/src/scalar_fn/fns/zip/mod.rs @@ -115,7 +115,9 @@ impl ScalarFnVTable for Zip { let if_false = args.get(1)?; let mask_array = args.get(2)?; - let mask = mask_array.execute::(ctx)?.to_mask(); + let mask = mask_array + .execute::(ctx)? + .to_mask_fill_null_false(); let return_dtype = if_true .dtype() @@ -227,19 +229,24 @@ mod tests { use vortex_error::VortexResult; use vortex_mask::Mask; + use crate::ArrayRef; + use crate::DynArray; use crate::IntoArray; use crate::LEGACY_SESSION; use crate::VortexSessionExecute; + use crate::arrays::BoolArray; use crate::arrays::ConstantArray; use crate::arrays::PrimitiveArray; use crate::arrays::StructArray; - use crate::arrays::VarBinViewArray; + use crate::arrays::StructVTable; + use crate::arrays::VarBinViewVTable; use crate::arrow::IntoArrowArray; use crate::assert_arrays_eq; use crate::builders::ArrayBuilder; use crate::builders::BufferGrowthStrategy; use crate::builders::VarBinViewBuilder; use crate::builtins::ArrayBuiltins; + use crate::columnar::Columnar; use crate::dtype::DType; use crate::dtype::Nullability; use crate::dtype::PType; @@ -324,10 +331,12 @@ mod tests { let mask = Mask::from_indices(len, indices); let mask_array = mask.into_array(); + let mut ctx = LEGACY_SESSION.create_execution_ctx(); let result = mask_array .clone() .zip(const1.clone(), const2.clone())? - .execute::(&mut LEGACY_SESSION.create_execution_ctx())?; + .execute::(&mut ctx)? + .into_array(); insta::assert_snapshot!(result.display_tree(), @r" root: vortex.varbinview(utf8?, len=100) nbytes=1.66 kB (100.00%) [all_valid] @@ -341,16 +350,10 @@ mod tests { let wrapped1 = StructArray::try_from_iter([("nested", const1)])?.into_array(); let wrapped2 = StructArray::try_from_iter([("nested", const2)])?.into_array(); - let wrapped_result = mask_array.zip(wrapped1, wrapped2)?; - insta::assert_snapshot!(wrapped_result.display_tree(), @r" - root: vortex.struct({nested=utf8?}, len=100) nbytes=1.66 kB (100.00%) - metadata: EmptyMetadata - nested: vortex.varbinview(utf8?, len=100) nbytes=1.66 kB (100.00%) [all_valid] - metadata: EmptyMetadata - buffer: buffer_0 host 29 B (align=1) (1.75%) - buffer: buffer_1 host 28 B (align=1) (1.69%) - buffer: views host 1.60 kB (align=16) (96.56%) - "); + let wrapped_result = mask_array + .zip(wrapped1, wrapped2)? + .execute::(&mut ctx)?; + assert!(wrapped_result.is::()); Ok(()) } @@ -391,11 +394,13 @@ mod tests { let mask = Mask::from_indices(200, (0..100).filter(|i| i % 3 != 0).collect()); let mask_array = mask.clone().into_array(); + let mut ctx = LEGACY_SESSION.create_execution_ctx(); let zipped = mask_array .zip(if_true.clone(), if_false.clone()) .unwrap() - .execute::(&mut LEGACY_SESSION.create_execution_ctx()) + .execute::(&mut ctx) .unwrap(); + let zipped = zipped.as_opt::().unwrap(); assert_eq!(zipped.nbuffers(), 2); // assert the result is the same as arrow @@ -409,7 +414,53 @@ mod tests { ) .unwrap(); - let actual = zipped.into_array().into_arrow_preferred().unwrap(); + let actual = zipped.clone().into_array().into_arrow_preferred().unwrap(); assert_eq!(actual.as_ref(), expected.as_ref()); } + + #[test] + fn test_zip_nullable_mask() { + // Null mask values are treated as false (selecting if_false). + let mask = BoolArray::from_iter([Some(true), None, Some(false), None, Some(true)]); + let if_true = buffer![10, 20, 30, 40, 50].into_array(); + let if_false = buffer![1, 2, 3, 4, 5].into_array(); + + let result = mask.into_array().zip(if_true, if_false).unwrap(); + let expected = buffer![10, 2, 3, 4, 50].into_array(); + + assert_arrays_eq!(result, expected); + } + + #[test] + fn test_zip_nullable_mask_all_null() { + // All-null mask should select entirely from if_false. + let mask = BoolArray::from_iter([None, None, None]); + let if_true = buffer![10, 20, 30].into_array(); + let if_false = buffer![1, 2, 3].into_array(); + + let result = mask.into_array().zip(if_true, if_false).unwrap(); + let expected = buffer![1, 2, 3].into_array(); + + assert_arrays_eq!(result, expected); + } + + #[test] + fn test_zip_nullable_mask_with_nullable_values() { + // Nullable mask combined with nullable if_true and if_false. + let mask = BoolArray::from_iter([Some(true), None, Some(false), Some(true)]); + let if_true = + PrimitiveArray::from_option_iter([Some(10), Some(20), Some(30), None]).into_array(); + let if_false = + PrimitiveArray::from_option_iter([Some(1), None, Some(3), Some(4)]).into_array(); + + let result = mask.into_array().zip(if_true, if_false).unwrap(); + // mask[0]=true → if_true[0]=10 + // mask[1]=null → if_false[1]=null + // mask[2]=false → if_false[2]=3 + // mask[3]=true → if_true[3]=null + let expected = + PrimitiveArray::from_option_iter([Some(10), None, Some(3), None]).into_array(); + + assert_arrays_eq!(result, expected); + } } From 601fa37267f9fc66aad767bc1452d8650ab8dde1 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 3 Mar 2026 16:03:23 +0000 Subject: [PATCH 09/12] fix Signed-off-by: Joe Isaacs --- vortex-array/benches/varbinview_zip.rs | 35 +++++++++++++++++++++----- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/vortex-array/benches/varbinview_zip.rs b/vortex-array/benches/varbinview_zip.rs index fa70974f122..57326674d0b 100644 --- a/vortex-array/benches/varbinview_zip.rs +++ b/vortex-array/benches/varbinview_zip.rs @@ -5,6 +5,9 @@ use divan::Bencher; use vortex_array::IntoArray; +use vortex_array::LEGACY_SESSION; +use vortex_array::RecursiveCanonical; +use vortex_array::VortexSessionExecute; use vortex_array::arrays::VarBinViewArray; use vortex_array::builtins::ArrayBuiltins; use vortex_array::dtype::DType; @@ -24,9 +27,19 @@ fn varbinview_zip_fragmented_mask(bencher: Bencher) { let mask = alternating_mask(len); bencher - .with_inputs(|| (&if_true, &if_false, &mask)) - .bench_refs(|(t, f, m)| { - m.clone().into_array().zip(t.clone(), f.clone()).unwrap(); + .with_inputs(|| { + ( + &if_true, + &if_false, + mask.clone().into_array(), + LEGACY_SESSION.create_execution_ctx(), + ) + }) + .bench_refs(|(t, f, m, ctx)| { + t.zip(f.clone(), m.clone()) + .unwrap() + .execute::(ctx) + .unwrap(); }); } @@ -39,9 +52,19 @@ fn varbinview_zip_block_mask(bencher: Bencher) { let mask = block_mask(len, 128); bencher - .with_inputs(|| (&if_true, &if_false, &mask)) - .bench_refs(|(t, f, m)| { - m.clone().into_array().zip(t.clone(), f.clone()).unwrap(); + .with_inputs(|| { + ( + &if_true, + &if_false, + &mask, + LEGACY_SESSION.create_execution_ctx(), + ) + }) + .bench_refs(|(t, f, m, ctx)| { + t.zip(f.clone(), m.clone().into_array()) + .unwrap() + .execute::(ctx) + .unwrap(); }); } From 1d65baeb653b194a2cb6b84b3381f3877110cac3 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 3 Mar 2026 16:15:50 +0000 Subject: [PATCH 10/12] fix Signed-off-by: Joe Isaacs --- vortex-array/benches/varbinview_zip.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/vortex-array/benches/varbinview_zip.rs b/vortex-array/benches/varbinview_zip.rs index 57326674d0b..b3ce220f558 100644 --- a/vortex-array/benches/varbinview_zip.rs +++ b/vortex-array/benches/varbinview_zip.rs @@ -29,14 +29,14 @@ fn varbinview_zip_fragmented_mask(bencher: Bencher) { bencher .with_inputs(|| { ( - &if_true, - &if_false, + if_true.clone(), + if_false.clone(), mask.clone().into_array(), LEGACY_SESSION.create_execution_ctx(), ) }) .bench_refs(|(t, f, m, ctx)| { - t.zip(f.clone(), m.clone()) + m.zip(t.clone(), f.clone()) .unwrap() .execute::(ctx) .unwrap(); @@ -54,14 +54,14 @@ fn varbinview_zip_block_mask(bencher: Bencher) { bencher .with_inputs(|| { ( - &if_true, - &if_false, - &mask, + if_true.clone(), + if_false.clone(), + mask.clone().into_array(), LEGACY_SESSION.create_execution_ctx(), ) }) .bench_refs(|(t, f, m, ctx)| { - t.zip(f.clone(), m.clone().into_array()) + m.zip(t.clone(), f.clone()) .unwrap() .execute::(ctx) .unwrap(); From 9f6bbb728bddb87e435340073c99fd53623a310a Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 4 Mar 2026 14:53:51 +0000 Subject: [PATCH 11/12] fix Signed-off-by: Joe Isaacs --- .../src/arrays/chunked/compute/zip.rs | 37 +-- vortex-array/src/arrays/chunked/mod.rs | 1 + .../src/arrays/chunked/paired_chunks.rs | 258 ++++++++++++++++++ 3 files changed, 263 insertions(+), 33 deletions(-) create mode 100644 vortex-array/src/arrays/chunked/paired_chunks.rs diff --git a/vortex-array/src/arrays/chunked/compute/zip.rs b/vortex-array/src/arrays/chunked/compute/zip.rs index b76970852b7..f7c969fdf6e 100644 --- a/vortex-array/src/arrays/chunked/compute/zip.rs +++ b/vortex-array/src/arrays/chunked/compute/zip.rs @@ -28,39 +28,10 @@ impl ZipKernel for ChunkedVTable { .union_nullability(if_false.dtype().nullability()); let mut out_chunks = Vec::with_capacity(if_true.nchunks() + if_false.nchunks()); - let mut lhs_idx = 0; - let mut rhs_idx = 0; - let mut lhs_offset = 0; - let mut rhs_offset = 0; - let mut pos = 0; - let total_len = if_true.len(); - - while pos < total_len { - let lhs_chunk = if_true.chunk(lhs_idx); - let rhs_chunk = if_false.chunk(rhs_idx); - - let lhs_rem = lhs_chunk.len() - lhs_offset; - let rhs_rem = rhs_chunk.len() - rhs_offset; - let take_until = lhs_rem.min(rhs_rem); - - let mask_slice = mask.slice(pos..pos + take_until)?; - let lhs_slice = lhs_chunk.slice(lhs_offset..lhs_offset + take_until)?; - let rhs_slice = rhs_chunk.slice(rhs_offset..rhs_offset + take_until)?; - - out_chunks.push(mask_slice.zip(lhs_slice, rhs_slice)?); - - pos += take_until; - lhs_offset += take_until; - rhs_offset += take_until; - - if lhs_offset == lhs_chunk.len() { - lhs_idx += 1; - lhs_offset = 0; - } - if rhs_offset == rhs_chunk.len() { - rhs_idx += 1; - rhs_offset = 0; - } + for pair in if_true.paired_chunks(if_false) { + let pair = pair?; + let mask_slice = mask.slice(pair.pos)?; + out_chunks.push(mask_slice.zip(pair.left, pair.right)?); } // SAFETY: chunks originate from zipping slices of inputs that share dtype/nullability. diff --git a/vortex-array/src/arrays/chunked/mod.rs b/vortex-array/src/arrays/chunked/mod.rs index 4ef5ed10cf2..07d0bbcc346 100644 --- a/vortex-array/src/arrays/chunked/mod.rs +++ b/vortex-array/src/arrays/chunked/mod.rs @@ -5,6 +5,7 @@ mod array; pub use array::ChunkedArray; mod compute; +mod paired_chunks; mod vtable; pub use vtable::ChunkedVTable; diff --git a/vortex-array/src/arrays/chunked/paired_chunks.rs b/vortex-array/src/arrays/chunked/paired_chunks.rs new file mode 100644 index 00000000000..bc91c53b1e3 --- /dev/null +++ b/vortex-array/src/arrays/chunked/paired_chunks.rs @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::ops::Range; + +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::arrays::ChunkedArray; + +/// A pre-sliced, aligned pair of array chunks from two `ChunkedArray`s. +pub(crate) struct AlignedPair { + pub left: ArrayRef, + pub right: ArrayRef, + pub pos: Range, +} + +/// An iterator that walks two equally-sized `ChunkedArray`s in lockstep, +/// yielding aligned `(left, right)` slices at every chunk boundary of either +/// input. Empty chunks are skipped automatically. +pub(crate) struct PairedChunks<'a> { + left: &'a ChunkedArray, + right: &'a ChunkedArray, + lhs_idx: usize, + rhs_idx: usize, + lhs_offset: usize, + rhs_offset: usize, + pos: usize, + total_len: usize, +} + +impl ChunkedArray { + /// Returns an iterator that walks `self` and `other` in lockstep, yielding + /// [`AlignedPair`]s sliced at every chunk boundary of either input. + /// + /// # Panics + /// + /// Panics if `self.len() != other.len()`. + pub(crate) fn paired_chunks<'a>(&'a self, other: &'a ChunkedArray) -> PairedChunks<'a> { + assert_eq!( + self.len(), + other.len(), + "paired_chunks requires arrays of equal length" + ); + PairedChunks { + left: self, + right: other, + lhs_idx: 0, + rhs_idx: 0, + lhs_offset: 0, + rhs_offset: 0, + pos: 0, + total_len: self.len(), + } + } +} + +impl Iterator for PairedChunks<'_> { + type Item = VortexResult; + + fn next(&mut self) -> Option { + // Skip empty chunks on either side. + while self.lhs_idx < self.left.nchunks() && self.left.chunk(self.lhs_idx).is_empty() { + self.lhs_idx += 1; + } + while self.rhs_idx < self.right.nchunks() && self.right.chunk(self.rhs_idx).is_empty() { + self.rhs_idx += 1; + } + + if self.pos >= self.total_len { + return None; + } + + let lhs_chunk = self.left.chunk(self.lhs_idx); + let rhs_chunk = self.right.chunk(self.rhs_idx); + + let lhs_rem = lhs_chunk.len() - self.lhs_offset; + let rhs_rem = rhs_chunk.len() - self.rhs_offset; + let take = lhs_rem.min(rhs_rem); + + let lhs_slice = match lhs_chunk.slice(self.lhs_offset..self.lhs_offset + take) { + Ok(s) => s, + Err(e) => return Some(Err(e)), + }; + let rhs_slice = match rhs_chunk.slice(self.rhs_offset..self.rhs_offset + take) { + Ok(s) => s, + Err(e) => return Some(Err(e)), + }; + + let start = self.pos; + self.pos += take; + self.lhs_offset += take; + self.rhs_offset += take; + + if self.lhs_offset == lhs_chunk.len() { + self.lhs_idx += 1; + self.lhs_offset = 0; + } + if self.rhs_offset == rhs_chunk.len() { + self.rhs_idx += 1; + self.rhs_offset = 0; + } + + Some(Ok(AlignedPair { + left: lhs_slice, + right: rhs_slice, + pos: start..self.pos, + })) + } +} + +#[cfg(test)] +mod tests { + use vortex_buffer::buffer; + use vortex_error::VortexResult; + + use crate::IntoArray; + use crate::arrays::ChunkedArray; + use crate::dtype::DType; + use crate::dtype::Nullability; + use crate::dtype::PType; + + fn i32_dtype() -> DType { + DType::Primitive(PType::I32, Nullability::NonNullable) + } + + #[allow(clippy::type_complexity)] + fn collect_pairs( + left: &ChunkedArray, + right: &ChunkedArray, + ) -> VortexResult, Vec, std::ops::Range)>> { + use crate::ToCanonical; + let mut result = Vec::new(); + for pair in left.paired_chunks(right) { + let pair = pair?; + let l: Vec = pair.left.to_primitive().as_slice::().to_vec(); + let r: Vec = pair.right.to_primitive().as_slice::().to_vec(); + result.push((l, r, pair.pos)); + } + Ok(result) + } + + #[test] + fn test_aligned_chunks() -> VortexResult<()> { + let left = ChunkedArray::try_new( + vec![buffer![1i32, 2].into_array(), buffer![3i32, 4].into_array()], + i32_dtype(), + )?; + let right = ChunkedArray::try_new( + vec![ + buffer![10i32, 20].into_array(), + buffer![30i32, 40].into_array(), + ], + i32_dtype(), + )?; + + let pairs = collect_pairs(&left, &right)?; + assert_eq!(pairs.len(), 2); + assert_eq!(pairs[0], (vec![1, 2], vec![10, 20], 0..2)); + assert_eq!(pairs[1], (vec![3, 4], vec![30, 40], 2..4)); + Ok(()) + } + + #[test] + fn test_misaligned_chunks() -> VortexResult<()> { + let left = ChunkedArray::try_new( + vec![ + buffer![1i32, 2].into_array(), + buffer![3i32].into_array(), + buffer![4i32, 5].into_array(), + ], + i32_dtype(), + )?; + let right = ChunkedArray::try_new( + vec![ + buffer![10i32].into_array(), + buffer![20i32, 30].into_array(), + buffer![40i32, 50].into_array(), + ], + i32_dtype(), + )?; + + let pairs = collect_pairs(&left, &right)?; + // Left: [1,2] [3] [4,5] → boundaries at 0,2,3,5 + // Right: [10] [20,30] [40,50] → boundaries at 0,1,3,5 + // Aligned at: 0,1,2,3,5 + assert_eq!(pairs.len(), 4); + assert_eq!(pairs[0], (vec![1], vec![10], 0..1)); + assert_eq!(pairs[1], (vec![2], vec![20], 1..2)); + assert_eq!(pairs[2], (vec![3], vec![30], 2..3)); + assert_eq!(pairs[3], (vec![4, 5], vec![40, 50], 3..5)); + Ok(()) + } + + #[test] + fn test_empty_chunks() -> VortexResult<()> { + let left = ChunkedArray::try_new( + vec![ + buffer![0i32; 0].into_array(), + buffer![1i32, 2, 3].into_array(), + ], + i32_dtype(), + )?; + let right = ChunkedArray::try_new( + vec![ + buffer![10i32, 20, 30].into_array(), + buffer![0i32; 0].into_array(), + ], + i32_dtype(), + )?; + + let pairs = collect_pairs(&left, &right)?; + assert_eq!(pairs.len(), 1); + assert_eq!(pairs[0], (vec![1, 2, 3], vec![10, 20, 30], 0..3)); + Ok(()) + } + + #[test] + fn test_single_element_chunks() -> VortexResult<()> { + let left = ChunkedArray::try_new( + vec![ + buffer![1i32].into_array(), + buffer![2i32].into_array(), + buffer![3i32].into_array(), + ], + i32_dtype(), + )?; + let right = ChunkedArray::try_new(vec![buffer![10i32, 20, 30].into_array()], i32_dtype())?; + + let pairs = collect_pairs(&left, &right)?; + assert_eq!(pairs.len(), 3); + assert_eq!(pairs[0], (vec![1], vec![10], 0..1)); + assert_eq!(pairs[1], (vec![2], vec![20], 1..2)); + assert_eq!(pairs[2], (vec![3], vec![30], 2..3)); + Ok(()) + } + + #[test] + fn test_both_empty() -> VortexResult<()> { + let left = ChunkedArray::try_new(vec![], i32_dtype())?; + let right = ChunkedArray::try_new(vec![], i32_dtype())?; + + let pairs = collect_pairs(&left, &right)?; + assert!(pairs.is_empty()); + Ok(()) + } + + #[test] + #[should_panic(expected = "paired_chunks requires arrays of equal length")] + fn test_length_mismatch_panics() { + let left = ChunkedArray::try_new(vec![buffer![1i32, 2].into_array()], i32_dtype()).unwrap(); + let right = + ChunkedArray::try_new(vec![buffer![10i32, 20, 30].into_array()], i32_dtype()).unwrap(); + + // Should panic. + drop(left.paired_chunks(&right).collect::>()); + } +} From d8f8fdc725e31be5a567c88f12de74800c36b9d5 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 4 Mar 2026 15:25:53 +0000 Subject: [PATCH 12/12] fix Signed-off-by: Joe Isaacs --- vortex-cuda/gpu-scan-cli/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-cuda/gpu-scan-cli/Cargo.toml b/vortex-cuda/gpu-scan-cli/Cargo.toml index 9bdf7fc3478..2ec84c64004 100644 --- a/vortex-cuda/gpu-scan-cli/Cargo.toml +++ b/vortex-cuda/gpu-scan-cli/Cargo.toml @@ -21,6 +21,6 @@ tokio = { workspace = true, features = ["macros", "full"] } tracing = { workspace = true, features = ["std", "attributes"] } tracing-perfetto = { workspace = true } tracing-subscriber = { workspace = true, features = ["env-filter", "json"] } -vortex = { workspace = true } +vortex = { workspace = true, features = ["tokio"] } vortex-cuda = { workspace = true, features = ["_test-harness"] } vortex-cuda-macros = { workspace = true }