From 757d355954baeebcbadbc475cd595b246df86398 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Tue, 19 May 2026 10:43:38 +0000 Subject: [PATCH] fix: coerce rhs Arrow type to match lhs when comparing nested arrays When arrow_compare_arrays falls back to make_comparator for nested types (struct, list, etc.), both sides were independently converted to Arrow with target=None, letting each array choose its own nearest physical type. A struct child backed by VarBinArray would produce Binary while a VarBinViewArray child would produce BinaryView. Both represent the same logical DType::Binary, but the assert_eq! on the data types panicked. Fix: convert lhs first (target=None) then use its Arrow DataType as the target Field when converting rhs. This mirrors the non-nested fast path which uses Datum::try_new_with_target_datatype for the same reason, and removes the now-redundant assert. Add a regression test that directly reproduces the fuzz crash pattern: a struct with a VarBinArray child on the left and a VarBinViewArray child on the right. Fixes #7957 Signed-off-by: Claude Co-authored-by: Joe Isaacs --- .../src/scalar_fn/fns/binary/compare.rs | 44 +++++++++++++++---- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/vortex-array/src/scalar_fn/fns/binary/compare.rs b/vortex-array/src/scalar_fn/fns/binary/compare.rs index d01d5000131..99e17cfc9ed 100644 --- a/vortex-array/src/scalar_fn/fns/binary/compare.rs +++ b/vortex-array/src/scalar_fn/fns/binary/compare.rs @@ -7,6 +7,7 @@ use arrow_array::BooleanArray; use arrow_buffer::NullBuffer; use arrow_ord::cmp; use arrow_ord::ord::make_comparator; +use arrow_schema::Field; use arrow_schema::SortOptions; use vortex_error::VortexResult; use vortex_error::vortex_err; @@ -155,15 +156,14 @@ fn arrow_compare_arrays( // For nested types, fall back to `make_comparator` which does element-wise comparison. let arrow_array: BooleanArray = if left.dtype().is_nested() || right.dtype().is_nested() { let session = ctx.session().clone(); - let rhs = session.arrow().execute_arrow(right.clone(), None, ctx)?; + // Convert lhs first to determine the target Arrow type, then convert rhs using that + // same type as the target. This mirrors the non-nested path and ensures both sides + // have identical Arrow types even when the two Vortex arrays use different underlying + // encodings for logically equivalent fields (e.g., VarBinArray → Binary vs + // VarBinViewArray → BinaryView for the same DType::Binary field inside a struct). let lhs = session.arrow().execute_arrow(left.clone(), None, ctx)?; - - assert!( - lhs.data_type().equals_datatype(rhs.data_type()), - "lhs data_type: {}, rhs data_type: {}", - lhs.data_type(), - rhs.data_type() - ); + let target_field = Field::new("", lhs.data_type().clone(), right.dtype().is_nullable()); + let rhs = session.arrow().execute_arrow(right.clone(), Some(&target_field), ctx)?; compare_nested_arrow_arrays(lhs.as_ref(), rhs.as_ref(), operator)? } else { @@ -509,6 +509,34 @@ mod tests { assert_arrays_eq!(result, expected); } + /// Regression test: comparing struct arrays where the same logical field is backed by + /// different Vortex encodings (VarBinArray vs VarBinViewArray) must not panic. + /// Previously `arrow_compare_arrays` would assert that both sides had identical Arrow + /// data types, which failed when independent conversions chose different physical types + /// (Binary vs BinaryView) for the same logical DType::Binary field. + #[test] + fn struct_compare_mixed_binary_encodings() { + // LHS: struct with a VarBinArray (offset-based) binary field + let bin_field1 = + VarBinArray::from(vec!["apple".as_bytes(), "banana".as_bytes(), "cherry".as_bytes()]); + let struct1 = StructArray::from_fields(&[("data", bin_field1.into_array())]).unwrap(); + + // RHS: struct with a VarBinViewArray (view-based) binary field — same logical DType + let bin_field2 = VarBinViewArray::from_iter_bin([ + "apple".as_bytes(), + "banana".as_bytes(), + "durian".as_bytes(), + ]); + let struct2 = StructArray::from_fields(&[("data", bin_field2.into_array())]).unwrap(); + + let result = struct1 + .into_array() + .binary(struct2.into_array(), Operator::Eq) + .unwrap(); + let expected = BoolArray::from_iter([true, true, false]); + assert_arrays_eq!(result, expected); + } + /// Regression test: `scalar_cmp` must error when comparing scalars with incompatible /// extension types (e.g., timestamps with different time units) rather than silently /// returning a wrong result.