diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 9609ce9d79d..99a2c715a58 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -14418,6 +14418,8 @@ pub vortex_array::scalar::DecimalValue::I8(i8) impl vortex_array::scalar::DecimalValue +pub fn vortex_array::scalar::DecimalValue::as_i256(&self) -> vortex_array::dtype::i256 + pub fn vortex_array::scalar::DecimalValue::cast(&self) -> core::option::Option pub fn vortex_array::scalar::DecimalValue::checked_add(&self, &Self) -> core::option::Option diff --git a/vortex-array/src/arrays/decimal/compute/between.rs b/vortex-array/src/arrays/decimal/compute/between.rs index 3594e5a268c..72b66697eb5 100644 --- a/vortex-array/src/arrays/decimal/compute/between.rs +++ b/vortex-array/src/arrays/decimal/compute/between.rs @@ -4,7 +4,6 @@ use vortex_buffer::BitBuffer; use vortex_error::VortexExpect; use vortex_error::VortexResult; -use vortex_error::vortex_bail; use crate::ArrayRef; use crate::ExecutionCtx; @@ -14,6 +13,7 @@ use crate::arrays::BoolArray; use crate::arrays::Decimal; use crate::dtype::NativeDecimalType; use crate::dtype::Nullability; +use crate::dtype::i256; use crate::match_each_decimal_value_type; use crate::scalar::Scalar; use crate::scalar_fn::fns::between::BetweenKernel; @@ -52,25 +52,49 @@ fn between_unpack( nullability: Nullability, options: &BetweenOptions, ) -> VortexResult> { - let Some(lower_value) = lower - .as_decimal() - .decimal_value() - .and_then(|v| v.cast::()) - else { - vortex_bail!( - "invalid lower bound Scalar: {lower}, expected {:?}", - T::DECIMAL_TYPE - ) + let Some(lower_dv) = lower.as_decimal().decimal_value() else { + // Null lower bound — fall back to canonical path. + return Ok(None); }; - let Some(upper_value) = upper - .as_decimal() - .decimal_value() - .and_then(|v| v.cast::()) - else { - vortex_bail!( - "invalid upper bound Scalar: {upper}, expected {:?}", - T::DECIMAL_TYPE - ) + let Some(upper_dv) = upper.as_decimal().decimal_value() else { + // Null upper bound — fall back to canonical path. + return Ok(None); + }; + + // Try to cast the bound scalar to the array's storage type T. + // + // If the cast fails, the bound's value is outside [T::MIN, T::MAX]. For all signed + // NativeDecimalType implementations the minimum is negative and the maximum is positive, so + // we can determine the direction of overflow from the sign of the bound: + // • non-negative and doesn't fit in T ⟹ value > T::MAX + // • negative and doesn't fit in T ⟹ value < T::MIN + // + // From the direction we can answer the comparison immediately: + // lower > T::MAX: no array value (≤ T::MAX) satisfies lower ≤ value → all-false + // lower < T::MIN: every array value (≥ T::MIN) satisfies lower ≤ value → no lower constraint + // upper > T::MAX: every array value (≤ T::MAX) satisfies value ≤ upper → no upper constraint + // upper < T::MIN: no array value (≥ T::MIN) satisfies value ≤ upper → all-false + // + // Both the strict and non-strict forms lead to the same conclusion because the overflow is + // by at least one integer, so no boundary element can make the strict form differ. + let lower_value: Option = match lower_dv.cast::() { + Some(v) => Some(v), + None => { + if lower_dv.as_i256() >= i256::ZERO { + return Ok(Some(all_false(arr, nullability))); + } + None + } + }; + + let upper_value: Option = match upper_dv.cast::() { + Some(v) => Some(v), + None => { + if upper_dv.as_i256() < i256::ZERO { + return Ok(Some(all_false(arr, nullability))); + } + None + } }; let lower_op = match options.lower_strict { @@ -93,10 +117,21 @@ fn between_unpack( ))) } +/// Returns an all-false [`BoolArray`] with the correct length and nullability. +fn all_false(arr: ArrayView<'_, Decimal>, nullability: Nullability) -> ArrayRef { + BoolArray::new( + BitBuffer::new_unset(arr.len()), + arr.validity() + .vortex_expect("validity should be derivable") + .union_nullability(nullability), + ) + .into_array() +} + fn between_impl( arr: ArrayView<'_, Decimal>, - lower: T, - upper: T, + lower: Option, + upper: Option, nullability: Nullability, lower_op: impl Fn(T, T) -> bool, upper_op: impl Fn(T, T) -> bool, @@ -105,7 +140,8 @@ fn between_impl( BoolArray::new( BitBuffer::collect_bool(buffer.len(), |idx| { let value = buffer[idx]; - lower_op(lower, value) & upper_op(value, upper) + lower.map_or(true, |l| lower_op(l, value)) + & upper.map_or(true, |u| upper_op(value, u)) }), arr.validity() .vortex_expect("validity should be derivable") diff --git a/vortex-array/src/scalar/typed_view/decimal/dvalue.rs b/vortex-array/src/scalar/typed_view/decimal/dvalue.rs index 97eb38f07cd..a73cf953acc 100644 --- a/vortex-array/src/scalar/typed_view/decimal/dvalue.rs +++ b/vortex-array/src/scalar/typed_view/decimal/dvalue.rs @@ -89,6 +89,15 @@ impl DecimalValue { } } + /// Convert this `DecimalValue` to an [`i256`]. + /// + /// This conversion always succeeds since [`i256`] can represent every stored variant. + pub fn as_i256(&self) -> i256 { + match_each_decimal_value!(self, |v| { + v.to_i256().vortex_expect("upcast to i256 must always succeed") + }) + } + /// Returns the 0 value given the [`DecimalType`]. pub fn zero(decimal_type: &DecimalDType) -> Self { let smallest_type = DecimalType::smallest_decimal_value_type(decimal_type); diff --git a/vortex-array/src/scalar_fn/fns/between/mod.rs b/vortex-array/src/scalar_fn/fns/between/mod.rs index bb44d71c846..ec7998ce3a3 100644 --- a/vortex-array/src/scalar_fn/fns/between/mod.rs +++ b/vortex-array/src/scalar_fn/fns/between/mod.rs @@ -545,4 +545,58 @@ mod tests { BoolArray::from_iter([true, true, true, false]) ); } + + /// Regression test for a fuzzer crash where a bound scalar used a wider storage type (I32) + /// than the array's storage type (I16), causing the cast in `between_unpack` to fail. + /// + /// The fix casts the bound to the array's storage type and, when the cast fails, uses the + /// overflow direction to determine the result without falling back to Arrow. + #[rstest] + // Upper bound too large (I32 > i16::MAX): upper constraint always satisfied → result from lower only. + #[case(DecimalValue::I16(1), DecimalValue::I32(82246), vec![0, 1, 2, 3])] + // Lower bound too large (I32 > i16::MAX): lower constraint never satisfied → all false. + #[case(DecimalValue::I32(82246), DecimalValue::I16(4), vec![])] + // Upper bound too small (negative I32 < i16::MIN): upper constraint never satisfied → all false. + #[case(DecimalValue::I16(1), DecimalValue::I32(-82246), vec![])] + // Lower bound too small (negative I32 < i16::MIN): lower constraint always satisfied → result from upper only. + #[case(DecimalValue::I32(-82246), DecimalValue::I16(2), vec![0, 1])] + fn test_between_decimal_mismatched_storage_types( + #[case] lower_val: DecimalValue, + #[case] upper_val: DecimalValue, + #[case] expected_indices: Vec, + ) { + // Array uses I16 storage with precision=5 (values fit in i16 even though precision=5 + // nominally maps to I32 as the smallest storage type). + let decimal_type = DecimalDType::new(5, -67); + let array = + DecimalArray::new(buffer![1i16, 2i16, 3i16, 4i16], decimal_type, Validity::NonNullable) + .into_array(); + + let lower = ConstantArray::new( + Scalar::decimal(lower_val, decimal_type, Nullability::NonNullable), + array.len(), + ) + .into_array(); + let upper = ConstantArray::new( + Scalar::decimal(upper_val, decimal_type, Nullability::NonNullable), + array.len(), + ) + .into_array(); + + let result = between_canonical( + &array, + &lower, + &upper, + &BetweenOptions { + lower_strict: StrictComparison::NonStrict, + upper_strict: StrictComparison::NonStrict, + }, + &mut SESSION.create_execution_ctx(), + ) + .unwrap() + .execute::(&mut SESSION.create_execution_ctx()) + .unwrap(); + + assert_eq!(to_int_indices(result).unwrap(), expected_indices); + } }