From a21f5b1ff4ceaf5bddd8cf6336407809ba9aa72b Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Wed, 20 May 2026 17:59:07 +0100 Subject: [PATCH 1/9] Explicit abset Precision variant instead of Option Signed-off-by: Adam Gutglick --- vortex-array/src/aggregate_fn/accumulator.rs | 2 +- .../src/aggregate_fn/fns/is_constant/mod.rs | 12 +- .../src/aggregate_fn/fns/is_sorted/mod.rs | 2 +- .../src/aggregate_fn/fns/min_max/mod.rs | 10 +- .../src/aggregate_fn/fns/nan_count/mod.rs | 2 +- .../src/aggregate_fn/fns/null_count/mod.rs | 4 +- vortex-array/src/aggregate_fn/fns/sum/mod.rs | 4 +- .../fns/uncompressed_size_in_bytes/mod.rs | 6 +- vortex-array/src/array/erased.rs | 3 +- vortex-array/src/arrays/dict/take.rs | 13 +- vortex-array/src/arrays/varbin/builder.rs | 4 +- vortex-array/src/display/extractors/stats.rs | 26 +-- vortex-array/src/expr/stats/bound.rs | 33 ++- vortex-array/src/expr/stats/precision.rs | 146 ++++++++++--- vortex-array/src/expr/stats/provider.rs | 24 +- vortex-array/src/expr/stats/stat_bound.rs | 58 ----- vortex-array/src/scalar_fn/fns/stat.rs | 2 +- vortex-array/src/stats/array.rs | 6 +- vortex-array/src/stats/flatbuffers.rs | 86 ++++---- vortex-array/src/stats/stats_set.rs | 205 +++++++----------- .../flatbuffers/vortex-array/array.fbs | 1 + vortex-flatbuffers/src/generated/array.rs | 10 +- 22 files changed, 326 insertions(+), 333 deletions(-) diff --git a/vortex-array/src/aggregate_fn/accumulator.rs b/vortex-array/src/aggregate_fn/accumulator.rs index ddc3bd9a86c..c25562d21cb 100644 --- a/vortex-array/src/aggregate_fn/accumulator.rs +++ b/vortex-array/src/aggregate_fn/accumulator.rs @@ -122,7 +122,7 @@ impl DynAccumulator for Accumulator { // 0. Legacy stats bridge: if this aggregate is still cached under a legacy Stat slot, // consume that exact stat before kernel dispatch or decode. if let Some(stat) = Stat::from_aggregate_fn(&self.aggregate_fn) - && let Some(Precision::Exact(partial)) = batch.statistics().get(stat) + && let Precision::Exact(partial) = batch.statistics().get(stat) { vortex_ensure!( partial.dtype() == &self.partial_dtype, diff --git a/vortex-array/src/aggregate_fn/fns/is_constant/mod.rs b/vortex-array/src/aggregate_fn/fns/is_constant/mod.rs index 9a455fbe723..b14b2a050f8 100644 --- a/vortex-array/src/aggregate_fn/fns/is_constant/mod.rs +++ b/vortex-array/src/aggregate_fn/fns/is_constant/mod.rs @@ -89,7 +89,7 @@ fn arrays_value_equal(a: &ArrayRef, b: &ArrayRef, ctx: &mut ExecutionCtx) -> Vor /// 5. Is all valid AND has minimum and maximum statistics that are equal. pub fn is_constant(array: &ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult { // Short-circuit using cached array statistics. - if let Some(Precision::Exact(value)) = array.statistics().get_as::(Stat::IsConstant) { + if let Precision::Exact(value) = array.statistics().get_as::(Stat::IsConstant) { return Ok(value); } @@ -133,14 +133,14 @@ pub fn is_constant(array: &ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult(Stat::NaNCount) == Some(Precision::exact(0u64))) + || array.statistics().get_as::(Stat::NaNCount) == Precision::exact(0u64)) { array .statistics() diff --git a/vortex-array/src/aggregate_fn/fns/is_sorted/mod.rs b/vortex-array/src/aggregate_fn/fns/is_sorted/mod.rs index d5726a465f1..a4a94fc5211 100644 --- a/vortex-array/src/aggregate_fn/fns/is_sorted/mod.rs +++ b/vortex-array/src/aggregate_fn/fns/is_sorted/mod.rs @@ -77,7 +77,7 @@ fn is_sorted_impl(array: &ArrayRef, strict: bool, ctx: &mut ExecutionCtx) -> Vor }; // Short-circuit using cached array statistics. - if let Some(Precision::Exact(value)) = array.statistics().get_as::(stat) { + if let Precision::Exact(value) = array.statistics().get_as::(stat) { return Ok(value); } diff --git a/vortex-array/src/aggregate_fn/fns/min_max/mod.rs b/vortex-array/src/aggregate_fn/fns/min_max/mod.rs index 786a70e5ffa..9665d9550b8 100644 --- a/vortex-array/src/aggregate_fn/fns/min_max/mod.rs +++ b/vortex-array/src/aggregate_fn/fns/min_max/mod.rs @@ -46,14 +46,8 @@ static NAMES: LazyLock = LazyLock::new(|| FieldNames::from(["min", " /// This will update the stats set of the array as a side effect. pub fn min_max(array: &ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult> { // Short-circuit using cached array statistics. - let cached_min = array - .statistics() - .get(Stat::Min) - .and_then(Precision::as_exact); - let cached_max = array - .statistics() - .get(Stat::Max) - .and_then(Precision::as_exact); + let cached_min = array.statistics().get(Stat::Min).as_exact(); + let cached_max = array.statistics().get(Stat::Max).as_exact(); if let Some((min, max)) = cached_min.zip(cached_max) { let non_nullable_dtype = array.dtype().as_nonnullable(); return Ok(Some(MinMaxResult { diff --git a/vortex-array/src/aggregate_fn/fns/nan_count/mod.rs b/vortex-array/src/aggregate_fn/fns/nan_count/mod.rs index 0e229ed7982..a8b644c1895 100644 --- a/vortex-array/src/aggregate_fn/fns/nan_count/mod.rs +++ b/vortex-array/src/aggregate_fn/fns/nan_count/mod.rs @@ -32,7 +32,7 @@ use crate::scalar::ScalarValue; /// See [`NanCount`] for details. pub fn nan_count(array: &ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult { // Short-circuit using cached array statistics. - if let Some(Precision::Exact(nan_count_scalar)) = array.statistics().get(Stat::NaNCount) { + if let Precision::Exact(nan_count_scalar) = array.statistics().get(Stat::NaNCount) { return usize::try_from(&nan_count_scalar) .map_err(|e| vortex_err!("Failed to convert NaN count stat to usize: {e}")); } diff --git a/vortex-array/src/aggregate_fn/fns/null_count/mod.rs b/vortex-array/src/aggregate_fn/fns/null_count/mod.rs index cf53c9ac854..47faabc82ef 100644 --- a/vortex-array/src/aggregate_fn/fns/null_count/mod.rs +++ b/vortex-array/src/aggregate_fn/fns/null_count/mod.rs @@ -25,7 +25,7 @@ use crate::scalar::ScalarValue; /// Return the number of null values in an array. pub fn null_count(array: &ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult { - if let Some(Precision::Exact(null_count_scalar)) = array.statistics().get(Stat::NullCount) { + if let Precision::Exact(null_count_scalar) = array.statistics().get(Stat::NullCount) { return usize::try_from(&null_count_scalar) .map_err(|e| vortex_err!("Failed to convert null count stat to usize: {e}")); } @@ -170,7 +170,7 @@ mod tests { assert_eq!(null_count(&array, &mut ctx)?, 2); assert_eq!( array.statistics().get_as::(Stat::NullCount), - Some(Precision::exact(2u64)) + Precision::exact(2u64) ); Ok(()) } diff --git a/vortex-array/src/aggregate_fn/fns/sum/mod.rs b/vortex-array/src/aggregate_fn/fns/sum/mod.rs index cad8cedf626..fb5f671e65d 100644 --- a/vortex-array/src/aggregate_fn/fns/sum/mod.rs +++ b/vortex-array/src/aggregate_fn/fns/sum/mod.rs @@ -41,7 +41,7 @@ use crate::scalar::Scalar; /// See [`Sum`] for details. pub fn sum(array: &ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult { // Short-circuit using cached array statistics. - if let Some(Precision::Exact(sum_scalar)) = array.statistics().get(Stat::Sum) { + if let Precision::Exact(sum_scalar) = array.statistics().get(Stat::Sum) { return Ok(sum_scalar); } @@ -379,7 +379,7 @@ mod tests { // For non-float types, try statistics short-circuit with accumulator. if !matches!(&sum_dtype, DType::Primitive(p, _) if p.is_float()) - && let Some(Precision::Exact(sum_scalar)) = array.statistics().get(Stat::Sum) + && let Precision::Exact(sum_scalar) = array.statistics().get(Stat::Sum) { return add_scalars(&sum_dtype, &sum_scalar, accumulator); } diff --git a/vortex-array/src/aggregate_fn/fns/uncompressed_size_in_bytes/mod.rs b/vortex-array/src/aggregate_fn/fns/uncompressed_size_in_bytes/mod.rs index ef6844e1a6a..7a77ed9555c 100644 --- a/vortex-array/src/aggregate_fn/fns/uncompressed_size_in_bytes/mod.rs +++ b/vortex-array/src/aggregate_fn/fns/uncompressed_size_in_bytes/mod.rs @@ -63,9 +63,7 @@ pub fn uncompressed_size_in_bytes(array: &ArrayRef, ctx: &mut ExecutionCtx) -> V } fn uncompressed_size_in_bytes_u64(array: &ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult { - if let Some(Precision::Exact(size_scalar)) = - array.statistics().get(Stat::UncompressedSizeInBytes) - { + if let Precision::Exact(size_scalar) = array.statistics().get(Stat::UncompressedSizeInBytes) { return u64::try_from(&size_scalar) .map_err(|e| vortex_err!("Failed to convert uncompressed size stat to u64: {e}")); } @@ -597,7 +595,7 @@ mod tests { assert_eq!( array.statistics().get(Stat::UncompressedSizeInBytes), - Some(Precision::exact(u64::try_from(size)?)) + Precision::exact(u64::try_from(size)?) ); Ok(()) } diff --git a/vortex-array/src/array/erased.rs b/vortex-array/src/array/erased.rs index e5f1f9050ab..77800377f1c 100644 --- a/vortex-array/src/array/erased.rs +++ b/vortex-array/src/array/erased.rs @@ -327,8 +327,7 @@ impl ArrayRef { /// Returns the number of valid elements in the array. pub fn valid_count(&self, ctx: &mut ExecutionCtx) -> VortexResult { let len = self.len(); - if let Some(Precision::Exact(invalid_count)) = - self.statistics().get_as::(Stat::NullCount) + if let Precision::Exact(invalid_count) = self.statistics().get_as::(Stat::NullCount) { return Ok(len - invalid_count); } diff --git a/vortex-array/src/arrays/dict/take.rs b/vortex-array/src/arrays/dict/take.rs index 78a80ff8277..b77bef19b39 100644 --- a/vortex-array/src/arrays/dict/take.rs +++ b/vortex-array/src/arrays/dict/take.rs @@ -151,19 +151,18 @@ pub(crate) fn propagate_take_stats( target.statistics().with_mut_typed_stats_set(|mut st| { if indices_all_valid { let is_constant = source.statistics().get_as::(Stat::IsConstant); - if is_constant == Some(Precision::Exact(true)) { + if matches!(is_constant, Precision::Exact(true)) { // Any combination of elements from a constant array is still const st.set(Stat::IsConstant, Precision::exact(true)); } } let inexact_min_max = [Stat::Min, Stat::Max] .into_iter() - .filter_map(|stat| { - source - .statistics() - .get(stat) - .and_then(|v| v.map(|s| s.into_value()).into_inexact().transpose()) - .map(|sv| (stat, sv)) + .filter_map(|stat| match source.statistics().get(stat).into_inexact() { + Precision::Exact(scalar) | Precision::Inexact(scalar) => { + scalar.into_value().map(|sv| (stat, Precision::Inexact(sv))) + } + Precision::Absent => None, }) .collect::>(); st.combine_sets( diff --git a/vortex-array/src/arrays/varbin/builder.rs b/vortex-array/src/arrays/varbin/builder.rs index 2e329d5b9a6..ad76eaa981b 100644 --- a/vortex-array/src/arrays/varbin/builder.rs +++ b/vortex-array/src/arrays/varbin/builder.rs @@ -177,7 +177,7 @@ mod tests { .offsets() .statistics() .with_typed_stats_set(|s| s.get_as::(Stat::IsSorted)); - assert_eq!(is_sorted, Some(Precision::Exact(true))); + assert_eq!(is_sorted, Precision::Exact(true)); Ok(()) } @@ -190,7 +190,7 @@ mod tests { .offsets() .statistics() .with_typed_stats_set(|s| s.get_as::(Stat::IsSorted)); - assert_eq!(is_sorted, Some(Precision::Exact(true))); + assert_eq!(is_sorted, Precision::Exact(true)); Ok(()) } } diff --git a/vortex-array/src/display/extractors/stats.rs b/vortex-array/src/display/extractors/stats.rs index 7f8fce7feb1..e39dafa2340 100644 --- a/vortex-array/src/display/extractors/stats.rs +++ b/vortex-array/src/display/extractors/stats.rs @@ -31,8 +31,8 @@ impl fmt::Display for StatsDisplay<'_> { }; // Null count or validity fallback - if let Some(nc) = stats.get(Stat::NullCount) { - if let Ok(n) = usize::try_from(&nc.clone().into_inner()) { + if let Some(nc) = stats.get(Stat::NullCount).into_inner() { + if let Ok(n) = usize::try_from(&nc) { sep(f)?; write!(f, "nulls={}", n)?; } else { @@ -61,8 +61,8 @@ impl fmt::Display for StatsDisplay<'_> { } // NaN count (only if > 0) - if let Some(nan) = stats.get(Stat::NaNCount) - && let Ok(n) = usize::try_from(&nan.into_inner()) + if let Some(nan) = stats.get(Stat::NaNCount).into_inner() + && let Ok(n) = usize::try_from(&nan) && n > 0 { sep(f)?; @@ -70,35 +70,35 @@ impl fmt::Display for StatsDisplay<'_> { } // Min/Max - if let Some(min) = stats.get(Stat::Min) { + if let Some(min) = stats.get(Stat::Min).into_inner() { sep(f)?; write!(f, "min={}", min)?; } - if let Some(max) = stats.get(Stat::Max) { + if let Some(max) = stats.get(Stat::Max).into_inner() { sep(f)?; write!(f, "max={}", max)?; } // Sum - if let Some(sum) = stats.get(Stat::Sum) { + if let Some(sum) = stats.get(Stat::Sum).into_inner() { sep(f)?; write!(f, "sum={}", sum)?; } // Boolean flags (compact) - if let Some(c) = stats.get(Stat::IsConstant) - && bool::try_from(&c.into_inner()).unwrap_or(false) + if let Some(c) = stats.get(Stat::IsConstant).into_inner() + && bool::try_from(&c).unwrap_or(false) { sep(f)?; f.write_str("const")?; } - if let Some(s) = stats.get(Stat::IsStrictSorted) { - if bool::try_from(&s.into_inner()).unwrap_or(false) { + if let Some(s) = stats.get(Stat::IsStrictSorted).into_inner() { + if bool::try_from(&s).unwrap_or(false) { sep(f)?; f.write_str("strict")?; } - } else if let Some(s) = stats.get(Stat::IsSorted) - && bool::try_from(&s.into_inner()).unwrap_or(false) + } else if let Some(s) = stats.get(Stat::IsSorted).into_inner() + && bool::try_from(&s).unwrap_or(false) { sep(f)?; f.write_str("sorted")?; diff --git a/vortex-array/src/expr/stats/bound.rs b/vortex-array/src/expr/stats/bound.rs index 98e3364ecf1..9f26c89e8f8 100644 --- a/vortex-array/src/expr/stats/bound.rs +++ b/vortex-array/src/expr/stats/bound.rs @@ -7,6 +7,7 @@ use vortex_error::VortexError; use vortex_error::VortexResult; use crate::expr::stats::Precision; +use crate::expr::stats::Precision::Absent; use crate::expr::stats::Precision::Exact; use crate::expr::stats::Precision::Inexact; use crate::expr::stats::StatBound; @@ -19,7 +20,7 @@ use crate::partial_ord::partial_min; pub struct LowerBound(pub(crate) Precision); impl LowerBound { - pub(crate) fn min_value(self) -> T { + pub(crate) fn min_value(self) -> Option { self.0.into_inner() } } @@ -37,7 +38,7 @@ pub enum IntersectionResult { /// An intersection result was found Value(T), /// Values has no intersection. - None, + Empty, } impl IntersectionResult { @@ -47,7 +48,7 @@ impl IntersectionResult { { match self { IntersectionResult::Value(v) => Ok(v), - IntersectionResult::None => Err(err()), + IntersectionResult::Empty => Err(err()), } } } @@ -58,6 +59,8 @@ impl StatBound for LowerBound { } fn union(&self, other: &Self) -> Option> { + use Precision::*; + Some(LowerBound(match (&self.0, &other.0) { (Exact(lhs), Exact(rhs)) => Exact(partial_min(lhs, rhs)?.clone()), (Inexact(lhs), Inexact(rhs)) => Inexact(partial_min(lhs, rhs)?.clone()), @@ -75,6 +78,7 @@ impl StatBound for LowerBound { Inexact(rhs.clone()) } } + (Absent, _) | (_, Absent) => return None, })) } @@ -86,7 +90,7 @@ impl StatBound for LowerBound { IntersectionResult::Value(LowerBound(Exact(lhs.clone()))) } else { // The two intervals do not overlap - IntersectionResult::None + IntersectionResult::Empty } } (Inexact(lhs), Inexact(rhs)) => { @@ -97,7 +101,7 @@ impl StatBound for LowerBound { IntersectionResult::Value(LowerBound(Exact(rhs.clone()))) } else { // The two intervals do not overlap - IntersectionResult::None + IntersectionResult::Empty } } (Exact(lhs), Inexact(rhs)) => { @@ -105,9 +109,10 @@ impl StatBound for LowerBound { IntersectionResult::Value(LowerBound(Exact(lhs.clone()))) } else { // The two intervals do not overlap - IntersectionResult::None + IntersectionResult::Empty } } + (Absent, _) | (_, Absent) => return None, }) } @@ -137,6 +142,7 @@ impl PartialOrd for LowerBound { Inexact(lhs) => lhs .partial_cmp(other) .and_then(|o| if o == Ordering::Less { None } else { Some(o) }), + Absent => None, } } } @@ -146,7 +152,7 @@ impl PartialOrd for LowerBound { pub struct UpperBound(pub(crate) Precision); impl UpperBound { - pub(crate) fn max_value(self) -> T { + pub(crate) fn max_value(self) -> Option { self.0.into_inner() } } @@ -175,6 +181,7 @@ impl StatBound for UpperBound { Inexact(rhs.clone()) } } + (Absent, _) | (_, Absent) => return None, })) } @@ -185,7 +192,7 @@ impl StatBound for UpperBound { IntersectionResult::Value(UpperBound(Exact(lhs.clone()))) } else { // The two intervals do not overlap - IntersectionResult::None + IntersectionResult::Empty } } (Inexact(lhs), Inexact(rhs)) => { @@ -196,7 +203,7 @@ impl StatBound for UpperBound { IntersectionResult::Value(UpperBound(Exact(rhs.clone()))) } else { // The two intervals do not overlap - IntersectionResult::None + IntersectionResult::Empty } } (Exact(lhs), Inexact(rhs)) => { @@ -204,9 +211,10 @@ impl StatBound for UpperBound { IntersectionResult::Value(UpperBound(Exact(lhs.clone()))) } else { // The two intervals do not overlap - IntersectionResult::None + IntersectionResult::Empty } } + (Absent, _) | (_, Absent) => return None, }) } @@ -240,6 +248,7 @@ impl PartialOrd for UpperBound { Some(o) } }), + Absent => return None, } } } @@ -305,7 +314,7 @@ mod tests { let ub1: UpperBound = UpperBound(Precision::exact(13i32)); let ub2 = UpperBound(Precision::inexact(12i32)); - assert_eq!(Some(IntersectionResult::None), ub1.intersection(&ub2)); + assert_eq!(Some(IntersectionResult::Empty), ub1.intersection(&ub2)); } #[test] @@ -321,6 +330,6 @@ mod tests { let lb1: LowerBound = LowerBound(Precision::exact(12i32)); let lb2 = LowerBound(Precision::inexact(13i32)); - assert_eq!(Some(IntersectionResult::None), lb1.intersection(&lb2)); + assert_eq!(Some(IntersectionResult::Empty), lb1.intersection(&lb2)); } } diff --git a/vortex-array/src/expr/stats/precision.rs b/vortex-array/src/expr/stats/precision.rs index 671401f1936..b745c6916e3 100644 --- a/vortex-array/src/expr/stats/precision.rs +++ b/vortex-array/src/expr/stats/precision.rs @@ -6,11 +6,12 @@ use std::fmt::Display; use std::fmt::Formatter; use vortex_error::VortexExpect; -use vortex_error::VortexResult; use crate::dtype::DType; -use crate::expr::stats::precision::Precision::Exact; -use crate::expr::stats::precision::Precision::Inexact; +use crate::expr::stats::IntersectionResult; +use crate::expr::stats::StatBound; +use crate::expr::stats::StatType; +use crate::partial_ord::partial_min; use crate::scalar::Scalar; use crate::scalar::ScalarValue; @@ -21,33 +22,49 @@ use crate::scalar::ScalarValue; /// This is statistic specific, for max this will be an upper bound. Meaning that the actual max /// in an array is guaranteed to be less than or equal to the inexact value, but equal to the exact /// value. -/// -// TODO(ngates): should we model Unknown as a variant of Precision? Or have Option>? #[derive(Debug, PartialEq, Eq, Clone, Copy)] pub enum Precision { Exact(T), Inexact(T), + Absent, +} + +impl Default for Precision { + fn default() -> Self { + Self::Absent + } } impl Precision> { /// Transpose the `Precision>` into `Option>`. pub fn transpose(self) -> Option> { + use Precision::*; + match self { Exact(Some(x)) => Some(Exact(x)), Inexact(Some(x)) => Some(Inexact(x)), + Absent => Some(Absent), Exact(None) | Inexact(None) => None, } } } +impl Precision> { + pub fn transpose(self) -> Result, E> { + todo!() + } +} + impl Precision where T: Copy, { pub fn to_inexact(&self) -> Self { + use Precision::*; + match self { - Exact(v) => Exact(*v), - Inexact(v) => Inexact(*v), + Exact(v) | Inexact(v) => Inexact(*v), + Absent => Absent, } } } @@ -55,34 +72,39 @@ where impl Precision { /// Creates an exact value pub fn exact>(s: S) -> Precision { - Exact(s.into()) + Self::Exact(s.into()) } /// Creates an inexact value pub fn inexact>(s: S) -> Precision { - Inexact(s.into()) + Self::Inexact(s.into()) } /// Pushed the ref into the Precision enum pub fn as_ref(&self) -> Precision<&T> { + use Precision::*; + match self { Exact(val) => Exact(val), Inexact(val) => Inexact(val), + Absent => Absent, } } /// Converts `self` into an inexact bound pub fn into_inexact(self) -> Self { + use Precision::*; + match self { - Exact(val) => Inexact(val), - Inexact(_) => self, + Exact(v) | Inexact(v) => Inexact(v), + Absent => Absent, } } /// Returns the exact value from the bound, if that value is inexact, otherwise `None`. pub fn as_exact(self) -> Option { match self { - Exact(val) => Some(val), + Self::Exact(val) => Some(val), _ => None, } } @@ -90,53 +112,60 @@ impl Precision { /// Returns the exact value from the bound, if that value is inexact, otherwise `None`. pub fn as_inexact(self) -> Option { match self { - Inexact(val) => Some(val), + Self::Inexact(val) => Some(val), _ => None, } } - /// True iff self == Exact(_) + /// Returns true when representing an exact value. pub fn is_exact(&self) -> bool { - matches!(self, Exact(_)) + matches!(self, Self::Exact(_)) + } + + /// Returns true when representing an absent value + pub fn is_absent(&self) -> bool { + matches!(self, Self::Absent) } /// Map the value of either precision value pub fn map U>(self, f: F) -> Precision { + use Precision::*; + match self { Exact(value) => Exact(f(value)), Inexact(value) => Inexact(f(value)), + Absent => Absent, } } /// Zip two `Precision` values into a tuple, keeping the inexactness if any. pub fn zip(self, other: Precision) -> Precision<(T, U)> { + use Precision::*; + match (self, other) { (Exact(lhs), Exact(rhs)) => Exact((lhs, rhs)), (Inexact(lhs), Exact(rhs)) | (Exact(lhs), Inexact(rhs)) | (Inexact(lhs), Inexact(rhs)) => Inexact((lhs, rhs)), + (Absent, _) | (_, Absent) => Absent, } } - /// Similar to `map` but handles functions that can fail. - pub fn try_map VortexResult>(self, f: F) -> VortexResult> { - let precision = match self { - Exact(value) => Exact(f(value)?), - Inexact(value) => Inexact(f(value)?), - }; - Ok(precision) - } - /// Unwrap the underlying value - pub fn into_inner(self) -> T { + pub fn into_inner(self) -> Option { + use Precision::*; + match self { - Exact(val) | Inexact(val) => val, + Exact(val) | Inexact(val) => Some(val), + Absent => None, } } } impl Display for Precision { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + use Precision::*; + match self { Exact(v) => { write!(f, "{v}") @@ -144,6 +173,9 @@ impl Display for Precision { Inexact(v) => { write!(f, "~{v}") } + Absent => { + write!(f, "{{empty}}") + } } } } @@ -151,7 +183,7 @@ impl Display for Precision { impl PartialEq for Precision { fn eq(&self, other: &T) -> bool { match self { - Exact(v) => v == other, + Self::Exact(v) => v == other, _ => false, } } @@ -177,3 +209,63 @@ impl Precision<&ScalarValue> { }) } } + +/// This allows a stat with a `Precision` to be interpreted as a bound. +impl Precision { + /// Applied the stat associated bound to the precision value + pub fn bound>(self) -> Option { + if self.is_absent() { + None + } else { + Some(S::Bound::lift(self)) + } + } +} + +impl StatBound for Precision { + fn lift(value: Precision) -> Self { + value + } + + fn into_value(self) -> Precision { + self + } + + fn union(&self, other: &Self) -> Option { + self.clone() + .zip(other.clone()) + .map(|(lhs, rhs)| partial_min(&lhs, &rhs).cloned()) + .transpose() + } + + fn intersection(&self, other: &Self) -> Option> { + Some(match (self, other) { + (Precision::Exact(lhs), Precision::Exact(rhs)) => { + if lhs.partial_cmp(rhs)?.is_eq() { + IntersectionResult::Value(Precision::Exact(lhs.clone())) + } else { + IntersectionResult::Empty + } + } + (Precision::Exact(exact), Precision::Inexact(inexact)) + | (Precision::Inexact(inexact), Precision::Exact(exact)) => { + if exact.partial_cmp(inexact)?.is_lt() { + IntersectionResult::Value(Precision::Inexact(exact.clone())) + } else { + IntersectionResult::Value(Precision::Exact(exact.clone())) + } + } + (Precision::Inexact(lhs), Precision::Inexact(rhs)) => { + IntersectionResult::Value(Precision::Inexact(partial_min(lhs, rhs)?.clone())) + } + (_, Precision::Absent) | (Precision::Absent, _) => IntersectionResult::Empty, + }) + } + + fn to_exact(&self) -> Option<&T> { + match self { + Precision::Exact(val) => Some(val), + _ => None, + } + } +} diff --git a/vortex-array/src/expr/stats/provider.rs b/vortex-array/src/expr/stats/provider.rs index 3dd35960a42..c14a702e123 100644 --- a/vortex-array/src/expr/stats/provider.rs +++ b/vortex-array/src/expr/stats/provider.rs @@ -10,7 +10,7 @@ use crate::expr::stats::Stat; use crate::scalar::Scalar; pub trait StatsProvider { - fn get(&self, stat: Stat) -> Option>; + fn get(&self, stat: Stat) -> Precision; /// Count of stored stats with known values. fn len(&self) -> usize; @@ -25,23 +25,21 @@ impl StatsProviderExt for S where S: StatsProvider {} pub trait StatsProviderExt: StatsProvider { fn get_scalar_bound>(&self) -> Option { - self.get(S::STAT).map(|v| v.bound::()) + self.get(S::STAT).bound::() } fn get_as TryFrom<&'a Scalar, Error = VortexError>>( &self, stat: Stat, - ) -> Option> { + ) -> Precision { self.get(stat).map(|v| { - v.map(|v| { - T::try_from(&v).unwrap_or_else(|err| { - vortex_panic!( - err, - "Failed to get stat {} as {}", - stat, - std::any::type_name::() - ) - }) + T::try_from(&v).unwrap_or_else(|err| { + vortex_panic!( + err, + "Failed to get stat {} as {}", + stat, + std::any::type_name::() + ) }) }) } @@ -51,6 +49,6 @@ pub trait StatsProviderExt: StatsProvider { S: StatType, U: for<'a> TryFrom<&'a Scalar, Error = VortexError>, { - self.get_as::(S::STAT).map(|v| v.bound::()) + self.get_as::(S::STAT).bound::() } } diff --git a/vortex-array/src/expr/stats/stat_bound.rs b/vortex-array/src/expr/stats/stat_bound.rs index e7a8d3c549b..6b8903dcfd0 100644 --- a/vortex-array/src/expr/stats/stat_bound.rs +++ b/vortex-array/src/expr/stats/stat_bound.rs @@ -1,12 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::cmp::Ordering; - use crate::expr::stats::Precision; use crate::expr::stats::Stat; use crate::expr::stats::bound::IntersectionResult; -use crate::partial_ord::partial_min; /// `StatType` define the bound of a given statistic. (e.g. `Max` is an upper bound), /// this is used to extract the bound from a `Precision` value, (e.g. `p::bound()`). @@ -38,58 +35,3 @@ pub trait StatBound: Sized { /// Returns the exact value from the bound if that value is exact, otherwise `None`. fn to_exact(&self) -> Option<&T>; } - -/// This allows a stat with a `Precision` to be interpreted as a bound. -impl Precision { - /// Applied the stat associated bound to the precision value - pub fn bound>(self) -> S::Bound { - S::Bound::lift(self) - } -} - -impl StatBound for Precision { - fn lift(value: Precision) -> Self { - value - } - - fn into_value(self) -> Precision { - self - } - - fn union(&self, other: &Self) -> Option { - self.clone() - .zip(other.clone()) - .map(|(lhs, rhs)| partial_min(&lhs, &rhs).cloned()) - .transpose() - } - - fn intersection(&self, other: &Self) -> Option> { - Some(match (self, other) { - (Precision::Exact(lhs), Precision::Exact(rhs)) => { - if lhs.partial_cmp(rhs)? == Ordering::Equal { - IntersectionResult::Value(Precision::Exact(lhs.clone())) - } else { - IntersectionResult::None - } - } - (Precision::Exact(exact), Precision::Inexact(inexact)) - | (Precision::Inexact(inexact), Precision::Exact(exact)) => { - if exact.partial_cmp(inexact)? == Ordering::Less { - IntersectionResult::Value(Precision::Inexact(exact.clone())) - } else { - IntersectionResult::Value(Precision::Exact(exact.clone())) - } - } - (Precision::Inexact(lhs), Precision::Inexact(rhs)) => { - IntersectionResult::Value(Precision::Inexact(partial_min(lhs, rhs)?.clone())) - } - }) - } - - fn to_exact(&self) -> Option<&T> { - match self { - Precision::Exact(val) => Some(val), - _ => None, - } - } -} diff --git a/vortex-array/src/scalar_fn/fns/stat.rs b/vortex-array/src/scalar_fn/fns/stat.rs index 540d4bcaff6..575041ab9c6 100644 --- a/vortex-array/src/scalar_fn/fns/stat.rs +++ b/vortex-array/src/scalar_fn/fns/stat.rs @@ -138,7 +138,7 @@ fn stat_array( .statistics() .with_typed_stats_set(|stats| stats.get(stat)) // We don't mind whether the stat is approxed or not, since these are row-wise bounds. - .map(|stat| stat.into_inner()) + .into_inner() .and_then(Scalar::into_value) } else { tracing::trace!( diff --git a/vortex-array/src/stats/array.rs b/vortex-array/src/stats/array.rs index fd41090c528..c939b9fbe60 100644 --- a/vortex-array/src/stats/array.rs +++ b/vortex-array/src/stats/array.rs @@ -104,7 +104,7 @@ impl StatsSetRef<'_> { let mut guard = self.array_stats.inner.write(); for (stat, value) in iter { if !value.is_exact() { - if !guard.get(*stat).is_some_and(|v| v.is_exact()) { + if !guard.get(*stat).is_exact() { guard.set(*stat, value.clone()); } } else { @@ -155,7 +155,7 @@ impl StatsSetRef<'_> { pub fn compute_stat(&self, stat: Stat, ctx: &mut ExecutionCtx) -> VortexResult> { // If it's already computed and exact, we can return it. - if let Some(Precision::Exact(s)) = self.get(stat) { + if let Precision::Exact(s) = self.get(stat) { return Ok(Some(s)); } @@ -281,7 +281,7 @@ impl StatsSetRef<'_> { } impl StatsProvider for StatsSetRef<'_> { - fn get(&self, stat: Stat) -> Option> { + fn get(&self, stat: Stat) -> Precision { self.array_stats .inner .read() diff --git a/vortex-array/src/stats/flatbuffers.rs b/vortex-array/src/stats/flatbuffers.rs index 2fbe7774a40..ea6f2a5377b 100644 --- a/vortex-array/src/stats/flatbuffers.rs +++ b/vortex-array/src/stats/flatbuffers.rs @@ -38,45 +38,33 @@ impl WriteFlatBuffer for StatsSet { &self, fbb: &mut FlatBufferBuilder<'fb>, ) -> VortexResult>> { - let (min_precision, min) = self - .get(Stat::Min) - .map(|min| { - ( - if min.is_exact() { - fba::Precision::Exact - } else { - fba::Precision::Inexact - }, - Some( - fbb.create_vector(&ScalarValue::to_proto_bytes::>(Some( - &min.into_inner(), - ))), - ), - ) - }) - .unwrap_or_else(|| (fba::Precision::Inexact, None)); - - let (max_precision, max) = self - .get(Stat::Max) - .map(|max| { - ( - if max.is_exact() { - fba::Precision::Exact - } else { - fba::Precision::Inexact - }, - Some( - fbb.create_vector(&ScalarValue::to_proto_bytes::>(Some( - &max.into_inner(), - ))), - ), - ) - }) - .unwrap_or_else(|| (fba::Precision::Inexact, None)); + let (min_precision, min) = match self.get(Stat::Min) { + Precision::Exact(min) => ( + fba::Precision::Exact, + Some(fbb.create_vector(&ScalarValue::to_proto_bytes::>(Some(&min)))), + ), + Precision::Inexact(min) => ( + fba::Precision::Inexact, + Some(fbb.create_vector(&ScalarValue::to_proto_bytes::>(Some(&min)))), + ), + Precision::Absent => (fba::Precision::Absent, None), + }; + + let (max_precision, max) = match self.get(Stat::Max) { + Precision::Exact(max) => ( + fba::Precision::Exact, + Some(fbb.create_vector(&ScalarValue::to_proto_bytes::>(Some(&max)))), + ), + Precision::Inexact(max) => ( + fba::Precision::Inexact, + Some(fbb.create_vector(&ScalarValue::to_proto_bytes::>(Some(&max)))), + ), + Precision::Absent => (fba::Precision::Absent, None), + }; let sum = self .get(Stat::Sum) - .and_then(Precision::as_exact) + .as_exact() .map(|sum| fbb.create_vector(&ScalarValue::to_proto_bytes::>(Some(&sum)))); let stat_args = &fba::ArrayStatsArgs { @@ -87,22 +75,22 @@ impl WriteFlatBuffer for StatsSet { sum, is_sorted: self .get_as::(Stat::IsSorted, &DType::Bool(Nullability::NonNullable)) - .and_then(Precision::as_exact), + .as_exact(), is_strict_sorted: self .get_as::(Stat::IsStrictSorted, &DType::Bool(Nullability::NonNullable)) - .and_then(Precision::as_exact), + .as_exact(), is_constant: self .get_as::(Stat::IsConstant, &DType::Bool(Nullability::NonNullable)) - .and_then(Precision::as_exact), + .as_exact(), null_count: self .get_as::(Stat::NullCount, &PType::U64.into()) - .and_then(Precision::as_exact), + .as_exact(), uncompressed_size_in_bytes: self .get_as::(Stat::UncompressedSizeInBytes, &PType::U64.into()) - .and_then(Precision::as_exact), + .as_exact(), nan_count: self .get_as::(Stat::NaNCount, &PType::U64.into()) - .and_then(Precision::as_exact), + .as_exact(), }; Ok(fba::ArrayStats::create(fbb, stat_args)) @@ -144,6 +132,11 @@ impl StatsSet { if let Some(max) = fb.max() && let Some(stat_dtype) = stat_dtype { + let max_precision = fb.max_precision(); + if max_precision == fba::Precision::Absent { + vortex_bail!("Corrupted max stat: value present with absent precision"); + } + let value = ScalarValue::from_proto_bytes(max.bytes(), &stat_dtype, session)?; let Some(value) = value else { @@ -152,7 +145,7 @@ impl StatsSet { stats_set.set( Stat::Max, - match fb.max_precision() { + match max_precision { fba::Precision::Exact => Precision::Exact(value), fba::Precision::Inexact => Precision::Inexact(value), other => vortex_bail!("Corrupted max_precision field: {other:?}"), @@ -164,6 +157,11 @@ impl StatsSet { if let Some(min) = fb.min() && let Some(stat_dtype) = stat_dtype { + let min_precision = fb.min_precision(); + if min_precision == fba::Precision::Absent { + vortex_bail!("Corrupted min stat: value present with absent precision"); + } + let value = ScalarValue::from_proto_bytes(min.bytes(), &stat_dtype, session)?; let Some(value) = value else { @@ -172,7 +170,7 @@ impl StatsSet { stats_set.set( Stat::Min, - match fb.min_precision() { + match min_precision { fba::Precision::Exact => Precision::Exact(value), fba::Precision::Inexact => Precision::Inexact(value), other => vortex_bail!("Corrupted min_precision field: {other:?}"), diff --git a/vortex-array/src/stats/stats_set.rs b/vortex-array/src/stats/stats_set.rs index 0b9430ef155..7613b746a33 100644 --- a/vortex-array/src/stats/stats_set.rs +++ b/vortex-array/src/stats/stats_set.rs @@ -103,11 +103,12 @@ impl StatsSet { } /// Get value for a given stat - pub fn get(&self, stat: Stat) -> Option> { + pub fn get(&self, stat: Stat) -> Precision { self.values .iter() .find(|(s, _)| *s == stat) .map(|(_, v)| v.clone()) + .unwrap_or(Precision::Absent) } /// Length of the stats set @@ -125,21 +126,19 @@ impl StatsSet { &self, stat: Stat, dtype: &DType, - ) -> Option> { + ) -> Precision { self.get(stat).map(|v| { - v.map(|v| { - T::try_from( - &Scalar::try_new(dtype.clone(), Some(v)) - .vortex_expect("failed to construct a scalar statistic"), + T::try_from( + &Scalar::try_new(dtype.clone(), Some(v)) + .vortex_expect("failed to construct a scalar statistic"), + ) + .unwrap_or_else(|err| { + vortex_panic!( + err, + "Failed to get stat {} as {}", + stat, + std::any::type_name::() ) - .unwrap_or_else(|err| { - vortex_panic!( - err, - "Failed to get stat {} as {}", - stat, - std::any::type_name::() - ) - }) }) }) } @@ -220,16 +219,14 @@ pub struct TypedStatsSetRef<'a, 'b> { } impl StatsProvider for TypedStatsSetRef<'_, '_> { - fn get(&self, stat: Stat) -> Option> { - self.values.get(stat).map(|p| { - p.map(|sv| { - Scalar::try_new( - stat.dtype(self.dtype) - .vortex_expect("Must have valid dtype if value is present"), - Some(sv), - ) - .vortex_expect("failed to construct a scalar statistic") - }) + fn get(&self, stat: Stat) -> Precision { + self.values.get(stat).map(|sv| { + Scalar::try_new( + stat.dtype(self.dtype) + .vortex_expect("Must have valid dtype if value is present"), + Some(sv), + ) + .vortex_expect("failed to construct a scalar statistic") }) } @@ -256,16 +253,14 @@ impl MutTypedStatsSetRef<'_, '_> { } impl StatsProvider for MutTypedStatsSetRef<'_, '_> { - fn get(&self, stat: Stat) -> Option> { - self.values.get(stat).map(|p| { - p.map(|sv| { - Scalar::try_new( - stat.dtype(self.dtype) - .vortex_expect("Must have valid dtype if value is present"), - Some(sv), - ) - .vortex_expect("failed to construct a scalar statistic") - }) + fn get(&self, stat: Stat) -> Precision { + self.values.get(stat).map(|sv| { + Scalar::try_new( + stat.dtype(self.dtype) + .vortex_expect("Must have valid dtype if value is present"), + Some(sv), + ) + .vortex_expect("failed to construct a scalar statistic") }) } @@ -488,13 +483,12 @@ impl MutTypedStatsSetRef<'_, '_> { let self_min = self.get(Stat::Min); let other_min = other.get(Stat::Min); - if let ( - Some(Precision::Exact(self_const)), - Some(Precision::Exact(other_const)), - Some(Precision::Exact(self_min)), - Some(Precision::Exact(other_min)), - ) = (self_const, other_const, self_min, other_min) - { + if let (Some(self_const), Some(other_const), Some(self_min), Some(other_min)) = ( + self_const.as_exact(), + other_const.as_exact(), + self_min.as_exact(), + other_min.as_exact(), + ) { if self_const && other_const && self_min == other_min { self.set(Stat::IsConstant, Precision::exact(true)); } else { @@ -518,7 +512,7 @@ impl MutTypedStatsSetRef<'_, '_> { stat: Stat, cmp: F, ) { - if (Some(Precision::Exact(true)), Some(Precision::Exact(true))) + if (Precision::Exact(true), Precision::Exact(true)) == (self.get_as(stat), other.get_as(stat)) { // There might be no stat because it was dropped, or it doesn't exist @@ -526,10 +520,10 @@ impl MutTypedStatsSetRef<'_, '_> { // We assume that it was the dropped case since the doesn't exist might imply sorted, // but this in-precision is correct. if let (Some(self_max), Some(other_min)) = ( - self.get_scalar_bound::(), - other.get_scalar_bound::(), + self.get_scalar_bound::().and_then(|v| v.max_value()), + other.get_scalar_bound::().and_then(|v| v.min_value()), ) { - return if cmp(&self_max.max_value(), &other_min.min_value()) { + return if cmp(&self_max, &other_min) { // keep value } else { self.set(stat, Precision::inexact(false)); @@ -552,14 +546,15 @@ impl MutTypedStatsSetRef<'_, '_> { } fn merge_sum_stat(&mut self, stat: Stat, other: &TypedStatsSetRef) { - match (self.get_as::(stat), other.get_as::(stat)) { - (Some(nc1), Some(nc2)) => { - self.set( - stat, - nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)), - ); - } - _ => self.clear(stat), + let merged = self + .get_as::(stat) + .zip(other.get_as::(stat)) + .map(|(l, r)| ScalarValue::from(l + r)); + + if merged.is_absent() { + self.clear(stat); + } else { + self.set(stat, merged); } } } @@ -657,12 +652,9 @@ mod test { let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)); assert_eq!( first_ref.get_as::(Stat::IsConstant), - Some(Precision::exact(false)) - ); - assert_eq!( - first_ref.get_as::(Stat::Min), - Some(Precision::exact(42)) + Precision::exact(false) ); + assert_eq!(first_ref.get_as::(Stat::Min), Precision::exact(42)); } #[test] @@ -673,7 +665,7 @@ mod test { ); let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)); - assert!(first_ref.get(Stat::Min).is_none()); + assert!(first_ref.get(Stat::Min).is_absent()); } #[test] @@ -684,7 +676,7 @@ mod test { ); let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)); - assert!(first_ref.get(Stat::Min).is_none()); + assert!(first_ref.get(Stat::Min).is_absent()); } #[test] @@ -695,10 +687,7 @@ mod test { ); let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)); - assert_eq!( - first_ref.get_as::(Stat::Min), - Some(Precision::exact(37)) - ); + assert_eq!(first_ref.get_as::(Stat::Min), Precision::exact(37)); } #[test] @@ -707,7 +696,7 @@ mod test { &StatsSet::default(), &DType::Primitive(PType::I32, Nullability::NonNullable), ); - assert!(first.get(Stat::Max).is_none()); + assert!(first.get(Stat::Max).is_absent()); } #[test] @@ -716,7 +705,7 @@ mod test { &StatsSet::of(Stat::Max, Precision::exact(42)), &DType::Primitive(PType::I32, Nullability::NonNullable), ); - assert!(first.get(Stat::Max).is_none()); + assert!(first.get(Stat::Max).is_absent()); } #[test] @@ -726,10 +715,7 @@ mod test { &DType::Primitive(PType::I32, Nullability::NonNullable), ); let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)); - assert_eq!( - first_ref.get_as::(Stat::Max), - Some(Precision::exact(42)) - ); + assert_eq!(first_ref.get_as::(Stat::Max), Precision::exact(42)); } #[test] @@ -738,10 +724,7 @@ mod test { let first = StatsSet::of(Stat::Max, Precision::exact(42i32)) .merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype); let first_ref = first.as_typed_ref(&dtype); - assert_eq!( - first_ref.get_as::(Stat::Max), - Some(Precision::inexact(43)) - ); + assert_eq!(first_ref.get_as::(Stat::Max), Precision::inexact(43)); } #[test] @@ -753,7 +736,7 @@ mod test { &DType::Primitive(PType::I32, Nullability::NonNullable), ); let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)); - assert!(first_ref.get(Stat::Sum).is_none()); + assert!(first_ref.get(Stat::Sum).is_absent()); } #[test] @@ -765,7 +748,7 @@ mod test { &DType::Primitive(PType::I32, Nullability::NonNullable), ); let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)); - assert!(first_ref.get(Stat::Sum).is_none()); + assert!(first_ref.get(Stat::Sum).is_absent()); } #[test] @@ -777,10 +760,7 @@ mod test { &DType::Primitive(PType::I32, Nullability::NonNullable), ); let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)); - assert_eq!( - first_ref.get_as::(Stat::Sum), - Some(Precision::exact(79i64)) - ); + assert_eq!(first_ref.get_as::(Stat::Sum), Precision::exact(79i64)); } #[test] @@ -789,7 +769,7 @@ mod test { &StatsSet::default(), &DType::Primitive(PType::I32, Nullability::NonNullable), ); - assert!(first.get(Stat::IsStrictSorted).is_none()); + assert!(first.get(Stat::IsStrictSorted).is_absent()); } #[test] @@ -798,7 +778,7 @@ mod test { &StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)), &DType::Primitive(PType::I32, Nullability::NonNullable), ); - assert!(first.get(Stat::IsStrictSorted).is_none()); + assert!(first.get(Stat::IsStrictSorted).is_absent()); } #[test] @@ -815,7 +795,7 @@ mod test { let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)); assert_eq!( first_ref.get_as::(Stat::IsStrictSorted), - Some(Precision::exact(true)) + Precision::exact(true) ); } @@ -834,7 +814,7 @@ mod test { second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)); assert_eq!( second_ref.get_as::(Stat::IsStrictSorted), - Some(Precision::inexact(false)) + Precision::inexact(false) ); } @@ -853,7 +833,7 @@ mod test { second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)); assert_eq!( second_ref.get_as::(Stat::IsStrictSorted), - Some(Precision::exact(false)) + Precision::exact(false) ); } @@ -866,7 +846,7 @@ mod test { &second, &DType::Primitive(PType::I32, Nullability::NonNullable), ); - assert!(first.get(Stat::IsStrictSorted).is_none()); + assert!(first.get(Stat::IsStrictSorted).is_absent()); } #[test] @@ -883,7 +863,7 @@ mod test { let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)); assert_eq!( first_ref.get_as::(Stat::IsStrictSorted), - Some(Precision::exact(true)) + Precision::exact(true) ); } @@ -902,7 +882,7 @@ mod test { let stats = array.statistics().to_owned(); for stat in &all_stats { - assert!(stats.get(*stat).is_some(), "Stat {stat} is missing"); + assert!(!stats.get(*stat).is_absent(), "Stat {stat} is missing"); } let merged = stats.clone().merge_unordered( @@ -911,7 +891,7 @@ mod test { ); for stat in &all_stats { assert_eq!( - merged.get(*stat).is_some(), + !merged.get(*stat).is_absent(), stat.is_commutative(), "Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative" ) @@ -929,11 +909,8 @@ mod test { stats_ref.get_as::(Stat::Max) ); assert_eq!( - merged_ref.get_as::(Stat::NullCount).unwrap(), - stats_ref - .get_as::(Stat::NullCount) - .unwrap() - .map(|s| s * 2) + merged_ref.get_as::(Stat::NullCount), + stats_ref.get_as::(Stat::NullCount).map(|s| s * 2) ); } @@ -947,10 +924,7 @@ mod test { ); let merged_ref = merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)); - assert_eq!( - merged_ref.get_as::(Stat::Min), - Some(Precision::exact(5)) - ); + assert_eq!(merged_ref.get_as::(Stat::Min), Precision::exact(5)); } #[test] @@ -961,10 +935,7 @@ mod test { ); let merged_ref = merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)); - assert_eq!( - merged_ref.get_as::(Stat::Min), - Some(Precision::inexact(4)) - ); + assert_eq!(merged_ref.get_as::(Stat::Min), Precision::inexact(4)); } #[test] @@ -981,7 +952,7 @@ mod test { .unwrap(); assert_eq!( stats_ref.get_as::(Stat::IsConstant), - Some(Precision::exact(true)) + Precision::exact(true) ); } @@ -997,7 +968,7 @@ mod test { .unwrap(); assert_eq!( stats_ref.get_as::(Stat::IsConstant), - Some(Precision::exact(true)) + Precision::exact(true) ); } @@ -1013,7 +984,7 @@ mod test { .unwrap(); assert_eq!( stats_ref.get_as::(Stat::IsConstant), - Some(Precision::exact(false)) + Precision::exact(false) ); } } @@ -1060,19 +1031,13 @@ mod test { stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)); // Min should remain unchanged - assert_eq!( - stats_ref.get_as::(Stat::Min), - Some(Precision::exact(42)) - ); + assert_eq!(stats_ref.get_as::(Stat::Min), Precision::exact(42)); // Max should be added - assert_eq!( - stats_ref.get_as::(Stat::Max), - Some(Precision::exact(100)) - ); + assert_eq!(stats_ref.get_as::(Stat::Max), Precision::exact(100)); // IsStrictSorted should be added assert_eq!( stats_ref.get_as::(Stat::IsStrictSorted), - Some(Precision::exact(true)) + Precision::exact(true) ); } @@ -1102,19 +1067,13 @@ mod test { stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)); // Min should remain unchanged since it's more restrictive than the inexact value - assert_eq!( - stats_ref.get_as::(Stat::Min), - Some(Precision::exact(42)) - ); + assert_eq!(stats_ref.get_as::(Stat::Min), Precision::exact(42)); // Check that max was updated with the exact value - assert_eq!( - stats_ref.get_as::(Stat::Max), - Some(Precision::exact(90)) - ); + assert_eq!(stats_ref.get_as::(Stat::Max), Precision::exact(90)); // Check that IsSorted was added assert_eq!( stats_ref.get_as::(Stat::IsSorted), - Some(Precision::exact(true)) + Precision::exact(true) ); } } diff --git a/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs b/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs index c3c3ddfd34a..0d394a3643c 100644 --- a/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs +++ b/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs @@ -40,6 +40,7 @@ table ArrayNode { enum Precision: uint8 { Inexact = 0, Exact = 1, + Absent = 2, } table ArrayStats { diff --git a/vortex-flatbuffers/src/generated/array.rs b/vortex-flatbuffers/src/generated/array.rs index 6d903a56aa5..04a2f4ef94f 100644 --- a/vortex-flatbuffers/src/generated/array.rs +++ b/vortex-flatbuffers/src/generated/array.rs @@ -91,12 +91,13 @@ impl ::flatbuffers::SimpleToVerifyInSlice for Compression {} #[deprecated(since = "2.0.0", note = "Use associated constants instead. This will no longer be generated in 2021.")] pub const ENUM_MIN_PRECISION: u8 = 0; #[deprecated(since = "2.0.0", note = "Use associated constants instead. This will no longer be generated in 2021.")] -pub const ENUM_MAX_PRECISION: u8 = 1; +pub const ENUM_MAX_PRECISION: u8 = 2; #[deprecated(since = "2.0.0", note = "Use associated constants instead. This will no longer be generated in 2021.")] #[allow(non_camel_case_types)] -pub const ENUM_VALUES_PRECISION: [Precision; 2] = [ +pub const ENUM_VALUES_PRECISION: [Precision; 3] = [ Precision::Inexact, Precision::Exact, + Precision::Absent, ]; #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] @@ -106,18 +107,21 @@ pub struct Precision(pub u8); impl Precision { pub const Inexact: Self = Self(0); pub const Exact: Self = Self(1); + pub const Absent: Self = Self(2); pub const ENUM_MIN: u8 = 0; - pub const ENUM_MAX: u8 = 1; + pub const ENUM_MAX: u8 = 2; pub const ENUM_VALUES: &'static [Self] = &[ Self::Inexact, Self::Exact, + Self::Absent, ]; /// Returns the variant's name or "" if unknown. pub fn variant_name(self) -> Option<&'static str> { match self { Self::Inexact => Some("Inexact"), Self::Exact => Some("Exact"), + Self::Absent => Some("Absent"), _ => None, } } From 5080813716b8ac25d3c609cc449c0fd8be1a04e2 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Wed, 20 May 2026 18:04:17 +0100 Subject: [PATCH 2/9] fixups Signed-off-by: Adam Gutglick --- encodings/datetime-parts/src/compute/take.rs | 4 +- vortex-array/public-api.lock | 42 +++++---- vortex-array/src/expr/stats/precision.rs | 7 +- vortex-datafusion/src/convert/stats.rs | 91 ++++++++++---------- vortex-datafusion/src/lib.rs | 1 + vortex-datafusion/src/persistent/format.rs | 7 +- vortex-file/src/pruning.rs | 2 +- vortex-file/src/v2/file_stats_reader.rs | 2 +- vortex-flatbuffers/public-api.lock | 4 +- vortex-layout/src/layouts/flat/writer.rs | 4 +- vortex-layout/src/layouts/zoned/builder.rs | 2 +- 11 files changed, 91 insertions(+), 75 deletions(-) diff --git a/encodings/datetime-parts/src/compute/take.rs b/encodings/datetime-parts/src/compute/take.rs index fcdc00c1979..8b09e2d78ef 100644 --- a/encodings/datetime-parts/src/compute/take.rs +++ b/encodings/datetime-parts/src/compute/take.rs @@ -65,7 +65,7 @@ fn take_datetime_parts( .seconds() .statistics() .get(Stat::Min) - .map(|s| s.into_inner()) + .into_inner() .unwrap_or_else(|| Scalar::primitive(0i64, Nullability::NonNullable)) .cast(array.seconds().dtype())?; let taken_seconds = taken_seconds.fill_null(seconds_fill)?; @@ -74,7 +74,7 @@ fn take_datetime_parts( .subseconds() .statistics() .get(Stat::Min) - .map(|s| s.into_inner()) + .into_inner() .unwrap_or_else(|| Scalar::primitive(0i64, Nullability::NonNullable)) .cast(array.subseconds().dtype())?; let taken_subseconds = taken_subseconds.fill_null(subseconds_fill)?; diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 51733789a4b..db07d0daf46 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -12072,7 +12072,7 @@ pub mod vortex_array::expr::stats pub enum vortex_array::expr::stats::IntersectionResult -pub vortex_array::expr::stats::IntersectionResult::None +pub vortex_array::expr::stats::IntersectionResult::Empty pub vortex_array::expr::stats::IntersectionResult::Value(T) @@ -12098,6 +12098,8 @@ impl core::marker::StructuralPartialEq for vortex_array::expr::stats::Interse pub enum vortex_array::expr::stats::Precision +pub vortex_array::expr::stats::Precision::Absent + pub vortex_array::expr::stats::Precision::Exact(T) pub vortex_array::expr::stats::Precision::Inexact(T) @@ -12110,6 +12112,10 @@ impl vortex_array::expr::stats::Precision pub fn vortex_array::expr::stats::Precision::into_scalar(self, vortex_array::dtype::DType) -> vortex_array::expr::stats::Precision +impl vortex_array::expr::stats::Precision> + +pub fn vortex_array::expr::stats::Precision>::transpose(self) -> core::result::Result, E> + impl vortex_array::expr::stats::Precision where T: core::marker::Copy pub fn vortex_array::expr::stats::Precision::to_inexact(&self) -> Self @@ -12128,19 +12134,19 @@ pub fn vortex_array::expr::stats::Precision::inexact::into_inexact(self) -> Self -pub fn vortex_array::expr::stats::Precision::into_inner(self) -> T +pub fn vortex_array::expr::stats::Precision::into_inner(self) -> core::option::Option + +pub fn vortex_array::expr::stats::Precision::is_absent(&self) -> bool pub fn vortex_array::expr::stats::Precision::is_exact(&self) -> bool pub fn vortex_array::expr::stats::Precision::map U>(self, F) -> vortex_array::expr::stats::Precision -pub fn vortex_array::expr::stats::Precision::try_map vortex_error::VortexResult>(self, F) -> vortex_error::VortexResult> - pub fn vortex_array::expr::stats::Precision::zip(self, vortex_array::expr::stats::Precision) -> vortex_array::expr::stats::Precision<(T, U)> impl vortex_array::expr::stats::Precision -pub fn vortex_array::expr::stats::Precision::bound>(self) -> ::Bound +pub fn vortex_array::expr::stats::Precision::bound>(self) -> core::option::Option<::Bound> impl vortex_array::expr::stats::Precision> @@ -12182,6 +12188,10 @@ pub fn vortex_array::expr::stats::Precision::fmt(&self, &mut core::fmt::Forma impl core::marker::Copy for vortex_array::expr::stats::Precision +impl core::default::Default for vortex_array::expr::stats::Precision + +pub fn vortex_array::expr::stats::Precision::default() -> Self + impl core::marker::StructuralPartialEq for vortex_array::expr::stats::Precision #[repr(u8)] pub enum vortex_array::expr::stats::Stat @@ -12548,7 +12558,7 @@ pub const vortex_array::expr::stats::UncompressedSizeInBytes::STAT: vortex_array pub trait vortex_array::expr::stats::StatsProvider -pub fn vortex_array::expr::stats::StatsProvider::get(&self, vortex_array::expr::stats::Stat) -> core::option::Option> +pub fn vortex_array::expr::stats::StatsProvider::get(&self, vortex_array::expr::stats::Stat) -> vortex_array::expr::stats::Precision pub fn vortex_array::expr::stats::StatsProvider::is_empty(&self) -> bool @@ -12556,7 +12566,7 @@ pub fn vortex_array::expr::stats::StatsProvider::len(&self) -> usize impl vortex_array::expr::stats::StatsProvider for vortex_array::stats::MutTypedStatsSetRef<'_, '_> -pub fn vortex_array::stats::MutTypedStatsSetRef<'_, '_>::get(&self, vortex_array::expr::stats::Stat) -> core::option::Option> +pub fn vortex_array::stats::MutTypedStatsSetRef<'_, '_>::get(&self, vortex_array::expr::stats::Stat) -> vortex_array::expr::stats::Precision pub fn vortex_array::stats::MutTypedStatsSetRef<'_, '_>::is_empty(&self) -> bool @@ -12564,7 +12574,7 @@ pub fn vortex_array::stats::MutTypedStatsSetRef<'_, '_>::len(&self) -> usize impl vortex_array::expr::stats::StatsProvider for vortex_array::stats::StatsSetRef<'_> -pub fn vortex_array::stats::StatsSetRef<'_>::get(&self, vortex_array::expr::stats::Stat) -> core::option::Option> +pub fn vortex_array::stats::StatsSetRef<'_>::get(&self, vortex_array::expr::stats::Stat) -> vortex_array::expr::stats::Precision pub fn vortex_array::stats::StatsSetRef<'_>::is_empty(&self) -> bool @@ -12572,7 +12582,7 @@ pub fn vortex_array::stats::StatsSetRef<'_>::len(&self) -> usize impl vortex_array::expr::stats::StatsProvider for vortex_array::stats::TypedStatsSetRef<'_, '_> -pub fn vortex_array::stats::TypedStatsSetRef<'_, '_>::get(&self, vortex_array::expr::stats::Stat) -> core::option::Option> +pub fn vortex_array::stats::TypedStatsSetRef<'_, '_>::get(&self, vortex_array::expr::stats::Stat) -> vortex_array::expr::stats::Precision pub fn vortex_array::stats::TypedStatsSetRef<'_, '_>::is_empty(&self) -> bool @@ -12580,7 +12590,7 @@ pub fn vortex_array::stats::TypedStatsSetRef<'_, '_>::len(&self) -> usize pub trait vortex_array::expr::stats::StatsProviderExt: vortex_array::expr::stats::StatsProvider -pub fn vortex_array::expr::stats::StatsProviderExt::get_as core::convert::TryFrom<&'a vortex_array::scalar::Scalar, Error = vortex_error::VortexError>>(&self, vortex_array::expr::stats::Stat) -> core::option::Option> +pub fn vortex_array::expr::stats::StatsProviderExt::get_as core::convert::TryFrom<&'a vortex_array::scalar::Scalar, Error = vortex_error::VortexError>>(&self, vortex_array::expr::stats::Stat) -> vortex_array::expr::stats::Precision pub fn vortex_array::expr::stats::StatsProviderExt::get_as_bound(&self) -> core::option::Option<::Bound> where S: vortex_array::expr::stats::StatType, U: for<'a> core::convert::TryFrom<&'a vortex_array::scalar::Scalar, Error = vortex_error::VortexError> @@ -12588,7 +12598,7 @@ pub fn vortex_array::expr::stats::StatsProviderExt::get_scalar_bound vortex_array::expr::stats::StatsProviderExt for S where S: vortex_array::expr::stats::StatsProvider -pub fn S::get_as core::convert::TryFrom<&'a vortex_array::scalar::Scalar, Error = vortex_error::VortexError>>(&self, vortex_array::expr::stats::Stat) -> core::option::Option> +pub fn S::get_as core::convert::TryFrom<&'a vortex_array::scalar::Scalar, Error = vortex_error::VortexError>>(&self, vortex_array::expr::stats::Stat) -> vortex_array::expr::stats::Precision pub fn S::get_as_bound(&self) -> core::option::Option<::Bound> where S: vortex_array::expr::stats::StatType, U: for<'a> core::convert::TryFrom<&'a vortex_array::scalar::Scalar, Error = vortex_error::VortexError> @@ -20228,7 +20238,7 @@ pub fn vortex_array::stats::MutTypedStatsSetRef<'_, '_>::merge_unordered(self, & impl vortex_array::expr::stats::StatsProvider for vortex_array::stats::MutTypedStatsSetRef<'_, '_> -pub fn vortex_array::stats::MutTypedStatsSetRef<'_, '_>::get(&self, vortex_array::expr::stats::Stat) -> core::option::Option> +pub fn vortex_array::stats::MutTypedStatsSetRef<'_, '_>::get(&self, vortex_array::expr::stats::Stat) -> vortex_array::expr::stats::Precision pub fn vortex_array::stats::MutTypedStatsSetRef<'_, '_>::is_empty(&self) -> bool @@ -20266,9 +20276,9 @@ impl vortex_array::stats::StatsSet pub fn vortex_array::stats::StatsSet::clear(&mut self, vortex_array::expr::stats::Stat) -pub fn vortex_array::stats::StatsSet::get(&self, vortex_array::expr::stats::Stat) -> core::option::Option> +pub fn vortex_array::stats::StatsSet::get(&self, vortex_array::expr::stats::Stat) -> vortex_array::expr::stats::Precision -pub fn vortex_array::stats::StatsSet::get_as core::convert::TryFrom<&'a vortex_array::scalar::Scalar, Error = vortex_error::VortexError>>(&self, vortex_array::expr::stats::Stat, &vortex_array::dtype::DType) -> core::option::Option> +pub fn vortex_array::stats::StatsSet::get_as core::convert::TryFrom<&'a vortex_array::scalar::Scalar, Error = vortex_error::VortexError>>(&self, vortex_array::expr::stats::Stat, &vortex_array::dtype::DType) -> vortex_array::expr::stats::Precision pub fn vortex_array::stats::StatsSet::is_empty(&self) -> bool @@ -20390,7 +20400,7 @@ pub fn vortex_array::stats::StatsSetRef<'_>::with_typed_stats_set -pub fn vortex_array::stats::StatsSetRef<'_>::get(&self, vortex_array::expr::stats::Stat) -> core::option::Option> +pub fn vortex_array::stats::StatsSetRef<'_>::get(&self, vortex_array::expr::stats::Stat) -> vortex_array::expr::stats::Precision pub fn vortex_array::stats::StatsSetRef<'_>::is_empty(&self) -> bool @@ -20410,7 +20420,7 @@ pub vortex_array::stats::TypedStatsSetRef::values: &'a vortex_array::stats::Stat impl vortex_array::expr::stats::StatsProvider for vortex_array::stats::TypedStatsSetRef<'_, '_> -pub fn vortex_array::stats::TypedStatsSetRef<'_, '_>::get(&self, vortex_array::expr::stats::Stat) -> core::option::Option> +pub fn vortex_array::stats::TypedStatsSetRef<'_, '_>::get(&self, vortex_array::expr::stats::Stat) -> vortex_array::expr::stats::Precision pub fn vortex_array::stats::TypedStatsSetRef<'_, '_>::is_empty(&self) -> bool diff --git a/vortex-array/src/expr/stats/precision.rs b/vortex-array/src/expr/stats/precision.rs index b745c6916e3..81d521f30a1 100644 --- a/vortex-array/src/expr/stats/precision.rs +++ b/vortex-array/src/expr/stats/precision.rs @@ -50,8 +50,13 @@ impl Precision> { } impl Precision> { + /// Transpose a `Precision>` into a `Result, E>`. pub fn transpose(self) -> Result, E> { - todo!() + match self { + Self::Exact(value) => value.map(Precision::Exact), + Self::Inexact(value) => value.map(Precision::Inexact), + Self::Absent => Ok(Precision::Absent), + } } } diff --git a/vortex-datafusion/src/convert/stats.rs b/vortex-datafusion/src/convert/stats.rs index 3c7b31de95d..d6c2d90d444 100644 --- a/vortex-datafusion/src/convert/stats.rs +++ b/vortex-datafusion/src/convert/stats.rs @@ -27,55 +27,52 @@ pub(crate) fn stats_set_to_df( // TODO(connor): There's a lot that can go wrong here, should probably handle this // more gracefully... // Find the min statistic. - let min = stats_set.get(Stat::Min).and_then(|pstat_val| { - pstat_val - .map(|stat_val| { - Scalar::try_new( - Stat::Min - .dtype(dtype) - .vortex_expect("must have a valid dtype"), - Some(stat_val), - ) - .vortex_expect("`Stat::Min` somehow had an incompatible `DType`") - .try_to_df() - .ok() - }) - .transpose() - }); + let min = stats_set + .get(Stat::Min) + .map(|stat_val| { + Scalar::try_new( + Stat::Min + .dtype(dtype) + .vortex_expect("must have a valid dtype"), + Some(stat_val), + ) + .vortex_expect("`Stat::Min` somehow had an incompatible `DType`") + .try_to_df() + .ok() + }) + .transpose(); // Find the max statistic. - let max = stats_set.get(Stat::Max).and_then(|pstat_val| { - pstat_val - .map(|stat_val| { - Scalar::try_new( - Stat::Max - .dtype(dtype) - .vortex_expect("must have a valid dtype"), - Some(stat_val), - ) - .vortex_expect("`Stat::Max` somehow had an incompatible `DType`") - .try_to_df() - .ok() - }) - .transpose() - }); + let max = stats_set + .get(Stat::Max) + .map(|stat_val| { + Scalar::try_new( + Stat::Max + .dtype(dtype) + .vortex_expect("must have a valid dtype"), + Some(stat_val), + ) + .vortex_expect("`Stat::Max` somehow had an incompatible `DType`") + .try_to_df() + .ok() + }) + .transpose(); // Find the sum statistic - let sum = stats_set.get(Stat::Sum).and_then(|pstat_val| { - pstat_val - .map(|stat_val| { - Scalar::try_new( - Stat::Sum - .dtype(dtype) - .vortex_expect("must have a valid dtype"), - Some(stat_val), - ) - .vortex_expect("`Stat::Sum` somehow had an incompatible `DType`") - .try_to_df() - .ok() - }) - .transpose() - }); + let sum = stats_set + .get(Stat::Sum) + .map(|stat_val| { + Scalar::try_new( + Stat::Sum + .dtype(dtype) + .vortex_expect("must have a valid dtype"), + Some(stat_val), + ) + .vortex_expect("`Stat::Sum` somehow had an incompatible `DType`") + .try_to_df() + .ok() + }) + .transpose(); let null_count = stats_set.get_as::(Stat::NullCount, &PType::U64.into()); @@ -92,9 +89,9 @@ pub(crate) fn stats_set_to_df( } pub(crate) fn is_constant_to_distinct_count( - is_constant: Option>, + is_constant: VortexPrecision, ) -> Precision { - match is_constant.and_then(VortexPrecision::as_exact) { + match is_constant.as_exact() { Some(true) => Precision::Exact(1), Some(false) | None => Precision::Absent, } diff --git a/vortex-datafusion/src/lib.rs b/vortex-datafusion/src/lib.rs index e127e3e33ee..4c6a3ffee41 100644 --- a/vortex-datafusion/src/lib.rs +++ b/vortex-datafusion/src/lib.rs @@ -119,6 +119,7 @@ where match self { Precision::Exact(v) => DFPrecision::Exact(v), Precision::Inexact(v) => DFPrecision::Inexact(v), + Precision::Absent => DFPrecision::Absent, } } } diff --git a/vortex-datafusion/src/persistent/format.rs b/vortex-datafusion/src/persistent/format.rs index b00dfadaaf0..1dd6ba542c1 100644 --- a/vortex-datafusion/src/persistent/format.rs +++ b/vortex-datafusion/src/persistent/format.rs @@ -639,18 +639,19 @@ impl FileFormat for VortexFormat { fn scalar_stat_to_df( stat: Stat, - value: Option>, + value: stats::Precision, stats_dtype: &DType, target_dtype: &DType, ) -> Option> { let stat_dtype = stat.dtype(stats_dtype)?; - value? - .try_map(|stat_value| { + value + .map(|stat_value| { Scalar::try_new(stat_dtype, Some(stat_value))? .cast(target_dtype)? .try_to_df() }) + .transpose() .ok() } diff --git a/vortex-file/src/pruning.rs b/vortex-file/src/pruning.rs index d5ae649ae6d..74ef90a034e 100644 --- a/vortex-file/src/pruning.rs +++ b/vortex-file/src/pruning.rs @@ -60,7 +60,7 @@ pub fn extract_relevant_file_stats_as_struct_row( stat, Stat::Max | Stat::Min | Stat::NaNCount | Stat::NullCount ) { - let Some(stat_value) = typed_stats.get(*stat).and_then(|p| p.as_exact()) else { + let Some(stat_value) = typed_stats.get(*stat).as_exact() else { vortex_bail!("missing stat {}, {} from stats set", field, stat) }; columns.push(( diff --git a/vortex-file/src/v2/file_stats_reader.rs b/vortex-file/src/v2/file_stats_reader.rs index f65c758ee7d..d5a1dc3e5d2 100644 --- a/vortex-file/src/v2/file_stats_reader.rs +++ b/vortex-file/src/v2/file_stats_reader.rs @@ -132,7 +132,7 @@ impl StatsCatalog for FileStatsLayoutReader { let field_idx = self.struct_fields.find(field_name)?; let field_stats = self.file_stats.stats_sets().get(field_idx)?; - let stat_value = field_stats.get(stat)?.as_exact()?; + let stat_value = field_stats.get(stat).as_exact()?; let field_dtype = self.struct_fields.field_by_index(field_idx)?; let stat_dtype = stat.dtype(&field_dtype)?; let stat_scalar = Scalar::try_new(stat_dtype, Some(stat_value)).ok()?; diff --git a/vortex-flatbuffers/public-api.lock b/vortex-flatbuffers/public-api.lock index 3c37457734a..ff6bf886d6c 100644 --- a/vortex-flatbuffers/public-api.lock +++ b/vortex-flatbuffers/public-api.lock @@ -450,6 +450,8 @@ pub fn vortex_flatbuffers::array::Compression::run_verifier(&mut flatbuffers::ve impl vortex_flatbuffers::array::Precision +pub const vortex_flatbuffers::array::Precision::Absent: Self + pub const vortex_flatbuffers::array::Precision::ENUM_MAX: u8 pub const vortex_flatbuffers::array::Precision::ENUM_MIN: u8 @@ -532,7 +534,7 @@ pub const vortex_flatbuffers::array::ENUM_MIN_PRECISION: u8 pub const vortex_flatbuffers::array::ENUM_VALUES_COMPRESSION: [vortex_flatbuffers::array::Compression; 2] -pub const vortex_flatbuffers::array::ENUM_VALUES_PRECISION: [vortex_flatbuffers::array::Precision; 2] +pub const vortex_flatbuffers::array::ENUM_VALUES_PRECISION: [vortex_flatbuffers::array::Precision; 3] pub fn vortex_flatbuffers::array::finish_array_buffer<'a, 'b, A: flatbuffers::builder::Allocator + 'a>(&'b mut flatbuffers::builder::FlatBufferBuilder<'a, A>, flatbuffers::primitives::WIPOffset>) diff --git a/vortex-layout/src/layouts/flat/writer.rs b/vortex-layout/src/layouts/flat/writer.rs index da250414951..deb2580014e 100644 --- a/vortex-layout/src/layouts/flat/writer.rs +++ b/vortex-layout/src/layouts/flat/writer.rs @@ -81,8 +81,8 @@ fn truncate_scalar_stat Option<(Scalar, bool)>>( stat: Stat, truncation: F, ) { - if let Some(sv) = statistics.get(stat) { - if let Some((truncated_value, truncated)) = truncation(sv.into_inner()) { + if let Some(sv) = statistics.get(stat).into_inner() { + if let Some((truncated_value, truncated)) = truncation(sv) { if truncated && let Some(v) = truncated_value.into_value() { statistics.set(stat, Precision::Inexact(v)); } diff --git a/vortex-layout/src/layouts/zoned/builder.rs b/vortex-layout/src/layouts/zoned/builder.rs index 25a6e89f9db..3cf80d42539 100644 --- a/vortex-layout/src/layouts/zoned/builder.rs +++ b/vortex-layout/src/layouts/zoned/builder.rs @@ -69,7 +69,7 @@ impl StatsAccumulator { pub fn push_chunk_without_compute(&mut self, array: &ArrayRef) -> VortexResult<()> { for builder in &mut self.builders { - if let Some(Precision::Exact(value)) = array.statistics().get(builder.stat()) { + if let Precision::Exact(value) = array.statistics().get(builder.stat()) { builder.append_scalar(value.cast(&value.dtype().as_nullable())?)?; } else { builder.append_null(); From 326c875409262a9ac14a7635ee69460843f6ee42 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Wed, 20 May 2026 18:15:11 +0100 Subject: [PATCH 3/9] fix Signed-off-by: Adam Gutglick --- encodings/sequence/src/array.rs | 16 ++++++++-------- encodings/zstd/src/zstd_buffers.rs | 2 +- vortex-btrblocks/src/schemes/integer.rs | 6 +++--- vortex-duckdb/src/datasource.rs | 10 ++++------ vortex-layout/src/layouts/flat/writer.rs | 10 +++++----- 5 files changed, 21 insertions(+), 23 deletions(-) diff --git a/encodings/sequence/src/array.rs b/encodings/sequence/src/array.rs index 2ee0b22d89c..a572a5fc165 100644 --- a/encodings/sequence/src/array.rs +++ b/encodings/sequence/src/array.rs @@ -519,12 +519,12 @@ mod tests { let is_sorted = arr .statistics() .with_typed_stats_set(|s| s.get_as::(Stat::IsSorted)); - assert_eq!(is_sorted, Some(StatPrecision::Exact(true))); + assert_eq!(is_sorted, StatPrecision::Exact(true)); let is_strict_sorted = arr .statistics() .with_typed_stats_set(|s| s.get_as::(Stat::IsStrictSorted)); - assert_eq!(is_strict_sorted, Some(StatPrecision::Exact(true))); + assert_eq!(is_strict_sorted, StatPrecision::Exact(true)); Ok(()) } @@ -535,12 +535,12 @@ mod tests { let is_sorted = arr .statistics() .with_typed_stats_set(|s| s.get_as::(Stat::IsSorted)); - assert_eq!(is_sorted, Some(StatPrecision::Exact(true))); + assert_eq!(is_sorted, StatPrecision::Exact(true)); let is_strict_sorted = arr .statistics() .with_typed_stats_set(|s| s.get_as::(Stat::IsStrictSorted)); - assert_eq!(is_strict_sorted, Some(StatPrecision::Exact(false))); + assert_eq!(is_strict_sorted, StatPrecision::Exact(false)); Ok(()) } @@ -551,12 +551,12 @@ mod tests { let is_sorted = arr .statistics() .with_typed_stats_set(|s| s.get_as::(Stat::IsSorted)); - assert_eq!(is_sorted, Some(StatPrecision::Exact(false))); + assert_eq!(is_sorted, StatPrecision::Exact(false)); let is_strict_sorted = arr .statistics() .with_typed_stats_set(|s| s.get_as::(Stat::IsStrictSorted)); - assert_eq!(is_strict_sorted, Some(StatPrecision::Exact(false))); + assert_eq!(is_strict_sorted, StatPrecision::Exact(false)); Ok(()) } @@ -575,8 +575,8 @@ mod tests { .statistics() .with_typed_stats_set(|s| s.get_as::(Stat::IsStrictSorted)); - assert_eq!(is_sorted, Some(StatPrecision::Exact(true))); - assert_eq!(is_strict_sorted, Some(StatPrecision::Exact(true))); + assert_eq!(is_sorted, StatPrecision::Exact(true)); + assert_eq!(is_strict_sorted, StatPrecision::Exact(true)); Ok(()) } diff --git a/encodings/zstd/src/zstd_buffers.rs b/encodings/zstd/src/zstd_buffers.rs index c05fe82da06..597817456f2 100644 --- a/encodings/zstd/src/zstd_buffers.rs +++ b/encodings/zstd/src/zstd_buffers.rs @@ -563,7 +563,7 @@ mod tests { let compressed = ZstdBuffers::compress(&input, 3, &LEGACY_SESSION)?; - assert!(compressed.statistics().get(Stat::Min).is_some()); + assert!(!compressed.statistics().get(Stat::Min).is_absent()); Ok(()) } diff --git a/vortex-btrblocks/src/schemes/integer.rs b/vortex-btrblocks/src/schemes/integer.rs index 87b4ac85ad6..4005056b032 100644 --- a/vortex-btrblocks/src/schemes/integer.rs +++ b/vortex-btrblocks/src/schemes/integer.rs @@ -1193,15 +1193,15 @@ mod scheme_selection_tests { assert!(compressed.is::()); assert_eq!( compressed.statistics().get_as::(Stat::NullCount), - Some(Precision::exact(0u64)) + Precision::exact(0u64) ); assert_eq!( compressed.statistics().get_as::(Stat::Min), - Some(Precision::exact(0u32)) + Precision::exact(0u32) ); assert_eq!( compressed.statistics().get_as::(Stat::Max), - Some(Precision::exact(15u32)) + Precision::exact(15u32) ); Ok(()) } diff --git a/vortex-duckdb/src/datasource.rs b/vortex-duckdb/src/datasource.rs index a1fa15a1b40..92d36fdc16c 100644 --- a/vortex-duckdb/src/datasource.rs +++ b/vortex-duckdb/src/datasource.rs @@ -230,16 +230,16 @@ pub struct ColumnStatisticsAggregate { impl ColumnStatisticsAggregate { pub fn new(stats: &StatsSet) -> Self { let min = match stats.get(Stat::Min) { - Some(Precision::Exact(min)) => Some(min), + Precision::Exact(min) => Some(min), _ => None, }; let max = match stats.get(Stat::Max) { - Some(Precision::Exact(max)) => Some(max), + Precision::Exact(max) => Some(max), _ => None, }; let max_string_length = - if let Some(Precision::Exact(value)) = stats.get(Stat::UncompressedSizeInBytes) { + if let Precision::Exact(value) = stats.get(Stat::UncompressedSizeInBytes) { // DuckDB's string length is u32 #[allow(clippy::cast_possible_truncation)] Some(value.as_primitive().as_u64().vortex_expect("not a u64") as u32) @@ -248,9 +248,7 @@ impl ColumnStatisticsAggregate { }; let has_null = match stats.get(Stat::NullCount) { - Some(Precision::Exact(cnt)) => { - cnt.as_primitive().as_u64().vortex_expect("not a u64") > 0 - } + Precision::Exact(cnt) => cnt.as_primitive().as_u64().vortex_expect("not a u64") > 0, _ => true, }; diff --git a/vortex-layout/src/layouts/flat/writer.rs b/vortex-layout/src/layouts/flat/writer.rs index deb2580014e..a50d8a27416 100644 --- a/vortex-layout/src/layouts/flat/writer.rs +++ b/vortex-layout/src/layouts/flat/writer.rs @@ -273,7 +273,7 @@ mod tests { assert_eq!( result.statistics().get_as::(Stat::IsSorted), - Some(Precision::Exact(true)) + Precision::Exact(true) ); }) } @@ -325,16 +325,16 @@ mod tests { assert_eq!( result.statistics().get_as::(Stat::Min), // The typo is correct, we need this to be truncated. - Some(Precision::Inexact( + Precision::Inexact( // spellchecker:ignore-next-line "Another string that's meant to be smaller than the previous valu".to_string() - )) + ) ); assert_eq!( result.statistics().get_as::(Stat::Max), - Some(Precision::Inexact( + Precision::Inexact( "Long value to test that the statistics are actually truncated, j".to_string() - )) + ) ); }) } From e861c2671d6cb840e6c9cf40a1b8885131136ff7 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Wed, 20 May 2026 18:17:05 +0100 Subject: [PATCH 4/9] more fixes Signed-off-by: Adam Gutglick --- vortex-datafusion/src/v2/source.rs | 2 +- vortex-duckdb/src/datasource.rs | 2 +- vortex-ffi/src/scan.rs | 2 +- vortex-jni/src/data_source.rs | 2 +- vortex-jni/src/scan.rs | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/vortex-datafusion/src/v2/source.rs b/vortex-datafusion/src/v2/source.rs index c51c01bc946..291da6f6193 100644 --- a/vortex-datafusion/src/v2/source.rs +++ b/vortex-datafusion/src/v2/source.rs @@ -669,6 +669,6 @@ fn estimate_to_df_precision(est: Option<&Precision>) -> DFPrecision Some(Precision::Inexact(v)) => { DFPrecision::Inexact(usize::try_from(*v).unwrap_or(usize::MAX)) } - None => DFPrecision::Absent, + Some(Precision::Absent) | None => DFPrecision::Absent, } } diff --git a/vortex-duckdb/src/datasource.rs b/vortex-duckdb/src/datasource.rs index 92d36fdc16c..1b764c8d2e2 100644 --- a/vortex-duckdb/src/datasource.rs +++ b/vortex-duckdb/src/datasource.rs @@ -608,7 +608,7 @@ impl TableFunction for T { // Post-filter estimate is always a heuristic. Cardinality::Estimate(postfilter_cardinality(v, has_non_optional_filter)) } - None => Cardinality::Unknown, + Some(Precision::Absent) | None => Cardinality::Unknown, } } diff --git a/vortex-ffi/src/scan.rs b/vortex-ffi/src/scan.rs index 8f8f910d2f6..358d0057b32 100644 --- a/vortex-ffi/src/scan.rs +++ b/vortex-ffi/src/scan.rs @@ -204,7 +204,7 @@ fn write_estimate>(estimate: Option>, out: &mut vx_est out.r#type = vx_estimate_type::VX_ESTIMATE_INEXACT; out.estimate = value.into(); } - None => { + Some(Precision::Absent) | None => { out.r#type = vx_estimate_type::VX_ESTIMATE_UNKNOWN; } } diff --git a/vortex-jni/src/data_source.rs b/vortex-jni/src/data_source.rs index d62b583a4a9..697c0e8817b 100644 --- a/vortex-jni/src/data_source.rs +++ b/vortex-jni/src/data_source.rs @@ -204,7 +204,7 @@ pub extern "system" fn Java_dev_vortex_jni_NativeDataSource_rowCount( let (rows, cardinality) = match ds.inner.row_count() { Some(Precision::Exact(r)) => (r as jlong, 2), Some(Precision::Inexact(r)) => (r as jlong, 1), - None => (0, 0), + Some(Precision::Absent) | None => (0, 0), }; out.set_region(env, 0, &[rows, cardinality])?; Ok(()) diff --git a/vortex-jni/src/scan.rs b/vortex-jni/src/scan.rs index 08be9f221fc..edde36bf995 100644 --- a/vortex-jni/src/scan.rs +++ b/vortex-jni/src/scan.rs @@ -216,7 +216,7 @@ pub extern "system" fn Java_dev_vortex_jni_NativeScan_partitionCount( let (rows, cardinality) = match scan.partition_count() { Some(Precision::Exact(v)) => (v as jlong, 2), Some(Precision::Inexact(v)) => (v as jlong, 1), - None => (0, 0), + Some(Precision::Absent) | None => (0, 0), }; out.set_region(env, 0, &[rows, cardinality])?; Ok(()) @@ -286,7 +286,7 @@ pub extern "system" fn Java_dev_vortex_jni_NativePartition_rowCount( let (rows, cardinality) = match partition.row_count() { Some(Precision::Exact(v)) => (v as jlong, 2), Some(Precision::Inexact(v)) => (v as jlong, 1), - None => (0, 0), + Some(Precision::Absent) | None => (0, 0), }; out.set_region(env, 0, &[rows, cardinality])?; Ok(()) From e6d5eae6c16d121a7a35a267c07348b56087348d Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Wed, 20 May 2026 18:17:22 +0100 Subject: [PATCH 5/9] I'm just impatiant Signed-off-by: Adam Gutglick --- vortex-ffi/src/data_source.rs | 3 ++- vortex-ffi/src/scan.rs | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/vortex-ffi/src/data_source.rs b/vortex-ffi/src/data_source.rs index 33ebe937b3a..95b05c40613 100644 --- a/vortex-ffi/src/data_source.rs +++ b/vortex-ffi/src/data_source.rs @@ -9,6 +9,7 @@ use std::sync::Arc; use vortex::error::VortexResult; use vortex::error::vortex_ensure; +use vortex::expr::stats::Precision::Absent; use vortex::expr::stats::Precision::Exact; use vortex::expr::stats::Precision::Inexact; use vortex::file::multi::MultiFileDataSource; @@ -124,7 +125,7 @@ pub unsafe extern "C-unwind" fn vx_data_source_get_row_count( rc.r#type = vx_estimate_type::VX_ESTIMATE_INEXACT; rc.estimate = rows; } - None => { + Some(Absent) | None => { rc.r#type = vx_estimate_type::VX_ESTIMATE_UNKNOWN; } } diff --git a/vortex-ffi/src/scan.rs b/vortex-ffi/src/scan.rs index 358d0057b32..f3c6cb8115e 100644 --- a/vortex-ffi/src/scan.rs +++ b/vortex-ffi/src/scan.rs @@ -238,6 +238,7 @@ pub unsafe extern "C-unwind" fn vx_data_source_scan( scan.partition_count().map(|x| match x { Precision::Exact(v) => Precision::Exact(v as u64), Precision::Inexact(v) => Precision::Inexact(v as u64), + Precision::Absent => Precision::Absent, }), unsafe { &mut *estimate }, ); From 024dacb6d6cb8cb5f0dff30c68eb35db008cb759 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Wed, 20 May 2026 18:27:00 +0100 Subject: [PATCH 6/9] more fixes Signed-off-by: Adam Gutglick --- vortex-array/src/expr/stats/bound.rs | 2 +- vortex-array/src/expr/stats/precision.rs | 9 ++------- vortex-cuda/src/layout.rs | 4 ++-- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/vortex-array/src/expr/stats/bound.rs b/vortex-array/src/expr/stats/bound.rs index 9f26c89e8f8..c3b62033f4b 100644 --- a/vortex-array/src/expr/stats/bound.rs +++ b/vortex-array/src/expr/stats/bound.rs @@ -248,7 +248,7 @@ impl PartialOrd for UpperBound { Some(o) } }), - Absent => return None, + Absent => None, } } } diff --git a/vortex-array/src/expr/stats/precision.rs b/vortex-array/src/expr/stats/precision.rs index 81d521f30a1..5f9249c0cc6 100644 --- a/vortex-array/src/expr/stats/precision.rs +++ b/vortex-array/src/expr/stats/precision.rs @@ -22,19 +22,14 @@ use crate::scalar::ScalarValue; /// This is statistic specific, for max this will be an upper bound. Meaning that the actual max /// in an array is guaranteed to be less than or equal to the inexact value, but equal to the exact /// value. -#[derive(Debug, PartialEq, Eq, Clone, Copy)] +#[derive(Default, Debug, PartialEq, Eq, Clone, Copy)] pub enum Precision { Exact(T), Inexact(T), + #[default] Absent, } -impl Default for Precision { - fn default() -> Self { - Self::Absent - } -} - impl Precision> { /// Transpose the `Precision>` into `Option>`. pub fn transpose(self) -> Option> { diff --git a/vortex-cuda/src/layout.rs b/vortex-cuda/src/layout.rs index 858bb87e0cc..76e5a5ba5fc 100644 --- a/vortex-cuda/src/layout.rs +++ b/vortex-cuda/src/layout.rs @@ -434,8 +434,8 @@ fn truncate_scalar_stat Option<(Scalar, bool)>>( stat: Stat, truncation: F, ) { - if let Some(sv) = statistics.get(stat) { - if let Some((truncated_value, truncated)) = truncation(sv.into_inner()) { + if let Some(sv) = statistics.get(stat).into_inner() { + if let Some((truncated_value, truncated)) = truncation(sv) { if truncated && let Some(v) = truncated_value.into_value() { statistics.set(stat, Precision::Inexact(v)); } From d107e20b51e70e33edc584d889367a1eed3e520a Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Wed, 20 May 2026 18:29:46 +0100 Subject: [PATCH 7/9] public apis Signed-off-by: Adam Gutglick --- vortex-array/public-api.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index db07d0daf46..fd3a792c145 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -12190,7 +12190,7 @@ impl core::marker::Copy for vortex_array::expr::stats::Pr impl core::default::Default for vortex_array::expr::stats::Precision -pub fn vortex_array::expr::stats::Precision::default() -> Self +pub fn vortex_array::expr::stats::Precision::default() -> vortex_array::expr::stats::Precision impl core::marker::StructuralPartialEq for vortex_array::expr::stats::Precision From b9b3c71ff118bb406d532ea9d0adb7d44e286156 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Wed, 20 May 2026 18:43:19 +0100 Subject: [PATCH 8/9] fix thing Signed-off-by: Adam Gutglick --- vortex-layout/src/display.rs | 4 ++-- vortex-python/src/io.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/vortex-layout/src/display.rs b/vortex-layout/src/display.rs index 5acf685e946..29fbc0ce520 100644 --- a/vortex-layout/src/display.rs +++ b/vortex-layout/src/display.rs @@ -334,8 +334,8 @@ mod tests { let expected = "\ vortex.struct, dtype: {numbers=i64?, strings=utf8}, children: 2, rows: 5 ├── numbers: vortex.chunked, dtype: i64?, children: 1, rows: 5 -│ └── [0]: vortex.flat, dtype: i64?, metadata: 171 bytes, rows: 5, segment 0, buffers=[40B, 1B], total=41B -└── strings: vortex.flat, dtype: utf8, metadata: 110 bytes, rows: 5, segment 1, buffers=[43B, 80B], total=123B +│ └── [0]: vortex.flat, dtype: i64?, metadata: 187 bytes, rows: 5, segment 0, buffers=[40B, 1B], total=41B +└── strings: vortex.flat, dtype: utf8, metadata: 122 bytes, rows: 5, segment 1, buffers=[43B, 80B], total=123B "; assert_eq!(output, expected); }) diff --git a/vortex-python/src/io.rs b/vortex-python/src/io.rs index 7da526390bb..3d749cb5097 100644 --- a/vortex-python/src/io.rs +++ b/vortex-python/src/io.rs @@ -258,7 +258,7 @@ impl PyVortexWriteOptions { /// >>> vx.io.VortexWriteOptions.default().write(sprl, "chonky.vortex") /// >>> import os /// >>> os.path.getsize('chonky.vortex') - /// 215972 + /// 215996 /// /// Wow, Vortex manages to use about two bytes per integer! So advanced. So tiny. /// @@ -268,7 +268,7 @@ impl PyVortexWriteOptions { /// /// >>> vx.io.VortexWriteOptions.compact().write(sprl, "tiny.vortex") /// >>> os.path.getsize('tiny.vortex') - /// 55088 + /// 55140 /// /// Random numbers are not (usually) composed of random bytes! #[staticmethod] From 5ebeb8b42cd6165a96ee9094bba84d5bfd68e425 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Wed, 20 May 2026 18:56:50 +0100 Subject: [PATCH 9/9] Revert on-disk change Signed-off-by: Adam Gutglick --- vortex-array/src/stats/flatbuffers.rs | 18 ++++-------------- .../flatbuffers/vortex-array/array.fbs | 1 - vortex-flatbuffers/public-api.lock | 4 +--- vortex-flatbuffers/src/generated/array.rs | 10 +++------- vortex-layout/src/display.rs | 4 ++-- vortex-python/src/io.rs | 4 ++-- 6 files changed, 12 insertions(+), 29 deletions(-) diff --git a/vortex-array/src/stats/flatbuffers.rs b/vortex-array/src/stats/flatbuffers.rs index ea6f2a5377b..c8f5ed1684a 100644 --- a/vortex-array/src/stats/flatbuffers.rs +++ b/vortex-array/src/stats/flatbuffers.rs @@ -47,7 +47,7 @@ impl WriteFlatBuffer for StatsSet { fba::Precision::Inexact, Some(fbb.create_vector(&ScalarValue::to_proto_bytes::>(Some(&min)))), ), - Precision::Absent => (fba::Precision::Absent, None), + Precision::Absent => (fba::Precision::Inexact, None), }; let (max_precision, max) = match self.get(Stat::Max) { @@ -59,7 +59,7 @@ impl WriteFlatBuffer for StatsSet { fba::Precision::Inexact, Some(fbb.create_vector(&ScalarValue::to_proto_bytes::>(Some(&max)))), ), - Precision::Absent => (fba::Precision::Absent, None), + Precision::Absent => (fba::Precision::Inexact, None), }; let sum = self @@ -132,11 +132,6 @@ impl StatsSet { if let Some(max) = fb.max() && let Some(stat_dtype) = stat_dtype { - let max_precision = fb.max_precision(); - if max_precision == fba::Precision::Absent { - vortex_bail!("Corrupted max stat: value present with absent precision"); - } - let value = ScalarValue::from_proto_bytes(max.bytes(), &stat_dtype, session)?; let Some(value) = value else { @@ -145,7 +140,7 @@ impl StatsSet { stats_set.set( Stat::Max, - match max_precision { + match fb.max_precision() { fba::Precision::Exact => Precision::Exact(value), fba::Precision::Inexact => Precision::Inexact(value), other => vortex_bail!("Corrupted max_precision field: {other:?}"), @@ -157,11 +152,6 @@ impl StatsSet { if let Some(min) = fb.min() && let Some(stat_dtype) = stat_dtype { - let min_precision = fb.min_precision(); - if min_precision == fba::Precision::Absent { - vortex_bail!("Corrupted min stat: value present with absent precision"); - } - let value = ScalarValue::from_proto_bytes(min.bytes(), &stat_dtype, session)?; let Some(value) = value else { @@ -170,7 +160,7 @@ impl StatsSet { stats_set.set( Stat::Min, - match min_precision { + match fb.min_precision() { fba::Precision::Exact => Precision::Exact(value), fba::Precision::Inexact => Precision::Inexact(value), other => vortex_bail!("Corrupted min_precision field: {other:?}"), diff --git a/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs b/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs index 0d394a3643c..c3c3ddfd34a 100644 --- a/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs +++ b/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs @@ -40,7 +40,6 @@ table ArrayNode { enum Precision: uint8 { Inexact = 0, Exact = 1, - Absent = 2, } table ArrayStats { diff --git a/vortex-flatbuffers/public-api.lock b/vortex-flatbuffers/public-api.lock index ff6bf886d6c..3c37457734a 100644 --- a/vortex-flatbuffers/public-api.lock +++ b/vortex-flatbuffers/public-api.lock @@ -450,8 +450,6 @@ pub fn vortex_flatbuffers::array::Compression::run_verifier(&mut flatbuffers::ve impl vortex_flatbuffers::array::Precision -pub const vortex_flatbuffers::array::Precision::Absent: Self - pub const vortex_flatbuffers::array::Precision::ENUM_MAX: u8 pub const vortex_flatbuffers::array::Precision::ENUM_MIN: u8 @@ -534,7 +532,7 @@ pub const vortex_flatbuffers::array::ENUM_MIN_PRECISION: u8 pub const vortex_flatbuffers::array::ENUM_VALUES_COMPRESSION: [vortex_flatbuffers::array::Compression; 2] -pub const vortex_flatbuffers::array::ENUM_VALUES_PRECISION: [vortex_flatbuffers::array::Precision; 3] +pub const vortex_flatbuffers::array::ENUM_VALUES_PRECISION: [vortex_flatbuffers::array::Precision; 2] pub fn vortex_flatbuffers::array::finish_array_buffer<'a, 'b, A: flatbuffers::builder::Allocator + 'a>(&'b mut flatbuffers::builder::FlatBufferBuilder<'a, A>, flatbuffers::primitives::WIPOffset>) diff --git a/vortex-flatbuffers/src/generated/array.rs b/vortex-flatbuffers/src/generated/array.rs index 04a2f4ef94f..6d903a56aa5 100644 --- a/vortex-flatbuffers/src/generated/array.rs +++ b/vortex-flatbuffers/src/generated/array.rs @@ -91,13 +91,12 @@ impl ::flatbuffers::SimpleToVerifyInSlice for Compression {} #[deprecated(since = "2.0.0", note = "Use associated constants instead. This will no longer be generated in 2021.")] pub const ENUM_MIN_PRECISION: u8 = 0; #[deprecated(since = "2.0.0", note = "Use associated constants instead. This will no longer be generated in 2021.")] -pub const ENUM_MAX_PRECISION: u8 = 2; +pub const ENUM_MAX_PRECISION: u8 = 1; #[deprecated(since = "2.0.0", note = "Use associated constants instead. This will no longer be generated in 2021.")] #[allow(non_camel_case_types)] -pub const ENUM_VALUES_PRECISION: [Precision; 3] = [ +pub const ENUM_VALUES_PRECISION: [Precision; 2] = [ Precision::Inexact, Precision::Exact, - Precision::Absent, ]; #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] @@ -107,21 +106,18 @@ pub struct Precision(pub u8); impl Precision { pub const Inexact: Self = Self(0); pub const Exact: Self = Self(1); - pub const Absent: Self = Self(2); pub const ENUM_MIN: u8 = 0; - pub const ENUM_MAX: u8 = 2; + pub const ENUM_MAX: u8 = 1; pub const ENUM_VALUES: &'static [Self] = &[ Self::Inexact, Self::Exact, - Self::Absent, ]; /// Returns the variant's name or "" if unknown. pub fn variant_name(self) -> Option<&'static str> { match self { Self::Inexact => Some("Inexact"), Self::Exact => Some("Exact"), - Self::Absent => Some("Absent"), _ => None, } } diff --git a/vortex-layout/src/display.rs b/vortex-layout/src/display.rs index 29fbc0ce520..5acf685e946 100644 --- a/vortex-layout/src/display.rs +++ b/vortex-layout/src/display.rs @@ -334,8 +334,8 @@ mod tests { let expected = "\ vortex.struct, dtype: {numbers=i64?, strings=utf8}, children: 2, rows: 5 ├── numbers: vortex.chunked, dtype: i64?, children: 1, rows: 5 -│ └── [0]: vortex.flat, dtype: i64?, metadata: 187 bytes, rows: 5, segment 0, buffers=[40B, 1B], total=41B -└── strings: vortex.flat, dtype: utf8, metadata: 122 bytes, rows: 5, segment 1, buffers=[43B, 80B], total=123B +│ └── [0]: vortex.flat, dtype: i64?, metadata: 171 bytes, rows: 5, segment 0, buffers=[40B, 1B], total=41B +└── strings: vortex.flat, dtype: utf8, metadata: 110 bytes, rows: 5, segment 1, buffers=[43B, 80B], total=123B "; assert_eq!(output, expected); }) diff --git a/vortex-python/src/io.rs b/vortex-python/src/io.rs index 3d749cb5097..7da526390bb 100644 --- a/vortex-python/src/io.rs +++ b/vortex-python/src/io.rs @@ -258,7 +258,7 @@ impl PyVortexWriteOptions { /// >>> vx.io.VortexWriteOptions.default().write(sprl, "chonky.vortex") /// >>> import os /// >>> os.path.getsize('chonky.vortex') - /// 215996 + /// 215972 /// /// Wow, Vortex manages to use about two bytes per integer! So advanced. So tiny. /// @@ -268,7 +268,7 @@ impl PyVortexWriteOptions { /// /// >>> vx.io.VortexWriteOptions.compact().write(sprl, "tiny.vortex") /// >>> os.path.getsize('tiny.vortex') - /// 55140 + /// 55088 /// /// Random numbers are not (usually) composed of random bytes! #[staticmethod]