Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion bench-orchestrator/bench_orchestrator/runner/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ def run(
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)

Expand Down
13 changes: 0 additions & 13 deletions encodings/runend/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -423,19 +423,6 @@ impl BaseArrayVTable<RunEndVTable> for RunEndVTable {
}

impl ValidityVTable<RunEndVTable> for RunEndVTable {
fn is_valid(array: &RunEndArray, index: usize) -> bool {
let physical_idx = array.find_physical_index(index);
array.values().is_valid(physical_idx)
}

fn all_valid(array: &RunEndArray) -> bool {
array.values().all_valid()
}

fn all_invalid(array: &RunEndArray) -> bool {
array.values().all_invalid()
}

fn validity(array: &RunEndArray) -> VortexResult<Validity> {
Ok(match array.values().validity()? {
Validity::NonNullable | Validity::AllValid => Validity::AllValid,
Expand Down
12 changes: 0 additions & 12 deletions encodings/sequence/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -412,18 +412,6 @@ impl OperationsVTable<SequenceVTable> for SequenceVTable {
}

impl ValidityVTable<SequenceVTable> for SequenceVTable {
fn is_valid(_array: &SequenceArray, _index: usize) -> bool {
true
}

fn all_valid(_array: &SequenceArray) -> bool {
true
}

fn all_invalid(_array: &SequenceArray) -> bool {
false
}

fn validity(_array: &SequenceArray) -> VortexResult<Validity> {
Ok(Validity::AllValid)
}
Expand Down
27 changes: 0 additions & 27 deletions encodings/sparse/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -409,33 +409,6 @@ impl BaseArrayVTable<SparseVTable> for SparseVTable {
}

impl ValidityVTable<SparseVTable> for SparseVTable {
fn is_valid(array: &SparseArray, index: usize) -> bool {
match array.patches().get_patched(index) {
None => array.fill_scalar().is_valid(),
Some(patch_value) => patch_value.is_valid(),
}
}

fn all_valid(array: &SparseArray) -> bool {
if array.fill_scalar().is_null() {
// We need _all_ values to be patched, and all patches to be valid
return array.patches().values().len() == array.len()
&& array.patches().values().all_valid();
}

array.patches().values().all_valid()
}

fn all_invalid(array: &SparseArray) -> bool {
if !array.fill_scalar().is_null() {
// We need _all_ values to be patched, and all patches to be invalid
return array.patches().values().len() == array.len()
&& array.patches().values().all_invalid();
}

array.patches().values().all_invalid()
}

fn validity(array: &SparseArray) -> VortexResult<Validity> {
let patches = unsafe {
Patches::new_unchecked(
Expand Down
81 changes: 58 additions & 23 deletions vortex-array/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ use crate::arrays::StructVTable;
use crate::arrays::VarBinVTable;
use crate::arrays::VarBinViewVTable;
use crate::builders::ArrayBuilder;
use crate::compute;
use crate::compute::ComputeFn;
use crate::compute::InvocationArgs;
use crate::compute::Output;
Expand Down Expand Up @@ -145,12 +146,14 @@ pub trait Array:

/// Returns whether all items in the array are valid.
///
/// This is usually cheaper than computing a precise `valid_count`.
/// This is usually cheaper than computing a precise `valid_count`, but may return false
/// negatives.
fn all_valid(&self) -> bool;

/// Returns whether the array is all invalid.
///
/// This is usually cheaper than computing a precise `invalid_count`.
/// This is usually cheaper than computing a precise `invalid_count`, but may return false
/// negatives.
fn all_invalid(&self) -> bool;

/// Returns the number of valid elements in the array.
Expand Down Expand Up @@ -545,19 +548,38 @@ impl<V: VTable> Array for ArrayAdapter<V> {
if index >= self.len() {
vortex_panic!(OutOfBounds: index, 0, self.len());
}
<V::ValidityVTable as ValidityVTable<V>>::is_valid(&self.0, index)
match self
.validity()
.vortex_expect("Failed to get validity for is_valid")
{
Validity::NonNullable | Validity::AllValid => true,
Validity::AllInvalid => false,
Validity::Array(a) => a
.scalar_at(index)
.as_bool()
.value()
.vortex_expect("validity must be non-nullable"),
}
}

fn is_invalid(&self, index: usize) -> bool {
!self.is_valid(index)
}

fn all_valid(&self) -> bool {
<V::ValidityVTable as ValidityVTable<V>>::all_valid(&self.0)
match self.validity().vortex_expect("Failed to get validity") {
Validity::NonNullable | Validity::AllValid => true,
Validity::AllInvalid => false,
Validity::Array(a) => a.statistics().compute_min::<bool>().unwrap_or(false),
}
}

fn all_invalid(&self) -> bool {
<V::ValidityVTable as ValidityVTable<V>>::all_invalid(&self.0)
match self.validity().vortex_expect("Failed to get validity") {
Validity::NonNullable | Validity::AllValid => false,
Validity::AllInvalid => true,
Validity::Array(a) => !a.statistics().compute_max::<bool>().unwrap_or(true),
}
}
Comment on lines 569 to 583
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this looks like it might be very expensive?


fn valid_count(&self) -> usize {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will be null count soon right?

Expand All @@ -567,7 +589,19 @@ impl<V: VTable> Array for ArrayAdapter<V> {
return self.len() - invalid_count;
}

let count = <V::ValidityVTable as ValidityVTable<V>>::valid_count(&self.0);
let count = match self
.validity()
.vortex_expect("Failed to get validity for valid_count")
{
Validity::NonNullable | Validity::AllValid => self.len(),
Validity::AllInvalid => 0,
Validity::Array(a) => {
let sum = compute::sum(&a).vortex_expect("Failed to compute sum for valid count");
sum.as_primitive()
.as_::<usize>()
.vortex_expect("Sum must be non-nullable")
}
};
assert!(count <= self.len(), "Valid count exceeds array length");

self.statistics()
Expand All @@ -577,33 +611,34 @@ impl<V: VTable> Array for ArrayAdapter<V> {
}

fn invalid_count(&self) -> usize {
if let Some(Precision::Exact(invalid_count)) =
self.statistics().get_as::<usize>(Stat::NullCount)
{
return invalid_count;
}

let count = <V::ValidityVTable as ValidityVTable<V>>::invalid_count(&self.0);
assert!(count <= self.len(), "Invalid count exceeds array length");

self.statistics()
.set(Stat::NullCount, Precision::exact(count));

count
self.len() - self.valid_count()
}

fn validity(&self) -> VortexResult<Validity> {
if self.dtype().is_nullable() {
<V::ValidityVTable as ValidityVTable<V>>::validity(&self.0)
let validity = <V::ValidityVTable as ValidityVTable<V>>::validity(&self.0)?;
if let Validity::Array(array) = &validity {
vortex_ensure!(array.len() == self.len(), "Validity array length mismatch");
vortex_ensure!(
matches!(array.dtype(), DType::Bool(Nullability::NonNullable)),
"Validity array for must be non-nullable boolean: {}",
self.to_array().display_tree(),
);
}
Ok(validity)
} else {
Ok(Validity::NonNullable)
}
}

fn validity_mask(&self) -> Mask {
let mask = <V::ValidityVTable as ValidityVTable<V>>::validity_mask(&self.0);
assert_eq!(mask.len(), self.len(), "Validity mask length mismatch");
mask
match self.validity().vortex_expect("Failed to get validity") {
Validity::NonNullable | Validity::AllValid => Mask::new_true(self.len()),
Validity::AllInvalid => Mask::new_false(self.len()),
Validity::Array(a) => a
.try_to_mask_fill_null_false()
.vortex_expect("Failed to get validity mask"),
}
}

fn to_canonical(&self) -> VortexResult<Canonical> {
Expand Down
56 changes: 23 additions & 33 deletions vortex-array/src/arrays/chunked/vtable/validity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,47 +15,37 @@ use crate::validity::Validity;
use crate::vtable::ValidityVTable;

impl ValidityVTable<ChunkedVTable> for ChunkedVTable {
fn is_valid(array: &ChunkedArray, index: usize) -> bool {
if !array.dtype.is_nullable() {
return true;
}
let (chunk, offset_in_chunk) = array.find_chunk_idx(index);
array.chunk(chunk).is_valid(offset_in_chunk)
}
fn validity(array: &ChunkedArray) -> VortexResult<Validity> {
let validities: Vec<Validity> =
array.chunks().iter().map(|c| c.validity()).try_collect()?;

fn all_valid(array: &ChunkedArray) -> bool {
if !array.dtype().is_nullable() {
return true;
}
for chunk in array.non_empty_chunks() {
if !chunk.all_valid() {
return false;
match validities.first() {
// If there are no chunks, return the array's dtype nullability
None => return Ok(array.dtype().nullability().into()),
// If all chunks have the same non-array validity, return that validity directly
// We skip Validity::Array since equality is very expensive.
Some(first) if !matches!(first, Validity::Array(_)) => {
let target = std::mem::discriminant(first);
if validities
.iter()
.all(|v| std::mem::discriminant(v) == target)
{
return Ok(first.clone());
}
}
}
true
}

fn all_invalid(array: &ChunkedArray) -> bool {
if !array.dtype().is_nullable() {
return false;
}
for chunk in array.non_empty_chunks() {
if !chunk.all_invalid() {
return false;
_ => {
// Array validity or mixed validities, proceed to build the validity array
}
}
true
}

fn validity(array: &ChunkedArray) -> VortexResult<Validity> {
Ok(Validity::Array(
unsafe {
ChunkedArray::new_unchecked(
array
.chunks()
.iter()
.map(|chunk| chunk.validity().map(|v| v.to_array(chunk.len())))
.try_collect()?,
validities
.into_iter()
.zip(array.chunks())
.map(|(v, chunk)| v.to_array(chunk.len()))
.collect(),
DType::Bool(Nullability::NonNullable),
)
}
Expand Down
12 changes: 0 additions & 12 deletions vortex-array/src/arrays/constant/vtable/validity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,6 @@ use crate::validity::Validity;
use crate::vtable::ValidityVTable;

impl ValidityVTable<ConstantVTable> for ConstantVTable {
fn is_valid(array: &ConstantArray, _index: usize) -> bool {
!array.scalar().is_null()
}

fn all_valid(array: &ConstantArray) -> bool {
!array.scalar().is_null()
}

fn all_invalid(array: &ConstantArray) -> bool {
array.scalar().is_null()
}

fn validity(array: &ConstantArray) -> VortexResult<Validity> {
Ok(if array.scalar().is_null() {
Validity::AllInvalid
Expand Down
41 changes: 14 additions & 27 deletions vortex-array/src/arrays/dict/vtable/validity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,23 @@
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_buffer::BitBuffer;
use vortex_dtype::Nullability;
use vortex_dtype::match_each_integer_ptype;
use vortex_error::VortexExpect;
use vortex_error::VortexResult;
use vortex_mask::AllOr;
use vortex_mask::Mask;
use vortex_scalar::Scalar;

use super::DictVTable;
use crate::Array;
use crate::IntoArray;
use crate::ToCanonical;
use crate::arrays::dict::DictArray;
use crate::compute::fill_null;
use crate::validity::Validity;
use crate::vtable::ValidityVTable;

impl ValidityVTable<DictVTable> for DictVTable {
fn is_valid(array: &DictArray, index: usize) -> bool {
let scalar = array.codes().scalar_at(index);

if scalar.is_null() {
return false;
};
let values_index: usize = scalar
.as_ref()
.try_into()
.vortex_expect("Failed to convert dictionary code to usize");
array.values().is_valid(values_index)
}

fn all_valid(array: &DictArray) -> bool {
array.codes().all_valid() && array.values().all_valid()
}

fn all_invalid(array: &DictArray) -> bool {
array.codes().all_invalid() || array.values().all_invalid()
}

fn validity(array: &DictArray) -> VortexResult<Validity> {
Ok(
match (array.codes().validity()?, array.values().validity()?) {
Expand All @@ -58,11 +39,17 @@ impl ValidityVTable<DictVTable> for DictVTable {
.into_array(),
)
}
(Validity::Array(_), Validity::Array(values_validity)) => {
// We essentially create is_not_null(Dict(codes, is_not_null(values)))
unsafe { DictArray::new_unchecked(array.codes().clone(), values_validity) }
.into_array()
.validity()?
(Validity::Array(_codes_validity), Validity::Array(values_validity)) => {
// Create a mask representing "is the value at codes[i] valid?"
let values_valid_mask =
unsafe { DictArray::new_unchecked(array.codes().clone(), values_validity) }
.into_array();
let values_valid_mask = fill_null(
&values_valid_mask,
&Scalar::bool(false, Nullability::NonNullable),
)?;

Validity::Array(values_valid_mask)
}
},
)
Expand Down
Loading
Loading