From d080c1d209e9e2c2400a69121bf46d412cc2c15a Mon Sep 17 00:00:00 2001 From: Onur Satici Date: Wed, 8 Apr 2026 16:53:12 +0100 Subject: [PATCH 01/10] array tree layout --- vortex-array/public-api.lock | 2 + vortex-array/src/serde.rs | 132 ++++-- vortex-file/src/strategy.rs | 36 +- vortex-file/src/tests.rs | 49 ++- vortex-layout/public-api.lock | 270 ++++++++++++ vortex-layout/src/display.rs | 400 ++++++++---------- vortex-layout/src/layouts/array_tree/flat.rs | 197 +++++++++ vortex-layout/src/layouts/array_tree/mod.rs | 236 +++++++++++ .../src/layouts/array_tree/reader.rs | 296 +++++++++++++ .../src/layouts/array_tree/writer.rs | 229 ++++++++++ vortex-layout/src/layouts/flat/writer.rs | 6 +- vortex-layout/src/layouts/mod.rs | 1 + vortex-layout/src/session.rs | 10 + 13 files changed, 1592 insertions(+), 272 deletions(-) create mode 100644 vortex-layout/src/layouts/array_tree/flat.rs create mode 100644 vortex-layout/src/layouts/array_tree/mod.rs create mode 100644 vortex-layout/src/layouts/array_tree/reader.rs create mode 100644 vortex-layout/src/layouts/array_tree/writer.rs diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 9609ce9d79d..f7241dc3585 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -23596,6 +23596,8 @@ impl vortex_array::ArrayRef pub fn vortex_array::ArrayRef::serialize(&self, &vortex_array::ArrayContext, &vortex_session::VortexSession, &vortex_array::serde::SerializeOptions) -> vortex_error::VortexResult> +pub fn vortex_array::ArrayRef::serialize_array_tree(&self, &vortex_array::ArrayContext, &vortex_session::VortexSession, &vortex_array::serde::SerializeOptions) -> vortex_error::VortexResult + impl vortex_array::ArrayRef pub fn vortex_array::ArrayRef::to_array_iterator(&self) -> impl vortex_array::iter::ArrayIterator + 'static diff --git a/vortex-array/src/serde.rs b/vortex-array/src/serde.rs index 637b57324c0..8c1614610af 100644 --- a/vortex-array/src/serde.rs +++ b/vortex-array/src/serde.rs @@ -48,6 +48,68 @@ pub struct SerializeOptions { pub include_padding: bool, } +/// Collect flatbuffer buffer descriptors from array buffers, computing padding for each. +/// +/// This is the shared logic between [`ArrayRef::serialize`] and [`ArrayRef::serialize_array_tree`] +/// to ensure buffer descriptor tables are always consistent. +fn collect_buffer_descriptors( + array_buffers: &[ByteBuffer], + options: &SerializeOptions, +) -> VortexResult> { + let mut fb_buffers = Vec::with_capacity(array_buffers.len()); + let mut pos = options.offset; + + for buffer in array_buffers { + let padding = if options.include_padding { + let padding = pos.next_multiple_of(*buffer.alignment()) - pos; + pos += padding; + padding + } else { + 0 + }; + + fb_buffers.push(fba::Buffer::new( + u16::try_from(padding).vortex_expect("padding fits into u16"), + buffer.alignment().exponent(), + Compression::None, + u32::try_from(buffer.len()) + .map_err(|_| vortex_err!("All buffers must fit into u32 for serialization"))?, + )); + + pos += buffer.len(); + } + + Ok(fb_buffers) +} + +/// Build a complete `fba::Array` flatbuffer from an encoding tree and buffer descriptors. +fn build_array_flatbuffer( + ctx: &ArrayContext, + session: &VortexSession, + array: &ArrayRef, + fb_buffers: Vec, + skip_stats: bool, +) -> VortexResult { + let mut fbb = FlatBufferBuilder::new(); + + let mut root = ArrayNodeFlatBuffer::try_new(ctx, session, array)?; + root.skip_stats = skip_stats; + let fb_root = root.try_write_flatbuffer(&mut fbb)?; + + let fb_buffers = fbb.create_vector(&fb_buffers); + let fb_array = fba::Array::create( + &mut fbb, + &fba::ArrayArgs { + root: Some(fb_root), + buffers: Some(fb_buffers), + }, + ); + fbb.finish_minimal(fb_array); + let (fb_vec, fb_start) = fbb.collapse(); + let fb_end = fb_vec.len(); + Ok(ByteBuffer::from(fb_vec).slice(fb_start..fb_end)) +} + impl ArrayRef { /// Serialize the array into a sequence of byte buffers that should be written contiguously. /// This function returns a vec to avoid copying data buffers. @@ -71,9 +133,10 @@ impl ArrayRef { .flat_map(|f| f.buffers()) .collect::>(); + let fb_buffers = collect_buffer_descriptors(&array_buffers, options)?; + // Allocate result buffers, including a possible padding buffer for each. let mut buffers = vec![]; - let mut fb_buffers = Vec::with_capacity(buffers.capacity()); // If we're including padding, we need to find the maximum required buffer alignment. let max_alignment = array_buffers @@ -95,47 +158,19 @@ impl ArrayRef { // Push all the array buffers with padding as necessary. for buffer in array_buffers { - let padding = if options.include_padding { + if options.include_padding { let padding = pos.next_multiple_of(*buffer.alignment()) - pos; if padding > 0 { pos += padding; buffers.push(zeros.slice(0..padding)); } - padding - } else { - 0 - }; - - fb_buffers.push(fba::Buffer::new( - u16::try_from(padding).vortex_expect("padding fits into u16"), - buffer.alignment().exponent(), - Compression::None, - u32::try_from(buffer.len()) - .map_err(|_| vortex_err!("All buffers must fit into u32 for serialization"))?, - )); + } pos += buffer.len(); buffers.push(buffer.aligned(Alignment::none())); } - // Set up the flatbuffer builder - let mut fbb = FlatBufferBuilder::new(); - - let root = ArrayNodeFlatBuffer::try_new(ctx, session, self)?; - let fb_root = root.try_write_flatbuffer(&mut fbb)?; - - let fb_buffers = fbb.create_vector(&fb_buffers); - let fb_array = fba::Array::create( - &mut fbb, - &fba::ArrayArgs { - root: Some(fb_root), - buffers: Some(fb_buffers), - }, - ); - fbb.finish_minimal(fb_array); - let (fb_vec, fb_start) = fbb.collapse(); - let fb_end = fb_vec.len(); - let fb_buffer = ByteBuffer::from(fb_vec).slice(fb_start..fb_end); + let fb_buffer = build_array_flatbuffer(ctx, session, self, fb_buffers, false)?; let fb_length = fb_buffer.len(); if options.include_padding { @@ -156,6 +191,30 @@ impl ArrayRef { Ok(buffers) } + + /// Produce a compact [`fba::Array`] flatbuffer containing the encoding tree and buffer + /// descriptors, but with per-node statistics stripped (`stats = null` on all [`fba::ArrayNode`]s). + /// + /// This is used by the array tree layout to store encoding metadata separately from data + /// segments, enabling decode planning and sub-segment random access without fetching + /// the full data segment. + /// + /// The returned flatbuffer has the same `buffers` table as a full [`serialize`](Self::serialize) + /// call with the same options, so buffer offsets can be used for sub-segment reads. + pub fn serialize_array_tree( + &self, + ctx: &ArrayContext, + session: &VortexSession, + options: &SerializeOptions, + ) -> VortexResult { + let array_buffers = self + .depth_first_traversal() + .flat_map(|f| f.buffers()) + .collect::>(); + + let fb_buffers = collect_buffer_descriptors(&array_buffers, options)?; + build_array_flatbuffer(ctx, session, self, fb_buffers, true) + } } /// A utility struct for creating an [`fba::ArrayNode`] flatbuffer. @@ -164,6 +223,7 @@ pub struct ArrayNodeFlatBuffer<'a> { session: &'a VortexSession, array: &'a ArrayRef, buffer_idx: u16, + skip_stats: bool, } impl<'a> ArrayNodeFlatBuffer<'a> { @@ -184,6 +244,7 @@ impl<'a> ArrayNodeFlatBuffer<'a> { session, array, buffer_idx: 0, + skip_stats: false, }) } @@ -226,6 +287,7 @@ impl<'a> ArrayNodeFlatBuffer<'a> { session: self.session, array: child, buffer_idx: child_buffer_idx, + skip_stats: self.skip_stats, } .try_write_flatbuffer(fbb)?; @@ -240,7 +302,11 @@ impl<'a> ArrayNodeFlatBuffer<'a> { let children = Some(fbb.create_vector(&children)); let buffers = Some(fbb.create_vector_from_iter((0..nbuffers).map(|i| i + self.buffer_idx))); - let stats = Some(self.array.statistics().write_flatbuffer(fbb)?); + let stats = if self.skip_stats { + None + } else { + Some(self.array.statistics().write_flatbuffer(fbb)?) + }; Ok(fba::ArrayNode::create( fbb, diff --git a/vortex-file/src/strategy.rs b/vortex-file/src/strategy.rs index 71c72ffc904..4f895aaf7f1 100644 --- a/vortex-file/src/strategy.rs +++ b/vortex-file/src/strategy.rs @@ -40,6 +40,7 @@ use vortex_fastlanes::FoR; use vortex_fastlanes::RLE; use vortex_fsst::FSST; use vortex_layout::LayoutStrategy; +use vortex_layout::layouts::array_tree::writer; use vortex_layout::layouts::buffered::BufferedStrategy; use vortex_layout::layouts::chunked::writer::ChunkedLayoutStrategy; use vortex_layout::layouts::collect::CollectStrategy; @@ -212,16 +213,32 @@ impl WriteStrategyBuilder { /// Builds the canonical [`LayoutStrategy`] implementation, with the configured overrides /// applied. pub fn build(self) -> Arc { - let flat: Arc = if let Some(flat) = self.flat_strategy { - flat - } else if let Some(allow_encodings) = self.allow_encodings { - Arc::new(FlatLayoutStrategy::default().with_allow_encodings(allow_encodings)) + let flat: Arc = if let Some(flat) = &self.flat_strategy { + Arc::clone(flat) + } else if let Some(allow_encodings) = &self.allow_encodings { + Arc::new(FlatLayoutStrategy::default().with_allow_encodings(allow_encodings.clone())) } else { Arc::new(FlatLayoutStrategy::default()) }; + // Build the data pipeline leaf. When the user provides a custom flat strategy, use it + // directly — they own the leaf format and array tree wrapping does not apply. + // Otherwise, create a TX/RX pair for array tree collection. + let (data_leaf, array_tree_collector): (Arc, _) = + if self.flat_strategy.is_some() { + (Arc::clone(&flat), None) + } else { + let data_flat = if let Some(allow_encodings) = &self.allow_encodings { + FlatLayoutStrategy::default().with_allow_encodings(allow_encodings.clone()) + } else { + FlatLayoutStrategy::default() + }; + let (collector, leaf) = writer::writer(data_flat, Arc::clone(&flat)); + (Arc::new(leaf), Some(collector)) + }; + // 7. for each chunk create a flat layout - let chunked = ChunkedLayoutStrategy::new(Arc::clone(&flat)); + let chunked = ChunkedLayoutStrategy::new(data_leaf); // 6. buffer chunks so they end up with closer segment ids physically let buffered = BufferedStrategy::new(chunked, 2 * ONE_MEG); // 2MB @@ -272,9 +289,16 @@ impl WriteStrategyBuilder { Default::default(), ); + // 2.5 collect compact array trees from each chunk (skipped for custom flat strategies) + let data_pipeline: Arc = if let Some(collector) = array_tree_collector { + Arc::new(collector.wrap(dict)) + } else { + Arc::new(dict) + }; + // 2. calculate stats for each row group let stats = ZonedStrategy::new( - dict, + data_pipeline, compress_then_flat.clone(), ZonedLayoutOptions { block_size: self.row_block_size, diff --git a/vortex-file/src/tests.rs b/vortex-file/src/tests.rs index 2ba10d96684..93efccb20e3 100644 --- a/vortex-file/src/tests.rs +++ b/vortex-file/src/tests.rs @@ -1911,22 +1911,36 @@ async fn test_segment_ordering_zonemaps_after_data() -> VortexResult<()> { check_zoned_ordering(root.as_ref(), segment_specs); // Additionally: all zone map segments across all columns should appear after - // all data segments across all columns. + // all data segments across all columns. Array tree segments (if present) appear + // between data and zones. let mut all_data_offsets = Vec::new(); + let mut all_array_tree_offsets = Vec::new(); let mut all_zones_offsets = Vec::new(); fn collect_all_zoned( layout: &dyn Layout, segment_specs: &[SegmentSpec], all_data: &mut Vec, + all_array_trees: &mut Vec, all_zones: &mut Vec, ) { if layout.encoding_id().as_ref() == "vortex.stats" { - // child 0 = data, child 1 = zones - all_data.extend(collect_segment_offsets( - layout.child(0).unwrap().as_ref(), - segment_specs, - )); + // child 0 = data (may contain array_tree layouts), child 1 = zones + let data_child = layout.child(0).unwrap(); + // If the data child is an array_tree layout, split its segments. + if data_child.encoding_id().as_ref() == "vortex.array_tree" { + // child 0 = actual data, child 1 = array_trees auxiliary + all_data.extend(collect_segment_offsets( + data_child.child(0).unwrap().as_ref(), + segment_specs, + )); + all_array_trees.extend(collect_segment_offsets( + data_child.child(1).unwrap().as_ref(), + segment_specs, + )); + } else { + all_data.extend(collect_segment_offsets(data_child.as_ref(), segment_specs)); + } all_zones.extend(collect_segment_offsets( layout.child(1).unwrap().as_ref(), segment_specs, @@ -1934,7 +1948,13 @@ async fn test_segment_ordering_zonemaps_after_data() -> VortexResult<()> { return; } for child in layout.children().unwrap() { - collect_all_zoned(child.as_ref(), segment_specs, all_data, all_zones); + collect_all_zoned( + child.as_ref(), + segment_specs, + all_data, + all_array_trees, + all_zones, + ); } } @@ -1942,13 +1962,24 @@ async fn test_segment_ordering_zonemaps_after_data() -> VortexResult<()> { root.as_ref(), segment_specs, &mut all_data_offsets, + &mut all_array_tree_offsets, &mut all_zones_offsets, ); + // The root writer splits the sequence universe into two: data chunks use IDs from `ptr` + // and all metadata (array trees, zones) derive from `eof`. Since ptr < eof, all data + // segments are globally before all metadata segments. + // + // Within the eof universe, per-column ordering guarantees array_trees < zones within + // each column, but cross-column interleaving means we cannot assert + // all_array_trees < all_zones globally. + let mut all_metadata_offsets = all_array_tree_offsets; + all_metadata_offsets.extend(&all_zones_offsets); + assert_offsets_ordered( &all_data_offsets, - &all_zones_offsets, - "global: all data segments should come before all zone map segments", + &all_metadata_offsets, + "global: all data segments should come before all metadata segments (array trees + zone maps)", ); Ok(()) diff --git a/vortex-layout/public-api.lock b/vortex-layout/public-api.lock index 1c6ae70dd6f..06bc6f53f26 100644 --- a/vortex-layout/public-api.lock +++ b/vortex-layout/public-api.lock @@ -20,6 +20,192 @@ pub fn vortex_layout::display::DisplayLayoutTree::fmt(&self, &mut core::fmt::For pub mod vortex_layout::layouts +pub mod vortex_layout::layouts::array_tree + +pub mod vortex_layout::layouts::array_tree::writer + +pub struct vortex_layout::layouts::array_tree::writer::ArrayTreeCollectorStrategy + +impl vortex_layout::layouts::array_tree::writer::ArrayTreeCollectorStrategy + +pub fn vortex_layout::layouts::array_tree::writer::ArrayTreeCollectorStrategy::wrap(self, impl vortex_layout::LayoutStrategy) -> Self + +impl vortex_layout::LayoutStrategy for vortex_layout::layouts::array_tree::writer::ArrayTreeCollectorStrategy + +pub fn vortex_layout::layouts::array_tree::writer::ArrayTreeCollectorStrategy::buffered_bytes(&self) -> u64 + +pub fn vortex_layout::layouts::array_tree::writer::ArrayTreeCollectorStrategy::write_stream<'life0, 'life1, 'async_trait>(&'life0 self, vortex_array::ArrayContext, vortex_layout::segments::SegmentSinkRef, vortex_layout::sequence::SendableSequentialStream, vortex_layout::sequence::SequencePointer, &'life1 vortex_session::VortexSession) -> core::pin::Pin> + core::marker::Send + 'async_trait)>> where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait + +pub struct vortex_layout::layouts::array_tree::writer::ArrayTreeFlatStrategy + +impl core::clone::Clone for vortex_layout::layouts::array_tree::writer::ArrayTreeFlatStrategy + +pub fn vortex_layout::layouts::array_tree::writer::ArrayTreeFlatStrategy::clone(&self) -> vortex_layout::layouts::array_tree::writer::ArrayTreeFlatStrategy + +impl vortex_layout::LayoutStrategy for vortex_layout::layouts::array_tree::writer::ArrayTreeFlatStrategy + +pub fn vortex_layout::layouts::array_tree::writer::ArrayTreeFlatStrategy::buffered_bytes(&self) -> u64 + +pub fn vortex_layout::layouts::array_tree::writer::ArrayTreeFlatStrategy::write_stream<'life0, 'life1, 'async_trait>(&'life0 self, vortex_array::ArrayContext, vortex_layout::segments::SegmentSinkRef, vortex_layout::sequence::SendableSequentialStream, vortex_layout::sequence::SequencePointer, &'life1 vortex_session::VortexSession) -> core::pin::Pin> + core::marker::Send + 'async_trait)>> where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait + +pub fn vortex_layout::layouts::array_tree::writer::writer(vortex_layout::layouts::flat::writer::FlatLayoutStrategy, alloc::sync::Arc) -> (vortex_layout::layouts::array_tree::writer::ArrayTreeCollectorStrategy, vortex_layout::layouts::array_tree::writer::ArrayTreeFlatStrategy) + +pub struct vortex_layout::layouts::array_tree::ArrayTree + +impl core::fmt::Debug for vortex_layout::layouts::array_tree::ArrayTree + +pub fn vortex_layout::layouts::array_tree::ArrayTree::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_layout::VTable for vortex_layout::layouts::array_tree::ArrayTree + +pub type vortex_layout::layouts::array_tree::ArrayTree::Encoding = vortex_layout::layouts::array_tree::ArrayTreeLayoutEncoding + +pub type vortex_layout::layouts::array_tree::ArrayTree::Layout = vortex_layout::layouts::array_tree::ArrayTreeLayout + +pub type vortex_layout::layouts::array_tree::ArrayTree::Metadata = vortex_array::metadata::EmptyMetadata + +pub fn vortex_layout::layouts::array_tree::ArrayTree::build(&Self::Encoding, &vortex_array::dtype::DType, u64, &vortex_array::metadata::EmptyMetadata, alloc::vec::Vec, &dyn vortex_layout::LayoutChildren, &vortex_session::registry::ReadContext) -> vortex_error::VortexResult + +pub fn vortex_layout::layouts::array_tree::ArrayTree::child(&Self::Layout, usize) -> vortex_error::VortexResult + +pub fn vortex_layout::layouts::array_tree::ArrayTree::child_type(&Self::Layout, usize) -> vortex_layout::LayoutChildType + +pub fn vortex_layout::layouts::array_tree::ArrayTree::dtype(&Self::Layout) -> &vortex_array::dtype::DType + +pub fn vortex_layout::layouts::array_tree::ArrayTree::encoding(&Self::Layout) -> vortex_layout::LayoutEncodingRef + +pub fn vortex_layout::layouts::array_tree::ArrayTree::id(&Self::Encoding) -> vortex_layout::LayoutId + +pub fn vortex_layout::layouts::array_tree::ArrayTree::metadata(&Self::Layout) -> Self::Metadata + +pub fn vortex_layout::layouts::array_tree::ArrayTree::nchildren(&Self::Layout) -> usize + +pub fn vortex_layout::layouts::array_tree::ArrayTree::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_layout::layouts::array_tree::ArrayTree::row_count(&Self::Layout) -> u64 + +pub fn vortex_layout::layouts::array_tree::ArrayTree::segment_ids(&Self::Layout) -> alloc::vec::Vec + +pub fn vortex_layout::layouts::array_tree::ArrayTree::with_children(&mut Self::Layout, alloc::vec::Vec) -> vortex_error::VortexResult<()> + +pub struct vortex_layout::layouts::array_tree::ArrayTreeFlatLayout + +impl vortex_layout::layouts::array_tree::ArrayTreeFlatLayout + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::chunk_idx(&self) -> usize + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::compact_tree(&self) -> core::option::Option<&vortex_buffer::ByteBuffer> + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::inner(&self) -> &vortex_layout::layouts::flat::FlatLayout + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::new(vortex_layout::layouts::flat::FlatLayout, usize, vortex_buffer::ByteBuffer) -> Self + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::set_source(&self, alloc::sync::Arc) + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::source(&self) -> core::option::Option<&alloc::sync::Arc> + +impl core::clone::Clone for vortex_layout::layouts::array_tree::ArrayTreeFlatLayout + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::clone(&self) -> vortex_layout::layouts::array_tree::ArrayTreeFlatLayout + +impl core::convert::AsRef for vortex_layout::layouts::array_tree::ArrayTreeFlatLayout + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::as_ref(&self) -> &dyn vortex_layout::Layout + +impl core::convert::From for vortex_layout::LayoutRef + +pub fn vortex_layout::LayoutRef::from(vortex_layout::layouts::array_tree::ArrayTreeFlatLayout) -> vortex_layout::LayoutRef + +impl core::fmt::Debug for vortex_layout::layouts::array_tree::ArrayTreeFlatLayout + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::ops::deref::Deref for vortex_layout::layouts::array_tree::ArrayTreeFlatLayout + +pub type vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::Target = dyn vortex_layout::Layout + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::deref(&self) -> &Self::Target + +impl vortex_layout::IntoLayout for vortex_layout::layouts::array_tree::ArrayTreeFlatLayout + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::into_layout(self) -> vortex_layout::LayoutRef + +pub struct vortex_layout::layouts::array_tree::ArrayTreeFlatLayoutEncoding + +impl core::convert::AsRef for vortex_layout::layouts::array_tree::ArrayTreeFlatLayoutEncoding + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayoutEncoding::as_ref(&self) -> &dyn vortex_layout::LayoutEncoding + +impl core::fmt::Debug for vortex_layout::layouts::array_tree::ArrayTreeFlatLayoutEncoding + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayoutEncoding::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::ops::deref::Deref for vortex_layout::layouts::array_tree::ArrayTreeFlatLayoutEncoding + +pub type vortex_layout::layouts::array_tree::ArrayTreeFlatLayoutEncoding::Target = dyn vortex_layout::LayoutEncoding + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayoutEncoding::deref(&self) -> &Self::Target + +pub struct vortex_layout::layouts::array_tree::ArrayTreeLayout + +impl vortex_layout::layouts::array_tree::ArrayTreeLayout + +pub fn vortex_layout::layouts::array_tree::ArrayTreeLayout::new(vortex_layout::LayoutRef, vortex_layout::LayoutRef) -> Self + +impl core::clone::Clone for vortex_layout::layouts::array_tree::ArrayTreeLayout + +pub fn vortex_layout::layouts::array_tree::ArrayTreeLayout::clone(&self) -> vortex_layout::layouts::array_tree::ArrayTreeLayout + +impl core::convert::AsRef for vortex_layout::layouts::array_tree::ArrayTreeLayout + +pub fn vortex_layout::layouts::array_tree::ArrayTreeLayout::as_ref(&self) -> &dyn vortex_layout::Layout + +impl core::convert::From for vortex_layout::LayoutRef + +pub fn vortex_layout::LayoutRef::from(vortex_layout::layouts::array_tree::ArrayTreeLayout) -> vortex_layout::LayoutRef + +impl core::fmt::Debug for vortex_layout::layouts::array_tree::ArrayTreeLayout + +pub fn vortex_layout::layouts::array_tree::ArrayTreeLayout::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::ops::deref::Deref for vortex_layout::layouts::array_tree::ArrayTreeLayout + +pub type vortex_layout::layouts::array_tree::ArrayTreeLayout::Target = dyn vortex_layout::Layout + +pub fn vortex_layout::layouts::array_tree::ArrayTreeLayout::deref(&self) -> &Self::Target + +impl vortex_layout::IntoLayout for vortex_layout::layouts::array_tree::ArrayTreeLayout + +pub fn vortex_layout::layouts::array_tree::ArrayTreeLayout::into_layout(self) -> vortex_layout::LayoutRef + +pub struct vortex_layout::layouts::array_tree::ArrayTreeLayoutEncoding + +impl core::convert::AsRef for vortex_layout::layouts::array_tree::ArrayTreeLayoutEncoding + +pub fn vortex_layout::layouts::array_tree::ArrayTreeLayoutEncoding::as_ref(&self) -> &dyn vortex_layout::LayoutEncoding + +impl core::fmt::Debug for vortex_layout::layouts::array_tree::ArrayTreeLayoutEncoding + +pub fn vortex_layout::layouts::array_tree::ArrayTreeLayoutEncoding::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::ops::deref::Deref for vortex_layout::layouts::array_tree::ArrayTreeLayoutEncoding + +pub type vortex_layout::layouts::array_tree::ArrayTreeLayoutEncoding::Target = dyn vortex_layout::LayoutEncoding + +pub fn vortex_layout::layouts::array_tree::ArrayTreeLayoutEncoding::deref(&self) -> &Self::Target + +pub struct vortex_layout::layouts::array_tree::ArrayTreesSource + +impl vortex_layout::layouts::array_tree::ArrayTreesSource + +pub fn vortex_layout::layouts::array_tree::ArrayTreesSource::array_future(&self) -> vortex_layout::layouts::SharedArrayFuture + +pub fn vortex_layout::layouts::array_tree::ArrayTreesSource::new(vortex_layout::LayoutReaderRef) -> Self + +impl core::fmt::Debug for vortex_layout::layouts::array_tree::ArrayTreesSource + +pub fn vortex_layout::layouts::array_tree::ArrayTreesSource::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result + pub mod vortex_layout::layouts::buffered pub struct vortex_layout::layouts::buffered::BufferedStrategy @@ -1464,6 +1650,38 @@ pub fn vortex_layout::vtable::VTable::segment_ids(&Self::Layout) -> alloc::vec:: pub fn vortex_layout::vtable::VTable::with_children(&mut Self::Layout, alloc::vec::Vec) -> vortex_error::VortexResult<()> +impl vortex_layout::VTable for vortex_layout::layouts::array_tree::ArrayTree + +pub type vortex_layout::layouts::array_tree::ArrayTree::Encoding = vortex_layout::layouts::array_tree::ArrayTreeLayoutEncoding + +pub type vortex_layout::layouts::array_tree::ArrayTree::Layout = vortex_layout::layouts::array_tree::ArrayTreeLayout + +pub type vortex_layout::layouts::array_tree::ArrayTree::Metadata = vortex_array::metadata::EmptyMetadata + +pub fn vortex_layout::layouts::array_tree::ArrayTree::build(&Self::Encoding, &vortex_array::dtype::DType, u64, &vortex_array::metadata::EmptyMetadata, alloc::vec::Vec, &dyn vortex_layout::LayoutChildren, &vortex_session::registry::ReadContext) -> vortex_error::VortexResult + +pub fn vortex_layout::layouts::array_tree::ArrayTree::child(&Self::Layout, usize) -> vortex_error::VortexResult + +pub fn vortex_layout::layouts::array_tree::ArrayTree::child_type(&Self::Layout, usize) -> vortex_layout::LayoutChildType + +pub fn vortex_layout::layouts::array_tree::ArrayTree::dtype(&Self::Layout) -> &vortex_array::dtype::DType + +pub fn vortex_layout::layouts::array_tree::ArrayTree::encoding(&Self::Layout) -> vortex_layout::LayoutEncodingRef + +pub fn vortex_layout::layouts::array_tree::ArrayTree::id(&Self::Encoding) -> vortex_layout::LayoutId + +pub fn vortex_layout::layouts::array_tree::ArrayTree::metadata(&Self::Layout) -> Self::Metadata + +pub fn vortex_layout::layouts::array_tree::ArrayTree::nchildren(&Self::Layout) -> usize + +pub fn vortex_layout::layouts::array_tree::ArrayTree::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_layout::layouts::array_tree::ArrayTree::row_count(&Self::Layout) -> u64 + +pub fn vortex_layout::layouts::array_tree::ArrayTree::segment_ids(&Self::Layout) -> alloc::vec::Vec + +pub fn vortex_layout::layouts::array_tree::ArrayTree::with_children(&mut Self::Layout, alloc::vec::Vec) -> vortex_error::VortexResult<()> + impl vortex_layout::VTable for vortex_layout::layouts::chunked::Chunked pub type vortex_layout::layouts::chunked::Chunked::Encoding = vortex_layout::layouts::chunked::ChunkedLayoutEncoding @@ -1760,6 +1978,14 @@ pub trait vortex_layout::IntoLayout pub fn vortex_layout::IntoLayout::into_layout(self) -> vortex_layout::LayoutRef +impl vortex_layout::IntoLayout for vortex_layout::layouts::array_tree::ArrayTreeFlatLayout + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::into_layout(self) -> vortex_layout::LayoutRef + +impl vortex_layout::IntoLayout for vortex_layout::layouts::array_tree::ArrayTreeLayout + +pub fn vortex_layout::layouts::array_tree::ArrayTreeLayout::into_layout(self) -> vortex_layout::LayoutRef + impl vortex_layout::IntoLayout for vortex_layout::layouts::chunked::ChunkedLayout pub fn vortex_layout::layouts::chunked::ChunkedLayout::into_layout(self) -> vortex_layout::LayoutRef @@ -1916,6 +2142,18 @@ pub fn alloc::sync::Arc::buffered_bytes(&self pub fn alloc::sync::Arc::write_stream<'life0, 'life1, 'async_trait>(&'life0 self, vortex_array::ArrayContext, vortex_layout::segments::SegmentSinkRef, vortex_layout::sequence::SendableSequentialStream, vortex_layout::sequence::SequencePointer, &'life1 vortex_session::VortexSession) -> core::pin::Pin> + core::marker::Send + 'async_trait)>> where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait +impl vortex_layout::LayoutStrategy for vortex_layout::layouts::array_tree::writer::ArrayTreeCollectorStrategy + +pub fn vortex_layout::layouts::array_tree::writer::ArrayTreeCollectorStrategy::buffered_bytes(&self) -> u64 + +pub fn vortex_layout::layouts::array_tree::writer::ArrayTreeCollectorStrategy::write_stream<'life0, 'life1, 'async_trait>(&'life0 self, vortex_array::ArrayContext, vortex_layout::segments::SegmentSinkRef, vortex_layout::sequence::SendableSequentialStream, vortex_layout::sequence::SequencePointer, &'life1 vortex_session::VortexSession) -> core::pin::Pin> + core::marker::Send + 'async_trait)>> where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait + +impl vortex_layout::LayoutStrategy for vortex_layout::layouts::array_tree::writer::ArrayTreeFlatStrategy + +pub fn vortex_layout::layouts::array_tree::writer::ArrayTreeFlatStrategy::buffered_bytes(&self) -> u64 + +pub fn vortex_layout::layouts::array_tree::writer::ArrayTreeFlatStrategy::write_stream<'life0, 'life1, 'async_trait>(&'life0 self, vortex_array::ArrayContext, vortex_layout::segments::SegmentSinkRef, vortex_layout::sequence::SendableSequentialStream, vortex_layout::sequence::SequencePointer, &'life1 vortex_session::VortexSession) -> core::pin::Pin> + core::marker::Send + 'async_trait)>> where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait + impl vortex_layout::LayoutStrategy for vortex_layout::layouts::buffered::BufferedStrategy pub fn vortex_layout::layouts::buffered::BufferedStrategy::buffered_bytes(&self) -> u64 @@ -2002,6 +2240,38 @@ pub fn vortex_layout::VTable::segment_ids(&Self::Layout) -> alloc::vec::Vec) -> vortex_error::VortexResult<()> +impl vortex_layout::VTable for vortex_layout::layouts::array_tree::ArrayTree + +pub type vortex_layout::layouts::array_tree::ArrayTree::Encoding = vortex_layout::layouts::array_tree::ArrayTreeLayoutEncoding + +pub type vortex_layout::layouts::array_tree::ArrayTree::Layout = vortex_layout::layouts::array_tree::ArrayTreeLayout + +pub type vortex_layout::layouts::array_tree::ArrayTree::Metadata = vortex_array::metadata::EmptyMetadata + +pub fn vortex_layout::layouts::array_tree::ArrayTree::build(&Self::Encoding, &vortex_array::dtype::DType, u64, &vortex_array::metadata::EmptyMetadata, alloc::vec::Vec, &dyn vortex_layout::LayoutChildren, &vortex_session::registry::ReadContext) -> vortex_error::VortexResult + +pub fn vortex_layout::layouts::array_tree::ArrayTree::child(&Self::Layout, usize) -> vortex_error::VortexResult + +pub fn vortex_layout::layouts::array_tree::ArrayTree::child_type(&Self::Layout, usize) -> vortex_layout::LayoutChildType + +pub fn vortex_layout::layouts::array_tree::ArrayTree::dtype(&Self::Layout) -> &vortex_array::dtype::DType + +pub fn vortex_layout::layouts::array_tree::ArrayTree::encoding(&Self::Layout) -> vortex_layout::LayoutEncodingRef + +pub fn vortex_layout::layouts::array_tree::ArrayTree::id(&Self::Encoding) -> vortex_layout::LayoutId + +pub fn vortex_layout::layouts::array_tree::ArrayTree::metadata(&Self::Layout) -> Self::Metadata + +pub fn vortex_layout::layouts::array_tree::ArrayTree::nchildren(&Self::Layout) -> usize + +pub fn vortex_layout::layouts::array_tree::ArrayTree::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_layout::layouts::array_tree::ArrayTree::row_count(&Self::Layout) -> u64 + +pub fn vortex_layout::layouts::array_tree::ArrayTree::segment_ids(&Self::Layout) -> alloc::vec::Vec + +pub fn vortex_layout::layouts::array_tree::ArrayTree::with_children(&mut Self::Layout, alloc::vec::Vec) -> vortex_error::VortexResult<()> + impl vortex_layout::VTable for vortex_layout::layouts::chunked::Chunked pub type vortex_layout::layouts::chunked::Chunked::Encoding = vortex_layout::layouts::chunked::ChunkedLayoutEncoding diff --git a/vortex-layout/src/display.rs b/vortex-layout/src/display.rs index 5acf685e946..9e4574d5614 100644 --- a/vortex-layout/src/display.rs +++ b/vortex-layout/src/display.rs @@ -225,7 +225,6 @@ mod tests { use vortex_array::dtype::Nullability::NonNullable; use vortex_array::dtype::PType; use vortex_array::dtype::StructFields; - use vortex_array::serde::SerializedArray; use vortex_array::validity::Validity; use vortex_buffer::BitBufferMut; use vortex_buffer::buffer; @@ -235,7 +234,6 @@ mod tests { use crate::IntoLayout; use crate::OwnedLayoutChildren; use crate::layouts::chunked::ChunkedLayout; - use crate::layouts::flat::Flat; use crate::layouts::flat::writer::FlatLayoutStrategy; use crate::layouts::struct_::StructLayout; use crate::segments::TestSegments; @@ -244,247 +242,211 @@ mod tests { use crate::strategy::LayoutStrategy; use crate::test::SESSION; - /// Test display_tree with inline array_tree metadata (no segment source needed). + /// Test display_tree for a struct layout (fallback rendering, no inline array_tree). #[test] fn test_display_tree_inline_array_tree() { - // LazyLock caches the env var on first read, so only nextest (separate processes) can isolate it. - if std::env::var("NEXTEST_RUN_ID").is_ok() { - temp_env::with_var("FLAT_LAYOUT_INLINE_ARRAY_NODE", Some("1"), || { - block_on(|handle| async move { - let session = SESSION.clone().with_handle(handle); - let ctx = ArrayContext::empty(); - let segments = Arc::new(TestSegments::default()); - - // Create nullable i64 array (2 buffers: data + validity) - let (ptr1, eof1) = SequenceId::root().split(); - let mut validity_builder = BitBufferMut::with_capacity(5); - for b in [true, false, true, true, false] { - validity_builder.append(b); - } - let validity = Validity::Array( - BoolArray::new(validity_builder.freeze(), Validity::NonNullable) - .into_array(), - ); - let array1 = PrimitiveArray::new(buffer![1i64, 2, 3, 4, 5], validity); - let layout1 = FlatLayoutStrategy::default() - .write_stream( - ctx.clone(), - Arc::::clone(&segments), - array1.into_array().to_array_stream().sequenced(ptr1), - eof1, - &session, - ) - .await - .unwrap(); - - // Create utf8 array (2 buffers: views + data) - let (ptr2, eof2) = SequenceId::root().split(); - let mut builder = VarBinViewBuilder::with_capacity(DType::Utf8(NonNullable), 5); - for s in [ - "hello world this is long", - "another long string", - "short", - "medium str", - "x", - ] { - builder.append_value(s); - } - let layout2 = FlatLayoutStrategy::default() - .write_stream( - ctx.clone(), - Arc::::clone(&segments), - builder - .finish() - .into_array() - .to_array_stream() - .sequenced(ptr2), - eof2, - &session, - ) - .await - .unwrap(); - - // Create struct layout - let struct_layout = StructLayout::new( - 5, - DType::Struct( - StructFields::new( - vec![FieldName::from("numbers"), FieldName::from("strings")].into(), - vec![ - DType::Primitive(PType::I64, Nullability::Nullable), - DType::Utf8(NonNullable), - ], - ), - NonNullable, - ), + block_on(|handle| async move { + let session = SESSION.clone().with_handle(handle); + let ctx = ArrayContext::empty(); + let segments = Arc::new(TestSegments::default()); + + // Create nullable i64 array (2 buffers: data + validity) + let (ptr1, eof1) = SequenceId::root().split(); + let mut validity_builder = BitBufferMut::with_capacity(5); + for b in [true, false, true, true, false] { + validity_builder.append(b); + } + let validity = Validity::Array( + BoolArray::new(validity_builder.freeze(), Validity::NonNullable).into_array(), + ); + let array1 = PrimitiveArray::new(buffer![1i64, 2, 3, 4, 5], validity); + let layout1 = FlatLayoutStrategy::default() + .write_stream( + ctx.clone(), + Arc::::clone(&segments), + array1.into_array().to_array_stream().sequenced(ptr1), + eof1, + &session, + ) + .await + .unwrap(); + + // Create utf8 array (2 buffers: views + data) + let (ptr2, eof2) = SequenceId::root().split(); + let mut builder = VarBinViewBuilder::with_capacity(DType::Utf8(NonNullable), 5); + for s in [ + "hello world this is long", + "another long string", + "short", + "medium str", + "x", + ] { + builder.append_value(s); + } + let layout2 = FlatLayoutStrategy::default() + .write_stream( + ctx.clone(), + Arc::::clone(&segments), + builder + .finish() + .into_array() + .to_array_stream() + .sequenced(ptr2), + eof2, + &session, + ) + .await + .unwrap(); + + // Create struct layout + let struct_layout = StructLayout::new( + 5, + DType::Struct( + StructFields::new( + vec![FieldName::from("numbers"), FieldName::from("strings")].into(), vec![ - ChunkedLayout::new( - 5, - DType::Primitive(PType::I64, Nullability::Nullable), - OwnedLayoutChildren::layout_children(vec![layout1]), - ) - .into_layout(), - layout2, + DType::Primitive(PType::I64, Nullability::Nullable), + DType::Utf8(NonNullable), ], + ), + NonNullable, + ), + vec![ + ChunkedLayout::new( + 5, + DType::Primitive(PType::I64, Nullability::Nullable), + OwnedLayoutChildren::layout_children(vec![layout1]), ) - .into_layout(); + .into_layout(), + layout2, + ], + ) + .into_layout(); - let output = format!("{}", struct_layout.display_tree_verbose(true)); + let output = format!("{}", struct_layout.display_tree_verbose(true)); - let expected = "\ + let expected = "\ vortex.struct, dtype: {numbers=i64?, strings=utf8}, children: 2, rows: 5 ├── numbers: vortex.chunked, dtype: i64?, children: 1, rows: 5 -│ └── [0]: vortex.flat, dtype: i64?, metadata: 171 bytes, rows: 5, segment 0, buffers=[40B, 1B], total=41B -└── strings: vortex.flat, dtype: utf8, metadata: 110 bytes, rows: 5, segment 1, buffers=[43B, 80B], total=123B +│ └── [0]: vortex.flat, dtype: i64?, rows: 5, segment: 0 +└── strings: vortex.flat, dtype: utf8, rows: 5, segment: 1 "; - assert_eq!(output, expected); - }) - }) - } + assert_eq!(output, expected); + }) } /// Test display_tree_with_segments using async segment source to fetch buffer sizes. #[test] fn test_display_tree_with_segment_source() { - if std::env::var("NEXTEST_RUN_ID").is_ok() { - temp_env::with_var("FLAT_LAYOUT_INLINE_ARRAY_NODE", None::<&str>, || { - block_on(|handle| async move { - let session = SESSION.clone().with_handle(handle); - let ctx = ArrayContext::empty(); - let segments = Arc::new(TestSegments::default()); - - // Create simple i32 array - let (ptr1, eof1) = SequenceId::root().split(); - let array1 = - PrimitiveArray::new(buffer![1i32, 2, 3, 4, 5], Validity::NonNullable); - let layout1 = FlatLayoutStrategy::default() - .write_stream( - ctx.clone(), - Arc::::clone(&segments), - array1.into_array().to_array_stream().sequenced(ptr1), - eof1, - &session, - ) - .await - .unwrap(); - - // Create another i32 array - let (ptr2, eof2) = SequenceId::root().split(); - let array2 = - PrimitiveArray::new(buffer![6i32, 7, 8, 9, 10], Validity::NonNullable); - let layout2 = FlatLayoutStrategy::default() - .write_stream( - ctx.clone(), - Arc::::clone(&segments), - array2.into_array().to_array_stream().sequenced(ptr2), - eof2, - &session, - ) - .await - .unwrap(); - - // Create chunked layout - let chunked_layout = ChunkedLayout::new( - 10, - DType::Primitive(PType::I32, NonNullable), - OwnedLayoutChildren::layout_children(vec![layout1, layout2]), - ) - .into_layout(); - - let output = chunked_layout - .display_tree_with_segments(segments) - .await - .unwrap(); - - let expected = "\ + block_on(|handle| async move { + let session = SESSION.clone().with_handle(handle); + let ctx = ArrayContext::empty(); + let segments = Arc::new(TestSegments::default()); + + // Create simple i32 array + let (ptr1, eof1) = SequenceId::root().split(); + let array1 = PrimitiveArray::new(buffer![1i32, 2, 3, 4, 5], Validity::NonNullable); + let layout1 = FlatLayoutStrategy::default() + .write_stream( + ctx.clone(), + Arc::::clone(&segments), + array1.into_array().to_array_stream().sequenced(ptr1), + eof1, + &session, + ) + .await + .unwrap(); + + // Create another i32 array + let (ptr2, eof2) = SequenceId::root().split(); + let array2 = PrimitiveArray::new(buffer![6i32, 7, 8, 9, 10], Validity::NonNullable); + let layout2 = FlatLayoutStrategy::default() + .write_stream( + ctx.clone(), + Arc::::clone(&segments), + array2.into_array().to_array_stream().sequenced(ptr2), + eof2, + &session, + ) + .await + .unwrap(); + + // Create chunked layout + let chunked_layout = ChunkedLayout::new( + 10, + DType::Primitive(PType::I32, NonNullable), + OwnedLayoutChildren::layout_children(vec![layout1, layout2]), + ) + .into_layout(); + + let output = chunked_layout + .display_tree_with_segments(segments) + .await + .unwrap(); + + let expected = "\ vortex.chunked, dtype: i32, children: 2, rows: 10 ├── [0]: vortex.flat, dtype: i32, rows: 5, segment 0, buffers=[20B], total=20B └── [1]: vortex.flat, dtype: i32, rows: 5, segment 1, buffers=[20B], total=20B "; - assert_eq!(output.to_string(), expected); - }) - }) - } + assert_eq!(output.to_string(), expected); + }) } - /// Test display_array_tree with inline array node metadata. + /// Test display_tree for nullable flat layout (fallback rendering, no inline array_tree). #[test] - fn test_display_array_tree_with_inline_node() { - if std::env::var("NEXTEST_RUN_ID").is_ok() { - temp_env::with_var("FLAT_LAYOUT_INLINE_ARRAY_NODE", Some("1"), || { - let ctx = ArrayContext::empty(); - let segments = Arc::new(TestSegments::default()); - let (ptr, eof) = SequenceId::root().split(); - - // Create a simple primitive array - let array = PrimitiveArray::new(buffer![1i32, 2, 3, 4, 5], Validity::AllValid); - let layout = block_on(|handle| async { - let session = SESSION.clone().with_handle(handle); - FlatLayoutStrategy::default() - .write_stream( - ctx.clone(), - Arc::::clone(&segments), - array.into_array().to_array_stream().sequenced(ptr), - eof, - &session, - ) - .await - .unwrap() - }); - - let flat_layout = layout.as_::(); - - let array_tree = flat_layout.array_tree().expect( - "array_tree should be populated when FLAT_LAYOUT_INLINE_ARRAY_NODE is set", - ); - - let parts = SerializedArray::from_array_tree(array_tree.as_ref().to_vec()) - .expect("should parse array_tree"); - assert_eq!(parts.buffer_lengths(), vec![20]); // 5 i32 values = 20 bytes - - assert_eq!( - layout.display_tree().to_string(), - "\ -vortex.flat, dtype: i32?, segment 0, buffers=[20B], total=20B -" - ); - }) - } + fn test_display_flat_layout() { + let ctx = ArrayContext::empty(); + let segments = Arc::new(TestSegments::default()); + let (ptr, eof) = SequenceId::root().split(); + + // Create a simple primitive array + let array = PrimitiveArray::new(buffer![1i32, 2, 3, 4, 5], Validity::AllValid); + let layout = block_on(|handle| async { + let session = SESSION.clone().with_handle(handle); + FlatLayoutStrategy::default() + .write_stream( + ctx.clone(), + Arc::::clone(&segments), + array.into_array().to_array_stream().sequenced(ptr), + eof, + &session, + ) + .await + .unwrap() + }); + + assert_eq!( + layout.display_tree().to_string(), + "vortex.flat, dtype: i32?, segment: 0\n" + ); } - /// Test display_tree without inline array node (shows segment ID). + /// Test display_tree for non-nullable flat layout (fallback rendering, no inline array_tree). #[test] - fn test_display_tree_without_inline_node() { - if std::env::var("NEXTEST_RUN_ID").is_ok() { - temp_env::with_var("FLAT_LAYOUT_INLINE_ARRAY_NODE", Some("1"), || { - let ctx = ArrayContext::empty(); - let segments = Arc::new(TestSegments::default()); - let (ptr, eof) = SequenceId::root().split(); - - // Create a simple primitive array - let array = PrimitiveArray::new(buffer![10i64, 20, 30], Validity::NonNullable); - let layout = block_on(|handle| async { - let session = SESSION.clone().with_handle(handle); - FlatLayoutStrategy::default() - .write_stream( - ctx, - Arc::::clone(&segments), - array.into_array().to_array_stream().sequenced(ptr), - eof, - &session, - ) - .await - .unwrap() - }); - - // Test display_tree exact output (with inline array_tree enabled by env var from other test) - assert_eq!( - layout.display_tree().to_string(), - "\ -vortex.flat, dtype: i64, segment 0, buffers=[24B], total=24B -" - ); - }) - } + fn test_display_flat_layout_non_nullable() { + let ctx = ArrayContext::empty(); + let segments = Arc::new(TestSegments::default()); + let (ptr, eof) = SequenceId::root().split(); + + let array = PrimitiveArray::new(buffer![10i64, 20, 30], Validity::NonNullable); + let layout = block_on(|handle| async { + let session = SESSION.clone().with_handle(handle); + FlatLayoutStrategy::default() + .write_stream( + ctx, + Arc::::clone(&segments), + array.into_array().to_array_stream().sequenced(ptr), + eof, + &session, + ) + .await + .unwrap() + }); + + assert_eq!( + layout.display_tree().to_string(), + "vortex.flat, dtype: i64, segment: 0\n" + ); } } diff --git a/vortex-layout/src/layouts/array_tree/flat.rs b/vortex-layout/src/layouts/array_tree/flat.rs new file mode 100644 index 00000000000..f12faebff6e --- /dev/null +++ b/vortex-layout/src/layouts/array_tree/flat.rs @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::sync::Arc; +use std::sync::OnceLock; + +use vortex_array::DeserializeMetadata; +use vortex_array::SerializeMetadata; +use vortex_array::dtype::DType; +use vortex_array::dtype::TryFromBytes; +use vortex_buffer::ByteBuffer; +use vortex_error::VortexExpect; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_panic; +use vortex_session::VortexSession; +use vortex_session::registry::ReadContext; + +use crate::LayoutChildType; +use crate::LayoutEncodingRef; +use crate::LayoutId; +use crate::LayoutReaderRef; +use crate::LayoutRef; +use crate::VTable; +use crate::children::LayoutChildren; +use crate::layouts::array_tree::ArrayTreesSource; +use crate::layouts::array_tree::reader::ArrayTreeFlatReader; +use crate::layouts::flat::FlatLayout; +use crate::segments::SegmentId; +use crate::segments::SegmentSource; +use crate::vtable; + +vtable!(ArrayTreeFlat); + +/// Encoding marker for [`ArrayTreeFlatLayout`]. +#[derive(Debug)] +pub struct ArrayTreeFlatLayoutEncoding; + +/// A flat layout variant that stores its compact encoding tree separately from the data segment. +/// +/// At write time, the compact flatbuffer (encoding tree + buffer descriptors, no stats) is +/// stored in this layout and later collected by [`super::ArrayTreeLayout`] into a shared VarBin +/// array. +/// +/// At read time, the compact flatbuffer is retrieved from the shared [`ArrayTreesSource`] +/// (injected by the parent [`super::ArrayTreeLayout`]'s reader-construction walk) rather than +/// being parsed from the data segment. This avoids fetching the segment for decode planning +/// and prevents device-to-host copies for device-resident buffers. +#[derive(Clone, Debug)] +pub struct ArrayTreeFlatLayout { + inner: FlatLayout, + chunk_idx: usize, + /// The compact flatbuffer produced at write time. Not persisted — only used to communicate + /// between the leaf strategy and the collector strategy via the layout tree. + compact_tree: Option, + /// Shared source for compact flatbuffers, injected by the parent [`super::ArrayTreeLayout`] + /// during reader construction. + source: OnceLock>, +} + +impl ArrayTreeFlatLayout { + /// Creates a new layout at write time with a compact flatbuffer. + pub fn new(inner: FlatLayout, chunk_idx: usize, compact_tree: ByteBuffer) -> Self { + Self { + inner, + chunk_idx, + compact_tree: Some(compact_tree), + source: OnceLock::new(), + } + } + + /// Returns the chunk index of this layout in the array trees VarBin. + pub fn chunk_idx(&self) -> usize { + self.chunk_idx + } + + /// Returns the compact flatbuffer, if available (write-time only). + pub fn compact_tree(&self) -> Option<&ByteBuffer> { + self.compact_tree.as_ref() + } + + /// Returns the inner flat layout. + pub fn inner(&self) -> &FlatLayout { + &self.inner + } + + /// Sets the shared array trees source. Called by the parent [`super::ArrayTreeLayout`] + /// during the reader-construction injection walk. + pub fn set_source(&self, source: Arc) { + // Ignore if already set (e.g., in tests or double-init scenarios). + drop(self.source.set(source)); + } + + /// Returns the shared array trees source, if set. + pub fn source(&self) -> Option<&Arc> { + self.source.get() + } +} + +/// Metadata for [`ArrayTreeFlatLayout`]: stores the chunk index. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ArrayTreeFlatMetadata { + pub chunk_idx: u32, +} + +impl SerializeMetadata for ArrayTreeFlatMetadata { + fn serialize(self) -> Vec { + self.chunk_idx.to_le_bytes().to_vec() + } +} + +impl DeserializeMetadata for ArrayTreeFlatMetadata { + type Output = Self; + + fn deserialize(metadata: &[u8]) -> VortexResult { + let chunk_idx = u32::try_from_le_bytes(&metadata[0..4])?; + Ok(Self { chunk_idx }) + } +} + +impl VTable for ArrayTreeFlat { + type Layout = ArrayTreeFlatLayout; + type Encoding = ArrayTreeFlatLayoutEncoding; + type Metadata = ArrayTreeFlatMetadata; + + fn id(_encoding: &Self::Encoding) -> LayoutId { + LayoutId::new_static("vortex.array_tree_flat") + } + + fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef { + LayoutEncodingRef::new_ref(ArrayTreeFlatLayoutEncoding.as_ref()) + } + + fn row_count(layout: &Self::Layout) -> u64 { + layout.inner.row_count() + } + + fn dtype(layout: &Self::Layout) -> &DType { + layout.inner.dtype() + } + + fn metadata(layout: &Self::Layout) -> Self::Metadata { + ArrayTreeFlatMetadata { + chunk_idx: u32::try_from(layout.chunk_idx).vortex_expect("chunk_idx must fit in u32"), + } + } + + fn segment_ids(layout: &Self::Layout) -> Vec { + vec![layout.inner.segment_id()] + } + + fn nchildren(_layout: &Self::Layout) -> usize { + 0 + } + + fn child(_layout: &Self::Layout, idx: usize) -> VortexResult { + vortex_bail!("ArrayTreeFlatLayout has no children, got index {}", idx) + } + + fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType { + vortex_panic!("ArrayTreeFlatLayout has no children, got index {}", idx) + } + + fn new_reader( + layout: &Self::Layout, + name: Arc, + segment_source: Arc, + session: &VortexSession, + ) -> VortexResult { + Ok(Arc::new(ArrayTreeFlatReader::new( + layout.clone(), + name, + segment_source, + session.clone(), + ))) + } + + fn build( + _encoding: &Self::Encoding, + dtype: &DType, + row_count: u64, + metadata: &ArrayTreeFlatMetadata, + segment_ids: Vec, + _children: &dyn LayoutChildren, + ctx: &ReadContext, + ) -> VortexResult { + if segment_ids.len() != 1 { + vortex_bail!("ArrayTreeFlatLayout must have exactly one segment ID"); + } + Ok(ArrayTreeFlatLayout { + inner: FlatLayout::new(row_count, dtype.clone(), segment_ids[0], ctx.clone()), + chunk_idx: metadata.chunk_idx as usize, + compact_tree: None, + source: OnceLock::new(), + }) + } +} diff --git a/vortex-layout/src/layouts/array_tree/mod.rs b/vortex-layout/src/layouts/array_tree/mod.rs new file mode 100644 index 00000000000..6e96d762868 --- /dev/null +++ b/vortex-layout/src/layouts/array_tree/mod.rs @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Array tree layout: stores compact encoding tree flatbuffers (without stats) as a VarBin +//! vortex array alongside the data layout, enabling decode planning and sub-segment random +//! access without fetching data segments. + +mod flat; +mod reader; +pub mod writer; + +use std::sync::Arc; +use std::sync::OnceLock; + +use futures::FutureExt; +use vortex_array::EmptyMetadata; +use vortex_array::MaskFuture; +use vortex_array::dtype::DType; +use vortex_array::dtype::Nullability; +use vortex_array::expr::root; +use vortex_error::VortexExpect; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_panic; +use vortex_session::VortexSession; +use vortex_session::registry::ReadContext; + +pub use self::flat::ArrayTreeFlatLayout; +pub use self::flat::ArrayTreeFlatLayoutEncoding; +use crate::LayoutChildType; +use crate::LayoutEncodingRef; +use crate::LayoutId; +use crate::LayoutReaderRef; +use crate::LayoutRef; +use crate::VTable; +use crate::children::LayoutChildren; +use crate::children::OwnedLayoutChildren; +use crate::layouts::array_tree::flat::ArrayTreeFlat; +use crate::layouts::array_tree::reader::ArrayTreeReader; +use crate::segments::SegmentId; +use crate::segments::SegmentSource; +use crate::vtable; + +vtable!(ArrayTree); + +/// Encoding marker for [`ArrayTreeLayout`]. +#[derive(Debug)] +pub struct ArrayTreeLayoutEncoding; + +/// Collects compact encoding tree flatbuffers from [`ArrayTreeFlatLayout`] descendants and stores +/// them as a VarBin array in an auxiliary child layout. +/// +/// # Children +/// +/// - Child 0 (`Transparent "data"`): The actual data layout tree (may contain any intermediate +/// layouts like `ChunkedLayout`, `DictLayout`, etc., with [`ArrayTreeFlatLayout`] at the leaves). +/// - Child 1 (`Auxiliary "array_trees"`): A VarBin array of compact `Array` flatbuffers, one per +/// [`ArrayTreeFlatLayout`] leaf in depth-first order. +#[derive(Clone, Debug)] +pub struct ArrayTreeLayout { + dtype: DType, + children: Arc, +} + +impl ArrayTreeLayout { + /// Creates a new `ArrayTreeLayout` from the data and array_trees children. + pub fn new(data: LayoutRef, array_trees: LayoutRef) -> Self { + Self { + dtype: data.dtype().clone(), + children: OwnedLayoutChildren::layout_children(vec![data, array_trees]), + } + } +} + +impl VTable for ArrayTree { + type Layout = ArrayTreeLayout; + type Encoding = ArrayTreeLayoutEncoding; + type Metadata = EmptyMetadata; + + fn id(_encoding: &Self::Encoding) -> LayoutId { + LayoutId::new_static("vortex.array_tree") + } + + fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef { + LayoutEncodingRef::new_ref(ArrayTreeLayoutEncoding.as_ref()) + } + + fn row_count(layout: &Self::Layout) -> u64 { + layout.children.child_row_count(0) + } + + fn dtype(layout: &Self::Layout) -> &DType { + &layout.dtype + } + + fn metadata(_layout: &Self::Layout) -> Self::Metadata { + EmptyMetadata + } + + fn segment_ids(_layout: &Self::Layout) -> Vec { + vec![] + } + + fn nchildren(_layout: &Self::Layout) -> usize { + 2 + } + + fn child(layout: &Self::Layout, idx: usize) -> VortexResult { + match idx { + 0 => layout.children.child(0, &layout.dtype), + 1 => layout + .children + .child(1, &DType::Binary(Nullability::NonNullable)), + _ => vortex_bail!("ArrayTreeLayout has 2 children, got index {}", idx), + } + } + + fn child_type(_layout: &Self::Layout, idx: usize) -> LayoutChildType { + match idx { + 0 => LayoutChildType::Transparent("data".into()), + 1 => LayoutChildType::Auxiliary("array_trees".into()), + _ => vortex_panic!("ArrayTreeLayout has 2 children, got index {}", idx), + } + } + + fn new_reader( + layout: &Self::Layout, + name: Arc, + segment_source: Arc, + session: &VortexSession, + ) -> VortexResult { + // Walk the data child to find all ArrayTreeFlatLayouts and inject the array_trees source. + let data_child = Self::child(layout, 0)?; + let array_trees_child = Self::child(layout, 1)?; + + // Create a reader for the array_trees VarBin child so the source can lazily read it. + let trees_reader = array_trees_child.new_reader( + Arc::from(format!("{name}/array_trees")), + Arc::clone(&segment_source), + session, + )?; + let source = Arc::new(ArrayTreesSource::new(trees_reader)); + + // Inject the shared source into all ArrayTreeFlatLayout descendants. + for layout_ref in data_child.depth_first_traversal() { + let layout_ref = layout_ref?; + if let Some(atf) = layout_ref.as_opt::() { + atf.set_source(Arc::clone(&source)); + } + } + + // Create a transparent reader that delegates to the data child. + let data_reader = data_child.new_reader(Arc::clone(&name), segment_source, session)?; + Ok(Arc::new(ArrayTreeReader::new(name, data_reader))) + } + + fn build( + _encoding: &Self::Encoding, + dtype: &DType, + _row_count: u64, + _metadata: &EmptyMetadata, + _segment_ids: Vec, + children: &dyn LayoutChildren, + _ctx: &ReadContext, + ) -> VortexResult { + Ok(ArrayTreeLayout { + dtype: dtype.clone(), + children: children.to_arc(), + }) + } + + fn with_children(layout: &mut Self::Layout, children: Vec) -> VortexResult<()> { + if children.len() != 2 { + vortex_bail!( + "ArrayTreeLayout expects 2 children (data, array_trees), got {}", + children.len() + ); + } + layout.children = OwnedLayoutChildren::layout_children(children); + Ok(()) + } +} + +/// Shared source for compact array tree flatbuffers. +/// +/// Holds a reader for the array_trees child layout and provides lazy shared access +/// to the decoded VarBin array. The first reader to need it triggers the read; all +/// subsequent readers reuse the shared result. +pub struct ArrayTreesSource { + reader: LayoutReaderRef, + /// Lazily initialized shared future for the full VarBin array. + array: OnceLock, +} + +impl std::fmt::Debug for ArrayTreesSource { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ArrayTreesSource").finish_non_exhaustive() + } +} + +impl ArrayTreesSource { + /// Creates a new source backed by the given array_trees reader. + pub fn new(reader: LayoutReaderRef) -> Self { + Self { + reader, + array: OnceLock::new(), + } + } + + /// Returns a shared future that resolves to the full VarBin array of compact trees. + pub fn array_future(&self) -> SharedArrayFuture { + self.array + .get_or_init(|| { + let row_count = self.reader.row_count(); + let reader = Arc::clone(&self.reader); + async move { + reader + .projection_evaluation( + &(0..row_count), + &root(), + MaskFuture::new_true( + usize::try_from(row_count) + .vortex_expect("row count must fit in usize"), + ), + )? + .await + .map_err(Arc::new) + } + .boxed() + .shared() + }) + .clone() + } +} + +use crate::layouts::SharedArrayFuture; diff --git a/vortex-layout/src/layouts/array_tree/reader.rs b/vortex-layout/src/layouts/array_tree/reader.rs new file mode 100644 index 00000000000..b697b51163d --- /dev/null +++ b/vortex-layout/src/layouts/array_tree/reader.rs @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::collections::BTreeSet; +use std::ops::BitAnd; +use std::ops::Range; +use std::sync::Arc; + +use futures::FutureExt; +use vortex_array::MaskFuture; +use vortex_array::VortexSessionExecute; +use vortex_array::arrays::VarBinView; +use vortex_array::dtype::DType; +use vortex_array::dtype::FieldMask; +use vortex_array::expr::Expression; +use vortex_array::serde::SerializedArray; +use vortex_error::VortexExpect; +use vortex_error::VortexResult; +use vortex_mask::Mask; +use vortex_session::VortexSession; + +use crate::LayoutReader; +use crate::LayoutReaderRef; +use crate::layouts::SharedArrayFuture; +use crate::layouts::array_tree::flat::ArrayTreeFlatLayout; +use crate::reader::ArrayFuture; +use crate::reader::SplitRange; +use crate::segments::SegmentSource; + +/// Transparent reader for [`super::ArrayTreeLayout`]. +/// +/// Delegates all operations to the data child reader. The array_trees auxiliary child +/// is consumed at construction time (by [`super::ArrayTreeLayout::new_reader`]) to inject +/// compact flatbuffers into [`ArrayTreeFlatLayout`] descendants. +pub struct ArrayTreeReader { + name: Arc, + data_reader: LayoutReaderRef, +} + +impl ArrayTreeReader { + pub fn new(name: Arc, data_reader: LayoutReaderRef) -> Self { + Self { name, data_reader } + } +} + +impl LayoutReader for ArrayTreeReader { + fn name(&self) -> &Arc { + &self.name + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn dtype(&self) -> &DType { + self.data_reader.dtype() + } + + fn row_count(&self) -> u64 { + self.data_reader.row_count() + } + + fn register_splits( + &self, + field_mask: &[FieldMask], + split_range: &SplitRange, + splits: &mut BTreeSet, + ) -> VortexResult<()> { + self.data_reader + .register_splits(field_mask, split_range, splits) + } + + fn pruning_evaluation( + &self, + row_range: &Range, + expr: &Expression, + mask: Mask, + ) -> VortexResult { + self.data_reader.pruning_evaluation(row_range, expr, mask) + } + + fn filter_evaluation( + &self, + row_range: &Range, + expr: &Expression, + mask: MaskFuture, + ) -> VortexResult { + self.data_reader.filter_evaluation(row_range, expr, mask) + } + + fn projection_evaluation( + &self, + row_range: &Range, + expr: &Expression, + mask: MaskFuture, + ) -> VortexResult { + self.data_reader + .projection_evaluation(row_range, expr, mask) + } +} + +/// The threshold of mask density below which we will evaluate the expression only over the +/// selected rows, and above which we evaluate the expression over all rows and then select after. +const EXPR_EVAL_THRESHOLD: f64 = 0.2; + +/// Reader for [`ArrayTreeFlatLayout`]. +/// +/// Similar to [`super::super::flat::reader::FlatReader`] but obtains its compact encoding tree +/// from the shared [`super::ArrayTreesSource`] rather than from inline layout metadata or +/// the data segment. +pub struct ArrayTreeFlatReader { + layout: ArrayTreeFlatLayout, + name: Arc, + segment_source: Arc, + session: VortexSession, +} + +impl ArrayTreeFlatReader { + pub(crate) fn new( + layout: ArrayTreeFlatLayout, + name: Arc, + segment_source: Arc, + session: VortexSession, + ) -> Self { + Self { + layout, + name, + segment_source, + session, + } + } + + /// Register the segment request and return a future that resolves to the deserialized array. + /// + /// If a shared [`super::ArrayTreesSource`] has been injected, the compact flatbuffer is + /// obtained from there (concurrently with the data segment fetch). Otherwise falls back + /// to parsing from the segment (like FlatReader). + fn array_future(&self) -> SharedArrayFuture { + let row_count = usize::try_from(self.layout.inner().row_count()) + .vortex_expect("row count must fit in usize"); + + let segment_fut = self + .segment_source + .request(self.layout.inner().segment_id()); + let ctx = self.layout.inner().array_ctx().clone(); + let session = self.session.clone(); + let dtype = self.layout.inner().dtype().clone(); + + // If a source has been injected, resolve the compact tree from it. + // Otherwise, fall back to parsing from the segment (like FlatReader). + let source_future = self.layout.source().map(|s| s.array_future()); + let chunk_idx = self.layout.chunk_idx(); + + async move { + let segment = segment_fut.await?; + let parts = if let Some(source_future) = source_future { + // Resolve the VarBin array of compact trees (shared, read once). + let trees_array = source_future.await?; + let trees = trees_array.try_downcast::().map_err(|_| { + Arc::new(vortex_error::vortex_err!( + "array_trees child is not a VarBinView array" + )) + })?; + let compact_tree = trees.bytes_at(chunk_idx); + SerializedArray::from_flatbuffer_and_segment(compact_tree, segment)? + } else { + SerializedArray::try_from(segment)? + }; + parts + .decode(&dtype, row_count, &ctx, &session) + .map_err(Arc::new) + } + .boxed() + .shared() + } +} + +impl LayoutReader for ArrayTreeFlatReader { + fn name(&self) -> &Arc { + &self.name + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn dtype(&self) -> &DType { + self.layout.inner().dtype() + } + + fn row_count(&self) -> u64 { + self.layout.inner().row_count() + } + + fn register_splits( + &self, + _field_mask: &[FieldMask], + split_range: &SplitRange, + splits: &mut BTreeSet, + ) -> VortexResult<()> { + split_range.check_bounds(self.layout.inner().row_count())?; + splits.insert(split_range.root_row_range().end); + Ok(()) + } + + fn pruning_evaluation( + &self, + _row_range: &Range, + _expr: &Expression, + mask: Mask, + ) -> VortexResult { + Ok(MaskFuture::ready(mask)) + } + + fn filter_evaluation( + &self, + row_range: &Range, + expr: &Expression, + mask: MaskFuture, + ) -> VortexResult { + let row_range = usize::try_from(row_range.start) + .vortex_expect("Row range begin must fit within layout size") + ..usize::try_from(row_range.end) + .vortex_expect("Row range end must fit within layout size"); + let name = Arc::clone(&self.name); + let array = self.array_future(); + let expr = expr.clone(); + let session = self.session.clone(); + + Ok(MaskFuture::new(mask.len(), async move { + let mut array = array.clone().await?; + let mask = mask.await?; + + if row_range.start > 0 || row_range.end < array.len() { + array = array.slice(row_range.clone())?; + } + + let array_mask = if mask.density() < EXPR_EVAL_THRESHOLD { + let array = array.apply(&expr)?; + let array = array.filter(mask.clone())?; + let mut ctx = session.create_execution_ctx(); + let array_mask = array.execute::(&mut ctx)?; + mask.intersect_by_rank(&array_mask) + } else { + let array = array.apply(&expr)?; + let mut ctx = session.create_execution_ctx(); + let array_mask = array.execute::(&mut ctx)?; + mask.bitand(&array_mask) + }; + + tracing::debug!( + "ArrayTreeFlat mask evaluation {} - {} (mask = {}) => {}", + name, + expr, + mask.density(), + array_mask.density(), + ); + + Ok(array_mask) + })) + } + + fn projection_evaluation( + &self, + row_range: &Range, + expr: &Expression, + mask: MaskFuture, + ) -> VortexResult { + let row_range = usize::try_from(row_range.start) + .vortex_expect("Row range begin must fit within layout size") + ..usize::try_from(row_range.end) + .vortex_expect("Row range end must fit within layout size"); + let name = Arc::clone(&self.name); + let array = self.array_future(); + let expr = expr.clone(); + + Ok(async move { + tracing::debug!("ArrayTreeFlat array evaluation {} - {}", name, expr); + + let mut array = array.clone().await?; + let mask = mask.await?; + + if row_range.start > 0 || row_range.end < array.len() { + array = array.slice(row_range.clone())?; + } + + if !mask.all_true() { + array = array.filter(mask)?; + } + + array = array.apply(&expr)?; + Ok(array) + } + .boxed()) + } +} diff --git a/vortex-layout/src/layouts/array_tree/writer.rs b/vortex-layout/src/layouts/array_tree/writer.rs new file mode 100644 index 00000000000..c521e02ecd2 --- /dev/null +++ b/vortex-layout/src/layouts/array_tree/writer.rs @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::sync::Arc; +use std::sync::atomic::AtomicUsize; +use std::sync::atomic::Ordering; + +use async_trait::async_trait; +use futures::StreamExt as _; +use vortex_array::ArrayContext; +use vortex_array::IntoArray; +use vortex_array::builders::ArrayBuilder; +use vortex_array::builders::VarBinViewBuilder; +use vortex_array::dtype::DType; +use vortex_array::dtype::Nullability; +use vortex_array::serde::SerializeOptions; +use vortex_buffer::ByteBuffer; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_session::VortexSession; +use vortex_session::registry::ReadContext; + +use crate::IntoLayout; +use crate::LayoutRef; +use crate::LayoutStrategy; +use crate::layouts::array_tree::ArrayTreeLayout; +use crate::layouts::array_tree::flat::ArrayTreeFlat; +use crate::layouts::array_tree::flat::ArrayTreeFlatLayout; +use crate::layouts::flat::FlatLayout; +use crate::layouts::flat::writer::FlatLayoutStrategy; +use crate::segments::SegmentSinkRef; +use crate::sequence::SendableSequentialStream; +use crate::sequence::SequencePointer; +use crate::sequence::SequentialArrayStreamExt; + +/// Creates a cooperating pair of strategies for array tree collection. +/// +/// Returns `(collector, leaf)` where: +/// - `leaf` replaces [`FlatLayoutStrategy`] in the data pipeline — it serializes chunks and +/// produces compact flatbuffers. +/// - `collector` wraps the data pipeline — after data is written, it collects compact flatbuffers +/// from the layout tree and writes them as a VarBin array. +pub fn writer( + flat: FlatLayoutStrategy, + array_trees_strategy: Arc, +) -> (ArrayTreeCollectorStrategy, ArrayTreeFlatStrategy) { + let chunk_counter = Arc::new(AtomicUsize::new(0)); + let leaf = ArrayTreeFlatStrategy { + flat, + chunk_counter, + }; + let collector = ArrayTreeCollectorStrategy { + child: None, + array_trees_strategy, + }; + (collector, leaf) +} + +/// Leaf strategy (TX) that replaces [`FlatLayoutStrategy`]. +/// +/// For each chunk, it delegates serialization to the inner [`FlatLayoutStrategy`], also produces +/// a compact flatbuffer (encoding tree + buffer descriptors, no stats), and returns an +/// [`ArrayTreeFlatLayout`] with the compact tree attached. +#[derive(Clone)] +pub struct ArrayTreeFlatStrategy { + flat: FlatLayoutStrategy, + chunk_counter: Arc, +} + +#[async_trait] +impl LayoutStrategy for ArrayTreeFlatStrategy { + async fn write_stream( + &self, + ctx: ArrayContext, + segment_sink: SegmentSinkRef, + mut stream: SendableSequentialStream, + _eof: SequencePointer, + session: &VortexSession, + ) -> VortexResult { + let ctx = ctx.clone(); + let Some(chunk) = stream.next().await else { + vortex_bail!("array tree flat layout needs a single chunk"); + }; + let (sequence_id, chunk) = chunk?; + + let row_count = chunk.len() as u64; + + // Normalize if needed (delegate to flat strategy's normalization). + let chunk = if let Some(allowed) = &self.flat.allowed_encodings { + use vortex_array::normalize::NormalizeOptions; + use vortex_array::normalize::Operation; + chunk.normalize(&mut NormalizeOptions { + allowed, + operation: Operation::Error, + })? + } else { + chunk + }; + + // Produce the compact flatbuffer (no stats, with buffer descriptors). + let compact_tree = chunk.serialize_array_tree( + &ctx, + session, + &SerializeOptions { + offset: 0, + include_padding: self.flat.include_padding, + }, + )?; + + // Full serialization (with stats) for the data segment. + let buffers = chunk.serialize( + &ctx, + session, + &SerializeOptions { + offset: 0, + include_padding: self.flat.include_padding, + }, + )?; + assert!(buffers.len() >= 2); + let segment_id = segment_sink.write(sequence_id, buffers).await?; + + let None = stream.next().await else { + vortex_bail!("array tree flat layout received stream with more than a single chunk"); + }; + + let chunk_idx = self.chunk_counter.fetch_add(1, Ordering::Relaxed); + + Ok(ArrayTreeFlatLayout::new( + FlatLayout::new( + row_count, + stream.dtype().clone(), + segment_id, + ReadContext::new(ctx.to_ids()), + ), + chunk_idx, + compact_tree, + ) + .into_layout()) + } + + fn buffered_bytes(&self) -> u64 { + 0 + } +} + +/// Collector strategy (RX) that wraps the data pipeline. +/// +/// After the data child completes, walks the returned layout tree to extract compact flatbuffers +/// from all [`ArrayTreeFlatLayout`] leaves, builds a VarBin array, and writes it as an +/// auxiliary child. +pub struct ArrayTreeCollectorStrategy { + child: Option>, + array_trees_strategy: Arc, +} + +impl ArrayTreeCollectorStrategy { + /// Sets the data child pipeline that this collector wraps. + pub fn wrap(mut self, child: impl LayoutStrategy) -> Self { + self.child = Some(Arc::new(child)); + self + } +} + +#[async_trait] +impl LayoutStrategy for ArrayTreeCollectorStrategy { + async fn write_stream( + &self, + ctx: ArrayContext, + segment_sink: SegmentSinkRef, + stream: SendableSequentialStream, + mut eof: SequencePointer, + session: &VortexSession, + ) -> VortexResult { + let Some(child) = self.child.as_ref() else { + vortex_bail!("ArrayTreeCollectorStrategy must have a child set via wrap()") + }; + + // Data segments get earlier sequence IDs than array tree segments. + let data_eof = eof.split_off(); + + let data_layout = child + .write_stream( + ctx.clone(), + Arc::clone(&segment_sink), + stream, + data_eof, + session, + ) + .await?; + + // Walk the layout tree to collect compact flatbuffers from ArrayTreeFlatLayout leaves. + let mut compact_trees: Vec<(usize, ByteBuffer)> = Vec::new(); + for layout_ref in data_layout.depth_first_traversal() { + let layout_ref = layout_ref?; + if let Some(atf) = layout_ref.as_opt::() + && let Some(tree) = atf.compact_tree() + { + compact_trees.push((atf.chunk_idx(), tree.clone())); + } + } + + // Sort by chunk index to ensure deterministic order. + compact_trees.sort_by_key(|(idx, _)| *idx); + + // Build a VarBin array of compact flatbuffers. + let dtype = DType::Binary(Nullability::NonNullable); + let mut builder = VarBinViewBuilder::with_capacity(dtype.clone(), compact_trees.len()); + for (_, tree) in &compact_trees { + builder.append_value(tree.as_slice()); + } + let array_trees_array = builder.finish().into_array(); + + // Write the VarBin array via the array_trees strategy. + let trees_stream = array_trees_array + .to_array_stream() + .sequenced(eof.split_off()); + let array_trees_layout = self + .array_trees_strategy + .write_stream(ctx, segment_sink, trees_stream, eof, session) + .await?; + + Ok(ArrayTreeLayout::new(data_layout, array_trees_layout).into_layout()) + } + + fn buffered_bytes(&self) -> u64 { + self.child.as_ref().map(|c| c.buffered_bytes()).unwrap_or(0) + + self.array_trees_strategy.buffered_bytes() + } +} diff --git a/vortex-layout/src/layouts/flat/writer.rs b/vortex-layout/src/layouts/flat/writer.rs index da250414951..17f6983082e 100644 --- a/vortex-layout/src/layouts/flat/writer.rs +++ b/vortex-layout/src/layouts/flat/writer.rs @@ -30,7 +30,6 @@ use crate::IntoLayout; use crate::LayoutRef; use crate::LayoutStrategy; use crate::layouts::flat::FlatLayout; -use crate::layouts::flat::flat_layout_inline_array_node; use crate::segments::SegmentSinkRef; use crate::sequence::SendableSequentialStream; use crate::sequence::SequencePointer; @@ -169,19 +168,16 @@ impl LayoutStrategy for FlatLayoutStrategy { )?; // there is at least the flatbuffer and the length assert!(buffers.len() >= 2); - let array_node = - flat_layout_inline_array_node().then(|| buffers[buffers.len() - 2].clone()); let segment_id = segment_sink.write(sequence_id, buffers).await?; let None = stream.next().await else { vortex_bail!("flat layout received stream with more than a single chunk"); }; - Ok(FlatLayout::new_with_metadata( + Ok(FlatLayout::new( row_count, stream.dtype().clone(), segment_id, ReadContext::new(ctx.to_ids()), - array_node, ) .into_layout()) } diff --git a/vortex-layout/src/layouts/mod.rs b/vortex-layout/src/layouts/mod.rs index 18df5b8f347..c483020c029 100644 --- a/vortex-layout/src/layouts/mod.rs +++ b/vortex-layout/src/layouts/mod.rs @@ -8,6 +8,7 @@ use futures::future::Shared; use vortex_array::ArrayRef; use vortex_error::SharedVortexResult; +pub mod array_tree; pub mod buffered; pub mod chunked; pub mod collect; diff --git a/vortex-layout/src/session.rs b/vortex-layout/src/session.rs index 370fe391ca0..8a388dc3d46 100644 --- a/vortex-layout/src/session.rs +++ b/vortex-layout/src/session.rs @@ -9,6 +9,8 @@ use vortex_session::SessionVar; use vortex_session::registry::Registry; use crate::LayoutEncodingRef; +use crate::layouts::array_tree::ArrayTreeFlatLayoutEncoding; +use crate::layouts::array_tree::ArrayTreeLayoutEncoding; use crate::layouts::chunked::ChunkedLayoutEncoding; use crate::layouts::dict::DictLayoutEncoding; use crate::layouts::flat::FlatLayoutEncoding; @@ -52,6 +54,14 @@ impl Default for LayoutSession { layouts.register(StructLayoutEncoding.id(), StructLayoutEncoding.as_ref()); layouts.register(ZonedLayoutEncoding.id(), ZonedLayoutEncoding.as_ref()); layouts.register(DictLayoutEncoding.id(), DictLayoutEncoding.as_ref()); + layouts.register( + ArrayTreeLayoutEncoding.id(), + ArrayTreeLayoutEncoding.as_ref(), + ); + layouts.register( + ArrayTreeFlatLayoutEncoding.id(), + ArrayTreeFlatLayoutEncoding.as_ref(), + ); Self { registry: layouts } } From 6991c01168cc21344bfbec066874393532e71608 Mon Sep 17 00:00:00 2001 From: Onur Satici Date: Fri, 15 May 2026 15:48:27 +0100 Subject: [PATCH 02/10] layout builder overrides Signed-off-by: Onur Satici --- vortex-cuda/src/layout.rs | 1 + vortex-layout/public-api.lock | 86 ++++++--- vortex-layout/src/layout.rs | 30 ++- vortex-layout/src/layouts/array_tree/flat.rs | 105 +++------- vortex-layout/src/layouts/array_tree/mod.rs | 182 ++++++++++++++---- .../src/layouts/array_tree/reader.rs | 56 +++--- .../src/layouts/array_tree/writer.rs | 85 ++++---- vortex-layout/src/layouts/chunked/mod.rs | 3 + vortex-layout/src/layouts/chunked/reader.rs | 3 + vortex-layout/src/layouts/dict/mod.rs | 3 + vortex-layout/src/layouts/dict/reader.rs | 15 +- vortex-layout/src/layouts/flat/mod.rs | 11 +- vortex-layout/src/layouts/foreign/mod.rs | 4 +- vortex-layout/src/layouts/struct_/mod.rs | 3 + vortex-layout/src/layouts/struct_/reader.rs | 3 + vortex-layout/src/layouts/zoned/mod.rs | 3 + vortex-layout/src/layouts/zoned/reader.rs | 3 + vortex-layout/src/reader.rs | 100 +++++++++- vortex-layout/src/vtable.rs | 7 + 19 files changed, 472 insertions(+), 231 deletions(-) diff --git a/vortex-cuda/src/layout.rs b/vortex-cuda/src/layout.rs index 858bb87e0cc..20ab3f3a50b 100644 --- a/vortex-cuda/src/layout.rs +++ b/vortex-cuda/src/layout.rs @@ -180,6 +180,7 @@ impl VTable for CudaFlat { name: Arc, segment_source: Arc, session: &VortexSession, + _ctx: &vortex::layout::LayoutReaderContext, ) -> VortexResult { Ok(Arc::new(CudaFlatReader { layout: layout.clone(), diff --git a/vortex-layout/public-api.lock b/vortex-layout/public-api.lock index 06bc6f53f26..a0f7546c481 100644 --- a/vortex-layout/public-api.lock +++ b/vortex-layout/public-api.lock @@ -80,7 +80,7 @@ pub fn vortex_layout::layouts::array_tree::ArrayTree::metadata(&Self::Layout) -> pub fn vortex_layout::layouts::array_tree::ArrayTree::nchildren(&Self::Layout) -> usize -pub fn vortex_layout::layouts::array_tree::ArrayTree::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::layouts::array_tree::ArrayTree::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult pub fn vortex_layout::layouts::array_tree::ArrayTree::row_count(&Self::Layout) -> u64 @@ -92,17 +92,11 @@ pub struct vortex_layout::layouts::array_tree::ArrayTreeFlatLayout impl vortex_layout::layouts::array_tree::ArrayTreeFlatLayout -pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::chunk_idx(&self) -> usize - pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::compact_tree(&self) -> core::option::Option<&vortex_buffer::ByteBuffer> pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::inner(&self) -> &vortex_layout::layouts::flat::FlatLayout -pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::new(vortex_layout::layouts::flat::FlatLayout, usize, vortex_buffer::ByteBuffer) -> Self - -pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::set_source(&self, alloc::sync::Arc) - -pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::source(&self) -> core::option::Option<&alloc::sync::Arc> +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::new(vortex_layout::layouts::flat::FlatLayout, vortex_buffer::ByteBuffer) -> Self impl core::clone::Clone for vortex_layout::layouts::array_tree::ArrayTreeFlatLayout @@ -150,6 +144,8 @@ pub struct vortex_layout::layouts::array_tree::ArrayTreeLayout impl vortex_layout::layouts::array_tree::ArrayTreeLayout +pub fn vortex_layout::layouts::array_tree::ArrayTreeLayout::array_trees_dtype() -> vortex_array::dtype::DType + pub fn vortex_layout::layouts::array_tree::ArrayTreeLayout::new(vortex_layout::LayoutRef, vortex_layout::LayoutRef) -> Self impl core::clone::Clone for vortex_layout::layouts::array_tree::ArrayTreeLayout @@ -198,7 +194,7 @@ pub struct vortex_layout::layouts::array_tree::ArrayTreesSource impl vortex_layout::layouts::array_tree::ArrayTreesSource -pub fn vortex_layout::layouts::array_tree::ArrayTreesSource::array_future(&self) -> vortex_layout::layouts::SharedArrayFuture +pub fn vortex_layout::layouts::array_tree::ArrayTreesSource::get_for_segment(&self, vortex_layout::segments::SegmentId) -> vortex_layout::layouts::array_tree::SharedSegmentBufferFuture pub fn vortex_layout::layouts::array_tree::ArrayTreesSource::new(vortex_layout::LayoutReaderRef) -> Self @@ -206,6 +202,8 @@ impl core::fmt::Debug for vortex_layout::layouts::array_tree::ArrayTreesSource pub fn vortex_layout::layouts::array_tree::ArrayTreesSource::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result +pub type vortex_layout::layouts::array_tree::SharedSegmentBufferFuture = futures_util::future::future::shared::Shared>> + pub mod vortex_layout::layouts::buffered pub struct vortex_layout::layouts::buffered::BufferedStrategy @@ -276,7 +274,7 @@ pub fn vortex_layout::layouts::chunked::Chunked::metadata(&Self::Layout) -> Self pub fn vortex_layout::layouts::chunked::Chunked::nchildren(&Self::Layout) -> usize -pub fn vortex_layout::layouts::chunked::Chunked::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::layouts::chunked::Chunked::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult pub fn vortex_layout::layouts::chunked::Chunked::row_count(&Self::Layout) -> u64 @@ -494,7 +492,7 @@ pub fn vortex_layout::layouts::dict::Dict::metadata(&Self::Layout) -> Self::Meta pub fn vortex_layout::layouts::dict::Dict::nchildren(&Self::Layout) -> usize -pub fn vortex_layout::layouts::dict::Dict::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::layouts::dict::Dict::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult pub fn vortex_layout::layouts::dict::Dict::row_count(&Self::Layout) -> u64 @@ -660,7 +658,7 @@ pub fn vortex_layout::layouts::flat::Flat::metadata(&Self::Layout) -> Self::Meta pub fn vortex_layout::layouts::flat::Flat::nchildren(&Self::Layout) -> usize -pub fn vortex_layout::layouts::flat::Flat::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::layouts::flat::Flat::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult pub fn vortex_layout::layouts::flat::Flat::row_count(&Self::Layout) -> u64 @@ -860,7 +858,7 @@ pub fn vortex_layout::layouts::struct_::Struct::metadata(&Self::Layout) -> Self: pub fn vortex_layout::layouts::struct_::Struct::nchildren(&Self::Layout) -> usize -pub fn vortex_layout::layouts::struct_::Struct::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::layouts::struct_::Struct::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult pub fn vortex_layout::layouts::struct_::Struct::row_count(&Self::Layout) -> u64 @@ -1024,7 +1022,7 @@ pub fn vortex_layout::layouts::zoned::Zoned::metadata(&Self::Layout) -> Self::Me pub fn vortex_layout::layouts::zoned::Zoned::nchildren(&Self::Layout) -> usize -pub fn vortex_layout::layouts::zoned::Zoned::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::layouts::zoned::Zoned::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult pub fn vortex_layout::layouts::zoned::Zoned::row_count(&Self::Layout) -> u64 @@ -1642,7 +1640,7 @@ pub fn vortex_layout::vtable::VTable::metadata(&Self::Layout) -> Self::Metadata pub fn vortex_layout::vtable::VTable::nchildren(&Self::Layout) -> usize -pub fn vortex_layout::vtable::VTable::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::vtable::VTable::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult pub fn vortex_layout::vtable::VTable::row_count(&Self::Layout) -> u64 @@ -1674,7 +1672,7 @@ pub fn vortex_layout::layouts::array_tree::ArrayTree::metadata(&Self::Layout) -> pub fn vortex_layout::layouts::array_tree::ArrayTree::nchildren(&Self::Layout) -> usize -pub fn vortex_layout::layouts::array_tree::ArrayTree::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::layouts::array_tree::ArrayTree::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult pub fn vortex_layout::layouts::array_tree::ArrayTree::row_count(&Self::Layout) -> u64 @@ -1706,7 +1704,7 @@ pub fn vortex_layout::layouts::chunked::Chunked::metadata(&Self::Layout) -> Self pub fn vortex_layout::layouts::chunked::Chunked::nchildren(&Self::Layout) -> usize -pub fn vortex_layout::layouts::chunked::Chunked::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::layouts::chunked::Chunked::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult pub fn vortex_layout::layouts::chunked::Chunked::row_count(&Self::Layout) -> u64 @@ -1738,7 +1736,7 @@ pub fn vortex_layout::layouts::dict::Dict::metadata(&Self::Layout) -> Self::Meta pub fn vortex_layout::layouts::dict::Dict::nchildren(&Self::Layout) -> usize -pub fn vortex_layout::layouts::dict::Dict::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::layouts::dict::Dict::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult pub fn vortex_layout::layouts::dict::Dict::row_count(&Self::Layout) -> u64 @@ -1770,7 +1768,7 @@ pub fn vortex_layout::layouts::flat::Flat::metadata(&Self::Layout) -> Self::Meta pub fn vortex_layout::layouts::flat::Flat::nchildren(&Self::Layout) -> usize -pub fn vortex_layout::layouts::flat::Flat::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::layouts::flat::Flat::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult pub fn vortex_layout::layouts::flat::Flat::row_count(&Self::Layout) -> u64 @@ -1802,7 +1800,7 @@ pub fn vortex_layout::layouts::struct_::Struct::metadata(&Self::Layout) -> Self: pub fn vortex_layout::layouts::struct_::Struct::nchildren(&Self::Layout) -> usize -pub fn vortex_layout::layouts::struct_::Struct::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::layouts::struct_::Struct::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult pub fn vortex_layout::layouts::struct_::Struct::row_count(&Self::Layout) -> u64 @@ -1834,7 +1832,7 @@ pub fn vortex_layout::layouts::zoned::Zoned::metadata(&Self::Layout) -> Self::Me pub fn vortex_layout::layouts::zoned::Zoned::nchildren(&Self::Layout) -> usize -pub fn vortex_layout::layouts::zoned::Zoned::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::layouts::zoned::Zoned::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult pub fn vortex_layout::layouts::zoned::Zoned::row_count(&Self::Layout) -> u64 @@ -1902,6 +1900,8 @@ pub fn vortex_layout::LayoutAdapter::nchildren(&self) -> usize pub fn vortex_layout::LayoutAdapter::new_reader(&self, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::LayoutAdapter::new_reader_in_ctx(&self, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult + pub fn vortex_layout::LayoutAdapter::row_count(&self) -> u64 pub fn vortex_layout::LayoutAdapter::segment_ids(&self) -> alloc::vec::Vec @@ -1922,13 +1922,35 @@ pub fn vortex_layout::LayoutEncodingAdapter::build(&self, &vortex_array::dtyp pub fn vortex_layout::LayoutEncodingAdapter::id(&self) -> vortex_layout::LayoutEncodingId +pub struct vortex_layout::LayoutReaderContext + +impl vortex_layout::LayoutReaderContext + +pub fn vortex_layout::LayoutReaderContext::get_override(&self, &vortex_layout::LayoutId) -> core::option::Option + +pub fn vortex_layout::LayoutReaderContext::new() -> Self + +pub fn vortex_layout::LayoutReaderContext::with_override(&self, vortex_layout::LayoutId, vortex_layout::ReaderBuilder) -> Self + +impl core::clone::Clone for vortex_layout::LayoutReaderContext + +pub fn vortex_layout::LayoutReaderContext::clone(&self) -> vortex_layout::LayoutReaderContext + +impl core::default::Default for vortex_layout::LayoutReaderContext + +pub fn vortex_layout::LayoutReaderContext::default() -> vortex_layout::LayoutReaderContext + +impl core::fmt::Debug for vortex_layout::LayoutReaderContext + +pub fn vortex_layout::LayoutReaderContext::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result + pub struct vortex_layout::LazyReaderChildren impl vortex_layout::LazyReaderChildren pub fn vortex_layout::LazyReaderChildren::get(&self, usize) -> vortex_error::VortexResult<&vortex_layout::LayoutReaderRef> -pub fn vortex_layout::LazyReaderChildren::new(alloc::sync::Arc, alloc::vec::Vec, alloc::vec::Vec>, alloc::sync::Arc, vortex_session::VortexSession) -> Self +pub fn vortex_layout::LazyReaderChildren::new(alloc::sync::Arc, alloc::vec::Vec, alloc::vec::Vec>, alloc::sync::Arc, vortex_session::VortexSession, vortex_layout::LayoutReaderContext) -> Self pub struct vortex_layout::SplitRange @@ -2026,6 +2048,8 @@ pub fn vortex_layout::Layout::nchildren(&self) -> usize pub fn vortex_layout::Layout::new_reader(&self, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::Layout::new_reader_in_ctx(&self, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult + pub fn vortex_layout::Layout::row_count(&self) -> u64 pub fn vortex_layout::Layout::segment_ids(&self) -> alloc::vec::Vec @@ -2052,6 +2076,8 @@ pub fn vortex_layout::LayoutAdapter::nchildren(&self) -> usize pub fn vortex_layout::LayoutAdapter::new_reader(&self, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::LayoutAdapter::new_reader_in_ctx(&self, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult + pub fn vortex_layout::LayoutAdapter::row_count(&self) -> u64 pub fn vortex_layout::LayoutAdapter::segment_ids(&self) -> alloc::vec::Vec @@ -2232,7 +2258,7 @@ pub fn vortex_layout::VTable::metadata(&Self::Layout) -> Self::Metadata pub fn vortex_layout::VTable::nchildren(&Self::Layout) -> usize -pub fn vortex_layout::VTable::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::VTable::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult pub fn vortex_layout::VTable::row_count(&Self::Layout) -> u64 @@ -2264,7 +2290,7 @@ pub fn vortex_layout::layouts::array_tree::ArrayTree::metadata(&Self::Layout) -> pub fn vortex_layout::layouts::array_tree::ArrayTree::nchildren(&Self::Layout) -> usize -pub fn vortex_layout::layouts::array_tree::ArrayTree::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::layouts::array_tree::ArrayTree::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult pub fn vortex_layout::layouts::array_tree::ArrayTree::row_count(&Self::Layout) -> u64 @@ -2296,7 +2322,7 @@ pub fn vortex_layout::layouts::chunked::Chunked::metadata(&Self::Layout) -> Self pub fn vortex_layout::layouts::chunked::Chunked::nchildren(&Self::Layout) -> usize -pub fn vortex_layout::layouts::chunked::Chunked::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::layouts::chunked::Chunked::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult pub fn vortex_layout::layouts::chunked::Chunked::row_count(&Self::Layout) -> u64 @@ -2328,7 +2354,7 @@ pub fn vortex_layout::layouts::dict::Dict::metadata(&Self::Layout) -> Self::Meta pub fn vortex_layout::layouts::dict::Dict::nchildren(&Self::Layout) -> usize -pub fn vortex_layout::layouts::dict::Dict::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::layouts::dict::Dict::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult pub fn vortex_layout::layouts::dict::Dict::row_count(&Self::Layout) -> u64 @@ -2360,7 +2386,7 @@ pub fn vortex_layout::layouts::flat::Flat::metadata(&Self::Layout) -> Self::Meta pub fn vortex_layout::layouts::flat::Flat::nchildren(&Self::Layout) -> usize -pub fn vortex_layout::layouts::flat::Flat::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::layouts::flat::Flat::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult pub fn vortex_layout::layouts::flat::Flat::row_count(&Self::Layout) -> u64 @@ -2392,7 +2418,7 @@ pub fn vortex_layout::layouts::struct_::Struct::metadata(&Self::Layout) -> Self: pub fn vortex_layout::layouts::struct_::Struct::nchildren(&Self::Layout) -> usize -pub fn vortex_layout::layouts::struct_::Struct::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::layouts::struct_::Struct::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult pub fn vortex_layout::layouts::struct_::Struct::row_count(&Self::Layout) -> u64 @@ -2424,7 +2450,7 @@ pub fn vortex_layout::layouts::zoned::Zoned::metadata(&Self::Layout) -> Self::Me pub fn vortex_layout::layouts::zoned::Zoned::nchildren(&Self::Layout) -> usize -pub fn vortex_layout::layouts::zoned::Zoned::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_layout::layouts::zoned::Zoned::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult pub fn vortex_layout::layouts::zoned::Zoned::row_count(&Self::Layout) -> u64 @@ -2449,3 +2475,5 @@ pub type vortex_layout::LayoutId = vortex_session::registry::Id pub type vortex_layout::LayoutReaderRef = alloc::sync::Arc pub type vortex_layout::LayoutRef = alloc::sync::Arc + +pub type vortex_layout::ReaderBuilder = alloc::sync::Arc<(dyn core::ops::function::Fn(&dyn vortex_layout::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult + core::marker::Send + core::marker::Sync)> diff --git a/vortex-layout/src/layout.rs b/vortex-layout/src/layout.rs index 11f0afd629c..d0274845487 100644 --- a/vortex-layout/src/layout.rs +++ b/vortex-layout/src/layout.rs @@ -19,6 +19,7 @@ use vortex_session::registry::Id; use crate::LayoutEncodingId; use crate::LayoutEncodingRef; +use crate::LayoutReaderContext; use crate::LayoutReaderRef; use crate::VTable; use crate::display::DisplayLayoutTree; @@ -63,12 +64,30 @@ pub trait Layout: 'static + Send + Sync + Debug + private::Sealed { /// Get the segment IDs for this layout. fn segment_ids(&self) -> Vec; - fn new_reader( + /// Construct a new reader for this layout, using the given dependency context. + /// + /// Implementations check `ctx` for an override registered against this layout's encoding + /// ID before dispatching to the layout's default [`crate::VTable::new_reader`]. + fn new_reader_in_ctx( &self, name: Arc, segment_source: Arc, session: &VortexSession, + ctx: &LayoutReaderContext, ) -> VortexResult; + + /// Convenience: construct a new reader for this layout using a fresh, empty context. + /// + /// Top-level callers (file open, tests) typically use this. Recursive calls inside + /// layout implementations should use [`Self::new_reader_in_ctx`] to propagate `ctx`. + fn new_reader( + &self, + name: Arc, + segment_source: Arc, + session: &VortexSession, + ) -> VortexResult { + self.new_reader_in_ctx(name, segment_source, session, &LayoutReaderContext::new()) + } } pub trait IntoLayout { @@ -306,13 +325,18 @@ impl Layout for LayoutAdapter { V::segment_ids(&self.0) } - fn new_reader( + fn new_reader_in_ctx( &self, name: Arc, segment_source: Arc, session: &VortexSession, + ctx: &LayoutReaderContext, ) -> VortexResult { - V::new_reader(&self.0, name, segment_source, session) + let id = V::encoding(&self.0).id(); + if let Some(builder) = ctx.get_override(&id) { + return builder(self, name, segment_source, session, ctx); + } + V::new_reader(&self.0, name, segment_source, session, ctx) } } diff --git a/vortex-layout/src/layouts/array_tree/flat.rs b/vortex-layout/src/layouts/array_tree/flat.rs index f12faebff6e..2b87af01894 100644 --- a/vortex-layout/src/layouts/array_tree/flat.rs +++ b/vortex-layout/src/layouts/array_tree/flat.rs @@ -2,14 +2,10 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::sync::Arc; -use std::sync::OnceLock; -use vortex_array::DeserializeMetadata; -use vortex_array::SerializeMetadata; +use vortex_array::EmptyMetadata; use vortex_array::dtype::DType; -use vortex_array::dtype::TryFromBytes; use vortex_buffer::ByteBuffer; -use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_panic; @@ -19,12 +15,11 @@ use vortex_session::registry::ReadContext; use crate::LayoutChildType; use crate::LayoutEncodingRef; use crate::LayoutId; +use crate::LayoutReaderContext; use crate::LayoutReaderRef; use crate::LayoutRef; use crate::VTable; use crate::children::LayoutChildren; -use crate::layouts::array_tree::ArrayTreesSource; -use crate::layouts::array_tree::reader::ArrayTreeFlatReader; use crate::layouts::flat::FlatLayout; use crate::segments::SegmentId; use crate::segments::SegmentSource; @@ -36,44 +31,34 @@ vtable!(ArrayTreeFlat); #[derive(Debug)] pub struct ArrayTreeFlatLayoutEncoding; -/// A flat layout variant that stores its compact encoding tree separately from the data segment. +/// A flat layout variant that retrieves its compact encoding tree from a sibling layout's +/// VarBin payload rather than from the data segment trailer. /// /// At write time, the compact flatbuffer (encoding tree + buffer descriptors, no stats) is -/// stored in this layout and later collected by [`super::ArrayTreeLayout`] into a shared VarBin -/// array. +/// attached to this layout in memory and later collected by [`super::ArrayTreeLayout`] into +/// a struct array keyed by segment ID. /// -/// At read time, the compact flatbuffer is retrieved from the shared [`ArrayTreesSource`] -/// (injected by the parent [`super::ArrayTreeLayout`]'s reader-construction walk) rather than -/// being parsed from the data segment. This avoids fetching the segment for decode planning -/// and prevents device-to-host copies for device-resident buffers. +/// At read time, this layout's reader looks up its compact tree in a shared +/// [`super::ArrayTreesSource`] using its own [`SegmentId`]. Construction requires that an +/// ancestor [`super::ArrayTreeLayout`] has registered a reader-builder override against +/// this encoding's ID — this layout has no useful default reader. #[derive(Clone, Debug)] pub struct ArrayTreeFlatLayout { inner: FlatLayout, - chunk_idx: usize, /// The compact flatbuffer produced at write time. Not persisted — only used to communicate /// between the leaf strategy and the collector strategy via the layout tree. compact_tree: Option, - /// Shared source for compact flatbuffers, injected by the parent [`super::ArrayTreeLayout`] - /// during reader construction. - source: OnceLock>, } impl ArrayTreeFlatLayout { /// Creates a new layout at write time with a compact flatbuffer. - pub fn new(inner: FlatLayout, chunk_idx: usize, compact_tree: ByteBuffer) -> Self { + pub fn new(inner: FlatLayout, compact_tree: ByteBuffer) -> Self { Self { inner, - chunk_idx, compact_tree: Some(compact_tree), - source: OnceLock::new(), } } - /// Returns the chunk index of this layout in the array trees VarBin. - pub fn chunk_idx(&self) -> usize { - self.chunk_idx - } - /// Returns the compact flatbuffer, if available (write-time only). pub fn compact_tree(&self) -> Option<&ByteBuffer> { self.compact_tree.as_ref() @@ -83,45 +68,12 @@ impl ArrayTreeFlatLayout { pub fn inner(&self) -> &FlatLayout { &self.inner } - - /// Sets the shared array trees source. Called by the parent [`super::ArrayTreeLayout`] - /// during the reader-construction injection walk. - pub fn set_source(&self, source: Arc) { - // Ignore if already set (e.g., in tests or double-init scenarios). - drop(self.source.set(source)); - } - - /// Returns the shared array trees source, if set. - pub fn source(&self) -> Option<&Arc> { - self.source.get() - } -} - -/// Metadata for [`ArrayTreeFlatLayout`]: stores the chunk index. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct ArrayTreeFlatMetadata { - pub chunk_idx: u32, -} - -impl SerializeMetadata for ArrayTreeFlatMetadata { - fn serialize(self) -> Vec { - self.chunk_idx.to_le_bytes().to_vec() - } -} - -impl DeserializeMetadata for ArrayTreeFlatMetadata { - type Output = Self; - - fn deserialize(metadata: &[u8]) -> VortexResult { - let chunk_idx = u32::try_from_le_bytes(&metadata[0..4])?; - Ok(Self { chunk_idx }) - } } impl VTable for ArrayTreeFlat { type Layout = ArrayTreeFlatLayout; type Encoding = ArrayTreeFlatLayoutEncoding; - type Metadata = ArrayTreeFlatMetadata; + type Metadata = EmptyMetadata; fn id(_encoding: &Self::Encoding) -> LayoutId { LayoutId::new_static("vortex.array_tree_flat") @@ -139,10 +91,8 @@ impl VTable for ArrayTreeFlat { layout.inner.dtype() } - fn metadata(layout: &Self::Layout) -> Self::Metadata { - ArrayTreeFlatMetadata { - chunk_idx: u32::try_from(layout.chunk_idx).vortex_expect("chunk_idx must fit in u32"), - } + fn metadata(_layout: &Self::Layout) -> Self::Metadata { + EmptyMetadata } fn segment_ids(layout: &Self::Layout) -> Vec { @@ -162,24 +112,27 @@ impl VTable for ArrayTreeFlat { } fn new_reader( - layout: &Self::Layout, - name: Arc, - segment_source: Arc, - session: &VortexSession, + _layout: &Self::Layout, + _name: Arc, + _segment_source: Arc, + _session: &VortexSession, + _ctx: &LayoutReaderContext, ) -> VortexResult { - Ok(Arc::new(ArrayTreeFlatReader::new( - layout.clone(), - name, - segment_source, - session.clone(), - ))) + // ArrayTreeFlatLayout has no useful default reader. It exists to be intercepted by an + // ancestor ArrayTreeLayout that registers a reader-builder override carrying the + // shared ArrayTreesSource. If the dispatcher reached this method, no such ancestor + // was present in the layout tree. + vortex_bail!( + "ArrayTreeFlatLayout requires an ancestor ArrayTreeLayout to register a reader \ + builder override; this layout cannot be read on its own" + ) } fn build( _encoding: &Self::Encoding, dtype: &DType, row_count: u64, - metadata: &ArrayTreeFlatMetadata, + _metadata: &EmptyMetadata, segment_ids: Vec, _children: &dyn LayoutChildren, ctx: &ReadContext, @@ -189,9 +142,7 @@ impl VTable for ArrayTreeFlat { } Ok(ArrayTreeFlatLayout { inner: FlatLayout::new(row_count, dtype.clone(), segment_ids[0], ctx.clone()), - chunk_idx: metadata.chunk_idx as usize, compact_tree: None, - source: OnceLock::new(), }) } } diff --git a/vortex-layout/src/layouts/array_tree/mod.rs b/vortex-layout/src/layouts/array_tree/mod.rs index 6e96d762868..d3be5f47210 100644 --- a/vortex-layout/src/layouts/array_tree/mod.rs +++ b/vortex-layout/src/layouts/array_tree/mod.rs @@ -1,9 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! Array tree layout: stores compact encoding tree flatbuffers (without stats) as a VarBin -//! vortex array alongside the data layout, enabling decode planning and sub-segment random -//! access without fetching data segments. +//! Array tree layout: stores compact encoding tree flatbuffers (without stats) as a struct +//! array keyed by segment ID, alongside the data layout. Enables decode planning and +//! sub-segment random access without fetching data segments. mod flat; mod reader; @@ -15,27 +15,40 @@ use std::sync::OnceLock; use futures::FutureExt; use vortex_array::EmptyMetadata; use vortex_array::MaskFuture; +use vortex_array::arrays::Primitive; +use vortex_array::arrays::Struct; +use vortex_array::arrays::VarBinView; +use vortex_array::arrays::struct_::StructArrayExt; use vortex_array::dtype::DType; +use vortex_array::dtype::FieldName; use vortex_array::dtype::Nullability; +use vortex_array::dtype::PType; +use vortex_array::dtype::StructFields; use vortex_array::expr::root; +use vortex_buffer::ByteBuffer; +use vortex_error::SharedVortexResult; use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; +use vortex_error::vortex_err; use vortex_error::vortex_panic; use vortex_session::VortexSession; use vortex_session::registry::ReadContext; +use vortex_utils::aliases::hash_map::HashMap; pub use self::flat::ArrayTreeFlatLayout; pub use self::flat::ArrayTreeFlatLayoutEncoding; use crate::LayoutChildType; use crate::LayoutEncodingRef; use crate::LayoutId; +use crate::LayoutReaderContext; use crate::LayoutReaderRef; use crate::LayoutRef; use crate::VTable; use crate::children::LayoutChildren; use crate::children::OwnedLayoutChildren; use crate::layouts::array_tree::flat::ArrayTreeFlat; +use crate::layouts::array_tree::reader::ArrayTreeFlatReader; use crate::layouts::array_tree::reader::ArrayTreeReader; use crate::segments::SegmentId; use crate::segments::SegmentSource; @@ -47,15 +60,16 @@ vtable!(ArrayTree); #[derive(Debug)] pub struct ArrayTreeLayoutEncoding; -/// Collects compact encoding tree flatbuffers from [`ArrayTreeFlatLayout`] descendants and stores -/// them as a VarBin array in an auxiliary child layout. +/// Collects compact encoding tree flatbuffers from [`ArrayTreeFlatLayout`] descendants and +/// stores them as a struct array (`{segment_id, compact_tree}`) in an auxiliary child layout. /// /// # Children /// /// - Child 0 (`Transparent "data"`): The actual data layout tree (may contain any intermediate /// layouts like `ChunkedLayout`, `DictLayout`, etc., with [`ArrayTreeFlatLayout`] at the leaves). -/// - Child 1 (`Auxiliary "array_trees"`): A VarBin array of compact `Array` flatbuffers, one per -/// [`ArrayTreeFlatLayout`] leaf in depth-first order. +/// - Child 1 (`Auxiliary "array_trees"`): A struct array with two fields: +/// - `segment_id: u32` — the segment ID of the data leaf +/// - `compact_tree: bytes` — the compact encoding-tree flatbuffer for that leaf #[derive(Clone, Debug)] pub struct ArrayTreeLayout { dtype: DType, @@ -70,6 +84,24 @@ impl ArrayTreeLayout { children: OwnedLayoutChildren::layout_children(vec![data, array_trees]), } } + + /// Returns the dtype of the auxiliary `array_trees` child. + pub fn array_trees_dtype() -> DType { + DType::Struct( + StructFields::new( + vec![ + FieldName::from("segment_id"), + FieldName::from("compact_tree"), + ] + .into(), + vec![ + DType::Primitive(PType::U32, Nullability::NonNullable), + DType::Binary(Nullability::NonNullable), + ], + ), + Nullability::NonNullable, + ) + } } impl VTable for ArrayTree { @@ -108,9 +140,7 @@ impl VTable for ArrayTree { fn child(layout: &Self::Layout, idx: usize) -> VortexResult { match idx { 0 => layout.children.child(0, &layout.dtype), - 1 => layout - .children - .child(1, &DType::Binary(Nullability::NonNullable)), + 1 => layout.children.child(1, &Self::Layout::array_trees_dtype()), _ => vortex_bail!("ArrayTreeLayout has 2 children, got index {}", idx), } } @@ -128,29 +158,45 @@ impl VTable for ArrayTree { name: Arc, segment_source: Arc, session: &VortexSession, + ctx: &LayoutReaderContext, ) -> VortexResult { - // Walk the data child to find all ArrayTreeFlatLayouts and inject the array_trees source. - let data_child = Self::child(layout, 0)?; + // Construct the array_trees auxiliary reader using the unmodified incoming context — + // the array_trees subtree is a vanilla struct of (u32, bytes) and needs no overrides. let array_trees_child = Self::child(layout, 1)?; - - // Create a reader for the array_trees VarBin child so the source can lazily read it. - let trees_reader = array_trees_child.new_reader( + let trees_reader = array_trees_child.new_reader_in_ctx( Arc::from(format!("{name}/array_trees")), Arc::clone(&segment_source), session, + ctx, )?; let source = Arc::new(ArrayTreesSource::new(trees_reader)); - // Inject the shared source into all ArrayTreeFlatLayout descendants. - for layout_ref in data_child.depth_first_traversal() { - let layout_ref = layout_ref?; - if let Some(atf) = layout_ref.as_opt::() { - atf.set_source(Arc::clone(&source)); - } - } + // Derive a context that intercepts ArrayTreeFlat construction with our source-injecting + // builder. The data subtree (and any nested layouts within it) sees this context, so + // any ArrayTreeFlat descendant — no matter how deep — gets the source. + let derived_ctx = ctx.with_override( + ArrayTreeFlat::id(&ArrayTreeFlatLayoutEncoding), + Arc::new(move |layout, name, segs, sess, _ctx| { + let atf = layout + .as_opt::() + .vortex_expect("ArrayTreeFlat override applied to wrong layout encoding"); + Ok(Arc::new(ArrayTreeFlatReader::new( + atf.clone(), + name, + segs, + sess.clone(), + Arc::clone(&source), + ))) + }), + ); - // Create a transparent reader that delegates to the data child. - let data_reader = data_child.new_reader(Arc::clone(&name), segment_source, session)?; + let data_child = Self::child(layout, 0)?; + let data_reader = data_child.new_reader_in_ctx( + Arc::clone(&name), + segment_source, + session, + &derived_ctx, + )?; Ok(Arc::new(ArrayTreeReader::new(name, data_reader))) } @@ -181,17 +227,25 @@ impl VTable for ArrayTree { } } -/// Shared source for compact array tree flatbuffers. +/// Shared source for compact array tree flatbuffers, keyed by [`SegmentId`]. /// -/// Holds a reader for the array_trees child layout and provides lazy shared access -/// to the decoded VarBin array. The first reader to need it triggers the read; all -/// subsequent readers reuse the shared result. +/// Holds a reader for the array_trees child layout. On first lookup, materializes the full +/// struct array and builds a `HashMap` for direct lookup. The map is +/// shared across all leaves of the parent [`ArrayTreeLayout`] via a `OnceLock`-cached future. pub struct ArrayTreesSource { reader: LayoutReaderRef, - /// Lazily initialized shared future for the full VarBin array. - array: OnceLock, + /// Lazily initialized shared future for the segment-keyed lookup map. + map: OnceLock, } +type SharedSegmentMapFuture = futures::future::Shared< + futures::future::BoxFuture<'static, SharedVortexResult>>>, +>; + +/// Future returned by [`ArrayTreesSource::get_for_segment`]. +pub type SharedSegmentBufferFuture = + futures::future::Shared>>; + impl std::fmt::Debug for ArrayTreesSource { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("ArrayTreesSource").finish_non_exhaustive() @@ -203,18 +257,37 @@ impl ArrayTreesSource { pub fn new(reader: LayoutReaderRef) -> Self { Self { reader, - array: OnceLock::new(), + map: OnceLock::new(), } } - /// Returns a shared future that resolves to the full VarBin array of compact trees. - pub fn array_future(&self) -> SharedArrayFuture { - self.array + /// Returns a future that resolves to the compact-tree byte buffer for the given data-leaf + /// segment ID. + /// + /// The first call triggers materialization of the entire struct array and the + /// segment-id-keyed lookup map; subsequent calls reuse the cached map. + pub fn get_for_segment(&self, segment_id: SegmentId) -> SharedSegmentBufferFuture { + let map_fut = self.map_future(); + async move { + let map = map_fut.await?; + map.get(&segment_id).cloned().ok_or_else(|| { + Arc::new(vortex_err!( + "no compact array tree found for segment id {}", + *segment_id + )) + }) + } + .boxed() + .shared() + } + + fn map_future(&self) -> SharedSegmentMapFuture { + self.map .get_or_init(|| { let row_count = self.reader.row_count(); let reader = Arc::clone(&self.reader); async move { - reader + let array = reader .projection_evaluation( &(0..row_count), &root(), @@ -222,9 +295,11 @@ impl ArrayTreesSource { usize::try_from(row_count) .vortex_expect("row count must fit in usize"), ), - )? + ) + .map_err(Arc::new)? .await - .map_err(Arc::new) + .map_err(Arc::new)?; + build_segment_map(array).map(Arc::new).map_err(Arc::new) } .boxed() .shared() @@ -233,4 +308,35 @@ impl ArrayTreesSource { } } -use crate::layouts::SharedArrayFuture; +/// Decode the array_trees struct array into a `HashMap`. +fn build_segment_map( + array: vortex_array::ArrayRef, +) -> VortexResult> { + let struct_array = array + .try_downcast::() + .map_err(|_| vortex_err!("array_trees is not a Struct array"))?; + + let segment_ids_field = struct_array + .unmasked_field_by_name_opt("segment_id") + .ok_or_else(|| vortex_err!("array_trees missing 'segment_id' field"))?; + let trees_field = struct_array + .unmasked_field_by_name_opt("compact_tree") + .ok_or_else(|| vortex_err!("array_trees missing 'compact_tree' field"))?; + + let segment_ids = segment_ids_field + .clone() + .try_downcast::() + .map_err(|_| vortex_err!("array_trees 'segment_id' field is not Primitive"))?; + let segment_ids = segment_ids.as_slice::(); + + let trees = trees_field + .clone() + .try_downcast::() + .map_err(|_| vortex_err!("array_trees 'compact_tree' field is not a VarBinView"))?; + + let mut map = HashMap::with_capacity(segment_ids.len()); + for (idx, &seg) in segment_ids.iter().enumerate() { + map.insert(SegmentId::from(seg), trees.bytes_at(idx)); + } + Ok(map) +} diff --git a/vortex-layout/src/layouts/array_tree/reader.rs b/vortex-layout/src/layouts/array_tree/reader.rs index b697b51163d..13ba90dc2e0 100644 --- a/vortex-layout/src/layouts/array_tree/reader.rs +++ b/vortex-layout/src/layouts/array_tree/reader.rs @@ -9,7 +9,6 @@ use std::sync::Arc; use futures::FutureExt; use vortex_array::MaskFuture; use vortex_array::VortexSessionExecute; -use vortex_array::arrays::VarBinView; use vortex_array::dtype::DType; use vortex_array::dtype::FieldMask; use vortex_array::expr::Expression; @@ -22,6 +21,7 @@ use vortex_session::VortexSession; use crate::LayoutReader; use crate::LayoutReaderRef; use crate::layouts::SharedArrayFuture; +use crate::layouts::array_tree::ArrayTreesSource; use crate::layouts::array_tree::flat::ArrayTreeFlatLayout; use crate::reader::ArrayFuture; use crate::reader::SplitRange; @@ -29,9 +29,10 @@ use crate::segments::SegmentSource; /// Transparent reader for [`super::ArrayTreeLayout`]. /// -/// Delegates all operations to the data child reader. The array_trees auxiliary child -/// is consumed at construction time (by [`super::ArrayTreeLayout::new_reader`]) to inject -/// compact flatbuffers into [`ArrayTreeFlatLayout`] descendants. +/// Delegates all operations to the data child reader. The array_trees auxiliary child is +/// consumed at construction time (by `ArrayTreeLayout`'s `new_reader`) to register a +/// reader-builder override that injects the shared source into all +/// [`ArrayTreeFlatLayout`] descendants. pub struct ArrayTreeReader { name: Arc, data_reader: LayoutReaderRef, @@ -105,14 +106,16 @@ const EXPR_EVAL_THRESHOLD: f64 = 0.2; /// Reader for [`ArrayTreeFlatLayout`]. /// -/// Similar to [`super::super::flat::reader::FlatReader`] but obtains its compact encoding tree -/// from the shared [`super::ArrayTreesSource`] rather than from inline layout metadata or -/// the data segment. +/// Constructed only via the reader-builder override registered by an ancestor +/// [`super::ArrayTreeLayout`]. Looks up its compact encoding tree from the shared +/// [`ArrayTreesSource`] keyed by its own segment ID, then concurrently fetches the data +/// segment to produce a decoded array. pub struct ArrayTreeFlatReader { layout: ArrayTreeFlatLayout, name: Arc, segment_source: Arc, session: VortexSession, + source: Arc, } impl ArrayTreeFlatReader { @@ -121,51 +124,36 @@ impl ArrayTreeFlatReader { name: Arc, segment_source: Arc, session: VortexSession, + source: Arc, ) -> Self { Self { layout, name, segment_source, session, + source, } } - /// Register the segment request and return a future that resolves to the deserialized array. - /// - /// If a shared [`super::ArrayTreesSource`] has been injected, the compact flatbuffer is - /// obtained from there (concurrently with the data segment fetch). Otherwise falls back - /// to parsing from the segment (like FlatReader). + /// Resolve the compact tree from the shared source and the data segment from the segment + /// source concurrently, then combine them into a decoded array. fn array_future(&self) -> SharedArrayFuture { let row_count = usize::try_from(self.layout.inner().row_count()) .vortex_expect("row count must fit in usize"); - let segment_fut = self - .segment_source - .request(self.layout.inner().segment_id()); + let segment_id = self.layout.inner().segment_id(); + let segment_fut = self.segment_source.request(segment_id); + let compact_tree_fut = self.source.get_for_segment(segment_id); + let ctx = self.layout.inner().array_ctx().clone(); let session = self.session.clone(); let dtype = self.layout.inner().dtype().clone(); - // If a source has been injected, resolve the compact tree from it. - // Otherwise, fall back to parsing from the segment (like FlatReader). - let source_future = self.layout.source().map(|s| s.array_future()); - let chunk_idx = self.layout.chunk_idx(); - async move { - let segment = segment_fut.await?; - let parts = if let Some(source_future) = source_future { - // Resolve the VarBin array of compact trees (shared, read once). - let trees_array = source_future.await?; - let trees = trees_array.try_downcast::().map_err(|_| { - Arc::new(vortex_error::vortex_err!( - "array_trees child is not a VarBinView array" - )) - })?; - let compact_tree = trees.bytes_at(chunk_idx); - SerializedArray::from_flatbuffer_and_segment(compact_tree, segment)? - } else { - SerializedArray::try_from(segment)? - }; + let segment_fut = async move { segment_fut.await.map_err(Arc::new) }; + let (segment, compact_tree) = futures::try_join!(segment_fut, compact_tree_fut)?; + let parts = SerializedArray::from_flatbuffer_and_segment(compact_tree, segment) + .map_err(Arc::new)?; parts .decode(&dtype, row_count, &ctx, &session) .map_err(Arc::new) diff --git a/vortex-layout/src/layouts/array_tree/writer.rs b/vortex-layout/src/layouts/array_tree/writer.rs index c521e02ecd2..e1025ff43b9 100644 --- a/vortex-layout/src/layouts/array_tree/writer.rs +++ b/vortex-layout/src/layouts/array_tree/writer.rs @@ -2,18 +2,21 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::sync::Arc; -use std::sync::atomic::AtomicUsize; -use std::sync::atomic::Ordering; use async_trait::async_trait; use futures::StreamExt as _; use vortex_array::ArrayContext; use vortex_array::IntoArray; +use vortex_array::arrays::PrimitiveArray; +use vortex_array::arrays::StructArray; use vortex_array::builders::ArrayBuilder; use vortex_array::builders::VarBinViewBuilder; use vortex_array::dtype::DType; +use vortex_array::dtype::FieldName; use vortex_array::dtype::Nullability; use vortex_array::serde::SerializeOptions; +use vortex_array::validity::Validity; +use vortex_buffer::Buffer; use vortex_buffer::ByteBuffer; use vortex_error::VortexResult; use vortex_error::vortex_bail; @@ -28,6 +31,7 @@ use crate::layouts::array_tree::flat::ArrayTreeFlat; use crate::layouts::array_tree::flat::ArrayTreeFlatLayout; use crate::layouts::flat::FlatLayout; use crate::layouts::flat::writer::FlatLayoutStrategy; +use crate::segments::SegmentId; use crate::segments::SegmentSinkRef; use crate::sequence::SendableSequentialStream; use crate::sequence::SequencePointer; @@ -37,18 +41,15 @@ use crate::sequence::SequentialArrayStreamExt; /// /// Returns `(collector, leaf)` where: /// - `leaf` replaces [`FlatLayoutStrategy`] in the data pipeline — it serializes chunks and -/// produces compact flatbuffers. -/// - `collector` wraps the data pipeline — after data is written, it collects compact flatbuffers -/// from the layout tree and writes them as a VarBin array. +/// produces compact flatbuffers attached to [`ArrayTreeFlatLayout`]. +/// - `collector` wraps the data pipeline — after data is written, it walks the layout tree to +/// collect compact flatbuffers from all [`ArrayTreeFlatLayout`] leaves and writes them as a +/// struct array (`{segment_id, compact_tree}`) via the configured `array_trees_strategy`. pub fn writer( flat: FlatLayoutStrategy, array_trees_strategy: Arc, ) -> (ArrayTreeCollectorStrategy, ArrayTreeFlatStrategy) { - let chunk_counter = Arc::new(AtomicUsize::new(0)); - let leaf = ArrayTreeFlatStrategy { - flat, - chunk_counter, - }; + let leaf = ArrayTreeFlatStrategy { flat }; let collector = ArrayTreeCollectorStrategy { child: None, array_trees_strategy, @@ -58,13 +59,12 @@ pub fn writer( /// Leaf strategy (TX) that replaces [`FlatLayoutStrategy`]. /// -/// For each chunk, it delegates serialization to the inner [`FlatLayoutStrategy`], also produces -/// a compact flatbuffer (encoding tree + buffer descriptors, no stats), and returns an -/// [`ArrayTreeFlatLayout`] with the compact tree attached. +/// For each chunk, it produces both the compact flatbuffer (encoding tree + buffer +/// descriptors, no stats) and the full data segment, and returns an [`ArrayTreeFlatLayout`] +/// with the compact tree attached for later collection. #[derive(Clone)] pub struct ArrayTreeFlatStrategy { flat: FlatLayoutStrategy, - chunk_counter: Arc, } #[async_trait] @@ -107,7 +107,7 @@ impl LayoutStrategy for ArrayTreeFlatStrategy { }, )?; - // Full serialization (with stats) for the data segment. + // Full serialization for the data segment. let buffers = chunk.serialize( &ctx, session, @@ -123,8 +123,6 @@ impl LayoutStrategy for ArrayTreeFlatStrategy { vortex_bail!("array tree flat layout received stream with more than a single chunk"); }; - let chunk_idx = self.chunk_counter.fetch_add(1, Ordering::Relaxed); - Ok(ArrayTreeFlatLayout::new( FlatLayout::new( row_count, @@ -132,7 +130,6 @@ impl LayoutStrategy for ArrayTreeFlatStrategy { segment_id, ReadContext::new(ctx.to_ids()), ), - chunk_idx, compact_tree, ) .into_layout()) @@ -145,9 +142,10 @@ impl LayoutStrategy for ArrayTreeFlatStrategy { /// Collector strategy (RX) that wraps the data pipeline. /// -/// After the data child completes, walks the returned layout tree to extract compact flatbuffers -/// from all [`ArrayTreeFlatLayout`] leaves, builds a VarBin array, and writes it as an -/// auxiliary child. +/// After the data child completes, walks the returned layout tree to extract compact +/// flatbuffers and segment IDs from all [`ArrayTreeFlatLayout`] leaves, builds a struct +/// array of `{segment_id, compact_tree}`, and writes it as an auxiliary child via the +/// configured `array_trees_strategy`. pub struct ArrayTreeCollectorStrategy { child: Option>, array_trees_strategy: Arc, @@ -188,29 +186,48 @@ impl LayoutStrategy for ArrayTreeCollectorStrategy { ) .await?; - // Walk the layout tree to collect compact flatbuffers from ArrayTreeFlatLayout leaves. - let mut compact_trees: Vec<(usize, ByteBuffer)> = Vec::new(); + // Walk the layout tree to collect (segment_id, compact_tree) pairs from + // ArrayTreeFlatLayout leaves. + let mut entries: Vec<(SegmentId, ByteBuffer)> = Vec::new(); for layout_ref in data_layout.depth_first_traversal() { let layout_ref = layout_ref?; if let Some(atf) = layout_ref.as_opt::() && let Some(tree) = atf.compact_tree() { - compact_trees.push((atf.chunk_idx(), tree.clone())); + entries.push((atf.inner().segment_id(), tree.clone())); } } - // Sort by chunk index to ensure deterministic order. - compact_trees.sort_by_key(|(idx, _)| *idx); + // Sort by segment ID so the on-disk order matches segment-write order — this gives + // good locality and predictable lookup-table layout. + entries.sort_by_key(|(seg, _)| *seg); - // Build a VarBin array of compact flatbuffers. - let dtype = DType::Binary(Nullability::NonNullable); - let mut builder = VarBinViewBuilder::with_capacity(dtype.clone(), compact_trees.len()); - for (_, tree) in &compact_trees { - builder.append_value(tree.as_slice()); - } - let array_trees_array = builder.finish().into_array(); + // Build a struct array of {segment_id: u32, compact_tree: bytes}. + let nrows = entries.len(); + let segment_ids: Buffer = entries.iter().map(|(seg, _)| **seg).collect(); + let segment_ids_array = + PrimitiveArray::new(segment_ids, Validity::NonNullable).into_array(); - // Write the VarBin array via the array_trees strategy. + let mut tree_builder = + VarBinViewBuilder::with_capacity(DType::Binary(Nullability::NonNullable), nrows); + for (_, tree) in &entries { + tree_builder.append_value(tree.as_slice()); + } + let trees_array = tree_builder.finish().into_array(); + + let array_trees_array = StructArray::try_new( + vec![ + FieldName::from("segment_id"), + FieldName::from("compact_tree"), + ] + .into(), + vec![segment_ids_array, trees_array], + nrows, + Validity::NonNullable, + )? + .into_array(); + + // Write the struct array via the array_trees strategy. let trees_stream = array_trees_array .to_array_stream() .sequenced(eof.split_off()); diff --git a/vortex-layout/src/layouts/chunked/mod.rs b/vortex-layout/src/layouts/chunked/mod.rs index 82fcb650607..e8dd3c06dc0 100644 --- a/vortex-layout/src/layouts/chunked/mod.rs +++ b/vortex-layout/src/layouts/chunked/mod.rs @@ -16,6 +16,7 @@ use vortex_session::registry::ReadContext; use crate::LayoutChildType; use crate::LayoutEncodingRef; use crate::LayoutId; +use crate::LayoutReaderContext; use crate::LayoutReaderRef; use crate::LayoutRef; use crate::VTable; @@ -74,12 +75,14 @@ impl VTable for Chunked { name: Arc, segment_source: Arc, session: &VortexSession, + ctx: &LayoutReaderContext, ) -> VortexResult { Ok(Arc::new(ChunkedReader::new( layout.clone(), name, segment_source, session, + ctx.clone(), ))) } diff --git a/vortex-layout/src/layouts/chunked/reader.rs b/vortex-layout/src/layouts/chunked/reader.rs index 94ab3272cd7..486b04a7313 100644 --- a/vortex-layout/src/layouts/chunked/reader.rs +++ b/vortex-layout/src/layouts/chunked/reader.rs @@ -25,6 +25,7 @@ use vortex_error::vortex_panic; use vortex_mask::Mask; use vortex_session::VortexSession; +use crate::LayoutReaderContext; use crate::LayoutReaderRef; use crate::LazyReaderChildren; use crate::layouts::chunked::ChunkedLayout; @@ -47,6 +48,7 @@ impl ChunkedReader { name: Arc, segment_source: Arc, session: &VortexSession, + ctx: LayoutReaderContext, ) -> Self { let nchildren = layout.nchildren(); @@ -66,6 +68,7 @@ impl ChunkedReader { names, segment_source, session.clone(), + ctx, ); Self { diff --git a/vortex-layout/src/layouts/dict/mod.rs b/vortex-layout/src/layouts/dict/mod.rs index 7928b447fa9..c91921c5e20 100644 --- a/vortex-layout/src/layouts/dict/mod.rs +++ b/vortex-layout/src/layouts/dict/mod.rs @@ -24,6 +24,7 @@ use vortex_session::registry::ReadContext; use crate::LayoutChildType; use crate::LayoutEncodingRef; use crate::LayoutId; +use crate::LayoutReaderContext; use crate::LayoutReaderRef; use crate::LayoutRef; use crate::VTable; @@ -92,12 +93,14 @@ impl VTable for Dict { name: Arc, segment_source: Arc, session: &VortexSession, + ctx: &LayoutReaderContext, ) -> VortexResult { Ok(Arc::new(DictReader::try_new( layout.clone(), name, segment_source, session.clone(), + ctx.clone(), )?)) } diff --git a/vortex-layout/src/layouts/dict/reader.rs b/vortex-layout/src/layouts/dict/reader.rs index 96f12d53ece..b6377ee5a2f 100644 --- a/vortex-layout/src/layouts/dict/reader.rs +++ b/vortex-layout/src/layouts/dict/reader.rs @@ -31,6 +31,7 @@ use vortex_utils::aliases::dash_map::DashMap; use super::DictLayout; use crate::LayoutReader; +use crate::LayoutReaderContext; use crate::LayoutReaderRef; use crate::SplitRange; use crate::layouts::SharedArrayFuture; @@ -58,17 +59,21 @@ impl DictReader { name: Arc, segment_source: Arc, session: VortexSession, + ctx: LayoutReaderContext, ) -> VortexResult { let values_len = usize::try_from(layout.values.row_count())?; - let values = layout.values.new_reader( + let values = layout.values.new_reader_in_ctx( format!("{name}.values").into(), Arc::clone(&segment_source), &session, + &ctx, + )?; + let codes = layout.codes.new_reader_in_ctx( + format!("{name}.codes").into(), + segment_source, + &session, + &ctx, )?; - let codes = - layout - .codes - .new_reader(format!("{name}.codes").into(), segment_source, &session)?; Ok(Self { layout, diff --git a/vortex-layout/src/layouts/flat/mod.rs b/vortex-layout/src/layouts/flat/mod.rs index b167e633170..8bf12e7be29 100644 --- a/vortex-layout/src/layouts/flat/mod.rs +++ b/vortex-layout/src/layouts/flat/mod.rs @@ -4,9 +4,7 @@ mod reader; pub mod writer; -use std::env; use std::sync::Arc; -use std::sync::LazyLock; use vortex_array::DeserializeMetadata; use vortex_array::ProstMetadata; @@ -21,6 +19,7 @@ use vortex_session::registry::ReadContext; use crate::LayoutChildType; use crate::LayoutEncodingRef; use crate::LayoutId; +use crate::LayoutReaderContext; use crate::LayoutReaderRef; use crate::LayoutRef; use crate::VTable; @@ -30,13 +29,6 @@ use crate::segments::SegmentId; use crate::segments::SegmentSource; use crate::vtable; -/// Check if inline array node is enabled. -pub(super) fn flat_layout_inline_array_node() -> bool { - static FLAT_LAYOUT_INLINE_ARRAY_NODE: LazyLock = - LazyLock::new(|| env::var("FLAT_LAYOUT_INLINE_ARRAY_NODE").is_ok_and(|v| v == "1")); - *FLAT_LAYOUT_INLINE_ARRAY_NODE -} - vtable!(Flat); impl VTable for Flat { @@ -87,6 +79,7 @@ impl VTable for Flat { name: Arc, segment_source: Arc, session: &VortexSession, + _ctx: &LayoutReaderContext, ) -> VortexResult { Ok(Arc::new(FlatReader::new( layout.clone(), diff --git a/vortex-layout/src/layouts/foreign/mod.rs b/vortex-layout/src/layouts/foreign/mod.rs index 9abacc8b26e..6004fdc015c 100644 --- a/vortex-layout/src/layouts/foreign/mod.rs +++ b/vortex-layout/src/layouts/foreign/mod.rs @@ -17,6 +17,7 @@ use crate::LayoutChildren; use crate::LayoutEncoding; use crate::LayoutEncodingId; use crate::LayoutEncodingRef; +use crate::LayoutReaderContext; use crate::LayoutReaderRef; use crate::LayoutRef; use crate::segments::SegmentId; @@ -166,11 +167,12 @@ impl Layout for ForeignLayout { self.segment_ids.clone() } - fn new_reader( + fn new_reader_in_ctx( &self, _name: Arc, _segment_source: Arc, _session: &VortexSession, + _ctx: &LayoutReaderContext, ) -> VortexResult { vortex_bail!( "Cannot read unknown layout encoding '{}'", diff --git a/vortex-layout/src/layouts/struct_/mod.rs b/vortex-layout/src/layouts/struct_/mod.rs index 29843f096d8..4b9e2ab39a7 100644 --- a/vortex-layout/src/layouts/struct_/mod.rs +++ b/vortex-layout/src/layouts/struct_/mod.rs @@ -25,6 +25,7 @@ use vortex_session::registry::ReadContext; use crate::LayoutChildType; use crate::LayoutEncodingRef; use crate::LayoutId; +use crate::LayoutReaderContext; use crate::LayoutReaderRef; use crate::LayoutRef; use crate::VTable; @@ -114,12 +115,14 @@ impl VTable for Struct { name: Arc, segment_source: Arc, session: &VortexSession, + ctx: &LayoutReaderContext, ) -> VortexResult { Ok(Arc::new(StructReader::try_new( layout.clone(), name, segment_source, session.session(), + ctx.clone(), )?)) } diff --git a/vortex-layout/src/layouts/struct_/reader.rs b/vortex-layout/src/layouts/struct_/reader.rs index 156c7456ed6..75f2369c114 100644 --- a/vortex-layout/src/layouts/struct_/reader.rs +++ b/vortex-layout/src/layouts/struct_/reader.rs @@ -41,6 +41,7 @@ use vortex_utils::aliases::hash_map::HashMap; use crate::ArrayFuture; use crate::LayoutReader; +use crate::LayoutReaderContext; use crate::LayoutReaderRef; use crate::LazyReaderChildren; use crate::SplitRange; @@ -68,6 +69,7 @@ impl StructReader { name: Arc, segment_source: Arc, session: VortexSession, + ctx: LayoutReaderContext, ) -> VortexResult { let struct_dt = layout.struct_fields(); @@ -99,6 +101,7 @@ impl StructReader { names, Arc::clone(&segment_source), session.clone(), + ctx, ); // Create an expanded root expression that contains all fields of the struct. diff --git a/vortex-layout/src/layouts/zoned/mod.rs b/vortex-layout/src/layouts/zoned/mod.rs index 697e55e968c..c8e5bcd9af9 100644 --- a/vortex-layout/src/layouts/zoned/mod.rs +++ b/vortex-layout/src/layouts/zoned/mod.rs @@ -42,6 +42,7 @@ use vortex_session::registry::ReadContext; use crate::LayoutChildType; use crate::LayoutEncodingRef; use crate::LayoutId; +use crate::LayoutReaderContext; use crate::LayoutReaderRef; use crate::LayoutRef; use crate::VTable; @@ -115,12 +116,14 @@ impl VTable for Zoned { name: Arc, segment_source: Arc, session: &VortexSession, + ctx: &LayoutReaderContext, ) -> VortexResult { Ok(Arc::new(ZonedReader::try_new( layout.clone(), name, segment_source, session.clone(), + ctx.clone(), )?)) } diff --git a/vortex-layout/src/layouts/zoned/reader.rs b/vortex-layout/src/layouts/zoned/reader.rs index a73ed6a547c..13cd0a9d40a 100644 --- a/vortex-layout/src/layouts/zoned/reader.rs +++ b/vortex-layout/src/layouts/zoned/reader.rs @@ -21,6 +21,7 @@ use vortex_mask::Mask; use vortex_session::VortexSession; use crate::LayoutReader; +use crate::LayoutReaderContext; use crate::LayoutReaderRef; use crate::LazyReaderChildren; use crate::SplitRange; @@ -42,6 +43,7 @@ impl ZonedReader { name: Arc, segment_source: Arc, session: VortexSession, + ctx: LayoutReaderContext, ) -> VortexResult { let dtypes = vec![ layout.dtype.clone(), @@ -54,6 +56,7 @@ impl ZonedReader { names, Arc::clone(&segment_source), session.clone(), + ctx, )); Ok(Self { diff --git a/vortex-layout/src/reader.rs b/vortex-layout/src/reader.rs index b40339e3946..6708dca4b41 100644 --- a/vortex-layout/src/reader.rs +++ b/vortex-layout/src/reader.rs @@ -20,7 +20,10 @@ use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_mask::Mask; use vortex_session::VortexSession; +use vortex_utils::aliases::hash_map::HashMap; +use crate::Layout; +use crate::LayoutId; use crate::children::LayoutChildren; use crate::segments::SegmentSource; @@ -181,6 +184,7 @@ pub struct LazyReaderChildren { names: Vec>, segment_source: Arc, session: VortexSession, + ctx: LayoutReaderContext, // TODO(ngates): we may want a hash map of some sort here? cache: Vec>, } @@ -192,6 +196,7 @@ impl LazyReaderChildren { names: Vec>, segment_source: Arc, session: VortexSession, + ctx: LayoutReaderContext, ) -> Self { let nchildren = children.nchildren(); let cache = (0..nchildren).map(|_| OnceCell::new()).collect(); @@ -201,6 +206,7 @@ impl LazyReaderChildren { names, segment_source, session, + ctx, cache, } } @@ -213,11 +219,103 @@ impl LazyReaderChildren { self.cache[idx].get_or_try_init(|| { let dtype = &self.dtypes[idx]; let child = self.children.child(idx, dtype)?; - child.new_reader( + child.new_reader_in_ctx( Arc::clone(&self.names[idx]), Arc::clone(&self.segment_source), &self.session, + &self.ctx, ) }) } } + +/// A function that constructs a [`LayoutReader`] for a layout, given the same arguments as +/// [`crate::VTable::new_reader`]. +/// +/// Used as a value in [`LayoutReaderContext`] to override how readers are constructed for a +/// particular [`LayoutId`], allowing parent layouts to inject dependencies (such as shared +/// sources) into the reader construction of descendant leaves without each descendant having +/// to know about the parent's data. +pub type ReaderBuilder = Arc< + dyn Fn( + &dyn Layout, + Arc, + Arc, + &VortexSession, + &LayoutReaderContext, + ) -> VortexResult + + Send + + Sync, +>; + +/// Per-reader-tree dependency context, threaded through [`crate::VTable::new_reader`]. +/// +/// Holds a registry of [`ReaderBuilder`] overrides keyed by [`LayoutId`]. When a layout +/// reader is being constructed, the dispatcher checks this context first — if an override +/// is registered for the layout's encoding ID, the override is invoked instead of the +/// default [`crate::VTable::new_reader`] implementation. +/// +/// Contexts are layered: [`Self::with_override`] returns a derived context that overlays the +/// original. The original is unchanged, allowing concurrent reader-tree constructions to +/// each have their own derived context without races. +/// +/// Contexts are cheap to clone (shared via `Arc`), so they can be captured by lazy children +/// helpers and survive any single stack frame. +#[derive(Clone, Default)] +pub struct LayoutReaderContext { + inner: Arc, +} + +#[derive(Default)] +struct ContextInner { + overrides: HashMap, + parent: Option>, +} + +impl LayoutReaderContext { + /// Creates a new, empty context. + pub fn new() -> Self { + Self::default() + } + + /// Returns a new context that overlays this one with an override for `id`. + /// + /// The original context is unchanged; descendants constructed via the returned context + /// will see the override. Lookups in the returned context check the new override first, + /// then fall through to the parent chain. + pub fn with_override(&self, id: LayoutId, builder: ReaderBuilder) -> Self { + let mut overrides = HashMap::new(); + overrides.insert(id, builder); + Self { + inner: Arc::new(ContextInner { + overrides, + parent: Some(Arc::clone(&self.inner)), + }), + } + } + + /// Returns the most-recently-overridden [`ReaderBuilder`] for `id`, or `None` if no + /// override is registered for that ID anywhere in the chain. + pub fn get_override(&self, id: &LayoutId) -> Option { + let mut current = Some(&*self.inner); + while let Some(c) = current { + if let Some(builder) = c.overrides.get(id) { + return Some(Arc::clone(builder)); + } + current = c.parent.as_deref(); + } + None + } +} + +impl std::fmt::Debug for LayoutReaderContext { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("LayoutReaderContext") + .field( + "overrides", + &self.inner.overrides.keys().collect::>(), + ) + .field("has_parent", &self.inner.parent.is_some()) + .finish() + } +} diff --git a/vortex-layout/src/vtable.rs b/vortex-layout/src/vtable.rs index 9bb83fcf643..019ce9e9981 100644 --- a/vortex-layout/src/vtable.rs +++ b/vortex-layout/src/vtable.rs @@ -19,6 +19,7 @@ use crate::LayoutChildType; use crate::LayoutEncoding; use crate::LayoutEncodingRef; use crate::LayoutId; +use crate::LayoutReaderContext; use crate::LayoutReaderRef; use crate::LayoutRef; use crate::children::LayoutChildren; @@ -58,11 +59,17 @@ pub trait VTable: 'static + Sized + Send + Sync + Debug { fn child_type(layout: &Self::Layout, idx: usize) -> LayoutChildType; /// Create a new reader for the layout. + /// + /// `ctx` is the dependency context for the reader-tree being constructed. Implementations + /// that recursively construct child readers must pass `ctx` (or a derived context) through + /// to those calls; see [`LayoutReaderContext::with_override`] for how parents inject + /// dependencies into descendant reader construction. fn new_reader( layout: &Self::Layout, name: Arc, segment_source: Arc, session: &VortexSession, + ctx: &LayoutReaderContext, ) -> VortexResult; /// Construct a new [`Layout`] from the provided parts. From 8c4f36070946f8e50ad298cb378b40068549dfed Mon Sep 17 00:00:00 2001 From: Onur Satici Date: Mon, 18 May 2026 12:16:01 +0100 Subject: [PATCH 03/10] tests Signed-off-by: Onur Satici --- vortex-file/src/tests.rs | 156 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) diff --git a/vortex-file/src/tests.rs b/vortex-file/src/tests.rs index 93efccb20e3..54723b25957 100644 --- a/vortex-file/src/tests.rs +++ b/vortex-file/src/tests.rs @@ -1984,3 +1984,159 @@ async fn test_segment_ordering_zonemaps_after_data() -> VortexResult<()> { Ok(()) } + +#[tokio::test] +#[cfg_attr(miri, ignore)] +async fn test_segment_ordering_array_trees_consolidated_and_after_data() -> VortexResult<()> { + // Multi-column struct with enough rows to produce chunked data, so each column will have + // many ArrayTreeFlat leaves. The collector should consolidate their compact trees into a + // single segment per ArrayTreeLayout. + let n = 100_000; + let values: Vec<&str> = (0..n).map(|i| ["alpha", "beta", "gamma"][i % 3]).collect(); + let strings = VarBinArray::from(values).into_array(); + let numbers = PrimitiveArray::from_iter(0..n as i32).into_array(); + let floats = PrimitiveArray::from_iter((0..n).map(|i| i as f64 * 0.1)).into_array(); + + let st = StructArray::from_fields(&[ + ("strings", strings), + ("numbers", numbers), + ("floats", floats), + ]) + .unwrap(); + + let mut buf = ByteBufferMut::empty(); + let summary = SESSION + .write_options() + .write(&mut buf, st.into_array().to_array_stream()) + .await?; + + let footer = summary.footer(); + let segment_specs = footer.segment_map(); + let root = footer.layout(); + + // Walk the layout tree and validate every ArrayTreeLayout we find. + // + // For each ArrayTreeLayout we assert two invariants: + // 1. **Consolidation:** the auxiliary `array_trees` child (child idx 1) writes exactly + // one segment — all compact flatbuffers from the leaves should land in a single + // contiguous payload, not be scattered across the file. + // 2. **Per-column ordering:** every data segment under child 0 appears before the + // array_trees segment under child 1. + fn check_array_tree_layouts( + layout: &dyn Layout, + segment_specs: &[SegmentSpec], + found_any: &mut bool, + ) { + if layout.encoding_id().as_ref() == "vortex.array_tree" { + *found_any = true; + + let data_child = layout.child(0).unwrap(); + let array_trees_child = layout.child(1).unwrap(); + + let data_offsets = collect_segment_offsets(data_child.as_ref(), segment_specs); + let array_trees_offsets = + collect_segment_offsets(array_trees_child.as_ref(), segment_specs); + + assert_eq!( + array_trees_offsets.len(), + 1, + "array_tree: auxiliary child must consolidate to exactly 1 segment, got {} segments at offsets {:?}", + array_trees_offsets.len(), + array_trees_offsets, + ); + + assert!( + !data_offsets.is_empty(), + "array_tree: data child must have at least one segment" + ); + + assert_offsets_ordered( + &data_offsets, + &array_trees_offsets, + "array_tree: all data segments should come before the array_trees segment", + ); + } + + for child in layout.children().unwrap() { + check_array_tree_layouts(child.as_ref(), segment_specs, found_any); + } + } + + let mut found_any = false; + check_array_tree_layouts(root.as_ref(), segment_specs, &mut found_any); + assert!( + found_any, + "test setup expected the default write strategy to produce at least one ArrayTreeLayout" + ); + + Ok(()) +} + +#[tokio::test] +#[cfg_attr(miri, ignore)] +async fn test_segment_ordering_array_trees_before_zones() -> VortexResult<()> { + // The default write strategy wraps every column in `ZonedStrategy { data: ArrayTree, zones }`. + // We assert per-Zoned-layout that the array_trees segment (sitting inside the data child) + // appears before every zone-map segment in the same column. + let n = 100_000; + let values: Vec<&str> = (0..n).map(|i| ["alpha", "beta", "gamma"][i % 3]).collect(); + let strings = VarBinArray::from(values).into_array(); + let numbers = PrimitiveArray::from_iter(0..n as i32).into_array(); + let floats = PrimitiveArray::from_iter((0..n).map(|i| i as f64 * 0.1)).into_array(); + + let st = StructArray::from_fields(&[ + ("strings", strings), + ("numbers", numbers), + ("floats", floats), + ]) + .unwrap(); + + let mut buf = ByteBufferMut::empty(); + let summary = SESSION + .write_options() + .write(&mut buf, st.into_array().to_array_stream()) + .await?; + + let footer = summary.footer(); + let segment_specs = footer.segment_map(); + let root = footer.layout(); + + fn check_zoned_with_array_tree( + layout: &dyn Layout, + segment_specs: &[SegmentSpec], + found_any: &mut bool, + ) { + if layout.encoding_id().as_ref() == "vortex.stats" { + let data_child = layout.child(0).unwrap(); + let zones_child = layout.child(1).unwrap(); + + if data_child.encoding_id().as_ref() == "vortex.array_tree" { + *found_any = true; + let array_trees_offsets = collect_segment_offsets( + data_child.child(1).unwrap().as_ref(), + segment_specs, + ); + let zones_offsets = collect_segment_offsets(zones_child.as_ref(), segment_specs); + + assert_offsets_ordered( + &array_trees_offsets, + &zones_offsets, + "zoned wrapping array_tree: the array_trees segment should come before zone-map segments", + ); + } + } + + for child in layout.children().unwrap() { + check_zoned_with_array_tree(child.as_ref(), segment_specs, found_any); + } + } + + let mut found_any = false; + check_zoned_with_array_tree(root.as_ref(), segment_specs, &mut found_any); + assert!( + found_any, + "test setup expected the default write strategy to produce at least one Zoned wrapping an ArrayTree" + ); + + Ok(()) +} From 085c2504e6c49a1b04ecbf66c7b64ec627b0ad1b Mon Sep 17 00:00:00 2001 From: Onur Satici Date: Mon, 18 May 2026 13:55:15 +0100 Subject: [PATCH 04/10] leaves send array trees to the parent instead of shared state Signed-off-by: Onur Satici --- vortex-file/src/tests.rs | 6 +- vortex-layout/src/layouts/array_tree/flat.rs | 27 ++----- .../src/layouts/array_tree/writer.rs | 79 +++++++++++-------- 3 files changed, 55 insertions(+), 57 deletions(-) diff --git a/vortex-file/src/tests.rs b/vortex-file/src/tests.rs index 54723b25957..1509d966ca0 100644 --- a/vortex-file/src/tests.rs +++ b/vortex-file/src/tests.rs @@ -2112,10 +2112,8 @@ async fn test_segment_ordering_array_trees_before_zones() -> VortexResult<()> { if data_child.encoding_id().as_ref() == "vortex.array_tree" { *found_any = true; - let array_trees_offsets = collect_segment_offsets( - data_child.child(1).unwrap().as_ref(), - segment_specs, - ); + let array_trees_offsets = + collect_segment_offsets(data_child.child(1).unwrap().as_ref(), segment_specs); let zones_offsets = collect_segment_offsets(zones_child.as_ref(), segment_specs); assert_offsets_ordered( diff --git a/vortex-layout/src/layouts/array_tree/flat.rs b/vortex-layout/src/layouts/array_tree/flat.rs index 2b87af01894..fe35588ffbc 100644 --- a/vortex-layout/src/layouts/array_tree/flat.rs +++ b/vortex-layout/src/layouts/array_tree/flat.rs @@ -5,7 +5,6 @@ use std::sync::Arc; use vortex_array::EmptyMetadata; use vortex_array::dtype::DType; -use vortex_buffer::ByteBuffer; use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_panic; @@ -32,11 +31,11 @@ vtable!(ArrayTreeFlat); pub struct ArrayTreeFlatLayoutEncoding; /// A flat layout variant that retrieves its compact encoding tree from a sibling layout's -/// VarBin payload rather than from the data segment trailer. +/// payload rather than from the data segment trailer. /// -/// At write time, the compact flatbuffer (encoding tree + buffer descriptors, no stats) is -/// attached to this layout in memory and later collected by [`super::ArrayTreeLayout`] into -/// a struct array keyed by segment ID. +/// At write time, the compact flatbuffer is produced by the leaf strategy and pushed into a +/// side channel shared with the collector strategy — the layout itself just carries the same +/// state as a vanilla [`FlatLayout`]. /// /// At read time, this layout's reader looks up its compact tree in a shared /// [`super::ArrayTreesSource`] using its own [`SegmentId`]. Construction requires that an @@ -45,23 +44,12 @@ pub struct ArrayTreeFlatLayoutEncoding; #[derive(Clone, Debug)] pub struct ArrayTreeFlatLayout { inner: FlatLayout, - /// The compact flatbuffer produced at write time. Not persisted — only used to communicate - /// between the leaf strategy and the collector strategy via the layout tree. - compact_tree: Option, } impl ArrayTreeFlatLayout { - /// Creates a new layout at write time with a compact flatbuffer. - pub fn new(inner: FlatLayout, compact_tree: ByteBuffer) -> Self { - Self { - inner, - compact_tree: Some(compact_tree), - } - } - - /// Returns the compact flatbuffer, if available (write-time only). - pub fn compact_tree(&self) -> Option<&ByteBuffer> { - self.compact_tree.as_ref() + /// Creates a new layout from the inner flat layout. + pub fn new(inner: FlatLayout) -> Self { + Self { inner } } /// Returns the inner flat layout. @@ -142,7 +130,6 @@ impl VTable for ArrayTreeFlat { } Ok(ArrayTreeFlatLayout { inner: FlatLayout::new(row_count, dtype.clone(), segment_ids[0], ctx.clone()), - compact_tree: None, }) } } diff --git a/vortex-layout/src/layouts/array_tree/writer.rs b/vortex-layout/src/layouts/array_tree/writer.rs index e1025ff43b9..75f0c7858f9 100644 --- a/vortex-layout/src/layouts/array_tree/writer.rs +++ b/vortex-layout/src/layouts/array_tree/writer.rs @@ -5,6 +5,7 @@ use std::sync::Arc; use async_trait::async_trait; use futures::StreamExt as _; +use parking_lot::Mutex; use vortex_array::ArrayContext; use vortex_array::IntoArray; use vortex_array::arrays::PrimitiveArray; @@ -27,7 +28,6 @@ use crate::IntoLayout; use crate::LayoutRef; use crate::LayoutStrategy; use crate::layouts::array_tree::ArrayTreeLayout; -use crate::layouts::array_tree::flat::ArrayTreeFlat; use crate::layouts::array_tree::flat::ArrayTreeFlatLayout; use crate::layouts::flat::FlatLayout; use crate::layouts::flat::writer::FlatLayoutStrategy; @@ -37,22 +37,35 @@ use crate::sequence::SendableSequentialStream; use crate::sequence::SequencePointer; use crate::sequence::SequentialArrayStreamExt; +/// Side channel for shipping `(segment_id, compact_tree)` pairs from leaf strategies to the +/// collector strategy. +/// +/// Each leaf pushes after `segment_sink.write` resolves (so the leaf's `SequenceId` has been +/// dropped before we touch the sink). The collector drains the sink only after the entire +/// data subtree has completed, which means every leaf has already pushed. +type Sink = Arc>>; + /// Creates a cooperating pair of strategies for array tree collection. /// /// Returns `(collector, leaf)` where: -/// - `leaf` replaces [`FlatLayoutStrategy`] in the data pipeline — it serializes chunks and -/// produces compact flatbuffers attached to [`ArrayTreeFlatLayout`]. -/// - `collector` wraps the data pipeline — after data is written, it walks the layout tree to -/// collect compact flatbuffers from all [`ArrayTreeFlatLayout`] leaves and writes them as a -/// struct array (`{segment_id, compact_tree}`) via the configured `array_trees_strategy`. +/// - `leaf` replaces [`FlatLayoutStrategy`] in the data pipeline — it serializes chunks, +/// produces compact flatbuffers, and pushes them onto the shared sink. +/// - `collector` wraps the data pipeline — after data is written, it drains the sink and +/// writes the collected pairs as a struct array (`{segment_id, compact_tree}`) via the +/// configured `array_trees_strategy`. pub fn writer( flat: FlatLayoutStrategy, array_trees_strategy: Arc, ) -> (ArrayTreeCollectorStrategy, ArrayTreeFlatStrategy) { - let leaf = ArrayTreeFlatStrategy { flat }; + let sink: Sink = Arc::new(Mutex::new(Vec::new())); + let leaf = ArrayTreeFlatStrategy { + flat, + sink: Arc::clone(&sink), + }; let collector = ArrayTreeCollectorStrategy { child: None, array_trees_strategy, + sink, }; (collector, leaf) } @@ -60,11 +73,12 @@ pub fn writer( /// Leaf strategy (TX) that replaces [`FlatLayoutStrategy`]. /// /// For each chunk, it produces both the compact flatbuffer (encoding tree + buffer -/// descriptors, no stats) and the full data segment, and returns an [`ArrayTreeFlatLayout`] -/// with the compact tree attached for later collection. +/// descriptors, no stats) and the full data segment, then pushes `(segment_id, compact_tree)` +/// onto the shared sink for the collector to consume. #[derive(Clone)] pub struct ArrayTreeFlatStrategy { flat: FlatLayoutStrategy, + sink: Sink, } #[async_trait] @@ -117,21 +131,28 @@ impl LayoutStrategy for ArrayTreeFlatStrategy { }, )?; assert!(buffers.len() >= 2); + + // IMPORTANT ORDERING CONSTRAINT: write the segment first, then push to the sink. + // + // `segment_sink.write` consumes our `SequenceId` and only drops it on return. Pushing + // to the sink before that point would risk holding the sink mutex while later leaves + // are blocked on `SequenceId::collapse`, creating a dependency from "later leaf is + // ready to write" → "earlier leaf must drop its SequenceId" → "earlier leaf must + // finish its sink push" → mutex contention with the later leaf. Doing the push after + // `await?` resolves means our SequenceId is already gone before we touch the sink. let segment_id = segment_sink.write(sequence_id, buffers).await?; + self.sink.lock().push((segment_id, compact_tree)); let None = stream.next().await else { vortex_bail!("array tree flat layout received stream with more than a single chunk"); }; - Ok(ArrayTreeFlatLayout::new( - FlatLayout::new( - row_count, - stream.dtype().clone(), - segment_id, - ReadContext::new(ctx.to_ids()), - ), - compact_tree, - ) + Ok(ArrayTreeFlatLayout::new(FlatLayout::new( + row_count, + stream.dtype().clone(), + segment_id, + ReadContext::new(ctx.to_ids()), + )) .into_layout()) } @@ -142,13 +163,13 @@ impl LayoutStrategy for ArrayTreeFlatStrategy { /// Collector strategy (RX) that wraps the data pipeline. /// -/// After the data child completes, walks the returned layout tree to extract compact -/// flatbuffers and segment IDs from all [`ArrayTreeFlatLayout`] leaves, builds a struct -/// array of `{segment_id, compact_tree}`, and writes it as an auxiliary child via the -/// configured `array_trees_strategy`. +/// After the data child completes, drains the shared sink and writes the collected +/// `(segment_id, compact_tree)` pairs as a struct array via the configured +/// `array_trees_strategy`. pub struct ArrayTreeCollectorStrategy { child: Option>, array_trees_strategy: Arc, + sink: Sink, } impl ArrayTreeCollectorStrategy { @@ -186,17 +207,9 @@ impl LayoutStrategy for ArrayTreeCollectorStrategy { ) .await?; - // Walk the layout tree to collect (segment_id, compact_tree) pairs from - // ArrayTreeFlatLayout leaves. - let mut entries: Vec<(SegmentId, ByteBuffer)> = Vec::new(); - for layout_ref in data_layout.depth_first_traversal() { - let layout_ref = layout_ref?; - if let Some(atf) = layout_ref.as_opt::() - && let Some(tree) = atf.compact_tree() - { - entries.push((atf.inner().segment_id(), tree.clone())); - } - } + // By the time the data subtree future resolves, every leaf has finished its + // `segment_sink.write().await?` and pushed onto the sink. Drain it now. + let mut entries = std::mem::take(&mut *self.sink.lock()); // Sort by segment ID so the on-disk order matches segment-write order — this gives // good locality and predictable lookup-table layout. From a63ae1b74642014f42d3f5e4ddf56b4ae1a35fc4 Mon Sep 17 00:00:00 2001 From: Onur Satici Date: Mon, 18 May 2026 14:29:47 +0100 Subject: [PATCH 05/10] off by default, restore env var and deprecate Signed-off-by: Onur Satici --- vortex-file/public-api.lock | 2 + vortex-file/src/strategy.rs | 49 ++++++--- vortex-file/src/tests.rs | 8 ++ vortex-layout/public-api.lock | 106 +++++++++++++++++++- vortex-layout/src/display.rs | 28 +++++- vortex-layout/src/layouts/array_tree/mod.rs | 2 +- vortex-layout/src/layouts/flat/mod.rs | 30 ++++++ vortex-layout/src/layouts/flat/writer.rs | 12 ++- 8 files changed, 214 insertions(+), 23 deletions(-) diff --git a/vortex-file/public-api.lock b/vortex-file/public-api.lock index bb2a9b00d81..97723eb97e8 100644 --- a/vortex-file/public-api.lock +++ b/vortex-file/public-api.lock @@ -348,6 +348,8 @@ pub fn vortex_file::WriteStrategyBuilder::build(self) -> alloc::sync::Arc) -> Self +pub fn vortex_file::WriteStrategyBuilder::with_array_tree(self, bool) -> Self + pub fn vortex_file::WriteStrategyBuilder::with_btrblocks_builder(self, vortex_btrblocks::builder::BtrBlocksCompressorBuilder) -> Self pub fn vortex_file::WriteStrategyBuilder::with_compressor(self, C) -> Self diff --git a/vortex-file/src/strategy.rs b/vortex-file/src/strategy.rs index 4f895aaf7f1..89a8c2237af 100644 --- a/vortex-file/src/strategy.rs +++ b/vortex-file/src/strategy.rs @@ -144,6 +144,7 @@ pub struct WriteStrategyBuilder { field_writers: HashMap>, allow_encodings: Option>, flat_strategy: Option>, + array_tree: bool, } impl Default for WriteStrategyBuilder { @@ -156,6 +157,7 @@ impl Default for WriteStrategyBuilder { field_writers: HashMap::new(), allow_encodings: Some(ALLOWED_ENCODINGS.clone()), flat_strategy: None, + array_tree: false, } } } @@ -188,11 +190,31 @@ impl WriteStrategyBuilder { /// /// By default, this uses [`FlatLayoutStrategy`]. This can be used to substitute a custom /// layout strategy, e.g. one that inlines constant array buffers for GPU reads. + /// + /// Passing a custom flat strategy implicitly disables the array-tree outlining feature + /// (see [`Self::with_array_tree`]), since the custom strategy owns the leaf format. pub fn with_flat_strategy(mut self, flat: Arc) -> Self { self.flat_strategy = Some(flat); self } + /// Enable array-tree outlining: each chunk's encoding tree (without per-chunk statistics) + /// is collected into a single auxiliary segment per column rather than being inlined + /// alongside the chunk's data. + /// + /// Disabled by default. When enabled, the written file uses two encodings that older + /// readers will not understand: [`vortex_layout::layouts::array_tree::ArrayTreeFlatLayout`] + /// at the data leaves and a wrapping [`vortex_layout::layouts::array_tree::ArrayTreeLayout`] + /// that owns the consolidated auxiliary segment. Once you opt in, files written by this + /// builder require a reader that recognizes both encodings. + /// + /// Has no effect if a custom flat strategy is provided via + /// [`Self::with_flat_strategy`] — the user-supplied leaf format wins. + pub fn with_array_tree(mut self, array_tree: bool) -> Self { + self.array_tree = array_tree; + self + } + /// Override the default [`BtrBlocksCompressorBuilder`] used for compression. /// /// The builder is finalized during [`build`](Self::build), producing two compressors: one for @@ -221,21 +243,22 @@ impl WriteStrategyBuilder { Arc::new(FlatLayoutStrategy::default()) }; - // Build the data pipeline leaf. When the user provides a custom flat strategy, use it - // directly — they own the leaf format and array tree wrapping does not apply. - // Otherwise, create a TX/RX pair for array tree collection. - let (data_leaf, array_tree_collector): (Arc, _) = - if self.flat_strategy.is_some() { - (Arc::clone(&flat), None) + // Build the data pipeline leaf. Array-tree outlining requires both opt-in via + // `with_array_tree(true)` AND no custom flat strategy (the user's strategy owns the + // leaf format in that case). + let array_tree_enabled = self.array_tree && self.flat_strategy.is_none(); + let (data_leaf, array_tree_collector): (Arc, _) = if !array_tree_enabled + { + (Arc::clone(&flat), None) + } else { + let data_flat = if let Some(allow_encodings) = &self.allow_encodings { + FlatLayoutStrategy::default().with_allow_encodings(allow_encodings.clone()) } else { - let data_flat = if let Some(allow_encodings) = &self.allow_encodings { - FlatLayoutStrategy::default().with_allow_encodings(allow_encodings.clone()) - } else { - FlatLayoutStrategy::default() - }; - let (collector, leaf) = writer::writer(data_flat, Arc::clone(&flat)); - (Arc::new(leaf), Some(collector)) + FlatLayoutStrategy::default() }; + let (collector, leaf) = writer::writer(data_flat, Arc::clone(&flat)); + (Arc::new(leaf), Some(collector)) + }; // 7. for each chunk create a flat layout let chunked = ChunkedLayoutStrategy::new(data_leaf); diff --git a/vortex-file/src/tests.rs b/vortex-file/src/tests.rs index 1509d966ca0..563f5f6ff99 100644 --- a/vortex-file/src/tests.rs +++ b/vortex-file/src/tests.rs @@ -2005,8 +2005,12 @@ async fn test_segment_ordering_array_trees_consolidated_and_after_data() -> Vort .unwrap(); let mut buf = ByteBufferMut::empty(); + let strategy = crate::WriteStrategyBuilder::default() + .with_array_tree(true) + .build(); let summary = SESSION .write_options() + .with_strategy(strategy) .write(&mut buf, st.into_array().to_array_stream()) .await?; @@ -2092,8 +2096,12 @@ async fn test_segment_ordering_array_trees_before_zones() -> VortexResult<()> { .unwrap(); let mut buf = ByteBufferMut::empty(); + let strategy = crate::WriteStrategyBuilder::default() + .with_array_tree(true) + .build(); let summary = SESSION .write_options() + .with_strategy(strategy) .write(&mut buf, st.into_array().to_array_stream()) .await?; diff --git a/vortex-layout/public-api.lock b/vortex-layout/public-api.lock index a0f7546c481..731b10f2ab9 100644 --- a/vortex-layout/public-api.lock +++ b/vortex-layout/public-api.lock @@ -88,15 +88,51 @@ pub fn vortex_layout::layouts::array_tree::ArrayTree::segment_ids(&Self::Layout) pub fn vortex_layout::layouts::array_tree::ArrayTree::with_children(&mut Self::Layout, alloc::vec::Vec) -> vortex_error::VortexResult<()> +pub struct vortex_layout::layouts::array_tree::ArrayTreeFlat + +impl core::fmt::Debug for vortex_layout::layouts::array_tree::ArrayTreeFlat + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_layout::VTable for vortex_layout::layouts::array_tree::ArrayTreeFlat + +pub type vortex_layout::layouts::array_tree::ArrayTreeFlat::Encoding = vortex_layout::layouts::array_tree::ArrayTreeFlatLayoutEncoding + +pub type vortex_layout::layouts::array_tree::ArrayTreeFlat::Layout = vortex_layout::layouts::array_tree::ArrayTreeFlatLayout + +pub type vortex_layout::layouts::array_tree::ArrayTreeFlat::Metadata = vortex_array::metadata::EmptyMetadata + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::build(&Self::Encoding, &vortex_array::dtype::DType, u64, &vortex_array::metadata::EmptyMetadata, alloc::vec::Vec, &dyn vortex_layout::LayoutChildren, &vortex_session::registry::ReadContext) -> vortex_error::VortexResult + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::child(&Self::Layout, usize) -> vortex_error::VortexResult + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::child_type(&Self::Layout, usize) -> vortex_layout::LayoutChildType + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::dtype(&Self::Layout) -> &vortex_array::dtype::DType + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::encoding(&Self::Layout) -> vortex_layout::LayoutEncodingRef + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::id(&Self::Encoding) -> vortex_layout::LayoutId + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::metadata(&Self::Layout) -> Self::Metadata + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::nchildren(&Self::Layout) -> usize + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::row_count(&Self::Layout) -> u64 + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::segment_ids(&Self::Layout) -> alloc::vec::Vec + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::with_children(&mut Self::Layout, alloc::vec::Vec) -> vortex_error::VortexResult<()> + pub struct vortex_layout::layouts::array_tree::ArrayTreeFlatLayout impl vortex_layout::layouts::array_tree::ArrayTreeFlatLayout -pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::compact_tree(&self) -> core::option::Option<&vortex_buffer::ByteBuffer> - pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::inner(&self) -> &vortex_layout::layouts::flat::FlatLayout -pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::new(vortex_layout::layouts::flat::FlatLayout, vortex_buffer::ByteBuffer) -> Self +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlatLayout::new(vortex_layout::layouts::flat::FlatLayout) -> Self impl core::clone::Clone for vortex_layout::layouts::array_tree::ArrayTreeFlatLayout @@ -1680,6 +1716,38 @@ pub fn vortex_layout::layouts::array_tree::ArrayTree::segment_ids(&Self::Layout) pub fn vortex_layout::layouts::array_tree::ArrayTree::with_children(&mut Self::Layout, alloc::vec::Vec) -> vortex_error::VortexResult<()> +impl vortex_layout::VTable for vortex_layout::layouts::array_tree::ArrayTreeFlat + +pub type vortex_layout::layouts::array_tree::ArrayTreeFlat::Encoding = vortex_layout::layouts::array_tree::ArrayTreeFlatLayoutEncoding + +pub type vortex_layout::layouts::array_tree::ArrayTreeFlat::Layout = vortex_layout::layouts::array_tree::ArrayTreeFlatLayout + +pub type vortex_layout::layouts::array_tree::ArrayTreeFlat::Metadata = vortex_array::metadata::EmptyMetadata + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::build(&Self::Encoding, &vortex_array::dtype::DType, u64, &vortex_array::metadata::EmptyMetadata, alloc::vec::Vec, &dyn vortex_layout::LayoutChildren, &vortex_session::registry::ReadContext) -> vortex_error::VortexResult + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::child(&Self::Layout, usize) -> vortex_error::VortexResult + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::child_type(&Self::Layout, usize) -> vortex_layout::LayoutChildType + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::dtype(&Self::Layout) -> &vortex_array::dtype::DType + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::encoding(&Self::Layout) -> vortex_layout::LayoutEncodingRef + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::id(&Self::Encoding) -> vortex_layout::LayoutId + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::metadata(&Self::Layout) -> Self::Metadata + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::nchildren(&Self::Layout) -> usize + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::row_count(&Self::Layout) -> u64 + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::segment_ids(&Self::Layout) -> alloc::vec::Vec + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::with_children(&mut Self::Layout, alloc::vec::Vec) -> vortex_error::VortexResult<()> + impl vortex_layout::VTable for vortex_layout::layouts::chunked::Chunked pub type vortex_layout::layouts::chunked::Chunked::Encoding = vortex_layout::layouts::chunked::ChunkedLayoutEncoding @@ -2298,6 +2366,38 @@ pub fn vortex_layout::layouts::array_tree::ArrayTree::segment_ids(&Self::Layout) pub fn vortex_layout::layouts::array_tree::ArrayTree::with_children(&mut Self::Layout, alloc::vec::Vec) -> vortex_error::VortexResult<()> +impl vortex_layout::VTable for vortex_layout::layouts::array_tree::ArrayTreeFlat + +pub type vortex_layout::layouts::array_tree::ArrayTreeFlat::Encoding = vortex_layout::layouts::array_tree::ArrayTreeFlatLayoutEncoding + +pub type vortex_layout::layouts::array_tree::ArrayTreeFlat::Layout = vortex_layout::layouts::array_tree::ArrayTreeFlatLayout + +pub type vortex_layout::layouts::array_tree::ArrayTreeFlat::Metadata = vortex_array::metadata::EmptyMetadata + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::build(&Self::Encoding, &vortex_array::dtype::DType, u64, &vortex_array::metadata::EmptyMetadata, alloc::vec::Vec, &dyn vortex_layout::LayoutChildren, &vortex_session::registry::ReadContext) -> vortex_error::VortexResult + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::child(&Self::Layout, usize) -> vortex_error::VortexResult + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::child_type(&Self::Layout, usize) -> vortex_layout::LayoutChildType + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::dtype(&Self::Layout) -> &vortex_array::dtype::DType + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::encoding(&Self::Layout) -> vortex_layout::LayoutEncodingRef + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::id(&Self::Encoding) -> vortex_layout::LayoutId + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::metadata(&Self::Layout) -> Self::Metadata + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::nchildren(&Self::Layout) -> usize + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::new_reader(&Self::Layout, alloc::sync::Arc, alloc::sync::Arc, &vortex_session::VortexSession, &vortex_layout::LayoutReaderContext) -> vortex_error::VortexResult + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::row_count(&Self::Layout) -> u64 + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::segment_ids(&Self::Layout) -> alloc::vec::Vec + +pub fn vortex_layout::layouts::array_tree::ArrayTreeFlat::with_children(&mut Self::Layout, alloc::vec::Vec) -> vortex_error::VortexResult<()> + impl vortex_layout::VTable for vortex_layout::layouts::chunked::Chunked pub type vortex_layout::layouts::chunked::Chunked::Encoding = vortex_layout::layouts::chunked::ChunkedLayoutEncoding diff --git a/vortex-layout/src/display.rs b/vortex-layout/src/display.rs index 9e4574d5614..d6f51e8f1e2 100644 --- a/vortex-layout/src/display.rs +++ b/vortex-layout/src/display.rs @@ -10,11 +10,24 @@ use vortex_error::VortexResult; use vortex_utils::aliases::hash_map::HashMap; use crate::LayoutRef; +use crate::layouts::array_tree::ArrayTreeFlat; use crate::layouts::flat::Flat; use crate::layouts::flat::FlatLayout; use crate::segments::SegmentId; use crate::segments::SegmentSource; +/// Returns the inner [`FlatLayout`] that owns a data segment, regardless of whether the +/// layout is a plain [`Flat`] or an [`ArrayTreeFlat`] (which wraps a [`FlatLayout`]). +/// +/// Used by display routines that want to render leaf-layout buffer info uniformly across +/// both encodings — the on-disk data segment shape is identical. +fn as_flat_view(layout: &LayoutRef) -> Option<&FlatLayout> { + if let Some(flat) = layout.as_opt::() { + return Some(flat); + } + layout.as_opt::().map(|atf| atf.inner()) +} + /// Display the layout as a tree, fetching segment sizes from the segment source. /// /// # Warning @@ -49,13 +62,18 @@ pub(super) async fn display_tree_with_segment_sizes( }) } -/// Collect segment IDs that need to be fetched (those without inline array_tree). +/// Collect segment IDs that need to be fetched. +/// +/// For a [`Flat`] with an inline array_tree (the deprecated env-var path), buffer info can be +/// parsed directly from the layout metadata — we skip those. Otherwise we fetch the data +/// segment and parse its trailing flatbuffer. [`ArrayTreeFlat`] leaves have the same on-disk +/// shape as a plain [`Flat`] (their compact tree is stored separately in the parent's +/// auxiliary child), so we treat them the same here. fn collect_segments_to_fetch( layout: &LayoutRef, segment_ids: &mut Vec, ) -> VortexResult<()> { - // For FlatLayout, only add if there's no inline array_tree - if let Some(flat_layout) = layout.as_opt::() { + if let Some(flat_layout) = as_flat_view(layout) { if flat_layout.array_tree().is_none() { segment_ids.push(flat_layout.segment_id()); } @@ -146,8 +164,8 @@ impl DisplayLayoutTree { node_parts.push(format!("rows: {}", layout.row_count())); } - // For FlatLayout, show buffer info - if let Some(flat_layout) = layout.as_opt::() { + // For FlatLayout (and ArrayTreeFlat which wraps one), show buffer info + if let Some(flat_layout) = as_flat_view(&layout) { node_parts.push(format_flat_layout_buffers( flat_layout, self.segment_buffer_sizes.as_ref(), diff --git a/vortex-layout/src/layouts/array_tree/mod.rs b/vortex-layout/src/layouts/array_tree/mod.rs index d3be5f47210..90ff5b58088 100644 --- a/vortex-layout/src/layouts/array_tree/mod.rs +++ b/vortex-layout/src/layouts/array_tree/mod.rs @@ -36,6 +36,7 @@ use vortex_session::VortexSession; use vortex_session::registry::ReadContext; use vortex_utils::aliases::hash_map::HashMap; +pub use self::flat::ArrayTreeFlat; pub use self::flat::ArrayTreeFlatLayout; pub use self::flat::ArrayTreeFlatLayoutEncoding; use crate::LayoutChildType; @@ -47,7 +48,6 @@ use crate::LayoutRef; use crate::VTable; use crate::children::LayoutChildren; use crate::children::OwnedLayoutChildren; -use crate::layouts::array_tree::flat::ArrayTreeFlat; use crate::layouts::array_tree::reader::ArrayTreeFlatReader; use crate::layouts::array_tree::reader::ArrayTreeReader; use crate::segments::SegmentId; diff --git a/vortex-layout/src/layouts/flat/mod.rs b/vortex-layout/src/layouts/flat/mod.rs index 8bf12e7be29..a2deb4cebc6 100644 --- a/vortex-layout/src/layouts/flat/mod.rs +++ b/vortex-layout/src/layouts/flat/mod.rs @@ -4,7 +4,9 @@ mod reader; pub mod writer; +use std::env; use std::sync::Arc; +use std::sync::LazyLock; use vortex_array::DeserializeMetadata; use vortex_array::ProstMetadata; @@ -29,6 +31,34 @@ use crate::segments::SegmentId; use crate::segments::SegmentSource; use crate::vtable; +/// Returns `true` if the `FLAT_LAYOUT_INLINE_ARRAY_NODE` environment variable is set to `1`, +/// instructing the flat writer to inline each chunk's compact encoding tree as a trailing +/// buffer in its data segment. +/// +/// # Deprecation +/// +/// This knob is retained for backward compatibility with files and tooling that depend on the +/// inline encoding-tree footer. The supported path forward is to opt in to the +/// `ArrayTreeLayout` outlining feature on the file write strategy +/// (`WriteStrategyBuilder::with_array_tree(true)`), which consolidates encoding trees into a +/// single auxiliary segment per column rather than scattering them across data segments. +/// A one-shot warning is emitted on the first read of the env var so the deprecation is +/// visible to operators. +pub(super) fn flat_layout_inline_array_node() -> bool { + static FLAT_LAYOUT_INLINE_ARRAY_NODE: LazyLock = LazyLock::new(|| { + let enabled = env::var("FLAT_LAYOUT_INLINE_ARRAY_NODE").is_ok_and(|v| v == "1"); + if enabled { + tracing::warn!( + "FLAT_LAYOUT_INLINE_ARRAY_NODE is deprecated: prefer enabling ArrayTreeLayout \ + outlining via WriteStrategyBuilder::with_array_tree(true). The env var path \ + will be removed in a future release." + ); + } + enabled + }); + *FLAT_LAYOUT_INLINE_ARRAY_NODE +} + vtable!(Flat); impl VTable for Flat { diff --git a/vortex-layout/src/layouts/flat/writer.rs b/vortex-layout/src/layouts/flat/writer.rs index 17f6983082e..f2a6867a931 100644 --- a/vortex-layout/src/layouts/flat/writer.rs +++ b/vortex-layout/src/layouts/flat/writer.rs @@ -168,16 +168,26 @@ impl LayoutStrategy for FlatLayoutStrategy { )?; // there is at least the flatbuffer and the length assert!(buffers.len() >= 2); + + // DEPRECATED: when the FLAT_LAYOUT_INLINE_ARRAY_NODE env var is set, capture the + // trailing compact array-tree flatbuffer (second-to-last buffer in the serialized + // form) so we can persist it in this layout's metadata. Prefer + // `WriteStrategyBuilder::with_array_tree(true)` for new code; see + // [`super::flat_layout_inline_array_node`] for details. + let inline_array_tree = + super::flat_layout_inline_array_node().then(|| buffers[buffers.len() - 2].clone()); + let segment_id = segment_sink.write(sequence_id, buffers).await?; let None = stream.next().await else { vortex_bail!("flat layout received stream with more than a single chunk"); }; - Ok(FlatLayout::new( + Ok(FlatLayout::new_with_metadata( row_count, stream.dtype().clone(), segment_id, ReadContext::new(ctx.to_ids()), + inline_array_tree, ) .into_layout()) } From 0ad36898897b339ef4a952c168f0932b7b7c93a7 Mon Sep 17 00:00:00 2001 From: Onur Satici Date: Mon, 18 May 2026 14:44:24 +0100 Subject: [PATCH 06/10] compress the array tree segments --- vortex-file/src/strategy.rs | 44 ++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/vortex-file/src/strategy.rs b/vortex-file/src/strategy.rs index 89a8c2237af..04d1329254a 100644 --- a/vortex-file/src/strategy.rs +++ b/vortex-file/src/strategy.rs @@ -243,6 +243,27 @@ impl WriteStrategyBuilder { Arc::new(FlatLayoutStrategy::default()) }; + // Data compressor: excludes IntDictScheme because DictStrategy (step 3 below) already + // dictionary-encodes columns; allowing it here would redundantly dictionary-encode the + // integer codes produced by that earlier step. + let data_compressor: Arc = match &self.compressor { + CompressorConfig::BtrBlocks(builder) => Arc::new( + builder + .clone() + .exclude_schemes([IntDictScheme.id()]) + .build(), + ), + CompressorConfig::Opaque(compressor) => Arc::clone(compressor), + }; + // Stats compressor: used for zone-map tables, dict values, and array-tree consolidated + // segments. + let stats_compressor: Arc = match &self.compressor { + CompressorConfig::BtrBlocks(builder) => Arc::new(builder.clone().build()), + CompressorConfig::Opaque(compressor) => Arc::clone(compressor), + }; + let compress_then_flat = CompressingStrategy::new(Arc::clone(&flat), stats_compressor); + let compress_then_flat_arc: Arc = Arc::new(compress_then_flat.clone()); + // Build the data pipeline leaf. Array-tree outlining requires both opt-in via // `with_array_tree(true)` AND no custom flat strategy (the user's strategy owns the // leaf format in that case). @@ -256,7 +277,9 @@ impl WriteStrategyBuilder { } else { FlatLayoutStrategy::default() }; - let (collector, leaf) = writer::writer(data_flat, Arc::clone(&flat)); + // Use the compressed flat strategy for the consolidated array-trees segment — the + // struct of (segment_id, compact_tree) dict-encodes and compresses well. + let (collector, leaf) = writer::writer(data_flat, Arc::clone(&compress_then_flat_arc)); (Arc::new(leaf), Some(collector)) }; @@ -266,18 +289,6 @@ impl WriteStrategyBuilder { let buffered = BufferedStrategy::new(chunked, 2 * ONE_MEG); // 2MB // 5. compress each chunk. - // Exclude IntDictScheme from the data compressor because DictStrategy (step 3) already - // dictionary-encodes columns. Allowing IntDictScheme here would redundantly - // dictionary-encode the integer codes produced by that earlier step. - let data_compressor: Arc = match &self.compressor { - CompressorConfig::BtrBlocks(builder) => Arc::new( - builder - .clone() - .exclude_schemes([IntDictScheme.id()]) - .build(), - ), - CompressorConfig::Opaque(compressor) => Arc::clone(compressor), - }; let compressing = CompressingStrategy::new(buffered, data_compressor); // 4. prior to compression, coalesce up to a minimum size @@ -297,13 +308,6 @@ impl WriteStrategyBuilder { }, ); - // 2.1. | 3.1. compress stats tables and dict values. - let stats_compressor: Arc = match self.compressor { - CompressorConfig::BtrBlocks(builder) => Arc::new(builder.build()), - CompressorConfig::Opaque(compressor) => compressor, - }; - let compress_then_flat = CompressingStrategy::new(flat, stats_compressor); - // 3. apply dict encoding or fallback let dict = DictStrategy::new( coalescing.clone(), From a5f6e715d1828984607043c7620b11877e1b241e Mon Sep 17 00:00:00 2001 From: Onur Satici Date: Mon, 18 May 2026 15:12:49 +0100 Subject: [PATCH 07/10] vortex web to work with array tree layouts Signed-off-by: Onur Satici --- vortex-layout/src/layouts/array_tree/flat.rs | 11 +- vortex-layout/src/layouts/array_tree/mod.rs | 128 +++++++++++------- vortex-web/crate/src/wasm.rs | 120 ++++++++++++---- vortex-web/src/App.tsx | 6 +- .../src/components/detail/TreemapPane.tsx | 4 +- vortex-web/src/components/swimlane/utils.ts | 14 +- 6 files changed, 201 insertions(+), 82 deletions(-) diff --git a/vortex-layout/src/layouts/array_tree/flat.rs b/vortex-layout/src/layouts/array_tree/flat.rs index fe35588ffbc..a6f6f9bfdb1 100644 --- a/vortex-layout/src/layouts/array_tree/flat.rs +++ b/vortex-layout/src/layouts/array_tree/flat.rs @@ -40,7 +40,10 @@ pub struct ArrayTreeFlatLayoutEncoding; /// At read time, this layout's reader looks up its compact tree in a shared /// [`super::ArrayTreesSource`] using its own [`SegmentId`]. Construction requires that an /// ancestor [`super::ArrayTreeLayout`] has registered a reader-builder override against -/// this encoding's ID — this layout has no useful default reader. +/// this encoding's ID — this layout has no useful default reader. Tools that need to +/// construct readers at arbitrary points in the layout tree (explorers, debuggers) should +/// use [`super::ArrayTreeLayout::derive_reader_ctx`] to build a context that registers the +/// override before descending to the leaf. #[derive(Clone, Debug)] pub struct ArrayTreeFlatLayout { inner: FlatLayout, @@ -109,10 +112,12 @@ impl VTable for ArrayTreeFlat { // ArrayTreeFlatLayout has no useful default reader. It exists to be intercepted by an // ancestor ArrayTreeLayout that registers a reader-builder override carrying the // shared ArrayTreesSource. If the dispatcher reached this method, no such ancestor - // was present in the layout tree. + // was present in the layout tree — see `ArrayTreeLayout::derive_reader_ctx` for the + // helper tools should call when starting reader construction below the root. vortex_bail!( "ArrayTreeFlatLayout requires an ancestor ArrayTreeLayout to register a reader \ - builder override; this layout cannot be read on its own" + builder override; call ArrayTreeLayout::derive_reader_ctx on each ArrayTreeLayout \ + ancestor before constructing a reader for this layout" ) } diff --git a/vortex-layout/src/layouts/array_tree/mod.rs b/vortex-layout/src/layouts/array_tree/mod.rs index 90ff5b58088..579f8e805f5 100644 --- a/vortex-layout/src/layouts/array_tree/mod.rs +++ b/vortex-layout/src/layouts/array_tree/mod.rs @@ -14,10 +14,12 @@ use std::sync::OnceLock; use futures::FutureExt; use vortex_array::EmptyMetadata; +use vortex_array::Executable; use vortex_array::MaskFuture; -use vortex_array::arrays::Primitive; -use vortex_array::arrays::Struct; -use vortex_array::arrays::VarBinView; +use vortex_array::VortexSessionExecute; +use vortex_array::arrays::PrimitiveArray; +use vortex_array::arrays::StructArray; +use vortex_array::arrays::VarBinViewArray; use vortex_array::arrays::struct_::StructArrayExt; use vortex_array::dtype::DType; use vortex_array::dtype::FieldName; @@ -102,6 +104,53 @@ impl ArrayTreeLayout { Nullability::NonNullable, ) } + + /// Build a [`LayoutReaderContext`] that overlays `ctx` with a source-injecting builder + /// override for this layout's [`ArrayTreeFlat`] descendants. + /// + /// The returned context, when used to construct a reader on a descendant layout, will + /// satisfy `ArrayTreeFlat`'s requirement for an injected [`ArrayTreesSource`]. Used by: + /// - The normal [`crate::VTable::new_reader`] dispatch on `ArrayTreeLayout` (production path). + /// - Tools that construct readers at arbitrary points in the layout tree (explorers, + /// debuggers) — they should walk from the root to the target node, calling this method + /// for each `ArrayTreeLayout` ancestor on the path so the accumulated ctx carries the + /// right override when the leaf is finally constructed. + pub fn derive_reader_ctx( + &self, + name: &str, + segment_source: Arc, + session: &VortexSession, + ctx: &LayoutReaderContext, + ) -> VortexResult { + // Construct the array_trees auxiliary reader using the unmodified incoming context — + // the array_trees subtree is a vanilla struct of (u32, bytes) and needs no overrides. + let array_trees_child = self + .children + .child(1, &Self::array_trees_dtype())?; + let trees_reader = array_trees_child.new_reader_in_ctx( + Arc::from(format!("{name}/array_trees")), + segment_source, + session, + ctx, + )?; + let source = Arc::new(ArrayTreesSource::new(trees_reader, session.clone())); + + Ok(ctx.with_override( + ArrayTreeFlat::id(&ArrayTreeFlatLayoutEncoding), + Arc::new(move |layout, name, segs, sess, _ctx| { + let atf = layout + .as_opt::() + .vortex_expect("ArrayTreeFlat override applied to wrong layout encoding"); + Ok(Arc::new(ArrayTreeFlatReader::new( + atf.clone(), + name, + segs, + sess.clone(), + Arc::clone(&source), + ))) + }), + )) + } } impl VTable for ArrayTree { @@ -160,36 +209,8 @@ impl VTable for ArrayTree { session: &VortexSession, ctx: &LayoutReaderContext, ) -> VortexResult { - // Construct the array_trees auxiliary reader using the unmodified incoming context — - // the array_trees subtree is a vanilla struct of (u32, bytes) and needs no overrides. - let array_trees_child = Self::child(layout, 1)?; - let trees_reader = array_trees_child.new_reader_in_ctx( - Arc::from(format!("{name}/array_trees")), - Arc::clone(&segment_source), - session, - ctx, - )?; - let source = Arc::new(ArrayTreesSource::new(trees_reader)); - - // Derive a context that intercepts ArrayTreeFlat construction with our source-injecting - // builder. The data subtree (and any nested layouts within it) sees this context, so - // any ArrayTreeFlat descendant — no matter how deep — gets the source. - let derived_ctx = ctx.with_override( - ArrayTreeFlat::id(&ArrayTreeFlatLayoutEncoding), - Arc::new(move |layout, name, segs, sess, _ctx| { - let atf = layout - .as_opt::() - .vortex_expect("ArrayTreeFlat override applied to wrong layout encoding"); - Ok(Arc::new(ArrayTreeFlatReader::new( - atf.clone(), - name, - segs, - sess.clone(), - Arc::clone(&source), - ))) - }), - ); - + let derived_ctx = + layout.derive_reader_ctx(&name, Arc::clone(&segment_source), session, ctx)?; let data_child = Self::child(layout, 0)?; let data_reader = data_child.new_reader_in_ctx( Arc::clone(&name), @@ -234,6 +255,10 @@ impl VTable for ArrayTree { /// shared across all leaves of the parent [`ArrayTreeLayout`] via a `OnceLock`-cached future. pub struct ArrayTreesSource { reader: LayoutReaderRef, + /// Session used to construct execution contexts when canonicalizing the array_trees + /// struct (its fields may be in compressed encodings depending on how the writer's + /// `array_trees_strategy` is configured). + session: VortexSession, /// Lazily initialized shared future for the segment-keyed lookup map. map: OnceLock, } @@ -253,10 +278,11 @@ impl std::fmt::Debug for ArrayTreesSource { } impl ArrayTreesSource { - /// Creates a new source backed by the given array_trees reader. - pub fn new(reader: LayoutReaderRef) -> Self { + /// Creates a new source backed by the given array_trees reader and session. + pub fn new(reader: LayoutReaderRef, session: VortexSession) -> Self { Self { reader, + session, map: OnceLock::new(), } } @@ -286,6 +312,7 @@ impl ArrayTreesSource { .get_or_init(|| { let row_count = self.reader.row_count(); let reader = Arc::clone(&self.reader); + let session = self.session.clone(); async move { let array = reader .projection_evaluation( @@ -299,7 +326,10 @@ impl ArrayTreesSource { .map_err(Arc::new)? .await .map_err(Arc::new)?; - build_segment_map(array).map(Arc::new).map_err(Arc::new) + let mut ctx = session.create_execution_ctx(); + build_segment_map(array, &mut ctx) + .map(Arc::new) + .map_err(Arc::new) } .boxed() .shared() @@ -309,30 +339,30 @@ impl ArrayTreesSource { } /// Decode the array_trees struct array into a `HashMap`. +/// +/// The struct array's columns may be in compressed encodings (bitpacked `segment_id`, dict +/// `compact_tree`, etc.) when read from a file whose array-trees strategy applies compression, +/// so we canonicalize each field via [`Executable::execute`] before downcasting to the +/// concrete typed array. fn build_segment_map( array: vortex_array::ArrayRef, + ctx: &mut vortex_array::ExecutionCtx, ) -> VortexResult> { - let struct_array = array - .try_downcast::() - .map_err(|_| vortex_err!("array_trees is not a Struct array"))?; + let struct_array = StructArray::execute(array, ctx)?; let segment_ids_field = struct_array .unmasked_field_by_name_opt("segment_id") - .ok_or_else(|| vortex_err!("array_trees missing 'segment_id' field"))?; + .ok_or_else(|| vortex_err!("array_trees missing 'segment_id' field"))? + .clone(); let trees_field = struct_array .unmasked_field_by_name_opt("compact_tree") - .ok_or_else(|| vortex_err!("array_trees missing 'compact_tree' field"))?; + .ok_or_else(|| vortex_err!("array_trees missing 'compact_tree' field"))? + .clone(); - let segment_ids = segment_ids_field - .clone() - .try_downcast::() - .map_err(|_| vortex_err!("array_trees 'segment_id' field is not Primitive"))?; + let segment_ids = PrimitiveArray::execute(segment_ids_field, ctx)?; let segment_ids = segment_ids.as_slice::(); - let trees = trees_field - .clone() - .try_downcast::() - .map_err(|_| vortex_err!("array_trees 'compact_tree' field is not a VarBinView"))?; + let trees = VarBinViewArray::execute(trees_field, ctx)?; let mut map = HashMap::with_capacity(segment_ids.len()); for (idx, &seg) in segment_ids.iter().enumerate() { diff --git a/vortex-web/crate/src/wasm.rs b/vortex-web/crate/src/wasm.rs index 05e328e5341..b260dd5302d 100644 --- a/vortex-web/crate/src/wasm.rs +++ b/vortex-web/crate/src/wasm.rs @@ -36,8 +36,12 @@ use vortex::file::VortexFile; use vortex::io::CoalesceConfig; use vortex::io::VortexReadAt; use vortex::layout::LayoutChildType; +use vortex::layout::LayoutReaderContext; use vortex::layout::LayoutRef; +use vortex::layout::layouts::array_tree::ArrayTree; +use vortex::layout::layouts::array_tree::ArrayTreeFlat; use vortex::layout::layouts::flat::Flat; +use vortex::layout::layouts::flat::FlatLayout; use vortex::layout::scan::scan_builder::ScanBuilder; use vortex::session::VortexSession; use vortex::session::registry::ReadContext; @@ -150,7 +154,7 @@ pub async fn open_vortex_file(file: web_sys::File) -> Result Option { - if let Some(flat) = layout.as_opt::() { + if let Some(flat) = as_flat_view(layout) { return Some(flat.array_ctx().clone()); } if let Ok(children) = layout.children() { @@ -163,6 +167,20 @@ fn find_array_read_ctx(layout: &LayoutRef) -> Option { None } +/// Returns the inner [`FlatLayout`] backing this layout, whether it's a plain [`Flat`] or an +/// [`ArrayTreeFlat`] (which wraps a `FlatLayout` and stores its compact encoding tree in a +/// sibling `array_trees` segment). +/// +/// The two encodings produce identical on-disk data segments — the only difference is where +/// the compact encoding tree lives — so the web explorer can treat them uniformly for +/// segment fetches, encoding-tree extraction, and previews. +fn as_flat_view(layout: &LayoutRef) -> Option<&FlatLayout> { + if let Some(flat) = layout.as_opt::() { + return Some(flat); + } + layout.as_opt::().map(|atf| atf.inner()) +} + /// A handle to an opened Vortex file, exposing metadata to JavaScript. #[wasm_bindgen] pub struct VortexFileHandle { @@ -288,12 +306,20 @@ impl VortexFileHandle { node_id: &str, row_limit: u32, ) -> Result { - let layout = find_layout_by_id(self.vxf.footer().layout(), node_id) - .ok_or_else(|| JsValue::from_str(&format!("Layout node not found: {node_id}")))?; - let segment_source = self.vxf.segment_source(); + + // Walk root → target, accumulating any reader-builder overrides that + // `ArrayTreeLayout` ancestors need to register so descendants like `ArrayTreeFlat` + // can construct without bailing on a missing source. + let (layout, ctx) = find_layout_with_ctx( + self.vxf.footer().layout(), + node_id, + Some((&segment_source, &self.session)), + ) + .ok_or_else(|| JsValue::from_str(&format!("Layout node not found: {node_id}")))?; + let reader = layout - .new_reader(node_id.into(), segment_source, &self.session) + .new_reader_in_ctx(node_id.into(), segment_source, &self.session, &ctx) .map_err(|e| JsValue::from_str(&e.to_string()))?; let stream = ScanBuilder::new(self.session.clone(), reader) @@ -332,10 +358,12 @@ impl VortexFileHandle { Ok(js_sys::Uint8Array::from(buf.as_slice())) } - /// Fetch the array encoding tree for a flat layout node. + /// Fetch the array encoding tree for a flat-style layout node. /// /// Finds the layout by node ID, reads the segment, fully decodes the array /// to extract dtype, child names, and buffer names from the encoding vtables. + /// Works for both [`Flat`] and [`ArrayTreeFlat`] — both share the same data-segment + /// shape. pub async fn fetch_encoding_tree(&self, node_id: String) -> Result { let ctx = self .array_read_ctx @@ -345,9 +373,8 @@ impl VortexFileHandle { let layout = find_layout_by_id(self.vxf.footer().layout(), &node_id) .ok_or_else(|| JsValue::from_str(&format!("Layout node not found: {node_id}")))?; - let flat = layout - .as_opt::() - .ok_or_else(|| JsValue::from_str("Node is not a flat layout"))?; + let flat = as_flat_view(&layout) + .ok_or_else(|| JsValue::from_str("Node is not a flat-style layout"))?; let segment_id = flat.segment_id(); let dtype = layout.dtype().clone(); @@ -393,9 +420,8 @@ impl VortexFileHandle { JsValue::from_str(&format!("Layout node not found: {layout_node_id}")) })?; - let flat = layout - .as_opt::() - .ok_or_else(|| JsValue::from_str("Node is not a flat layout"))?; + let flat = as_flat_view(&layout) + .ok_or_else(|| JsValue::from_str("Node is not a flat-style layout"))?; let segment_id = flat.segment_id(); let dtype = layout.dtype().clone(); @@ -460,9 +486,8 @@ impl VortexFileHandle { JsValue::from_str(&format!("Layout node not found: {layout_node_id}")) })?; - let flat = layout - .as_opt::() - .ok_or_else(|| JsValue::from_str("Node is not a flat layout"))?; + let flat = as_flat_view(&layout) + .ok_or_else(|| JsValue::from_str("Node is not a flat-style layout"))?; let segment_id = flat.segment_id(); let dtype = layout.dtype().clone(); @@ -577,8 +602,14 @@ fn build_layout_tree( } } - // For flat layouts, extract the array encoding tree if available. - let array_encoding_tree = layout.as_opt::().and_then(|flat| { + // For flat-style layouts (Flat or ArrayTreeFlat), extract the inline array encoding tree + // if available. + // + // Today only the deprecated env-var Flat path actually populates `array_tree()`. For + // `ArrayTreeFlat` the compact tree lives in the sibling `array_trees` auxiliary segment, + // not inline — `array_tree()` returns `None`, and the encoding tree is fetched on demand + // via `fetch_encoding_tree` instead. + let array_encoding_tree = as_flat_view(&layout).and_then(|flat| { let tree_buf = flat.array_tree()?; let ctx = flat.array_ctx(); let parts = SerializedArray::from_array_tree(tree_buf.as_ref().to_vec()).ok()?; @@ -678,31 +709,68 @@ fn build_array_encoding_tree_from_array( /// IDs match the format: "root.field_name.chunked.[0]" where each segment /// corresponds to a `LayoutChildType::name()`. fn find_layout_by_id(root: &LayoutRef, node_id: &str) -> Option { + find_layout_with_ctx(root, node_id, None).map(|(layout, _)| layout) +} + +/// Like [`find_layout_by_id`], but also builds a [`LayoutReaderContext`] that registers a +/// source-injecting builder override for every [`ArrayTree`] ancestor on the path from +/// `root` to the target. +/// +/// This is required when the explorer constructs a reader at a deep node: the default +/// reader-construction path on [`ArrayTreeFlat`] bails because it relies on an ancestor's +/// `new_reader` call to register the override. By walking the path explicitly and calling +/// [`ArrayTreeLayout::derive_reader_ctx`] on every ancestor we encounter, the returned ctx +/// satisfies the leaf's requirement when the caller eventually invokes +/// `target.new_reader_in_ctx(..., &ctx)`. +/// +/// Pass `Some((segment_source, session))` to actually derive the context. When passed +/// `None`, the function still navigates the tree and returns an empty context — useful for +/// callers that only need the layout reference. +fn find_layout_with_ctx( + root: &LayoutRef, + node_id: &str, + derive: Option<(&Arc, &VortexSession)>, +) -> Option<(LayoutRef, LayoutReaderContext)> { let segments: Vec<&str> = node_id.split('.').collect(); if segments.is_empty() || segments[0] != "root" { return None; } + + let mut current = root.clone(); + let mut current_id = String::from("root"); + let mut ctx = LayoutReaderContext::new(); + + let maybe_derive = |layout: &LayoutRef, name: &str, ctx: &LayoutReaderContext| { + let Some((segment_source, session)) = derive else { + return Some(ctx.clone()); + }; + layout.as_opt::().and_then(|atl| { + atl.derive_reader_ctx(name, Arc::clone(segment_source), session, ctx).ok() + }).or_else(|| Some(ctx.clone())) + }; + + ctx = maybe_derive(¤t, ¤t_id, &ctx)?; if segments.len() == 1 { - return Some(root.clone()); + return Some((current, ctx)); } - let mut current = root.clone(); for seg in &segments[1..] { let children = current.children().ok()?; - let mut found = false; + let mut found = None; for (i, child) in children.into_iter().enumerate() { let name = current.child_type(i).name(); if name.as_ref() == *seg { - current = child; - found = true; + found = Some(child); break; } } - if !found { - return None; - } + let child = found?; + current = child; + current_id.push('.'); + current_id.push_str(seg); + ctx = maybe_derive(¤t, ¤t_id, &ctx)?; } - Some(current) + Some((current, ctx)) } /// Downgrade Arrow `*View` types to their non-view equivalents so the JS diff --git a/vortex-web/src/App.tsx b/vortex-web/src/App.tsx index 07b52b120fe..d2ef4a61b7b 100644 --- a/vortex-web/src/App.tsx +++ b/vortex-web/src/App.tsx @@ -85,7 +85,11 @@ function App() { setFileState((prev) => { if (!prev) return prev; const node = findNodeById(prev.layoutTree, nodeId); - if (!node || node.encoding !== 'vortex.flat') return prev; + if ( + !node || + (node.encoding !== 'vortex.flat' && node.encoding !== 'vortex.array_tree_flat') + ) + return prev; if (node.children.some((c) => c.isArrayNode)) return prev; const arrayChildren = arrayTreeToLayoutChildren(arrayTree, node); diff --git a/vortex-web/src/components/detail/TreemapPane.tsx b/vortex-web/src/components/detail/TreemapPane.tsx index 55418a972c9..36f9df1380b 100644 --- a/vortex-web/src/components/detail/TreemapPane.tsx +++ b/vortex-web/src/components/detail/TreemapPane.tsx @@ -56,7 +56,9 @@ function buildTree(node: LayoutTreeNode, segmentMap: Map !c.isArrayNode); diff --git a/vortex-web/src/components/swimlane/utils.ts b/vortex-web/src/components/swimlane/utils.ts index 605ebbfd30d..bfb3b26153e 100644 --- a/vortex-web/src/components/swimlane/utils.ts +++ b/vortex-web/src/components/swimlane/utils.ts @@ -15,6 +15,8 @@ export const ENCODING_STYLES: Record = 'vortex.struct': { color: '#5971FD', label: 'struct' }, 'vortex.chunked': { color: '#CEE562', label: 'chunked' }, 'vortex.flat': { color: '#2CB9D1', label: 'flat' }, + 'vortex.array_tree_flat': { color: '#2CB9D1', label: 'array-tree-flat' }, + 'vortex.array_tree': { color: '#A87CC4', label: 'array-tree' }, 'vortex.dict': { color: '#EEB3E1', label: 'dict' }, 'vortex.zonemap': { color: '#FB863D', label: 'zonemap' }, 'vortex.fsst': { color: '#EEB3E1', label: 'fsst' }, @@ -557,10 +559,18 @@ export function arrayTreeToLayoutChildren( } /** - * Check if a layout node is a flat layout that can have array children. + * Check if a layout node is a flat-style layout that can have array children. + * + * Treats both `vortex.flat` and `vortex.array_tree_flat` as leaves with a single decodable + * data segment — they share the same on-disk segment shape, the only difference is whether + * the compact encoding tree lives inline (legacy env-var Flat) or in a sibling auxiliary + * segment (ArrayTreeFlat). */ export function isFlatLayout(node: LayoutTreeNode): boolean { - return node.encoding === 'vortex.flat' && !node.isArrayNode; + return ( + (node.encoding === 'vortex.flat' || node.encoding === 'vortex.array_tree_flat') && + !node.isArrayNode + ); } /** From 20d8b2e5efbeae8362b3beaed7c73bc1d7defda1 Mon Sep 17 00:00:00 2001 From: Onur Satici Date: Mon, 18 May 2026 16:20:03 +0100 Subject: [PATCH 08/10] tests, fix wasm swallowed error Signed-off-by: Onur Satici --- vortex-file/src/tests.rs | 130 ++++++++++++++------ vortex-layout/src/display.rs | 2 +- vortex-layout/src/layouts/array_tree/mod.rs | 4 +- vortex-web/crate/src/wasm.rs | 55 ++++++--- 4 files changed, 131 insertions(+), 60 deletions(-) diff --git a/vortex-file/src/tests.rs b/vortex-file/src/tests.rs index 563f5f6ff99..fd308fe7537 100644 --- a/vortex-file/src/tests.rs +++ b/vortex-file/src/tests.rs @@ -1910,37 +1910,28 @@ async fn test_segment_ordering_zonemaps_after_data() -> VortexResult<()> { check_zoned_ordering(root.as_ref(), segment_specs); - // Additionally: all zone map segments across all columns should appear after - // all data segments across all columns. Array tree segments (if present) appear - // between data and zones. + // Additionally: every zone-map segment across all columns must appear after every data + // segment across all columns. This holds even with cross-column interleaving inside the + // metadata phase because the root writer splits the sequence universe — chunk data uses + // IDs derived from `ptr` and all metadata (zones, etc.) derives from `eof`, with + // `ptr < eof` so all data segments globally precede all metadata segments. + // + // Per-column `data < array_trees < zones` ordering for the new `ArrayTreeLayout` is + // covered separately by the array-tree-specific ordering tests. let mut all_data_offsets = Vec::new(); - let mut all_array_tree_offsets = Vec::new(); let mut all_zones_offsets = Vec::new(); fn collect_all_zoned( layout: &dyn Layout, segment_specs: &[SegmentSpec], all_data: &mut Vec, - all_array_trees: &mut Vec, all_zones: &mut Vec, ) { if layout.encoding_id().as_ref() == "vortex.stats" { - // child 0 = data (may contain array_tree layouts), child 1 = zones - let data_child = layout.child(0).unwrap(); - // If the data child is an array_tree layout, split its segments. - if data_child.encoding_id().as_ref() == "vortex.array_tree" { - // child 0 = actual data, child 1 = array_trees auxiliary - all_data.extend(collect_segment_offsets( - data_child.child(0).unwrap().as_ref(), - segment_specs, - )); - all_array_trees.extend(collect_segment_offsets( - data_child.child(1).unwrap().as_ref(), - segment_specs, - )); - } else { - all_data.extend(collect_segment_offsets(data_child.as_ref(), segment_specs)); - } + all_data.extend(collect_segment_offsets( + layout.child(0).unwrap().as_ref(), + segment_specs, + )); all_zones.extend(collect_segment_offsets( layout.child(1).unwrap().as_ref(), segment_specs, @@ -1948,13 +1939,7 @@ async fn test_segment_ordering_zonemaps_after_data() -> VortexResult<()> { return; } for child in layout.children().unwrap() { - collect_all_zoned( - child.as_ref(), - segment_specs, - all_data, - all_array_trees, - all_zones, - ); + collect_all_zoned(child.as_ref(), segment_specs, all_data, all_zones); } } @@ -1962,24 +1947,13 @@ async fn test_segment_ordering_zonemaps_after_data() -> VortexResult<()> { root.as_ref(), segment_specs, &mut all_data_offsets, - &mut all_array_tree_offsets, &mut all_zones_offsets, ); - // The root writer splits the sequence universe into two: data chunks use IDs from `ptr` - // and all metadata (array trees, zones) derive from `eof`. Since ptr < eof, all data - // segments are globally before all metadata segments. - // - // Within the eof universe, per-column ordering guarantees array_trees < zones within - // each column, but cross-column interleaving means we cannot assert - // all_array_trees < all_zones globally. - let mut all_metadata_offsets = all_array_tree_offsets; - all_metadata_offsets.extend(&all_zones_offsets); - assert_offsets_ordered( &all_data_offsets, - &all_metadata_offsets, - "global: all data segments should come before all metadata segments (array trees + zone maps)", + &all_zones_offsets, + "global: all data segments should come before all zone-map segments", ); Ok(()) @@ -2146,3 +2120,77 @@ async fn test_segment_ordering_array_trees_before_zones() -> VortexResult<()> { Ok(()) } + +#[tokio::test] +#[cfg_attr(miri, ignore)] +async fn test_roundtrip_array_tree_layout() -> VortexResult<()> { + // End-to-end coverage of the new ArrayTreeLayout: write a multi-column struct with + // `with_array_tree(true)`, then read it back through the override-registration path and + // assert the data matches. Exercises: + // - ArrayTreeCollectorStrategy collecting compact trees via the side-channel sink + // - ArrayTreeLayout::new_reader deriving a ctx with the source-injecting override + // - ArrayTreeFlatReader looking up its compact tree from ArrayTreesSource by segment_id + // - SerializedArray::from_flatbuffer_and_segment decoding the data segment + let mut ctx = SESSION.create_execution_ctx(); + + let n = 10_000; + let strings_in: Vec<&str> = (0..n).map(|i| ["alpha", "beta", "gamma"][i % 3]).collect(); + let strings = VarBinArray::from(strings_in.clone()).into_array(); + let numbers_in: Vec = (0..n as i32).collect(); + let numbers = PrimitiveArray::from_iter(numbers_in.iter().copied()).into_array(); + + let st = StructArray::from_fields(&[("strings", strings), ("numbers", numbers)])?.into_array(); + let dtype = st.dtype().clone(); + + let mut buf = ByteBufferMut::empty(); + let strategy = crate::WriteStrategyBuilder::default() + .with_array_tree(true) + .build(); + SESSION + .write_options() + .with_strategy(strategy) + .write(&mut buf, st.to_array_stream()) + .await?; + + // Sanity-check that we actually wrote ArrayTreeLayout nodes — otherwise the test would + // silently pass on the default code path. + let file = SESSION.open_options().open_buffer(buf)?; + fn has_array_tree(layout: &dyn Layout) -> bool { + if layout.encoding_id().as_ref() == "vortex.array_tree" { + return true; + } + layout + .children() + .map(|cs| cs.iter().any(|c| has_array_tree(c.as_ref()))) + .unwrap_or(false) + } + assert!( + has_array_tree(file.footer().layout().as_ref()), + "test expected ArrayTreeLayout in the written file" + ); + + // Read back and assert structure + data round-trip cleanly. + let result = file.scan()?.into_array_stream()?.read_all().await?; + assert_eq!(result.len(), n); + assert_eq!(result.dtype(), &dtype); + + let struct_array = result.execute::(&mut ctx)?; + + let read_numbers = struct_array.unmasked_field_by_name("numbers").cloned()?; + let expected_numbers = PrimitiveArray::from_iter(numbers_in.iter().copied()).into_array(); + assert_arrays_eq!(read_numbers, expected_numbers); + + let read_strings = struct_array + .unmasked_field_by_name("strings") + .cloned()? + .execute::(&mut ctx)? + .with_iterator(|iter| { + iter.map(|s| s.map(|st| unsafe { String::from_utf8_unchecked(st.to_vec()) })) + .collect::>() + }); + let expected_strings: Vec> = + strings_in.iter().map(|s| Some((*s).to_string())).collect(); + assert_eq!(read_strings, expected_strings); + + Ok(()) +} diff --git a/vortex-layout/src/display.rs b/vortex-layout/src/display.rs index d6f51e8f1e2..3d2a8bb5a0a 100644 --- a/vortex-layout/src/display.rs +++ b/vortex-layout/src/display.rs @@ -262,7 +262,7 @@ mod tests { /// Test display_tree for a struct layout (fallback rendering, no inline array_tree). #[test] - fn test_display_tree_inline_array_tree() { + fn test_display_tree_struct_layout_fallback() { block_on(|handle| async move { let session = SESSION.clone().with_handle(handle); let ctx = ArrayContext::empty(); diff --git a/vortex-layout/src/layouts/array_tree/mod.rs b/vortex-layout/src/layouts/array_tree/mod.rs index 579f8e805f5..587bfeea780 100644 --- a/vortex-layout/src/layouts/array_tree/mod.rs +++ b/vortex-layout/src/layouts/array_tree/mod.rs @@ -124,9 +124,7 @@ impl ArrayTreeLayout { ) -> VortexResult { // Construct the array_trees auxiliary reader using the unmodified incoming context — // the array_trees subtree is a vanilla struct of (u32, bytes) and needs no overrides. - let array_trees_child = self - .children - .child(1, &Self::array_trees_dtype())?; + let array_trees_child = self.children.child(1, &Self::array_trees_dtype())?; let trees_reader = array_trees_child.new_reader_in_ctx( Arc::from(format!("{name}/array_trees")), segment_source, diff --git a/vortex-web/crate/src/wasm.rs b/vortex-web/crate/src/wasm.rs index b260dd5302d..201101bbada 100644 --- a/vortex-web/crate/src/wasm.rs +++ b/vortex-web/crate/src/wasm.rs @@ -316,6 +316,7 @@ impl VortexFileHandle { node_id, Some((&segment_source, &self.session)), ) + .map_err(|e| JsValue::from_str(&e.to_string()))? .ok_or_else(|| JsValue::from_str(&format!("Layout node not found: {node_id}")))?; let reader = layout @@ -709,7 +710,12 @@ fn build_array_encoding_tree_from_array( /// IDs match the format: "root.field_name.chunked.[0]" where each segment /// corresponds to a `LayoutChildType::name()`. fn find_layout_by_id(root: &LayoutRef, node_id: &str) -> Option { - find_layout_with_ctx(root, node_id, None).map(|(layout, _)| layout) + match find_layout_with_ctx(root, node_id, None) { + Ok(Some((layout, _))) => Some(layout), + Ok(None) => None, + // No derivation requested → derive_reader_ctx is never called, so this can't fail. + Err(_) => None, + } } /// Like [`find_layout_by_id`], but also builds a [`LayoutReaderContext`] that registers a @@ -726,36 +732,55 @@ fn find_layout_by_id(root: &LayoutRef, node_id: &str) -> Option { /// Pass `Some((segment_source, session))` to actually derive the context. When passed /// `None`, the function still navigates the tree and returns an empty context — useful for /// callers that only need the layout reference. +/// +/// Return shape: +/// - `Ok(Some((layout, ctx)))` — target found. +/// - `Ok(None)` — `node_id` does not resolve to a layout in the tree (bad path). +/// - `Err(e)` — `derive_reader_ctx` failed on some ancestor (e.g. the trees segment is +/// unreadable). Surfaces the real cause instead of silently producing a ctx that would +/// make the leaf bail with "missing override". fn find_layout_with_ctx( root: &LayoutRef, node_id: &str, - derive: Option<(&Arc, &VortexSession)>, -) -> Option<(LayoutRef, LayoutReaderContext)> { + derive: Option<( + &Arc, + &VortexSession, + )>, +) -> VortexResult> { let segments: Vec<&str> = node_id.split('.').collect(); if segments.is_empty() || segments[0] != "root" { - return None; + return Ok(None); } let mut current = root.clone(); let mut current_id = String::from("root"); let mut ctx = LayoutReaderContext::new(); - let maybe_derive = |layout: &LayoutRef, name: &str, ctx: &LayoutReaderContext| { + // For every layout we walk through, if it's an ArrayTree and we have the deps to derive, + // overlay its source-injecting override onto the ctx; any other layout is a passthrough. + let try_derive = |layout: &LayoutRef, + name: &str, + ctx: &LayoutReaderContext| + -> VortexResult { let Some((segment_source, session)) = derive else { - return Some(ctx.clone()); + return Ok(ctx.clone()); }; - layout.as_opt::().and_then(|atl| { - atl.derive_reader_ctx(name, Arc::clone(segment_source), session, ctx).ok() - }).or_else(|| Some(ctx.clone())) + match layout.as_opt::() { + Some(atl) => atl.derive_reader_ctx(name, Arc::clone(segment_source), session, ctx), + None => Ok(ctx.clone()), + } }; - ctx = maybe_derive(¤t, ¤t_id, &ctx)?; + ctx = try_derive(¤t, ¤t_id, &ctx)?; if segments.len() == 1 { - return Some((current, ctx)); + return Ok(Some((current, ctx))); } for seg in &segments[1..] { - let children = current.children().ok()?; + let children = match current.children() { + Ok(c) => c, + Err(_) => return Ok(None), + }; let mut found = None; for (i, child) in children.into_iter().enumerate() { let name = current.child_type(i).name(); @@ -764,13 +789,13 @@ fn find_layout_with_ctx( break; } } - let child = found?; + let Some(child) = found else { return Ok(None) }; current = child; current_id.push('.'); current_id.push_str(seg); - ctx = maybe_derive(¤t, ¤t_id, &ctx)?; + ctx = try_derive(¤t, ¤t_id, &ctx)?; } - Some((current, ctx)) + Ok(Some((current, ctx))) } /// Downgrade Arrow `*View` types to their non-view equivalents so the JS From 6bba33adfa394d66fda5ef2f20f8dd106730257f Mon Sep 17 00:00:00 2001 From: Onur Satici Date: Tue, 19 May 2026 15:40:05 +0100 Subject: [PATCH 09/10] array tree array to be a columnar vortex array, not fb bytes Signed-off-by: Onur Satici --- vortex-array/src/serde.rs | 580 ++++++++++++++++++ vortex-layout/src/layouts/array_tree/flat.rs | 39 +- vortex-layout/src/layouts/array_tree/mod.rs | 261 +++++++- .../src/layouts/array_tree/reader.rs | 8 +- .../src/layouts/array_tree/writer.rs | 342 ++++++++--- 5 files changed, 1110 insertions(+), 120 deletions(-) diff --git a/vortex-array/src/serde.rs b/vortex-array/src/serde.rs index 8c1614610af..b8d2fa87659 100644 --- a/vortex-array/src/serde.rs +++ b/vortex-array/src/serde.rs @@ -761,3 +761,583 @@ impl TryFrom for SerializedArray { Self::try_from(value.try_to_host_sync()?) } } + +// ============================================================================= +// Columnar serialization (parallel to SerializedArray, no flatbuffer involved) +// ============================================================================= +// +// `SerializedArray` parses a per-chunk flatbuffer (`fba::Array`) and navigates it via +// vtables. That format lives in the data segment's trailing buffer for `FlatLayout` and +// inside the `array_trees` auxiliary segment of `ArrayTreeLayout` files written by older +// builds. +// +// `ColumnarSerializedArray` is the parallel decode entry point for `ArrayTreeLayout` files +// where the consolidated `array_trees` segment uses a columnar struct-of-Lists encoding +// instead of opaque flatbuffer blobs. The plugin contract (`ArrayChildren` trait + +// `plugin.deserialize(dtype, len, metadata, buffers, children, session)`) doesn't care +// which source the metadata/buffers/children come from, so this type implements the same +// decode flow without ever constructing or parsing a flatbuffer. + +/// Per-node statistics from the consolidated columnar consolidated form. `None` means the +/// writer didn't persist any stats for that node. +pub type ColumnarNodeStats = Option; + +/// Per-chunk slice of the columnar consolidated tree, shared by all `ColumnarSerializedArray` +/// nodes within the chunk via `Arc`. +/// +/// Every `Vec` here is "per node" in pre-order traversal of the encoding tree, except +/// `buffer_padding` / `buffer_alignment` / `buffer_length`, which are flat across all nodes +/// (each node owns a contiguous run of `buffers_per_node[i]` entries). The `subtree_sizes` +/// and `buffer_offsets` fields are precomputed at materialization time for O(1) navigation. +#[derive(Debug)] +pub struct ColumnarChunkData { + /// Encoding id (as an interned u16 in the file's `ArrayContext`) per node. + pub encoding_ids: Vec, + /// Number of direct children of each node. + pub child_counts: Vec, + /// Opaque encoding-specific metadata bytes per node. + pub node_metadata: Vec, + /// Number of buffers owned by each node (their descriptors live in the flat arrays + /// below, starting at `buffer_offsets[i]`). + pub buffers_per_node: Vec, + /// Per-buffer descriptors, concatenated across all nodes in the same pre-order. + pub buffer_padding: Vec, + pub buffer_alignment_exponent: Vec, + pub buffer_length: Vec, + /// Per-node statistics. + pub stats: Vec, + /// Cumulative subtree size starting at each node (subtree_sizes[i] == 1 + sum of + /// subtree sizes of direct children of i). Precomputed for O(1) `child(idx)`. + pub subtree_sizes: Vec, + /// Cumulative buffer count up to each node (buffer_offsets[i] == sum of + /// buffers_per_node[0..i]). Precomputed for O(1) buffer slicing. + pub buffer_offsets: Vec, +} + +impl ColumnarChunkData { + /// Compute `subtree_sizes` from `child_counts` via a single right-to-left pass. + /// + /// This works because in pre-order traversal, a node's subtree occupies a contiguous + /// range of indices starting at the node, and a node's subtree size is determined by + /// itself + the sum of its children's subtree sizes. + fn compute_subtree_sizes(child_counts: &[u8]) -> Vec { + let n = child_counts.len(); + let mut sizes = vec![0u32; n]; + // Right-to-left: when we visit node i, all its descendants have already been + // visited. Walk children by stepping forward by the previously-computed subtree + // size of each child. + for i in (0..n).rev() { + let mut total = 1u32; + let mut cursor = i + 1; + for _ in 0..child_counts[i] { + let child_size = sizes[cursor]; + total += child_size; + cursor += child_size as usize; + } + sizes[i] = total; + } + sizes + } + + /// Compute `buffer_offsets` as a prefix sum of `buffers_per_node`. + fn compute_buffer_offsets(buffers_per_node: &[u16]) -> Vec { + let mut offsets = Vec::with_capacity(buffers_per_node.len()); + let mut acc = 0u32; + for &n in buffers_per_node { + offsets.push(acc); + acc += n as u32; + } + offsets + } + + /// Construct a chunk with auto-computed `subtree_sizes` and `buffer_offsets`. + #[allow(clippy::too_many_arguments)] + pub fn new( + encoding_ids: Vec, + child_counts: Vec, + node_metadata: Vec, + buffers_per_node: Vec, + buffer_padding: Vec, + buffer_alignment_exponent: Vec, + buffer_length: Vec, + stats: Vec, + ) -> VortexResult { + let n = encoding_ids.len(); + if child_counts.len() != n + || node_metadata.len() != n + || buffers_per_node.len() != n + || stats.len() != n + { + vortex_bail!( + "ColumnarChunkData per-node columns must all have length {} (got encoding={}, child_counts={}, node_metadata={}, buffers_per_node={}, stats={})", + n, + encoding_ids.len(), + child_counts.len(), + node_metadata.len(), + buffers_per_node.len(), + stats.len(), + ); + } + let total_buffers: usize = buffers_per_node.iter().map(|&b| b as usize).sum(); + if buffer_padding.len() != total_buffers + || buffer_alignment_exponent.len() != total_buffers + || buffer_length.len() != total_buffers + { + vortex_bail!( + "ColumnarChunkData per-buffer columns must all have length {} (got padding={}, alignment={}, length={})", + total_buffers, + buffer_padding.len(), + buffer_alignment_exponent.len(), + buffer_length.len(), + ); + } + let subtree_sizes = Self::compute_subtree_sizes(&child_counts); + let buffer_offsets = Self::compute_buffer_offsets(&buffers_per_node); + Ok(Self { + encoding_ids, + child_counts, + node_metadata, + buffers_per_node, + buffer_padding, + buffer_alignment_exponent, + buffer_length, + stats, + subtree_sizes, + buffer_offsets, + }) + } + + /// Number of nodes in the tree. + pub fn nnodes(&self) -> usize { + self.encoding_ids.len() + } +} + +/// Parallel to [`SerializedArray`] but sourced from a columnar representation of the +/// encoding tree rather than a flatbuffer. +/// +/// Holds a per-chunk `Arc` plus a `node_index` that identifies the +/// current node within the tree. `child(idx)` returns a new `ColumnarSerializedArray` +/// pointing at the requested child by computing the child's pre-order index from +/// `subtree_sizes`. +/// +/// `decode()` performs the same plugin dispatch as `SerializedArray::decode`, just sourcing +/// metadata/buffers/stats from the columnar chunk data. +#[derive(Clone)] +pub struct ColumnarSerializedArray { + chunk: Arc, + node_index: usize, + buffers: Arc<[BufferHandle]>, +} + +impl Debug for ColumnarSerializedArray { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ColumnarSerializedArray") + .field("encoding_id", &self.encoding_id()) + .field("node_index", &self.node_index) + .field("nchildren", &self.nchildren()) + .field("nbuffers", &self.nbuffers()) + .finish() + } +} + +impl ColumnarSerializedArray { + /// Construct a new root-level `ColumnarSerializedArray` for a chunk. + pub fn new(chunk: Arc, buffers: Arc<[BufferHandle]>) -> VortexResult { + if chunk.nnodes() == 0 { + vortex_bail!("ColumnarChunkData must have at least one node"); + } + Ok(Self { + chunk, + node_index: 0, + buffers, + }) + } + + /// Slice the data-buffer prefix of a segment into per-buffer handles using the + /// descriptors in `chunk`, then construct a root-level `ColumnarSerializedArray`. + /// + /// Works for segments produced by both [`SegmentMode::Inline`] (which appends a + /// flatbuffer + length suffix after the data buffers) and [`SegmentMode::DataOnly`]: + /// the chunk's descriptors only describe the data prefix, so the trailing inline + /// flatbuffer (if any) is simply ignored. + pub fn from_segment_and_chunk( + segment: BufferHandle, + chunk: Arc, + ) -> VortexResult { + let segment = segment.ensure_aligned(Alignment::none())?; + let n_buffers = chunk.buffer_length.len(); + let mut handles: Vec = Vec::with_capacity(n_buffers); + let mut offset = 0; + for i in 0..n_buffers { + offset += chunk.buffer_padding[i] as usize; + let buffer_len = chunk.buffer_length[i] as usize; + let alignment = Alignment::from_exponent(chunk.buffer_alignment_exponent[i]); + let buffer = segment.slice(offset..(offset + buffer_len)); + handles.push(buffer.ensure_aligned(alignment)?); + offset += buffer_len; + } + Self::new(chunk, Arc::from(handles)) + } + + /// Returns the encoding id (as the interned `u16` in the file's `ArrayContext`) of the + /// current node. + pub fn encoding_id(&self) -> u16 { + self.chunk.encoding_ids[self.node_index] + } + + /// Returns the metadata bytes for the current node. + pub fn metadata(&self) -> &[u8] { + self.chunk.node_metadata[self.node_index].as_slice() + } + + /// Returns the number of direct children of the current node. + pub fn nchildren(&self) -> usize { + self.chunk.child_counts[self.node_index] as usize + } + + /// Returns a `ColumnarSerializedArray` pointing at the `idx`th direct child of the + /// current node. + pub fn child(&self, idx: usize) -> ColumnarSerializedArray { + let n_children = self.nchildren(); + if idx >= n_children { + vortex_panic!( + "Invalid child index {} for node with {} children", + idx, + n_children + ); + } + // Children are laid out in pre-order immediately after the current node. The first + // child is at node_index + 1; each subsequent child sits at the previous child's + // index + that child's subtree size. + let mut cursor = self.node_index + 1; + for _ in 0..idx { + cursor += self.chunk.subtree_sizes[cursor] as usize; + } + Self { + chunk: Arc::clone(&self.chunk), + node_index: cursor, + buffers: Arc::clone(&self.buffers), + } + } + + /// Number of buffers owned by the current node. + pub fn nbuffers(&self) -> usize { + self.chunk.buffers_per_node[self.node_index] as usize + } + + /// Return the slice of buffer handles owned by the current node. + fn node_buffers(&self) -> VortexResult<&[BufferHandle]> { + let start = self.chunk.buffer_offsets[self.node_index] as usize; + let count = self.nbuffers(); + self.buffers.get(start..start + count).ok_or_else(|| { + vortex_err!( + "buffer indices {}..{} out of range for {} buffers", + start, + start + count, + self.buffers.len(), + ) + }) + } + + /// Decode this node into an `ArrayRef` using the same plugin contract as + /// [`SerializedArray::decode`]. + pub fn decode( + &self, + dtype: &DType, + len: usize, + ctx: &ReadContext, + session: &VortexSession, + ) -> VortexResult { + let encoding_idx = self.encoding_id(); + let encoding_id = ctx + .resolve(encoding_idx) + .ok_or_else(|| vortex_err!("Unknown encoding index: {}", encoding_idx))?; + let plugin = session + .arrays() + .registry() + .find(&encoding_id) + .ok_or_else(|| vortex_err!("Unknown encoding: {}", encoding_id))?; + + let buffers = self.node_buffers()?; + let children = ColumnarSerializedArrayChildren { + ser: self, + ctx, + session, + }; + + let decoded = + plugin.deserialize(dtype, len, self.metadata(), buffers, &children, session)?; + + assert_eq!( + decoded.len(), + len, + "Array decoded from {} has incorrect length {}, expected {}", + encoding_id, + decoded.len(), + len + ); + assert_eq!( + decoded.dtype(), + dtype, + "Array decoded from {} has incorrect dtype {}, expected {}", + encoding_id, + decoded.dtype(), + dtype, + ); + assert!( + plugin.is_supported_encoding(&decoded.encoding_id()), + "Array decoded from {} has incorrect encoding {}", + encoding_id, + decoded.encoding_id(), + ); + + // Populate statistics from the columnar chunk data. + if let Some(stats) = &self.chunk.stats[self.node_index] { + decoded.statistics().set_iter(stats.clone().into_iter()); + } + + Ok(decoded) + } +} + +/// Determines the on-disk shape of an array tree leaf's data segment when used alongside +/// the columnar consolidated array_trees segment. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SegmentMode { + /// Compat mode. Data buffers + combined flatbuffer (tree + per-node stats) + u32 + /// length suffix. Layout-compatible with `FlatLayout`; any reader that parses a flat + /// segment will work standalone, with the columnar consolidated acting as a + /// performance optimization. + Inline, + /// Skip-inline mode. Data buffers only, no trailing flatbuffer, no length suffix. + /// Segments are not self-contained — the consolidated columnar array_trees segment + /// is the only source of encoding metadata. Smaller files but only readable through + /// the array-tree-aware reader path. + DataOnly, +} + +/// Walk `array` once and produce both: +/// 1. The data segment buffer list (shape depends on `mode`). +/// 2. A `ColumnarChunkData` capturing the encoding tree + per-node stats + buffer +/// descriptors in the columnar form consumed by [`ColumnarSerializedArray`]. +/// +/// This is the writer-side entry point used by `ArrayTreeFlatStrategy`. The single walk +/// avoids the previous "serialize for inline AND serialize_array_tree for side channel" +/// double traversal. +pub fn serialize_with_columnar_chunk( + array: &ArrayRef, + ctx: &ArrayContext, + session: &VortexSession, + options: &SerializeOptions, + mode: SegmentMode, +) -> VortexResult<(Vec, ColumnarChunkData)> { + // Single DFS walk: collect per-node columnar data and all buffers in pre-order. + let mut encoding_ids = Vec::new(); + let mut child_counts = Vec::new(); + let mut node_metadata = Vec::new(); + let mut buffers_per_node = Vec::new(); + let mut stats = Vec::new(); + let mut array_buffers = Vec::new(); + + for node in array.depth_first_traversal() { + let encoding_idx = ctx.intern(&node.encoding_id()).ok_or_else(|| { + vortex_err!("Array encoding {} not permitted by ctx", node.encoding_id()) + })?; + encoding_ids.push(encoding_idx); + + let n_children = u8::try_from(node.nchildren()) + .map_err(|_| vortex_err!("Array node has more than u8::MAX children"))?; + child_counts.push(n_children); + + let metadata_bytes = session.array_serialize(&node)?.ok_or_else(|| { + vortex_err!( + "Array {} does not support serialization", + node.encoding_id() + ) + })?; + node_metadata.push(ByteBuffer::from(metadata_bytes)); + + let node_bufs = node.buffers(); + let n_buffers = u16::try_from(node_bufs.len()) + .map_err(|_| vortex_err!("Array node has more than u16::MAX buffers"))?; + buffers_per_node.push(n_buffers); + + // Capture per-node stats. `to_owned` snapshots the current StatsSet contents; we + // store `None` when no stats are present so the read side can distinguish "no + // stats persisted" from "all stats happen to be empty". + let stats_set = node.statistics().to_owned(); + stats.push(if stats_set.is_empty() { + None + } else { + Some(stats_set) + }); + + array_buffers.extend(node_bufs); + } + + // Per-buffer descriptors, computed with the same padding rules as `serialize()` so + // the columnar form points at the same byte offsets the inline path uses. + let fb_buffers = collect_buffer_descriptors(&array_buffers, options)?; + let buffer_padding: Vec = fb_buffers.iter().map(|b| b.padding()).collect(); + let buffer_alignment_exponent: Vec = + fb_buffers.iter().map(|b| b.alignment_exponent()).collect(); + let buffer_length: Vec = fb_buffers.iter().map(|b| b.length()).collect(); + + let chunk = ColumnarChunkData::new( + encoding_ids, + child_counts, + node_metadata, + buffers_per_node, + buffer_padding, + buffer_alignment_exponent, + buffer_length, + stats, + )?; + + // Assemble the segment buffer list. The data-buffer prefix is identical between + // Inline and DataOnly modes; the modes differ only in whether the trailing combined + // flatbuffer + length suffix is appended. + let max_alignment = array_buffers + .iter() + .map(|buf| buf.alignment()) + .chain(iter::once(FlatBuffer::alignment())) + .max() + .unwrap_or_else(FlatBuffer::alignment); + let zeros = ByteBuffer::zeroed(*max_alignment); + + let mut buffers = vec![ByteBuffer::zeroed_aligned(0, max_alignment)]; + let mut pos = options.offset; + for buffer in &array_buffers { + if options.include_padding { + let padding = pos.next_multiple_of(*buffer.alignment()) - pos; + if padding > 0 { + pos += padding; + buffers.push(zeros.slice(0..padding)); + } + } + pos += buffer.len(); + buffers.push(buffer.clone().aligned(Alignment::none())); + } + + if matches!(mode, SegmentMode::Inline) { + // Inline path: append the combined flatbuffer (tree + stats) and a u32 length + // suffix so the segment is parseable by the standard `SerializedArray::try_from` + // path. Bytes here are byte-identical to today's `ArrayRef::serialize` output. + let fb_buffer = build_array_flatbuffer(ctx, session, array, fb_buffers, false)?; + let fb_length = fb_buffer.len(); + if options.include_padding { + let padding = pos.next_multiple_of(*FlatBuffer::alignment()) - pos; + if padding > 0 { + buffers.push(zeros.slice(0..padding)); + } + } + buffers.push(fb_buffer); + buffers.push(ByteBuffer::from( + u32::try_from(fb_length) + .map_err(|_| { + vortex_err!( + "Array metadata flatbuffer must fit into u32 for serialization. Array encoding tree is too large." + ) + })? + .to_le_bytes() + .to_vec(), + )); + } + + Ok((buffers, chunk)) +} + +struct ColumnarSerializedArrayChildren<'a> { + ser: &'a ColumnarSerializedArray, + ctx: &'a ReadContext, + session: &'a VortexSession, +} + +impl ArrayChildren for ColumnarSerializedArrayChildren<'_> { + fn get(&self, index: usize, dtype: &DType, len: usize) -> VortexResult { + self.ser + .child(index) + .decode(dtype, len, self.ctx, self.session) + } + + fn len(&self) -> usize { + self.ser.nchildren() + } +} + +#[cfg(test)] +mod columnar_tests { + use super::*; + + /// Tree shape: + /// 0 (root, 2 children) + /// ├── 1 (leaf) + /// └── 2 (1 child) + /// └── 3 (leaf) + /// Subtree sizes: [4, 1, 2, 1]. + #[test] + fn subtree_sizes_basic() -> VortexResult<()> { + let child_counts = vec![2u8, 0, 1, 0]; + let sizes = ColumnarChunkData::compute_subtree_sizes(&child_counts); + assert_eq!(sizes, vec![4, 1, 2, 1]); + Ok(()) + } + + /// Single-node tree. + #[test] + fn subtree_sizes_leaf() -> VortexResult<()> { + let sizes = ColumnarChunkData::compute_subtree_sizes(&[0u8]); + assert_eq!(sizes, vec![1]); + Ok(()) + } + + /// Deeply nested tree (left-skewed): + /// 0 -> 1 -> 2 -> 3 (leaf) + /// Subtree sizes: [4, 3, 2, 1]. + #[test] + fn subtree_sizes_skewed() -> VortexResult<()> { + let sizes = ColumnarChunkData::compute_subtree_sizes(&[1u8, 1, 1, 0]); + assert_eq!(sizes, vec![4, 3, 2, 1]); + Ok(()) + } + + #[test] + fn buffer_offsets_basic() { + let offsets = ColumnarChunkData::compute_buffer_offsets(&[2u16, 0, 3, 1]); + assert_eq!(offsets, vec![0, 2, 2, 5]); + } + + /// Child navigation: from root (idx 0) of a tree + /// 0 [2 children] + /// ├── 1 [leaf] + /// └── 2 [1 child] + /// └── 3 [leaf] + /// expect child(0) -> node 1, child(1) -> node 2. Then from node 2, child(0) -> node 3. + #[test] + fn child_navigation() -> VortexResult<()> { + let chunk = Arc::new(ColumnarChunkData::new( + vec![0u16, 1, 2, 3], + vec![2u8, 0, 1, 0], + vec![ByteBuffer::empty(); 4], + vec![0u16; 4], + vec![], + vec![], + vec![], + vec![None; 4], + )?); + let root = ColumnarSerializedArray::new(chunk, Arc::new([]))?; + assert_eq!(root.encoding_id(), 0); + assert_eq!(root.nchildren(), 2); + let c0 = root.child(0); + assert_eq!(c0.encoding_id(), 1); + assert_eq!(c0.nchildren(), 0); + let c1 = root.child(1); + assert_eq!(c1.encoding_id(), 2); + assert_eq!(c1.nchildren(), 1); + let c1c0 = c1.child(0); + assert_eq!(c1c0.encoding_id(), 3); + assert_eq!(c1c0.nchildren(), 0); + Ok(()) + } +} diff --git a/vortex-layout/src/layouts/array_tree/flat.rs b/vortex-layout/src/layouts/array_tree/flat.rs index a6f6f9bfdb1..b49031c1b95 100644 --- a/vortex-layout/src/layouts/array_tree/flat.rs +++ b/vortex-layout/src/layouts/array_tree/flat.rs @@ -3,8 +3,10 @@ use std::sync::Arc; +use parking_lot::Mutex; use vortex_array::EmptyMetadata; use vortex_array::dtype::DType; +use vortex_array::serde::ColumnarChunkData; use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_panic; @@ -47,18 +49,42 @@ pub struct ArrayTreeFlatLayoutEncoding; #[derive(Clone, Debug)] pub struct ArrayTreeFlatLayout { inner: FlatLayout, + /// Transient write-time state: the leaf strategy attaches its [`ColumnarChunkData`] for + /// the collector to pluck via [`Self::take_chunk`]. Wrapped in `Mutex>` so the + /// collector can take ownership cheaply during its post-write walk. Read-path + /// construction (via the layout's `build` method) leaves this `None`; the field is never + /// serialized to disk. + chunk: Arc>>, } impl ArrayTreeFlatLayout { - /// Creates a new layout from the inner flat layout. + /// Creates a new layout from the inner flat layout without any attached chunk data. pub fn new(inner: FlatLayout) -> Self { - Self { inner } + Self { + inner, + chunk: Arc::new(Mutex::new(None)), + } + } + + /// Creates a new layout from the inner flat layout with attached transient + /// [`ColumnarChunkData`]. Used only by the array-tree writer; the chunk is consumed by + /// the collector and never serialized. + pub fn with_chunk(inner: FlatLayout, chunk: ColumnarChunkData) -> Self { + Self { + inner, + chunk: Arc::new(Mutex::new(Some(chunk))), + } } /// Returns the inner flat layout. pub fn inner(&self) -> &FlatLayout { &self.inner } + + /// Take ownership of any attached transient chunk data, leaving `None` behind. + pub fn take_chunk(&self) -> Option { + self.chunk.lock().take() + } } impl VTable for ArrayTreeFlat { @@ -133,8 +159,11 @@ impl VTable for ArrayTreeFlat { if segment_ids.len() != 1 { vortex_bail!("ArrayTreeFlatLayout must have exactly one segment ID"); } - Ok(ArrayTreeFlatLayout { - inner: FlatLayout::new(row_count, dtype.clone(), segment_ids[0], ctx.clone()), - }) + Ok(ArrayTreeFlatLayout::new(FlatLayout::new( + row_count, + dtype.clone(), + segment_ids[0], + ctx.clone(), + ))) } } diff --git a/vortex-layout/src/layouts/array_tree/mod.rs b/vortex-layout/src/layouts/array_tree/mod.rs index 587bfeea780..26865f3dec9 100644 --- a/vortex-layout/src/layouts/array_tree/mod.rs +++ b/vortex-layout/src/layouts/array_tree/mod.rs @@ -17,9 +17,12 @@ use vortex_array::EmptyMetadata; use vortex_array::Executable; use vortex_array::MaskFuture; use vortex_array::VortexSessionExecute; +use vortex_array::arrays::ListViewArray; use vortex_array::arrays::PrimitiveArray; use vortex_array::arrays::StructArray; use vortex_array::arrays::VarBinViewArray; +use vortex_array::arrays::list::ListArrayExt; +use vortex_array::arrays::listview::list_from_list_view; use vortex_array::arrays::struct_::StructArrayExt; use vortex_array::dtype::DType; use vortex_array::dtype::FieldName; @@ -27,6 +30,7 @@ use vortex_array::dtype::Nullability; use vortex_array::dtype::PType; use vortex_array::dtype::StructFields; use vortex_array::expr::root; +use vortex_array::serde::ColumnarChunkData; use vortex_buffer::ByteBuffer; use vortex_error::SharedVortexResult; use vortex_error::VortexExpect; @@ -88,20 +92,115 @@ impl ArrayTreeLayout { } /// Returns the dtype of the auxiliary `array_trees` child. + /// + /// The consolidated form is a struct array with one row per data-segment chunk: + /// ```text + /// Struct { + /// segment_id: u32, + /// nodes: List, + /// buffers: List, + /// } + /// ``` + /// Each row's `nodes` list traverses the chunk's encoding tree in pre-order; each + /// row's `buffers` list concatenates per-node buffer descriptors in the same order. pub fn array_trees_dtype() -> DType { + let nn = Nullability::NonNullable; + let nullable = Nullability::Nullable; + let prim = |p: PType, n: Nullability| DType::Primitive(p, n); + + let node_struct = DType::Struct( + StructFields::new( + vec![ + FieldName::from("encoding_id"), + FieldName::from("child_count"), + FieldName::from("metadata"), + FieldName::from("buffers_per_node"), + FieldName::from("stat_min"), + FieldName::from("stat_min_precision"), + FieldName::from("stat_max"), + FieldName::from("stat_max_precision"), + FieldName::from("stat_sum"), + FieldName::from("stat_null_count"), + FieldName::from("stat_nan_count"), + FieldName::from("stat_uncompressed_size_in_bytes"), + FieldName::from("stat_is_constant"), + FieldName::from("stat_is_sorted"), + FieldName::from("stat_is_strict_sorted"), + ] + .into(), + vec![ + prim(PType::U16, nn), + prim(PType::U8, nn), + DType::Binary(nn), + prim(PType::U16, nn), + DType::Binary(nullable), + prim(PType::U8, nullable), + DType::Binary(nullable), + prim(PType::U8, nullable), + DType::Binary(nullable), + prim(PType::U64, nullable), + prim(PType::U64, nullable), + prim(PType::U64, nullable), + DType::Bool(nullable), + DType::Bool(nullable), + DType::Bool(nullable), + ], + ), + nn, + ); + + let buffer_struct = DType::Struct( + StructFields::new( + vec![ + FieldName::from("padding"), + FieldName::from("alignment_exponent"), + FieldName::from("length"), + ] + .into(), + vec![ + prim(PType::U16, nn), + prim(PType::U8, nn), + prim(PType::U32, nn), + ], + ), + nn, + ); + DType::Struct( StructFields::new( vec![ FieldName::from("segment_id"), - FieldName::from("compact_tree"), + FieldName::from("nodes"), + FieldName::from("buffers"), ] .into(), vec![ - DType::Primitive(PType::U32, Nullability::NonNullable), - DType::Binary(Nullability::NonNullable), + prim(PType::U32, nn), + DType::List(Arc::new(node_struct), nn), + DType::List(Arc::new(buffer_struct), nn), ], ), - Nullability::NonNullable, + nn, ) } @@ -262,12 +361,16 @@ pub struct ArrayTreesSource { } type SharedSegmentMapFuture = futures::future::Shared< - futures::future::BoxFuture<'static, SharedVortexResult>>>, + futures::future::BoxFuture< + 'static, + SharedVortexResult>>>, + >, >; /// Future returned by [`ArrayTreesSource::get_for_segment`]. -pub type SharedSegmentBufferFuture = - futures::future::Shared>>; +pub type SharedSegmentChunkFuture = futures::future::Shared< + futures::future::BoxFuture<'static, SharedVortexResult>>, +>; impl std::fmt::Debug for ArrayTreesSource { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -285,18 +388,18 @@ impl ArrayTreesSource { } } - /// Returns a future that resolves to the compact-tree byte buffer for the given data-leaf - /// segment ID. + /// Returns a future that resolves to the per-chunk columnar tree data for the given + /// data-leaf segment ID. /// - /// The first call triggers materialization of the entire struct array and the - /// segment-id-keyed lookup map; subsequent calls reuse the cached map. - pub fn get_for_segment(&self, segment_id: SegmentId) -> SharedSegmentBufferFuture { + /// The first call triggers materialization of the entire consolidated struct array and + /// the segment-id-keyed lookup map; subsequent calls reuse the cached map. + pub fn get_for_segment(&self, segment_id: SegmentId) -> SharedSegmentChunkFuture { let map_fut = self.map_future(); async move { let map = map_fut.await?; map.get(&segment_id).cloned().ok_or_else(|| { Arc::new(vortex_err!( - "no compact array tree found for segment id {}", + "no columnar array tree found for segment id {}", *segment_id )) }) @@ -336,35 +439,143 @@ impl ArrayTreesSource { } } -/// Decode the array_trees struct array into a `HashMap`. +/// Decode the array_trees consolidated struct array into a per-segment lookup of +/// `ColumnarChunkData`. +/// +/// The consolidated struct layout is documented on [`ArrayTreeLayout::array_trees_dtype`]. +/// Each field may be in a compressed encoding (bitpacked `segment_id`, dict-coded +/// metadata, etc.) when read from a file whose array-trees strategy applies compression, so +/// we canonicalize each field via [`Executable::execute`] before downcasting. /// -/// The struct array's columns may be in compressed encodings (bitpacked `segment_id`, dict -/// `compact_tree`, etc.) when read from a file whose array-trees strategy applies compression, -/// so we canonicalize each field via [`Executable::execute`] before downcasting to the -/// concrete typed array. +/// Stats are not yet hydrated — they are written as all-null today (see the writer's +/// `build_consolidated_struct`). When stat columns are populated, this function will need +/// to materialize them into the `Vec>` accepted by `ColumnarChunkData`. fn build_segment_map( array: vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx, -) -> VortexResult> { +) -> VortexResult>> { let struct_array = StructArray::execute(array, ctx)?; let segment_ids_field = struct_array .unmasked_field_by_name_opt("segment_id") .ok_or_else(|| vortex_err!("array_trees missing 'segment_id' field"))? .clone(); - let trees_field = struct_array - .unmasked_field_by_name_opt("compact_tree") - .ok_or_else(|| vortex_err!("array_trees missing 'compact_tree' field"))? + let nodes_field = struct_array + .unmasked_field_by_name_opt("nodes") + .ok_or_else(|| vortex_err!("array_trees missing 'nodes' field"))? + .clone(); + let buffers_field = struct_array + .unmasked_field_by_name_opt("buffers") + .ok_or_else(|| vortex_err!("array_trees missing 'buffers' field"))? .clone(); let segment_ids = PrimitiveArray::execute(segment_ids_field, ctx)?; let segment_ids = segment_ids.as_slice::(); - let trees = VarBinViewArray::execute(trees_field, ctx)?; + // ---- Nodes list ---- + + let nodes_list = list_from_list_view(nodes_field.execute::(ctx)?)?; + let nodes_inner = nodes_list.elements().clone(); + let nodes_inner_struct = StructArray::execute(nodes_inner, ctx)?; + + let encoding_id_all = PrimitiveArray::execute( + nodes_inner_struct + .unmasked_field_by_name_opt("encoding_id") + .ok_or_else(|| vortex_err!("nodes struct missing 'encoding_id' field"))? + .clone(), + ctx, + )?; + let encoding_id_all = encoding_id_all.as_slice::(); + + let child_count_all = PrimitiveArray::execute( + nodes_inner_struct + .unmasked_field_by_name_opt("child_count") + .ok_or_else(|| vortex_err!("nodes struct missing 'child_count' field"))? + .clone(), + ctx, + )?; + let child_count_all = child_count_all.as_slice::(); + + let buffers_per_node_all = PrimitiveArray::execute( + nodes_inner_struct + .unmasked_field_by_name_opt("buffers_per_node") + .ok_or_else(|| vortex_err!("nodes struct missing 'buffers_per_node' field"))? + .clone(), + ctx, + )?; + let buffers_per_node_all = buffers_per_node_all.as_slice::(); + + let metadata_all = VarBinViewArray::execute( + nodes_inner_struct + .unmasked_field_by_name_opt("metadata") + .ok_or_else(|| vortex_err!("nodes struct missing 'metadata' field"))? + .clone(), + ctx, + )?; + + // ---- Buffers list ---- + + let buffers_list = list_from_list_view(buffers_field.execute::(ctx)?)?; + let buffers_inner = buffers_list.elements().clone(); + let buffers_inner_struct = StructArray::execute(buffers_inner, ctx)?; + + let padding_all = PrimitiveArray::execute( + buffers_inner_struct + .unmasked_field_by_name_opt("padding") + .ok_or_else(|| vortex_err!("buffers struct missing 'padding' field"))? + .clone(), + ctx, + )?; + let padding_all = padding_all.as_slice::(); + + let alignment_all = PrimitiveArray::execute( + buffers_inner_struct + .unmasked_field_by_name_opt("alignment_exponent") + .ok_or_else(|| vortex_err!("buffers struct missing 'alignment_exponent' field"))? + .clone(), + ctx, + )?; + let alignment_all = alignment_all.as_slice::(); + + let length_all = PrimitiveArray::execute( + buffers_inner_struct + .unmasked_field_by_name_opt("length") + .ok_or_else(|| vortex_err!("buffers struct missing 'length' field"))? + .clone(), + ctx, + )?; + let length_all = length_all.as_slice::(); let mut map = HashMap::with_capacity(segment_ids.len()); - for (idx, &seg) in segment_ids.iter().enumerate() { - map.insert(SegmentId::from(seg), trees.bytes_at(idx)); + for (row, &seg) in segment_ids.iter().enumerate() { + let n_start = nodes_list.offset_at(row)?; + let n_end = nodes_list.offset_at(row + 1)?; + let b_start = buffers_list.offset_at(row)?; + let b_end = buffers_list.offset_at(row + 1)?; + + let encoding_ids = encoding_id_all[n_start..n_end].to_vec(); + let child_counts = child_count_all[n_start..n_end].to_vec(); + let buffers_per_node = buffers_per_node_all[n_start..n_end].to_vec(); + let node_metadata: Vec = + (n_start..n_end).map(|j| metadata_all.bytes_at(j)).collect(); + + let buffer_padding = padding_all[b_start..b_end].to_vec(); + let buffer_alignment_exponent = alignment_all[b_start..b_end].to_vec(); + let buffer_length = length_all[b_start..b_end].to_vec(); + + let stats = vec![None; n_end - n_start]; + + let chunk = ColumnarChunkData::new( + encoding_ids, + child_counts, + node_metadata, + buffers_per_node, + buffer_padding, + buffer_alignment_exponent, + buffer_length, + stats, + )?; + map.insert(SegmentId::from(seg), Arc::new(chunk)); } Ok(map) } diff --git a/vortex-layout/src/layouts/array_tree/reader.rs b/vortex-layout/src/layouts/array_tree/reader.rs index 13ba90dc2e0..ee87c76e2c4 100644 --- a/vortex-layout/src/layouts/array_tree/reader.rs +++ b/vortex-layout/src/layouts/array_tree/reader.rs @@ -12,7 +12,7 @@ use vortex_array::VortexSessionExecute; use vortex_array::dtype::DType; use vortex_array::dtype::FieldMask; use vortex_array::expr::Expression; -use vortex_array::serde::SerializedArray; +use vortex_array::serde::ColumnarSerializedArray; use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_mask::Mask; @@ -143,7 +143,7 @@ impl ArrayTreeFlatReader { let segment_id = self.layout.inner().segment_id(); let segment_fut = self.segment_source.request(segment_id); - let compact_tree_fut = self.source.get_for_segment(segment_id); + let chunk_fut = self.source.get_for_segment(segment_id); let ctx = self.layout.inner().array_ctx().clone(); let session = self.session.clone(); @@ -151,8 +151,8 @@ impl ArrayTreeFlatReader { async move { let segment_fut = async move { segment_fut.await.map_err(Arc::new) }; - let (segment, compact_tree) = futures::try_join!(segment_fut, compact_tree_fut)?; - let parts = SerializedArray::from_flatbuffer_and_segment(compact_tree, segment) + let (segment, chunk) = futures::try_join!(segment_fut, chunk_fut)?; + let parts = ColumnarSerializedArray::from_segment_and_chunk(segment, chunk) .map_err(Arc::new)?; parts .decode(&dtype, row_count, &ctx, &session) diff --git a/vortex-layout/src/layouts/array_tree/writer.rs b/vortex-layout/src/layouts/array_tree/writer.rs index 75f0c7858f9..8858fbc936a 100644 --- a/vortex-layout/src/layouts/array_tree/writer.rs +++ b/vortex-layout/src/layouts/array_tree/writer.rs @@ -5,22 +5,27 @@ use std::sync::Arc; use async_trait::async_trait; use futures::StreamExt as _; -use parking_lot::Mutex; use vortex_array::ArrayContext; +use vortex_array::ArrayRef; use vortex_array::IntoArray; +use vortex_array::arrays::BoolArray; use vortex_array::arrays::PrimitiveArray; use vortex_array::arrays::StructArray; +use vortex_array::arrays::list::ListArray; use vortex_array::builders::ArrayBuilder; use vortex_array::builders::VarBinViewBuilder; use vortex_array::dtype::DType; -use vortex_array::dtype::FieldName; use vortex_array::dtype::Nullability; +use vortex_array::serde::ColumnarChunkData; +use vortex_array::serde::SegmentMode; use vortex_array::serde::SerializeOptions; +use vortex_array::serde::serialize_with_columnar_chunk; use vortex_array::validity::Validity; +use vortex_buffer::BitBufferMut; use vortex_buffer::Buffer; -use vortex_buffer::ByteBuffer; use vortex_error::VortexResult; use vortex_error::vortex_bail; +use vortex_error::vortex_err; use vortex_session::VortexSession; use vortex_session::registry::ReadContext; @@ -28,6 +33,7 @@ use crate::IntoLayout; use crate::LayoutRef; use crate::LayoutStrategy; use crate::layouts::array_tree::ArrayTreeLayout; +use crate::layouts::array_tree::flat::ArrayTreeFlat; use crate::layouts::array_tree::flat::ArrayTreeFlatLayout; use crate::layouts::flat::FlatLayout; use crate::layouts::flat::writer::FlatLayoutStrategy; @@ -37,48 +43,37 @@ use crate::sequence::SendableSequentialStream; use crate::sequence::SequencePointer; use crate::sequence::SequentialArrayStreamExt; -/// Side channel for shipping `(segment_id, compact_tree)` pairs from leaf strategies to the -/// collector strategy. -/// -/// Each leaf pushes after `segment_sink.write` resolves (so the leaf's `SequenceId` has been -/// dropped before we touch the sink). The collector drains the sink only after the entire -/// data subtree has completed, which means every leaf has already pushed. -type Sink = Arc>>; - /// Creates a cooperating pair of strategies for array tree collection. /// -/// Returns `(collector, leaf)` where: -/// - `leaf` replaces [`FlatLayoutStrategy`] in the data pipeline — it serializes chunks, -/// produces compact flatbuffers, and pushes them onto the shared sink. -/// - `collector` wraps the data pipeline — after data is written, it drains the sink and -/// writes the collected pairs as a struct array (`{segment_id, compact_tree}`) via the -/// configured `array_trees_strategy`. +/// Returns `(collector, leaf)` where the leaf replaces [`FlatLayoutStrategy`] in the data +/// pipeline and attaches per-chunk [`ColumnarChunkData`] to each [`ArrayTreeFlatLayout`] it +/// produces. The collector wraps the data pipeline, walks the resulting data subtree to +/// extract those attached chunks, and writes them as a columnar struct array via the +/// configured `array_trees_strategy`. Attaching chunk data to the leaf layout (rather than +/// using a shared sink) keeps every collector invocation scoped to its own subtree, which +/// matters because [`crate::layouts::table::TableStrategy`] writes columns concurrently and +/// a shared sink would mix leaves across collectors. pub fn writer( flat: FlatLayoutStrategy, array_trees_strategy: Arc, ) -> (ArrayTreeCollectorStrategy, ArrayTreeFlatStrategy) { - let sink: Sink = Arc::new(Mutex::new(Vec::new())); - let leaf = ArrayTreeFlatStrategy { - flat, - sink: Arc::clone(&sink), - }; + let leaf = ArrayTreeFlatStrategy { flat }; let collector = ArrayTreeCollectorStrategy { child: None, array_trees_strategy, - sink, }; (collector, leaf) } /// Leaf strategy (TX) that replaces [`FlatLayoutStrategy`]. /// -/// For each chunk, it produces both the compact flatbuffer (encoding tree + buffer -/// descriptors, no stats) and the full data segment, then pushes `(segment_id, compact_tree)` -/// onto the shared sink for the collector to consume. +/// Walks each chunk's array tree once via [`serialize_with_columnar_chunk`] to produce both +/// the data-segment buffers (no inline trailing flatbuffer — see [`SegmentMode::DataOnly`]) +/// and a [`ColumnarChunkData`] which is attached to the returned [`ArrayTreeFlatLayout`] as +/// transient write-time state for the collector to extract. #[derive(Clone)] pub struct ArrayTreeFlatStrategy { flat: FlatLayoutStrategy, - sink: Sink, } #[async_trait] @@ -111,48 +106,40 @@ impl LayoutStrategy for ArrayTreeFlatStrategy { chunk }; - // Produce the compact flatbuffer (no stats, with buffer descriptors). - let compact_tree = chunk.serialize_array_tree( + // Single walk: produce data-only segment buffers + columnar chunk data for the + // collector to consume. No trailing flatbuffer in the segment — the consolidated + // columnar array_trees segment is the sole source of encoding metadata for this + // leaf. + let (buffers, columnar_chunk) = serialize_with_columnar_chunk( + &chunk, &ctx, session, &SerializeOptions { offset: 0, include_padding: self.flat.include_padding, }, + SegmentMode::DataOnly, )?; - // Full serialization for the data segment. - let buffers = chunk.serialize( - &ctx, - session, - &SerializeOptions { - offset: 0, - include_padding: self.flat.include_padding, - }, - )?; - assert!(buffers.len() >= 2); - // IMPORTANT ORDERING CONSTRAINT: write the segment first, then push to the sink. - // - // `segment_sink.write` consumes our `SequenceId` and only drops it on return. Pushing - // to the sink before that point would risk holding the sink mutex while later leaves - // are blocked on `SequenceId::collapse`, creating a dependency from "later leaf is - // ready to write" → "earlier leaf must drop its SequenceId" → "earlier leaf must - // finish its sink push" → mutex contention with the later leaf. Doing the push after - // `await?` resolves means our SequenceId is already gone before we touch the sink. + // `segment_sink.write` consumes our `SequenceId` and only drops it on return; pushing + // before that would risk holding the sink mutex while later leaves wait on + // `SequenceId::collapse`. let segment_id = segment_sink.write(sequence_id, buffers).await?; - self.sink.lock().push((segment_id, compact_tree)); let None = stream.next().await else { vortex_bail!("array tree flat layout received stream with more than a single chunk"); }; - Ok(ArrayTreeFlatLayout::new(FlatLayout::new( - row_count, - stream.dtype().clone(), - segment_id, - ReadContext::new(ctx.to_ids()), - )) + Ok(ArrayTreeFlatLayout::with_chunk( + FlatLayout::new( + row_count, + stream.dtype().clone(), + segment_id, + ReadContext::new(ctx.to_ids()), + ), + columnar_chunk, + ) .into_layout()) } @@ -163,13 +150,14 @@ impl LayoutStrategy for ArrayTreeFlatStrategy { /// Collector strategy (RX) that wraps the data pipeline. /// -/// After the data child completes, drains the shared sink and writes the collected -/// `(segment_id, compact_tree)` pairs as a struct array via the configured +/// After the data child completes, walks the resulting data subtree to extract each +/// [`ArrayTreeFlatLayout`] leaf's attached [`ColumnarChunkData`], then serializes the +/// per-chunk data into one row-per-chunk struct array matching +/// [`ArrayTreeLayout::array_trees_dtype`] and writes it via the configured /// `array_trees_strategy`. pub struct ArrayTreeCollectorStrategy { child: Option>, array_trees_strategy: Arc, - sink: Sink, } impl ArrayTreeCollectorStrategy { @@ -207,38 +195,25 @@ impl LayoutStrategy for ArrayTreeCollectorStrategy { ) .await?; - // By the time the data subtree future resolves, every leaf has finished its - // `segment_sink.write().await?` and pushed onto the sink. Drain it now. - let mut entries = std::mem::take(&mut *self.sink.lock()); + // Walk the data subtree to extract per-leaf columnar chunk data. Each + // `ArrayTreeFlatLayout` leaf carries its chunk attached as transient write-time + // state (not serialized to disk). This per-column walk keeps the collector's view + // scoped to its own leaves, even when columns write concurrently — unlike a + // shared sink which would mix leaves across collector invocations. + let mut entries: Vec<(SegmentId, ColumnarChunkData)> = Vec::new(); + for layout_ref in data_layout.depth_first_traversal() { + let layout_ref = layout_ref?; + if let Some(atf) = layout_ref.as_opt::() + && let Some(chunk) = atf.take_chunk() + { + entries.push((atf.inner().segment_id(), chunk)); + } + } - // Sort by segment ID so the on-disk order matches segment-write order — this gives - // good locality and predictable lookup-table layout. + // Sort by segment ID so the on-disk row order matches segment-write order. entries.sort_by_key(|(seg, _)| *seg); - // Build a struct array of {segment_id: u32, compact_tree: bytes}. - let nrows = entries.len(); - let segment_ids: Buffer = entries.iter().map(|(seg, _)| **seg).collect(); - let segment_ids_array = - PrimitiveArray::new(segment_ids, Validity::NonNullable).into_array(); - - let mut tree_builder = - VarBinViewBuilder::with_capacity(DType::Binary(Nullability::NonNullable), nrows); - for (_, tree) in &entries { - tree_builder.append_value(tree.as_slice()); - } - let trees_array = tree_builder.finish().into_array(); - - let array_trees_array = StructArray::try_new( - vec![ - FieldName::from("segment_id"), - FieldName::from("compact_tree"), - ] - .into(), - vec![segment_ids_array, trees_array], - nrows, - Validity::NonNullable, - )? - .into_array(); + let array_trees_array = build_consolidated_struct(&entries)?; // Write the struct array via the array_trees strategy. let trees_stream = array_trees_array @@ -257,3 +232,198 @@ impl LayoutStrategy for ArrayTreeCollectorStrategy { + self.array_trees_strategy.buffered_bytes() } } + +/// Assemble the consolidated columnar struct array from a sorted list of per-chunk entries. +/// +/// One row per chunk. The `nodes` and `buffers` List columns are built by +/// concatenating each chunk's per-node / per-buffer values and recording offsets per row. +/// +/// **Stats are intentionally written as all-null in this initial implementation** — the +/// columnar schema has nullable stat columns ready to receive stats, but populating them +/// requires bridging the existing `StatsSet`/`ScalarValue` serialization to typed columns. +/// That's a focused follow-up; for now the consolidated carries tree shape + metadata + +/// buffer descriptors, which is sufficient for the new reader path to decode every chunk. +fn build_consolidated_struct(entries: &[(SegmentId, ColumnarChunkData)]) -> VortexResult { + let nrows = entries.len(); + let nn = Nullability::NonNullable; + + // segment_id column. + let segment_ids: Buffer = entries.iter().map(|(seg, _)| **seg).collect(); + let segment_ids_array = PrimitiveArray::new(segment_ids, Validity::NonNullable).into_array(); + + let total_nodes: usize = entries.iter().map(|(_, c)| c.nnodes()).sum(); + let total_buffers: usize = entries.iter().map(|(_, c)| c.buffer_padding.len()).sum(); + + // ---- Per-node columns ---- + + let mut encoding_ids: Vec = Vec::with_capacity(total_nodes); + let mut child_counts: Vec = Vec::with_capacity(total_nodes); + let mut buffers_per_node: Vec = Vec::with_capacity(total_nodes); + let mut metadata_builder = VarBinViewBuilder::with_capacity(DType::Binary(nn), total_nodes); + + let mut nodes_offsets: Vec = Vec::with_capacity(nrows + 1); + nodes_offsets.push(0); + let mut nodes_cumulative: i32 = 0; + + for (_, chunk) in entries { + for i in 0..chunk.nnodes() { + encoding_ids.push(chunk.encoding_ids[i]); + child_counts.push(chunk.child_counts[i]); + buffers_per_node.push(chunk.buffers_per_node[i]); + metadata_builder.append_value(chunk.node_metadata[i].as_slice()); + } + nodes_cumulative += i32::try_from(chunk.nnodes()) + .map_err(|_| vortex_err!("array tree node count overflows i32 offsets"))?; + nodes_offsets.push(nodes_cumulative); + } + + let encoding_id_arr = + PrimitiveArray::new(Buffer::from(encoding_ids), Validity::NonNullable).into_array(); + let child_count_arr = + PrimitiveArray::new(Buffer::from(child_counts), Validity::NonNullable).into_array(); + let buffers_per_node_arr = + PrimitiveArray::new(Buffer::from(buffers_per_node), Validity::NonNullable).into_array(); + let metadata_arr = metadata_builder.finish().into_array(); + + // All-null stat columns. Placeholder values per row to satisfy the typed-column shape. + let stat_binary = || -> VortexResult { all_null_binary(total_nodes) }; + let stat_u8 = || -> VortexResult { all_null_primitive::(total_nodes) }; + let stat_u64 = || -> VortexResult { all_null_primitive::(total_nodes) }; + let stat_bool = || -> VortexResult { all_null_bool(total_nodes) }; + + let node_names: Vec<&str> = vec![ + "encoding_id", + "child_count", + "metadata", + "buffers_per_node", + "stat_min", + "stat_min_precision", + "stat_max", + "stat_max_precision", + "stat_sum", + "stat_null_count", + "stat_nan_count", + "stat_uncompressed_size_in_bytes", + "stat_is_constant", + "stat_is_sorted", + "stat_is_strict_sorted", + ]; + let node_inner_struct = StructArray::try_new( + node_names.into(), + vec![ + encoding_id_arr, + child_count_arr, + metadata_arr, + buffers_per_node_arr, + stat_binary()?, + stat_u8()?, + stat_binary()?, + stat_u8()?, + stat_binary()?, + stat_u64()?, + stat_u64()?, + stat_u64()?, + stat_bool()?, + stat_bool()?, + stat_bool()?, + ], + total_nodes, + Validity::NonNullable, + )? + .into_array(); + + let nodes_offsets_arr = + PrimitiveArray::new(Buffer::from(nodes_offsets), Validity::NonNullable).into_array(); + let nodes_list = + ListArray::try_new(node_inner_struct, nodes_offsets_arr, Validity::NonNullable)? + .into_array(); + + // ---- Per-buffer columns ---- + + let mut buffer_padding: Vec = Vec::with_capacity(total_buffers); + let mut buffer_alignment_exp: Vec = Vec::with_capacity(total_buffers); + let mut buffer_length: Vec = Vec::with_capacity(total_buffers); + + let mut buffers_offsets: Vec = Vec::with_capacity(nrows + 1); + buffers_offsets.push(0); + let mut buffers_cumulative: i32 = 0; + + for (_, chunk) in entries { + buffer_padding.extend_from_slice(&chunk.buffer_padding); + buffer_alignment_exp.extend_from_slice(&chunk.buffer_alignment_exponent); + buffer_length.extend_from_slice(&chunk.buffer_length); + buffers_cumulative += i32::try_from(chunk.buffer_padding.len()) + .map_err(|_| vortex_err!("array tree buffer count overflows i32 offsets"))?; + buffers_offsets.push(buffers_cumulative); + } + + let padding_arr = + PrimitiveArray::new(Buffer::from(buffer_padding), Validity::NonNullable).into_array(); + let alignment_arr = + PrimitiveArray::new(Buffer::from(buffer_alignment_exp), Validity::NonNullable).into_array(); + let length_arr = + PrimitiveArray::new(Buffer::from(buffer_length), Validity::NonNullable).into_array(); + + let buffer_names: Vec<&str> = vec!["padding", "alignment_exponent", "length"]; + let buffer_inner_struct = StructArray::try_new( + buffer_names.into(), + vec![padding_arr, alignment_arr, length_arr], + total_buffers, + Validity::NonNullable, + )? + .into_array(); + + let buffers_offsets_arr = + PrimitiveArray::new(Buffer::from(buffers_offsets), Validity::NonNullable).into_array(); + let buffers_list = ListArray::try_new( + buffer_inner_struct, + buffers_offsets_arr, + Validity::NonNullable, + )? + .into_array(); + + // ---- Outer struct (one row per chunk) ---- + + let outer_names: Vec<&str> = vec!["segment_id", "nodes", "buffers"]; + let outer = StructArray::try_new( + outer_names.into(), + vec![segment_ids_array, nodes_list, buffers_list], + nrows, + Validity::NonNullable, + )? + .into_array(); + + Ok(outer) +} + +/// Build an all-null primitive column of the given length with the right typed dtype. +fn all_null_primitive( + n: usize, +) -> VortexResult { + let values: Vec = vec![T::default(); n]; + let mut validity = BitBufferMut::with_capacity(n); + for _ in 0..n { + validity.append(false); + } + let validity_arr = BoolArray::new(validity.freeze(), Validity::NonNullable).into_array(); + Ok(PrimitiveArray::new(Buffer::from(values), Validity::Array(validity_arr)).into_array()) +} + +fn all_null_bool(n: usize) -> VortexResult { + let mut bits = BitBufferMut::with_capacity(n); + let mut validity = BitBufferMut::with_capacity(n); + for _ in 0..n { + bits.append(false); + validity.append(false); + } + let validity_arr = BoolArray::new(validity.freeze(), Validity::NonNullable).into_array(); + Ok(BoolArray::new(bits.freeze(), Validity::Array(validity_arr)).into_array()) +} + +fn all_null_binary(n: usize) -> VortexResult { + let mut builder = VarBinViewBuilder::with_capacity(DType::Binary(Nullability::Nullable), n); + for _ in 0..n { + builder.append_null(); + } + Ok(builder.finish().into_array()) +} From 7ae710ac9ed6a20456145c59083e38363d9e823e Mon Sep 17 00:00:00 2001 From: Onur Satici Date: Tue, 19 May 2026 16:04:43 +0100 Subject: [PATCH 10/10] add in array stats Signed-off-by: Onur Satici --- vortex-array/src/serde.rs | 253 +++++++++++++++++- vortex-layout/src/layouts/array_tree/mod.rs | 152 ++++++++++- .../src/layouts/array_tree/writer.rs | 230 ++++++++++++---- 3 files changed, 578 insertions(+), 57 deletions(-) diff --git a/vortex-array/src/serde.rs b/vortex-array/src/serde.rs index b8d2fa87659..414ae577149 100644 --- a/vortex-array/src/serde.rs +++ b/vortex-array/src/serde.rs @@ -778,9 +778,173 @@ impl TryFrom for SerializedArray { // which source the metadata/buffers/children come from, so this type implements the same // decode flow without ever constructing or parsing a flatbuffer. +/// Per-stat raw blob with the inline flatbuffer's precision flag preserved. +/// +/// `bytes` is `ScalarValue::to_proto_bytes`-encoded. Pairs with the precision flag because +/// `min` / `max` in `fba::ArrayStats` track exact vs. inexact independently of the value. +#[derive(Debug, Clone)] +pub struct RawStatValue { + pub bytes: ByteBuffer, + pub exact: bool, +} + +/// Raw, dtype-agnostic snapshot of a node's statistics. Stored in [`ColumnarChunkData`] so +/// the columnar consolidated array_trees segment can carry stats without needing per-node +/// dtypes at materialization time. Conversion to a typed [`StatsSet`] happens at decode +/// time via [`Self::to_stats_set`], when the dtype is known. +/// +/// Mirrors the field set of `fba::ArrayStats` exactly so the columnar and inline paths +/// produce equivalent decoded `StatsSet`s. +#[derive(Debug, Clone, Default)] +pub struct RawNodeStats { + pub min: Option, + pub max: Option, + pub sum: Option, + pub null_count: Option, + pub nan_count: Option, + pub uncompressed_size_in_bytes: Option, + pub is_constant: Option, + pub is_sorted: Option, + pub is_strict_sorted: Option, +} + +impl RawNodeStats { + /// True when no stat slot is populated. Used by the writer to decide whether to record + /// `None` for the entire node (and emit nulls in every stat column). + pub fn is_empty(&self) -> bool { + self.min.is_none() + && self.max.is_none() + && self.sum.is_none() + && self.null_count.is_none() + && self.nan_count.is_none() + && self.uncompressed_size_in_bytes.is_none() + && self.is_constant.is_none() + && self.is_sorted.is_none() + && self.is_strict_sorted.is_none() + } + + /// Snapshot a typed [`StatsSet`] into raw form, mirroring the same selection / + /// precision handling as the inline flatbuffer writer. + pub fn from_stats_set(stats: &StatsSet) -> Self { + use crate::dtype::Nullability; + use crate::dtype::PType; + use crate::expr::stats::Precision; + use crate::expr::stats::Stat; + + let raw_value = |p: Precision| RawStatValue { + exact: p.is_exact(), + bytes: ByteBuffer::from(crate::scalar::ScalarValue::to_proto_bytes::>(Some( + &p.into_inner(), + ))), + }; + + let bool_dtype = DType::Bool(Nullability::NonNullable); + let u64_dtype: DType = PType::U64.into(); + + Self { + min: stats.get(Stat::Min).map(raw_value), + max: stats.get(Stat::Max).map(raw_value), + sum: stats + .get(Stat::Sum) + .and_then(Precision::as_exact) + .map(|sum| { + ByteBuffer::from(crate::scalar::ScalarValue::to_proto_bytes::>(Some( + &sum, + ))) + }), + null_count: stats + .get_as::(Stat::NullCount, &u64_dtype) + .and_then(Precision::as_exact), + nan_count: stats + .get_as::(Stat::NaNCount, &u64_dtype) + .and_then(Precision::as_exact), + uncompressed_size_in_bytes: stats + .get_as::(Stat::UncompressedSizeInBytes, &u64_dtype) + .and_then(Precision::as_exact), + is_constant: stats + .get_as::(Stat::IsConstant, &bool_dtype) + .and_then(Precision::as_exact), + is_sorted: stats + .get_as::(Stat::IsSorted, &bool_dtype) + .and_then(Precision::as_exact), + is_strict_sorted: stats + .get_as::(Stat::IsStrictSorted, &bool_dtype) + .and_then(Precision::as_exact), + } + } + + /// Hydrate into a typed [`StatsSet`] for a given array dtype, mirroring + /// [`StatsSet::from_flatbuffer`] (same per-stat dtype lookup, same precision handling). + pub fn to_stats_set(&self, dtype: &DType, session: &VortexSession) -> VortexResult { + use crate::expr::stats::Precision; + use crate::expr::stats::Stat; + use crate::scalar::ScalarValue; + + let mut set = StatsSet::default(); + + if let Some(raw) = &self.min + && let Some(stat_dtype) = Stat::Min.dtype(dtype) + && let Some(value) = + ScalarValue::from_proto_bytes(raw.bytes.as_slice(), &stat_dtype, session)? + { + set.set( + Stat::Min, + if raw.exact { + Precision::Exact(value) + } else { + Precision::Inexact(value) + }, + ); + } + if let Some(raw) = &self.max + && let Some(stat_dtype) = Stat::Max.dtype(dtype) + && let Some(value) = + ScalarValue::from_proto_bytes(raw.bytes.as_slice(), &stat_dtype, session)? + { + set.set( + Stat::Max, + if raw.exact { + Precision::Exact(value) + } else { + Precision::Inexact(value) + }, + ); + } + if let Some(raw) = &self.sum + && let Some(stat_dtype) = Stat::Sum.dtype(dtype) + && let Some(value) = + ScalarValue::from_proto_bytes(raw.as_slice(), &stat_dtype, session)? + { + set.set(Stat::Sum, Precision::Exact(value)); + } + if let Some(v) = self.null_count { + set.set(Stat::NullCount, Precision::Exact(ScalarValue::from(v))); + } + if let Some(v) = self.nan_count { + set.set(Stat::NaNCount, Precision::Exact(ScalarValue::from(v))); + } + if let Some(v) = self.uncompressed_size_in_bytes { + set.set( + Stat::UncompressedSizeInBytes, + Precision::Exact(ScalarValue::from(v)), + ); + } + if let Some(v) = self.is_constant { + set.set(Stat::IsConstant, Precision::Exact(ScalarValue::from(v))); + } + if let Some(v) = self.is_sorted { + set.set(Stat::IsSorted, Precision::Exact(ScalarValue::from(v))); + } + if let Some(v) = self.is_strict_sorted { + set.set(Stat::IsStrictSorted, Precision::Exact(ScalarValue::from(v))); + } + Ok(set) + } +} + /// Per-node statistics from the consolidated columnar consolidated form. `None` means the /// writer didn't persist any stats for that node. -pub type ColumnarNodeStats = Option; +pub type ColumnarNodeStats = Option; /// Per-chunk slice of the columnar consolidated tree, shared by all `ColumnarSerializedArray` /// nodes within the chunk via `Arc`. @@ -1092,9 +1256,11 @@ impl ColumnarSerializedArray { decoded.encoding_id(), ); - // Populate statistics from the columnar chunk data. - if let Some(stats) = &self.chunk.stats[self.node_index] { - decoded.statistics().set_iter(stats.clone().into_iter()); + // Populate statistics from the columnar chunk data. Hydrate the raw blob now that + // we know the array's dtype. + if let Some(raw) = &self.chunk.stats[self.node_index] { + let stats_set = raw.to_stats_set(dtype, session)?; + decoded.statistics().set_iter(stats_set.into_iter()); } Ok(decoded) @@ -1163,15 +1329,19 @@ pub fn serialize_with_columnar_chunk( .map_err(|_| vortex_err!("Array node has more than u16::MAX buffers"))?; buffers_per_node.push(n_buffers); - // Capture per-node stats. `to_owned` snapshots the current StatsSet contents; we - // store `None` when no stats are present so the read side can distinguish "no - // stats persisted" from "all stats happen to be empty". + // Capture per-node stats. Snapshot the current StatsSet contents and lower them to + // raw form so the consolidated array_trees segment can carry them without per-node + // dtypes; the decode path rehydrates with the correct dtype. `None` is recorded + // when the node has no stats so the read side can distinguish "no stats persisted" + // from "all stats happen to be empty". let stats_set = node.statistics().to_owned(); - stats.push(if stats_set.is_empty() { + let raw = if stats_set.is_empty() { None } else { - Some(stats_set) - }); + let raw = RawNodeStats::from_stats_set(&stats_set); + if raw.is_empty() { None } else { Some(raw) } + }; + stats.push(raw); array_buffers.extend(node_bufs); } @@ -1308,6 +1478,69 @@ mod columnar_tests { assert_eq!(offsets, vec![0, 2, 2, 5]); } + /// Round-trip a populated `StatsSet` through `RawNodeStats::from_stats_set` and + /// `to_stats_set` to confirm the dtype-agnostic raw form preserves the same selection + /// of stats and their values. + #[test] + fn raw_node_stats_roundtrip_i32() -> VortexResult<()> { + use crate::LEGACY_SESSION; + use crate::dtype::Nullability; + use crate::dtype::PType; + use crate::expr::stats::Precision; + use crate::expr::stats::Stat; + use crate::scalar::ScalarValue; + + let dtype = DType::Primitive(PType::I32, Nullability::NonNullable); + let mut set = StatsSet::default(); + set.set(Stat::Min, Precision::Exact(ScalarValue::from(-3i32))); + set.set(Stat::Max, Precision::Inexact(ScalarValue::from(42i32))); + set.set(Stat::Sum, Precision::Exact(ScalarValue::from(100i64))); + set.set(Stat::NullCount, Precision::Exact(ScalarValue::from(7u64))); + set.set(Stat::IsConstant, Precision::Exact(ScalarValue::from(false))); + set.set(Stat::IsSorted, Precision::Exact(ScalarValue::from(true))); + + let raw = RawNodeStats::from_stats_set(&set); + assert!(!raw.is_empty()); + // Precision is preserved on min/max. + assert!(raw.min.as_ref().unwrap().exact); + assert!(!raw.max.as_ref().unwrap().exact); + + let back = raw.to_stats_set(&dtype, &LEGACY_SESSION)?; + assert_eq!( + back.get_as::(Stat::Min, &dtype), + Some(Precision::Exact(-3)) + ); + assert_eq!( + back.get_as::(Stat::Max, &dtype), + Some(Precision::Inexact(42)) + ); + assert_eq!( + back.get_as::(Stat::NullCount, &PType::U64.into()), + Some(Precision::Exact(7)) + ); + assert_eq!( + back.get_as::(Stat::IsConstant, &DType::Bool(Nullability::NonNullable)), + Some(Precision::Exact(false)) + ); + assert_eq!( + back.get_as::(Stat::IsSorted, &DType::Bool(Nullability::NonNullable)), + Some(Precision::Exact(true)) + ); + // IsStrictSorted wasn't set; should remain absent. + assert!( + back.get(Stat::IsStrictSorted).is_none(), + "unset stats stay unset" + ); + Ok(()) + } + + /// Empty stats stay empty across the round trip. + #[test] + fn raw_node_stats_empty() { + let raw = RawNodeStats::from_stats_set(&StatsSet::default()); + assert!(raw.is_empty()); + } + /// Child navigation: from root (idx 0) of a tree /// 0 [2 children] /// ├── 1 [leaf] diff --git a/vortex-layout/src/layouts/array_tree/mod.rs b/vortex-layout/src/layouts/array_tree/mod.rs index 26865f3dec9..f6c2e6b2c6e 100644 --- a/vortex-layout/src/layouts/array_tree/mod.rs +++ b/vortex-layout/src/layouts/array_tree/mod.rs @@ -17,10 +17,12 @@ use vortex_array::EmptyMetadata; use vortex_array::Executable; use vortex_array::MaskFuture; use vortex_array::VortexSessionExecute; +use vortex_array::arrays::BoolArray; use vortex_array::arrays::ListViewArray; use vortex_array::arrays::PrimitiveArray; use vortex_array::arrays::StructArray; use vortex_array::arrays::VarBinViewArray; +use vortex_array::arrays::bool::BoolArrayExt; use vortex_array::arrays::list::ListArrayExt; use vortex_array::arrays::listview::list_from_list_view; use vortex_array::arrays::struct_::StructArrayExt; @@ -31,6 +33,8 @@ use vortex_array::dtype::PType; use vortex_array::dtype::StructFields; use vortex_array::expr::root; use vortex_array::serde::ColumnarChunkData; +use vortex_array::serde::RawNodeStats; +use vortex_array::serde::RawStatValue; use vortex_buffer::ByteBuffer; use vortex_error::SharedVortexResult; use vortex_error::VortexExpect; @@ -38,6 +42,7 @@ use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_err; use vortex_error::vortex_panic; +use vortex_mask::Mask; use vortex_session::VortexSession; use vortex_session::registry::ReadContext; use vortex_utils::aliases::hash_map::HashMap; @@ -513,6 +518,29 @@ fn build_segment_map( ctx, )?; + // ---- Per-node stat columns (nullable) ---- + + let stat_min = NullableBin::from_field(&nodes_inner_struct, "stat_min", ctx)?; + let stat_max = NullableBin::from_field(&nodes_inner_struct, "stat_max", ctx)?; + let stat_sum = NullableBin::from_field(&nodes_inner_struct, "stat_sum", ctx)?; + let stat_min_prec = + NullablePrim::::from_field(&nodes_inner_struct, "stat_min_precision", ctx)?; + let stat_max_prec = + NullablePrim::::from_field(&nodes_inner_struct, "stat_max_precision", ctx)?; + let stat_null_count = + NullablePrim::::from_field(&nodes_inner_struct, "stat_null_count", ctx)?; + let stat_nan_count = + NullablePrim::::from_field(&nodes_inner_struct, "stat_nan_count", ctx)?; + let stat_uncompressed = NullablePrim::::from_field( + &nodes_inner_struct, + "stat_uncompressed_size_in_bytes", + ctx, + )?; + let stat_is_constant = NullableBool::from_field(&nodes_inner_struct, "stat_is_constant", ctx)?; + let stat_is_sorted = NullableBool::from_field(&nodes_inner_struct, "stat_is_sorted", ctx)?; + let stat_is_strict_sorted = + NullableBool::from_field(&nodes_inner_struct, "stat_is_strict_sorted", ctx)?; + // ---- Buffers list ---- let buffers_list = list_from_list_view(buffers_field.execute::(ctx)?)?; @@ -563,7 +591,28 @@ fn build_segment_map( let buffer_alignment_exponent = alignment_all[b_start..b_end].to_vec(); let buffer_length = length_all[b_start..b_end].to_vec(); - let stats = vec![None; n_end - n_start]; + let stats: Vec> = (n_start..n_end) + .map(|j| { + let raw = RawNodeStats { + min: stat_min.at(j).map(|bytes| RawStatValue { + bytes, + exact: stat_min_prec.at(j).map(precision_to_exact).unwrap_or(true), + }), + max: stat_max.at(j).map(|bytes| RawStatValue { + bytes, + exact: stat_max_prec.at(j).map(precision_to_exact).unwrap_or(true), + }), + sum: stat_sum.at(j), + null_count: stat_null_count.at(j), + nan_count: stat_nan_count.at(j), + uncompressed_size_in_bytes: stat_uncompressed.at(j), + is_constant: stat_is_constant.at(j), + is_sorted: stat_is_sorted.at(j), + is_strict_sorted: stat_is_strict_sorted.at(j), + }; + if raw.is_empty() { None } else { Some(raw) } + }) + .collect(); let chunk = ColumnarChunkData::new( encoding_ids, @@ -579,3 +628,104 @@ fn build_segment_map( } Ok(map) } + +/// Mirror of the `PRECISION_EXACT`/`PRECISION_INEXACT` mapping in the writer. Treats any +/// unknown value as "exact" so forward-incompatible writes (precision values we don't +/// recognize) don't silently lose data — they just promote inexact to exact, which is the +/// safer side for stat consumers (a reader that thinks an inexact value is exact may make +/// a bad pruning decision; we accept that vs. mis-reading the value entirely). +fn precision_to_exact(p: u8) -> bool { + p == 0 +} + +/// Materialized nullable binary column (e.g. `stat_min`). +struct NullableBin { + arr: VarBinViewArray, + validity: Mask, +} + +impl NullableBin { + fn from_field( + parent: &StructArray, + name: &str, + ctx: &mut vortex_array::ExecutionCtx, + ) -> VortexResult { + let field = parent + .unmasked_field_by_name_opt(name) + .ok_or_else(|| vortex_err!("nodes struct missing '{}' field", name))? + .clone(); + let arr = VarBinViewArray::execute(field, ctx)?; + let validity = arr + .as_ref() + .validity()? + .execute_mask(arr.as_ref().len(), ctx)?; + Ok(Self { arr, validity }) + } + fn at(&self, idx: usize) -> Option { + self.validity.value(idx).then(|| self.arr.bytes_at(idx)) + } +} + +/// Materialized nullable primitive column (e.g. `stat_null_count`). +struct NullablePrim { + arr: PrimitiveArray, + validity: Mask, + _phantom: std::marker::PhantomData, +} + +impl NullablePrim { + fn from_field( + parent: &StructArray, + name: &str, + ctx: &mut vortex_array::ExecutionCtx, + ) -> VortexResult { + let field = parent + .unmasked_field_by_name_opt(name) + .ok_or_else(|| vortex_err!("nodes struct missing '{}' field", name))? + .clone(); + let arr = PrimitiveArray::execute(field, ctx)?; + let validity = arr + .as_ref() + .validity()? + .execute_mask(arr.as_ref().len(), ctx)?; + Ok(Self { + arr, + validity, + _phantom: std::marker::PhantomData, + }) + } + fn at(&self, idx: usize) -> Option { + self.validity + .value(idx) + .then(|| self.arr.as_slice::()[idx]) + } +} + +/// Materialized nullable bool column (e.g. `stat_is_constant`). +struct NullableBool { + bits: vortex_buffer::BitBuffer, + validity: Mask, +} + +impl NullableBool { + fn from_field( + parent: &StructArray, + name: &str, + ctx: &mut vortex_array::ExecutionCtx, + ) -> VortexResult { + let field = parent + .unmasked_field_by_name_opt(name) + .ok_or_else(|| vortex_err!("nodes struct missing '{}' field", name))? + .clone(); + let arr = BoolArray::execute(field, ctx)?; + let bits = arr.to_bit_buffer(); + let validity = arr + .as_ref() + .validity()? + .execute_mask(arr.as_ref().len(), ctx)?; + Ok(Self { bits, validity }) + } + fn at(&self, idx: usize) -> Option { + self.validity.value(idx).then(|| self.bits.value(idx)) + } +} diff --git a/vortex-layout/src/layouts/array_tree/writer.rs b/vortex-layout/src/layouts/array_tree/writer.rs index 8858fbc936a..80d6672eb86 100644 --- a/vortex-layout/src/layouts/array_tree/writer.rs +++ b/vortex-layout/src/layouts/array_tree/writer.rs @@ -17,6 +17,7 @@ use vortex_array::builders::VarBinViewBuilder; use vortex_array::dtype::DType; use vortex_array::dtype::Nullability; use vortex_array::serde::ColumnarChunkData; +use vortex_array::serde::RawNodeStats; use vortex_array::serde::SegmentMode; use vortex_array::serde::SerializeOptions; use vortex_array::serde::serialize_with_columnar_chunk; @@ -237,15 +238,12 @@ impl LayoutStrategy for ArrayTreeCollectorStrategy { /// /// One row per chunk. The `nodes` and `buffers` List columns are built by /// concatenating each chunk's per-node / per-buffer values and recording offsets per row. -/// -/// **Stats are intentionally written as all-null in this initial implementation** — the -/// columnar schema has nullable stat columns ready to receive stats, but populating them -/// requires bridging the existing `StatsSet`/`ScalarValue` serialization to typed columns. -/// That's a focused follow-up; for now the consolidated carries tree shape + metadata + -/// buffer descriptors, which is sufficient for the new reader path to decode every chunk. +/// Stats are hydrated from each node's `RawNodeStats` into typed nullable columns mirroring +/// the schema on [`ArrayTreeLayout::array_trees_dtype`]. fn build_consolidated_struct(entries: &[(SegmentId, ColumnarChunkData)]) -> VortexResult { let nrows = entries.len(); let nn = Nullability::NonNullable; + let nullable = Nullability::Nullable; // segment_id column. let segment_ids: Buffer = entries.iter().map(|(seg, _)| **seg).collect(); @@ -261,6 +259,21 @@ fn build_consolidated_struct(entries: &[(SegmentId, ColumnarChunkData)]) -> Vort let mut buffers_per_node: Vec = Vec::with_capacity(total_nodes); let mut metadata_builder = VarBinViewBuilder::with_capacity(DType::Binary(nn), total_nodes); + // Nullable stat column accumulators. For each binary stat we use a nullable + // `VarBinViewBuilder`; for primitive/bool stats we accumulate (values, validity) + // separately and assemble at finish time. + let mut min_builder = VarBinViewBuilder::with_capacity(DType::Binary(nullable), total_nodes); + let mut max_builder = VarBinViewBuilder::with_capacity(DType::Binary(nullable), total_nodes); + let mut sum_builder = VarBinViewBuilder::with_capacity(DType::Binary(nullable), total_nodes); + let mut min_prec = NullableValues::::with_capacity(total_nodes); + let mut max_prec = NullableValues::::with_capacity(total_nodes); + let mut null_count = NullableValues::::with_capacity(total_nodes); + let mut nan_count = NullableValues::::with_capacity(total_nodes); + let mut uncompressed_size = NullableValues::::with_capacity(total_nodes); + let mut is_constant = NullableBools::with_capacity(total_nodes); + let mut is_sorted = NullableBools::with_capacity(total_nodes); + let mut is_strict_sorted = NullableBools::with_capacity(total_nodes); + let mut nodes_offsets: Vec = Vec::with_capacity(nrows + 1); nodes_offsets.push(0); let mut nodes_cumulative: i32 = 0; @@ -271,6 +284,22 @@ fn build_consolidated_struct(entries: &[(SegmentId, ColumnarChunkData)]) -> Vort child_counts.push(chunk.child_counts[i]); buffers_per_node.push(chunk.buffers_per_node[i]); metadata_builder.append_value(chunk.node_metadata[i].as_slice()); + + let raw: Option<&RawNodeStats> = chunk.stats[i].as_ref(); + append_stat_columns( + raw, + &mut min_builder, + &mut max_builder, + &mut sum_builder, + &mut min_prec, + &mut max_prec, + &mut null_count, + &mut nan_count, + &mut uncompressed_size, + &mut is_constant, + &mut is_sorted, + &mut is_strict_sorted, + ); } nodes_cumulative += i32::try_from(chunk.nnodes()) .map_err(|_| vortex_err!("array tree node count overflows i32 offsets"))?; @@ -285,12 +314,6 @@ fn build_consolidated_struct(entries: &[(SegmentId, ColumnarChunkData)]) -> Vort PrimitiveArray::new(Buffer::from(buffers_per_node), Validity::NonNullable).into_array(); let metadata_arr = metadata_builder.finish().into_array(); - // All-null stat columns. Placeholder values per row to satisfy the typed-column shape. - let stat_binary = || -> VortexResult { all_null_binary(total_nodes) }; - let stat_u8 = || -> VortexResult { all_null_primitive::(total_nodes) }; - let stat_u64 = || -> VortexResult { all_null_primitive::(total_nodes) }; - let stat_bool = || -> VortexResult { all_null_bool(total_nodes) }; - let node_names: Vec<&str> = vec![ "encoding_id", "child_count", @@ -315,17 +338,17 @@ fn build_consolidated_struct(entries: &[(SegmentId, ColumnarChunkData)]) -> Vort child_count_arr, metadata_arr, buffers_per_node_arr, - stat_binary()?, - stat_u8()?, - stat_binary()?, - stat_u8()?, - stat_binary()?, - stat_u64()?, - stat_u64()?, - stat_u64()?, - stat_bool()?, - stat_bool()?, - stat_bool()?, + min_builder.finish().into_array(), + min_prec.finish(), + max_builder.finish().into_array(), + max_prec.finish(), + sum_builder.finish().into_array(), + null_count.finish(), + nan_count.finish(), + uncompressed_size.finish(), + is_constant.finish(), + is_sorted.finish(), + is_strict_sorted.finish(), ], total_nodes, Validity::NonNullable, @@ -396,34 +419,149 @@ fn build_consolidated_struct(entries: &[(SegmentId, ColumnarChunkData)]) -> Vort Ok(outer) } -/// Build an all-null primitive column of the given length with the right typed dtype. -fn all_null_primitive( - n: usize, -) -> VortexResult { - let values: Vec = vec![T::default(); n]; - let mut validity = BitBufferMut::with_capacity(n); - for _ in 0..n { - validity.append(false); +/// Mapping of `RawStatValue::exact` onto the u8 used in our columnar schema. Stable on disk +/// — old readers will assume `0 = Exact, 1 = Inexact`. +const PRECISION_EXACT: u8 = 0; +const PRECISION_INEXACT: u8 = 1; + +/// Push one node's stats onto every per-stat column. Nulls are pushed wherever the +/// `RawNodeStats` slot is `None` (or `raw` itself is `None`). +#[allow(clippy::too_many_arguments)] +fn append_stat_columns( + raw: Option<&RawNodeStats>, + min: &mut VarBinViewBuilder, + max: &mut VarBinViewBuilder, + sum: &mut VarBinViewBuilder, + min_prec: &mut NullableValues, + max_prec: &mut NullableValues, + null_count: &mut NullableValues, + nan_count: &mut NullableValues, + uncompressed_size: &mut NullableValues, + is_constant: &mut NullableBools, + is_sorted: &mut NullableBools, + is_strict_sorted: &mut NullableBools, +) { + match raw { + Some(raw) => { + match &raw.min { + Some(rv) => { + min.append_value(rv.bytes.as_slice()); + min_prec.push(if rv.exact { + PRECISION_EXACT + } else { + PRECISION_INEXACT + }); + } + None => { + min.append_null(); + min_prec.push_null(); + } + } + match &raw.max { + Some(rv) => { + max.append_value(rv.bytes.as_slice()); + max_prec.push(if rv.exact { + PRECISION_EXACT + } else { + PRECISION_INEXACT + }); + } + None => { + max.append_null(); + max_prec.push_null(); + } + } + match &raw.sum { + Some(b) => sum.append_value(b.as_slice()), + None => sum.append_null(), + } + null_count.push_opt(raw.null_count); + nan_count.push_opt(raw.nan_count); + uncompressed_size.push_opt(raw.uncompressed_size_in_bytes); + is_constant.push_opt(raw.is_constant); + is_sorted.push_opt(raw.is_sorted); + is_strict_sorted.push_opt(raw.is_strict_sorted); + } + None => { + min.append_null(); + max.append_null(); + sum.append_null(); + min_prec.push_null(); + max_prec.push_null(); + null_count.push_null(); + nan_count.push_null(); + uncompressed_size.push_null(); + is_constant.push_null(); + is_sorted.push_null(); + is_strict_sorted.push_null(); + } } - let validity_arr = BoolArray::new(validity.freeze(), Validity::NonNullable).into_array(); - Ok(PrimitiveArray::new(Buffer::from(values), Validity::Array(validity_arr)).into_array()) } -fn all_null_bool(n: usize) -> VortexResult { - let mut bits = BitBufferMut::with_capacity(n); - let mut validity = BitBufferMut::with_capacity(n); - for _ in 0..n { - bits.append(false); - validity.append(false); +/// Accumulator for a nullable primitive column. +struct NullableValues { + values: Vec, + validity: BitBufferMut, +} + +impl NullableValues { + fn with_capacity(cap: usize) -> Self { + Self { + values: Vec::with_capacity(cap), + validity: BitBufferMut::with_capacity(cap), + } + } + fn push(&mut self, v: T) { + self.values.push(v); + self.validity.append(true); } - let validity_arr = BoolArray::new(validity.freeze(), Validity::NonNullable).into_array(); - Ok(BoolArray::new(bits.freeze(), Validity::Array(validity_arr)).into_array()) + fn push_null(&mut self) { + self.values.push(T::default()); + self.validity.append(false); + } + fn push_opt(&mut self, v: Option) { + match v { + Some(v) => self.push(v), + None => self.push_null(), + } + } + fn finish(self) -> ArrayRef { + let validity_arr = + BoolArray::new(self.validity.freeze(), Validity::NonNullable).into_array(); + PrimitiveArray::new(Buffer::from(self.values), Validity::Array(validity_arr)).into_array() + } +} + +/// Accumulator for a nullable bool column. +struct NullableBools { + bits: BitBufferMut, + validity: BitBufferMut, } -fn all_null_binary(n: usize) -> VortexResult { - let mut builder = VarBinViewBuilder::with_capacity(DType::Binary(Nullability::Nullable), n); - for _ in 0..n { - builder.append_null(); +impl NullableBools { + fn with_capacity(cap: usize) -> Self { + Self { + bits: BitBufferMut::with_capacity(cap), + validity: BitBufferMut::with_capacity(cap), + } + } + fn push(&mut self, v: bool) { + self.bits.append(v); + self.validity.append(true); + } + fn push_null(&mut self) { + self.bits.append(false); + self.validity.append(false); + } + fn push_opt(&mut self, v: Option) { + match v { + Some(v) => self.push(v), + None => self.push_null(), + } + } + fn finish(self) -> ArrayRef { + let validity_arr = + BoolArray::new(self.validity.freeze(), Validity::NonNullable).into_array(); + BoolArray::new(self.bits.freeze(), Validity::Array(validity_arr)).into_array() } - Ok(builder.finish().into_array()) }