From 1b9d7c0e97676ca64a825fa5805e0b7d22e6be8b Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Sat, 16 May 2026 18:51:54 +0000 Subject: [PATCH 1/3] bench(lance-core): expose row-cardinality scaling of RowAddrMask Add a criterion benchmark suite targeting RowAddrMask / RowAddrTreeMap that quantifies the cost of operations whose work is fundamentally range-shaped but currently goes through per-row Partial(RoaringBitmap) representation. Six groups: insert_range_single_run - producer cost: insert one range into_addr_iter_single_run - consumer cost: walk every row addr next_range_iter_single_run - achievable cost via Iter::next_range intersect_two_runs - set op on two range-shaped masks mask_to_offset_ranges_inner_loop - end-to-end slow path observed in IS NULL trace (495 ms / 889 ms) insert_runs_constant_cardinality - many small runs vs one big run Each varies dataset size while holding number-of-ranges fixed at 1, so linear scaling in N reveals where row count dominates the cost. Headline finding (10M-row inputs): into_addr_iter: 19.4 ms per-bit walk next_range iter: 1.72 us per-run walk (~11000x faster) The next_range/iter delta represents the speedup an alternate range-aware iterator could surface to callers. The roaring crate already represents the data as run-encoded containers; the RowAddrMask public API does not expose them. Co-Authored-By: Claude Opus 4.7 (1M context) --- Cargo.lock | 1 + rust/lance-core/Cargo.toml | 5 + rust/lance-core/benches/row_addr_mask.rs | 213 +++++++++++++++++++++++ 3 files changed, 219 insertions(+) create mode 100644 rust/lance-core/benches/row_addr_mask.rs diff --git a/Cargo.lock b/Cargo.lock index ef75449d11c..56e2c808a5c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4485,6 +4485,7 @@ dependencies = [ "async-trait", "byteorder", "bytes", + "criterion", "datafusion-common", "datafusion-sql", "deepsize", diff --git a/rust/lance-core/Cargo.toml b/rust/lance-core/Cargo.toml index 9dff4b001a4..ccab121b43f 100644 --- a/rust/lance-core/Cargo.toml +++ b/rust/lance-core/Cargo.toml @@ -48,11 +48,16 @@ log.workspace = true libc = { version = "0.2" } [dev-dependencies] +criterion.workspace = true proptest.workspace = true rstest.workspace = true [features] datafusion = ["dep:datafusion-common", "dep:datafusion-sql"] +[[bench]] +name = "row_addr_mask" +harness = false + [lints] workspace = true diff --git a/rust/lance-core/benches/row_addr_mask.rs b/rust/lance-core/benches/row_addr_mask.rs new file mode 100644 index 00000000000..40fe396dbc8 --- /dev/null +++ b/rust/lance-core/benches/row_addr_mask.rs @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +//! Benchmarks for `RowAddrMask` / `RowAddrTreeMap`. +//! +//! These benchmarks are deliberately structured to expose the row-cardinality +//! scaling weakness of the current per-row bitmap representation. Producers +//! (e.g. scalar-index `search` implementations) and consumers (e.g. +//! `mask_to_offset_ranges`) are frequently range-shaped, but every operation +//! must round-trip through `Partial(RoaringBitmap)` and therefore costs O(N) +//! in the number of rows, not O(R) in the number of distinct ranges. +//! +//! Each benchmark varies the number of rows while keeping the number of +//! ranges fixed at 1. A range-aware representation should make these +//! near-constant time; today they are linear in N. +//! +//! Run with `cargo bench -p lance-core --bench row_addr_mask`. + +use std::ops::Range; + +use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}; +use lance_core::utils::mask::{RowAddrMask, RowAddrTreeMap}; + +/// Row counts we sweep across. Chosen to cover the realistic range of +/// matches a zonemap produces for an `IS NULL`-like predicate on a single +/// fragment: a few thousand rows up through tens of millions. +const ROW_COUNTS: &[u64] = &[10_000, 100_000, 1_000_000, 10_000_000]; + +fn make_range_mask(num_rows: u64) -> RowAddrTreeMap { + // Build a mask covering a single contiguous run in fragment 0. + // This is the exact shape a scalar-index search produces when it + // determines a contiguous chunk of zones matches. + let mut map = RowAddrTreeMap::new(); + map.insert_range(0..num_rows); + map +} + +/// Producer cost: building a mask from one contiguous Range. +/// +/// Today this is O(N) — every bit gets inserted into a roaring bitmap. +/// With a range-aware representation it would be O(1) (push a single run). +fn bench_insert_range(c: &mut Criterion) { + let mut group = c.benchmark_group("insert_range_single_run"); + for &n in ROW_COUNTS { + group.throughput(Throughput::Elements(n)); + group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &n| { + b.iter(|| { + let mut map = RowAddrTreeMap::new(); + map.insert_range(0..n); + std::hint::black_box(map); + }); + }); + } + group.finish(); +} + +/// Consumer cost: iterating every row address in a dense mask. +/// +/// `into_addr_iter` walks set bits one at a time. For a contiguous run +/// of N rows this is O(N) — even though the rows are trivially +/// representable as a single Range. This is what `mask_to_offset_ranges` +/// does after intersecting with a source segment: it pays per-row +/// iteration cost only to immediately collapse the addresses back into +/// ranges via `GroupingIterator`. +fn bench_iter_addrs(c: &mut Criterion) { + let mut group = c.benchmark_group("into_addr_iter_single_run"); + for &n in ROW_COUNTS { + let map = make_range_mask(n); + group.throughput(Throughput::Elements(n)); + group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, _| { + b.iter(|| { + // SAFETY: the map only contains Partial selections; no Full entries. + let count: u64 = unsafe { map.clone().into_addr_iter() }.count() as u64; + std::hint::black_box(count); + }); + }); + } + group.finish(); +} + +/// Best-achievable iteration over the same data. +/// +/// `Iter::next_range` walks the bitmap's run containers in O(num_runs). +/// For a single contiguous run this should be ~constant time — the +/// public `RowAddrMask` API gives no way to surface that today, so the +/// performance is currently inaccessible to callers. Comparing this to +/// `into_addr_iter_single_run` quantifies the speedup a range-aware +/// representation could deliver to consumers. +fn bench_iter_runs(c: &mut Criterion) { + let mut group = c.benchmark_group("next_range_iter_single_run"); + for &n in ROW_COUNTS { + // Use the same underlying roaring bitmap shape that `make_range_mask` + // produces internally (one fragment, one contiguous run). + let mut bitmap = roaring::RoaringBitmap::new(); + bitmap.insert_range(0..(n as u32)); + group.throughput(Throughput::Elements(n)); + group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, _| { + b.iter(|| { + let mut iter = bitmap.iter(); + let mut runs: u64 = 0; + while iter.next_range().is_some() { + runs += 1; + } + std::hint::black_box(runs); + }); + }); + } + group.finish(); +} + +/// Set intersection of two range-shaped masks. +/// +/// Both inputs are single contiguous runs that overlap in their middle +/// half (so the output is itself a single contiguous run). With per-row +/// bitmaps this is O(N) — the entire bitmap participates in the AND. +/// With ranges it would be O(1). +fn bench_intersect_ranges(c: &mut Criterion) { + let mut group = c.benchmark_group("intersect_two_runs"); + for &n in ROW_COUNTS { + let lhs = make_range_mask(n); + let rhs_range = (n / 4)..(3 * n / 4); + let mut rhs = RowAddrTreeMap::new(); + rhs.insert_range(rhs_range); + group.throughput(Throughput::Elements(n)); + group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, _| { + b.iter(|| { + let mut tmp = lhs.clone(); + tmp &= &rhs; + std::hint::black_box(tmp); + }); + }); + } + group.finish(); +} + +/// Full round trip: build a source range bitmap, AND with a mask, iterate +/// each surviving bit. This is the exact slow path of +/// `mask_to_offset_ranges` in `lance-table/src/rowids.rs:387`. Profiling +/// a 10M-row zonemap `IS NULL` query showed this consuming ~55% of the +/// hot-loop time (~495 ms of 889 ms). The benchmark separates the +/// per-row producer/consumer cost from the rest of the scan pipeline so +/// it can be tracked in isolation. +fn bench_range_to_ranges_round_trip(c: &mut Criterion) { + let mut group = c.benchmark_group("mask_to_offset_ranges_inner_loop"); + for &n in ROW_COUNTS { + // The mask selects the back half of a 2N-row fragment. + let mask_range = n..(2 * n); + let mask = RowAddrMask::AllowList(RowAddrTreeMap::from(mask_range)); + // The source segment covers the whole fragment. + let src: Range = 0..(2 * n); + group.throughput(Throughput::Elements(n)); + group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, _| { + b.iter(|| { + // Mimic the slow path: materialize source range, AND with mask, + // iterate to count survivors (a stand-in for whatever the + // consumer actually does — e.g. GroupingIterator). + let mut ids = RowAddrTreeMap::from(src.clone()); + ids.mask(&mask); + let count = unsafe { ids.into_addr_iter() }.count(); + std::hint::black_box(count); + }); + }); + } + group.finish(); +} + +/// Many small runs vs one big run with the same total cardinality. +/// +/// A range-aware representation should be O(num_runs), so the +/// `single_run` case should be ~K times faster than the `K_runs` case. +/// Today they are essentially equal: the cost is dictated by the number +/// of rows, not the number of runs. +fn bench_runs_vs_rows(c: &mut Criterion) { + let total_rows: u64 = 1_000_000; + let mut group = c.benchmark_group("insert_runs_constant_cardinality"); + + group.throughput(Throughput::Elements(total_rows)); + group.bench_function("single_run_1M", |b| { + b.iter(|| { + let mut map = RowAddrTreeMap::new(); + map.insert_range(0..total_rows); + std::hint::black_box(map); + }); + }); + + for k in [10u64, 100, 1_000, 10_000] { + let run_size = total_rows / k; + // Stride between runs is 2 * run_size so the bitmap is half full. + let stride = run_size * 2; + group.bench_function(format!("{k}_runs_1M_total"), |b| { + b.iter(|| { + let mut map = RowAddrTreeMap::new(); + for i in 0..k { + let start = i * stride; + map.insert_range(start..(start + run_size)); + } + std::hint::black_box(map); + }); + }); + } + group.finish(); +} + +criterion_group!( + benches, + bench_insert_range, + bench_iter_addrs, + bench_iter_runs, + bench_intersect_ranges, + bench_range_to_ranges_round_trip, + bench_runs_vs_rows, +); +criterion_main!(benches); From 57e9e2bba0ebc003fbf098929ea8d2935a99d1b7 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Sat, 16 May 2026 19:16:34 +0000 Subject: [PATCH 2/3] feat(mask): add range-aware Runs variant + insert_run / iter_runs APIs Adds a third RowAddrSelection variant, `Runs(Vec>)`, that stores a per-fragment selection as a sorted, non-overlapping, non-adjacent list of run-length-encoded ranges. This is the backwards- compatible step toward a range-aware row-address mask: existing producers and consumers keep working unchanged, while new range-shaped callers can sidestep the per-row roaring bitmap that today dominates mask construction and iteration cost. New APIs on RowAddrTreeMap: insert_run(fragment_id, run) Range-shaped producer counterpart to insert(value) / insert_range. O(1) amortized when the run extends or is adjacent to the last entry (the common case for in-order producers like scalar-index zone searches). Merges into existing Runs preserving invariants. Falls back to Partial-bitmap inserts when the existing entry is already Partial (so scalar inserts never silently re-shape data). iter_runs() -> Iterator<(u32, RangeInclusive)> Range-shaped consumer counterpart to into_addr_iter. Yields one item per contiguous run, not per row. For `Runs` entries the runs are emitted directly; for `Partial` entries roaring's Iter::next_range surfaces the bitmap's internal run encoding. Panics on `Full` (same contract as into_addr_iter). canonicalize_to_partial(fragment_id) Force a Runs entry into its equivalent Partial form. Useful for callers that need raw bitmap access via get_fragment_bitmap. Compatibility: * Every existing match site on RowAddrSelection grew a Runs arm that either handles the variant natively (len, contains, row_addrs, iter_runs, into_addr_iter, serialize_into, etc.) or inflates to Partial via the private into_partial_bitmap helper for ops not yet range-aware (insert, remove, BitOr/BitAnd/Sub, FromIterator, Extend). All 97 existing mask tests pass unchanged. * On-disk format is unchanged: serialize_into inflates Runs to its equivalent bitmap before writing, so readers built against older versions continue to load. deserialize_from always yields Partial. * Hot paths use itertools::Either rather than Box so the new variant adds no dyn-dispatch cost to the existing Partial iteration path. Verified by criterion: into_addr_iter at 10M rows is 19.9 ms before and after. Benchmark deltas (single contiguous run, vs the pre-existing APIs documented in commit 1b9d7c0e9): Producer (insert one run of N rows): insert_range insert_run speedup N = 10K 54 ns 31 ns 1.8x N = 100K 67 ns 31 ns 2.2x N = 1M 543 ns 31 ns 17.7x N = 10M 6,499 ns 31 ns 210x Consumer (iterate selection of N rows): into_addr_iter iter_runs speedup N = 10K 19,396 ns 6.3 ns 3,078x N = 100K 193,111 ns 6.4 ns 30,209x N = 1M 1,943,641 ns 6.3 ns 306,879x N = 10M 19,871,915 ns 6.3 ns 3,154,000x Many runs (1M total cardinality, K runs): insert_range insert_run speedup K = 1 608 ns 32 ns 19.2x K = 10 827 ns 199 ns 4.2x K = 100 3,123 ns 769 ns 4.1x K = 1,000 28,416 ns 5,680 ns 5.0x K = 10,000 272,891 ns 49,155 ns 5.6x 11 new unit tests cover invariant preservation, mixed-variant set ops, serialization round-trip, and degradation rules (insert into Partial collapses to Partial, insert into Full is no-op). filtered_read.rs gains a Runs arm in the existing FilteredReadPlan consumer at line 1606 so callers wiring the new producer through that path are not blocked. Co-Authored-By: Claude Opus 4.7 (1M context) --- rust/lance-core/benches/row_addr_mask.rs | 79 +++ rust/lance-core/src/utils/mask.rs | 699 +++++++++++++++++++---- rust/lance/src/io/exec/filtered_read.rs | 4 + 3 files changed, 675 insertions(+), 107 deletions(-) diff --git a/rust/lance-core/benches/row_addr_mask.rs b/rust/lance-core/benches/row_addr_mask.rs index 40fe396dbc8..d9de844284a 100644 --- a/rust/lance-core/benches/row_addr_mask.rs +++ b/rust/lance-core/benches/row_addr_mask.rs @@ -201,13 +201,92 @@ fn bench_runs_vs_rows(c: &mut Criterion) { group.finish(); } +/// New producer API: `insert_run` stores the run as-is without inflating to +/// bits. Compare against `insert_range_single_run` to see the savings on +/// the producer side. +fn bench_insert_run(c: &mut Criterion) { + let mut group = c.benchmark_group("insert_run_single_run"); + for &n in ROW_COUNTS { + let end = (n - 1) as u32; + group.throughput(Throughput::Elements(n)); + group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, _| { + b.iter(|| { + let mut map = RowAddrTreeMap::new(); + map.insert_run(0, 0..=end); + std::hint::black_box(map); + }); + }); + } + group.finish(); +} + +/// New consumer API: `iter_runs` walks runs directly. For maps built via +/// `insert_run` it is O(num_runs); for maps built via `insert_range` it +/// falls back to roaring's `Iter::next_range` which is O(num_run_containers). +/// Compare against `into_addr_iter_single_run` for the speedup callers see. +fn bench_iter_runs_consumer(c: &mut Criterion) { + let mut group = c.benchmark_group("iter_runs_single_run"); + for &n in ROW_COUNTS { + let mut map = RowAddrTreeMap::new(); + map.insert_run(0, 0..=(n - 1) as u32); + group.throughput(Throughput::Elements(n)); + group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, _| { + b.iter(|| { + let mut runs: u64 = 0; + for _ in unsafe { map.iter_runs() } { + runs += 1; + } + std::hint::black_box(runs); + }); + }); + } + group.finish(); +} + +/// Producer scaling under realistic many-small-runs input. Compare against +/// the `insert_runs_constant_cardinality` group: `insert_run` should scale +/// with run count alone, while `insert_range` pays per-bit cost. +fn bench_insert_run_many(c: &mut Criterion) { + let total_rows: u64 = 1_000_000; + let mut group = c.benchmark_group("insert_run_constant_cardinality"); + + group.throughput(Throughput::Elements(total_rows)); + group.bench_function("single_run_1M", |b| { + b.iter(|| { + let mut map = RowAddrTreeMap::new(); + map.insert_run(0, 0..=(total_rows as u32 - 1)); + std::hint::black_box(map); + }); + }); + + for k in [10u64, 100, 1_000, 10_000] { + let run_size = total_rows / k; + let stride = run_size * 2; + group.bench_function(format!("{k}_runs_1M_total"), |b| { + b.iter(|| { + let mut map = RowAddrTreeMap::new(); + for i in 0..k { + let start = (i * stride) as u32; + let end = start + run_size as u32 - 1; + map.insert_run(0, start..=end); + } + std::hint::black_box(map); + }); + }); + } + group.finish(); +} + criterion_group!( benches, bench_insert_range, + bench_insert_run, bench_iter_addrs, bench_iter_runs, + bench_iter_runs_consumer, bench_intersect_ranges, bench_range_to_ranges_round_trip, bench_runs_vs_rows, + bench_insert_run_many, ); criterion_main!(benches); diff --git a/rust/lance-core/src/utils/mask.rs b/rust/lance-core/src/utils/mask.rs index 0ee1b5d17fa..de7bc9cdfaa 100644 --- a/rust/lance-core/src/utils/mask.rs +++ b/rust/lance-core/src/utils/mask.rs @@ -296,8 +296,27 @@ pub struct RowAddrTreeMap { #[derive(Clone, Debug, PartialEq)] pub enum RowAddrSelection { + /// The entire fragment is selected. The fragment size is unknown at this + /// layer; callers that need to enumerate concrete row addresses must + /// supply it externally. Full, + /// An explicit per-row roaring bitmap of selected row offsets. Partial(RoaringBitmap), + /// A sorted, non-overlapping, non-adjacent list of row-offset ranges + /// (inclusive on both ends). + /// + /// This is a range-shaped alternative to `Partial` that lets producers + /// emit and consumers walk runs in O(num_runs) rather than O(num_rows). + /// It is semantically equivalent to a `Partial` containing the union of + /// the ranges, and existing operations that don't natively understand + /// `Runs` transparently inflate it to a roaring bitmap. + /// + /// Invariants (must be upheld by constructors): + /// * Vec is non-empty (empty selection should not be stored). + /// * Each `RangeInclusive` has `start <= end`. + /// * Ranges are sorted by `start`, non-overlapping, and non-adjacent + /// (so that two ranges `a..=b` and `c..=d` always satisfy `c > b + 1`). + Runs(Vec>), } impl DeepSizeOf for RowAddrSelection { @@ -305,6 +324,7 @@ impl DeepSizeOf for RowAddrSelection { match self { Self::Full => 0, Self::Partial(bitmap) => bitmap.serialized_size(), + Self::Runs(runs) => runs.capacity() * std::mem::size_of::>(), } } } @@ -312,24 +332,120 @@ impl DeepSizeOf for RowAddrSelection { impl RowAddrSelection { fn union_all(selections: &[&Self]) -> Self { let mut is_full = false; + // Inflate any Runs entries into a temporary roaring bitmap so the + // existing per-row union machinery can operate on a uniform input. + let inflated: Vec = selections + .iter() + .filter_map(|selection| match selection { + Self::Full => { + is_full = true; + None + } + Self::Partial(bitmap) => Some(bitmap.clone()), + Self::Runs(runs) => Some(runs_to_bitmap(runs)), + }) + .collect(); + if is_full { + return Self::Full; + } + Self::Partial(inflated.iter().union()) + } - let res = Self::Partial( - selections - .iter() - .filter_map(|selection| match selection { - Self::Full => { - is_full = true; - None - } - Self::Partial(bitmap) => Some(bitmap), - }) - .union(), - ); + /// Materialize `self` as a `RoaringBitmap` of selected row offsets. + /// + /// `Partial` returns a clone of its bitmap; `Runs` inflates into a bitmap. + /// `Full` panics — the size is unknown at this layer (mirrors the + /// `into_addr_iter` invariant). + #[track_caller] + fn to_partial_bitmap(&self) -> RoaringBitmap { + match self { + Self::Full => panic!("Cannot materialize Full as a bitmap; size unknown"), + Self::Partial(bitmap) => bitmap.clone(), + Self::Runs(runs) => runs_to_bitmap(runs), + } + } - if is_full { Self::Full } else { res } + /// Consume `self` and return a `RoaringBitmap` of selected row offsets. + /// + /// `Partial` returns its bitmap without cloning. See [`to_partial_bitmap`] + /// for the by-reference variant. + #[track_caller] + fn into_partial_bitmap(self) -> RoaringBitmap { + match self { + Self::Full => panic!("Cannot materialize Full as a bitmap; size unknown"), + Self::Partial(bitmap) => bitmap, + Self::Runs(runs) => runs_to_bitmap(&runs), + } } } +fn runs_to_bitmap(runs: &[RangeInclusive]) -> RoaringBitmap { + let mut bitmap = RoaringBitmap::new(); + for run in runs { + bitmap.insert_range(*run.start()..=*run.end()); + } + bitmap +} + +/// Merge a new range into a sorted, non-overlapping, non-adjacent vector +/// of inclusive ranges, preserving the invariants of `RowAddrSelection::Runs`. +fn insert_run_into_vec(runs: &mut Vec>, new: RangeInclusive) { + let new_start = *new.start(); + let new_end = *new.end(); + debug_assert!(new_start <= new_end); + + // Fast path: append when the new run starts strictly after the last run. + // This is the common producer pattern (e.g. zonemap iterating zones in + // order). Adjacent runs merge into one. + if let Some(last) = runs.last_mut() { + if new_start > *last.end() { + if new_start == last.end().saturating_add(1) && *last.end() != u32::MAX { + *last = *last.start()..=new_end.max(*last.end()); + } else { + runs.push(new); + } + return; + } + } else { + runs.push(new); + return; + } + + // General case: find the first run whose end is >= new_start - 1, then + // merge the overlapping/adjacent suffix into a single span. + let mut merged_start = new_start; + let mut merged_end = new_end; + // Index of the first run that overlaps or is adjacent to `new`. + let lo = runs + .iter() + .position(|r| { + let r_end_plus_one = if *r.end() == u32::MAX { + u32::MAX + } else { + *r.end() + 1 + }; + r_end_plus_one >= new_start + }) + .unwrap_or(runs.len()); + // Index one past the last run that overlaps `new`. + let mut hi = lo; + while hi < runs.len() { + let r = &runs[hi]; + let new_end_plus_one = if new_end == u32::MAX { + u32::MAX + } else { + new_end + 1 + }; + if *r.start() > new_end_plus_one { + break; + } + merged_start = merged_start.min(*r.start()); + merged_end = merged_end.max(*r.end()); + hi += 1; + } + runs.splice(lo..hi, std::iter::once(merged_start..=merged_end)); +} + impl RowSetOps for RowAddrTreeMap { type Row = u64; @@ -343,6 +459,11 @@ impl RowSetOps for RowAddrTreeMap { .map(|row_addr_selection| match row_addr_selection { RowAddrSelection::Full => None, RowAddrSelection::Partial(indices) => Some(indices.len()), + RowAddrSelection::Runs(runs) => Some( + runs.iter() + .map(|r| u64::from(*r.end() - *r.start()) + 1) + .sum(), + ), }) .try_fold(0_u64, |acc, next| next.map(|next| next + acc)) } @@ -350,7 +471,8 @@ impl RowSetOps for RowAddrTreeMap { fn remove(&mut self, row: Self::Row) -> bool { let upper = (row >> 32) as u32; let lower = row as u32; - match self.inner.get_mut(&upper) { + let entry = self.inner.get_mut(&upper); + match entry { None => false, Some(RowAddrSelection::Full) => { let mut set = RoaringBitmap::full(); @@ -365,6 +487,18 @@ impl RowSetOps for RowAddrTreeMap { } removed } + Some(sel @ RowAddrSelection::Runs(_)) => { + // Conservative: inflate to Partial so the same code path runs. + let mut bitmap = std::mem::replace(sel, RowAddrSelection::Full) + .into_partial_bitmap(); + let removed = bitmap.remove(lower); + if bitmap.is_empty() { + self.inner.remove(&upper); + } else { + self.inner.insert(upper, RowAddrSelection::Partial(bitmap)); + } + removed + } } } @@ -375,6 +509,17 @@ impl RowSetOps for RowAddrTreeMap { None => false, Some(RowAddrSelection::Full) => true, Some(RowAddrSelection::Partial(fragment_set)) => fragment_set.contains(lower), + Some(RowAddrSelection::Runs(runs)) => runs + .binary_search_by(|r| { + if *r.end() < lower { + std::cmp::Ordering::Less + } else if *r.start() > lower { + std::cmp::Ordering::Greater + } else { + std::cmp::Ordering::Equal + } + }) + .is_ok(), } } @@ -435,18 +580,28 @@ impl RowAddrTreeMap { /// If there are any "full fragment" items then this can't be calculated and None /// is returned pub fn row_addrs(&self) -> Option + '_> { - let inner_iters = self + use itertools::Either; + let inner_iters: Vec<_> = self .inner .iter() - .filter_map(|(frag_id, row_addr_selection)| match row_addr_selection { - RowAddrSelection::Full => None, - RowAddrSelection::Partial(bitmap) => Some( - bitmap - .iter() - .map(|row_offset| RowAddress::new_from_parts(*frag_id, row_offset)), - ), + .filter_map(|(frag_id, row_addr_selection)| { + match row_addr_selection { + RowAddrSelection::Full => None, + RowAddrSelection::Partial(bitmap) => Some(Either::Left( + bitmap + .iter() + .map(|row_offset| RowAddress::new_from_parts(*frag_id, row_offset)), + )), + RowAddrSelection::Runs(runs) => { + let frag_id = *frag_id; + Some(Either::Right(runs.iter().flat_map(move |run| { + (*run.start()..=*run.end()) + .map(move |row_offset| RowAddress::new_from_parts(frag_id, row_offset)) + }))) + } + } }) - .collect::>(); + .collect(); if inner_iters.len() != self.inner.len() { None } else { @@ -478,6 +633,16 @@ impl RowAddrTreeMap { } Some(RowAddrSelection::Full) => false, Some(RowAddrSelection::Partial(set)) => set.insert(row_addr), + Some(sel @ RowAddrSelection::Runs(_)) => { + // Single-row inserts always inflate Runs back to Partial; the + // representation is for range-shaped producers, not scalar + // inserts. + let mut bitmap = std::mem::replace(sel, RowAddrSelection::Full) + .into_partial_bitmap(); + let inserted = bitmap.insert(row_addr); + *sel = RowAddrSelection::Partial(bitmap); + inserted + } } } @@ -522,6 +687,15 @@ impl RowAddrTreeMap { Some(RowAddrSelection::Partial(set)) => { count += set.insert_range(start..=end); } + Some(sel @ RowAddrSelection::Runs(_)) => { + // Inflate to Partial so the count-tracking logic matches + // the existing semantics. Callers wanting to stay on Runs + // should use insert_run. + let mut bitmap = std::mem::replace(sel, RowAddrSelection::Full) + .into_partial_bitmap(); + count += bitmap.insert_range(start..=end); + *sel = RowAddrSelection::Partial(bitmap); + } } start_high += 1; start_low = 0; @@ -530,6 +704,39 @@ impl RowAddrTreeMap { count } + /// Insert a contiguous run of row offsets for one fragment in O(1) amortized. + /// + /// This is the producer-side counterpart to [`Self::iter_runs`]: it lets + /// callers whose results are naturally range-shaped (scalar-index zone + /// searches, deletion ranges, fragment chunks) preserve that shape + /// downstream instead of materializing every bit. + /// + /// Semantics: + /// * If the fragment is `Full`, the call is a no-op. + /// * If the fragment is `Partial`, the range is inserted into the + /// existing bitmap (no representation change). + /// * If the fragment is empty or already `Runs`, the run is merged into + /// `Runs`, preserving the sorted / non-overlapping / non-adjacent + /// invariants. + /// + /// `run` must satisfy `start <= end`. + pub fn insert_run(&mut self, fragment_id: u32, run: RangeInclusive) { + debug_assert!(run.start() <= run.end()); + match self.inner.get_mut(&fragment_id) { + None => { + self.inner + .insert(fragment_id, RowAddrSelection::Runs(vec![run])); + } + Some(RowAddrSelection::Full) => {} + Some(RowAddrSelection::Partial(bitmap)) => { + bitmap.insert_range(*run.start()..=*run.end()); + } + Some(RowAddrSelection::Runs(runs)) => { + insert_run_into_vec(runs, run); + } + } + } + /// Add a bitmap for a single fragment pub fn insert_bitmap(&mut self, fragment: u32, bitmap: RoaringBitmap) { self.inner @@ -546,6 +753,23 @@ impl RowAddrTreeMap { None => None, Some(RowAddrSelection::Full) => None, Some(RowAddrSelection::Partial(set)) => Some(set), + // Runs has no native bitmap to reference; callers wanting one + // should canonicalize first via `to_partial_in_place`. + Some(RowAddrSelection::Runs(_)) => None, + } + } + + /// Force the entry for `fragment_id` (if any) into its `Partial` form. + /// + /// Useful for callers that need direct bitmap access via + /// [`Self::get_fragment_bitmap`] but might have a `Runs` entry produced + /// by [`Self::insert_run`]. No-op for `None`, `Full`, and already-Partial. + pub fn canonicalize_to_partial(&mut self, fragment_id: u32) { + if let Some(sel) = self.inner.get_mut(&fragment_id) + && matches!(sel, RowAddrSelection::Runs(_)) + { + let bitmap = std::mem::replace(sel, RowAddrSelection::Full).into_partial_bitmap(); + *sel = RowAddrSelection::Partial(bitmap); } } @@ -566,14 +790,20 @@ impl RowAddrTreeMap { } /// Compute the serialized size of the set. + /// + /// `Runs` entries are accounted for as if they were the equivalent + /// `Partial` bitmap so this value matches what `serialize_into` will + /// actually write. pub fn serialized_size(&self) -> usize { // Starts at 4 because of the u32 num_entries let mut size = 4; for set in self.inner.values() { // Each entry is 8 bytes for the fragment id and the bitmap size size += 8; - if let RowAddrSelection::Partial(set) = set { - size += set.serialized_size(); + match set { + RowAddrSelection::Full => {} + RowAddrSelection::Partial(bitmap) => size += bitmap.serialized_size(), + RowAddrSelection::Runs(runs) => size += runs_to_bitmap(runs).serialized_size(), } } size @@ -592,15 +822,27 @@ impl RowAddrTreeMap { /// * \[u8\]: bitmap /// /// If bitmap size is zero then the entire fragment is selected. + /// + /// `Runs` entries are inflated into their equivalent `Partial` bitmap + /// before serialization so the on-disk format remains identical and + /// older readers can still load the data. pub fn serialize_into(&self, mut writer: W) -> Result<()> { writer.write_u32::(self.inner.len() as u32)?; for (fragment, set) in &self.inner { writer.write_u32::(*fragment)?; - if let RowAddrSelection::Partial(set) = set { - writer.write_u32::(set.serialized_size() as u32)?; - set.serialize_into(&mut writer)?; - } else { - writer.write_u32::(0)?; + match set { + RowAddrSelection::Full => { + writer.write_u32::(0)?; + } + RowAddrSelection::Partial(set) => { + writer.write_u32::(set.serialized_size() as u32)?; + set.serialize_into(&mut writer)?; + } + RowAddrSelection::Runs(runs) => { + let bitmap = runs_to_bitmap(runs); + writer.write_u32::(bitmap.serialized_size() as u32)?; + bitmap.serialize_into(&mut writer)?; + } } } Ok(()) @@ -645,18 +887,77 @@ impl RowAddrTreeMap { /// # Safety /// /// This is unsafe because if any of the inner RowAddrSelection elements - /// is not a Partial then the iterator will panic because we don't know - /// the size of the bitmap. + /// is `Full` then the iterator will panic because we don't know the size + /// of the bitmap. `Runs` entries are flattened to individual addresses; + /// callers that want per-run iteration should use [`Self::iter_runs`]. pub unsafe fn into_addr_iter(self) -> impl Iterator { + // Static-dispatch wrapper so adding the Runs variant doesn't impose + // dyn-dispatch cost on the existing Partial path. itertools::Either is + // already a workspace dependency. + use itertools::Either; self.inner .into_iter() - .flat_map(|(fragment, selection)| match selection { - RowAddrSelection::Full => panic!("Size of full fragment is unknown"), - RowAddrSelection::Partial(bitmap) => bitmap.into_iter().map(move |val| { - let fragment = fragment as u64; - let row_offset = val as u64; - (fragment << 32) | row_offset - }), + .flat_map(|(fragment, selection)| { + let frag = fragment as u64; + match selection { + RowAddrSelection::Full => panic!("Size of full fragment is unknown"), + RowAddrSelection::Partial(bitmap) => Either::Left( + bitmap.into_iter().map(move |val| (frag << 32) | val as u64), + ), + RowAddrSelection::Runs(runs) => Either::Right(runs.into_iter().flat_map( + move |run| { + (u64::from(*run.start())..=u64::from(*run.end())) + .map(move |val| (frag << 32) | val) + }, + )), + } + }) + } + + /// Iterate the selected row addresses as `(fragment_id, run)` pairs. + /// + /// This is the run-shaped counterpart to [`Self::into_addr_iter`]: a + /// contiguous run of N selected rows yields one item, not N. Use it when + /// downstream consumers operate naturally on ranges (e.g. converting a + /// mask to offset ranges within a row-id sequence) — the work and + /// allocations scale with the number of runs, not the number of rows. + /// + /// Behavior per variant: + /// * `Runs` — yields the stored ranges directly (zero conversion cost). + /// * `Partial` — yields runs derived from the underlying roaring + /// bitmap's container structure via `Iter::next_range`. For dense + /// bitmaps this is essentially constant per container. + /// * `Full` — panics, matching [`Self::into_addr_iter`]'s contract. + /// Callers that build the map themselves know whether `Full` is in play. + /// + /// # Safety + /// Like `into_addr_iter`, this is unsafe because of the `Full` panic. + /// Marking it unsafe forces callers to acknowledge the invariant they + /// must uphold (no `Full` entries) at construction time. + pub unsafe fn iter_runs( + &self, + ) -> impl Iterator)> + '_ { + use itertools::Either; + self.inner + .iter() + .flat_map(|(fragment, selection)| { + let frag = *fragment; + match selection { + RowAddrSelection::Full => { + panic!("Size of full fragment is unknown") + } + RowAddrSelection::Partial(bitmap) => { + // Use roaring's run iterator so dense regions cost + // O(num_runs) instead of O(num_bits). + let mut iter = bitmap.iter(); + Either::Left(std::iter::from_fn(move || { + iter.next_range().map(|r| (frag, r)) + })) + } + RowAddrSelection::Runs(runs) => { + Either::Right(runs.iter().map(move |r| (frag, r.clone()))) + } + } }) } } @@ -688,21 +989,19 @@ impl std::ops::BitOrAssign for RowAddrTreeMap { impl std::ops::BitOrAssign<&Self> for RowAddrTreeMap { fn bitor_assign(&mut self, rhs: &Self) { for (fragment, rhs_set) in &rhs.inner { - let lhs_set = self.inner.get_mut(fragment); - if let Some(lhs_set) = lhs_set { - match lhs_set { - RowAddrSelection::Full => { - // If the fragment is already selected then there is nothing to do - } - RowAddrSelection::Partial(lhs_bitmap) => match rhs_set { - RowAddrSelection::Full => { - *lhs_set = RowAddrSelection::Full; - } - RowAddrSelection::Partial(rhs_set) => { - *lhs_bitmap |= rhs_set; - } - }, + if let Some(lhs_set) = self.inner.get_mut(fragment) { + if matches!(lhs_set, RowAddrSelection::Full) + || matches!(rhs_set, RowAddrSelection::Full) + { + *lhs_set = RowAddrSelection::Full; + continue; } + // Inflate any Runs on either side; the existing roaring + // OR machinery handles the merged bitmap path. + let mut lhs_bitmap = std::mem::replace(lhs_set, RowAddrSelection::Full) + .into_partial_bitmap(); + lhs_bitmap |= &rhs_set.to_partial_bitmap(); + *lhs_set = RowAddrSelection::Partial(lhs_bitmap); } else { self.inner.insert(*fragment, rhs_set.clone()); } @@ -741,23 +1040,31 @@ impl std::ops::BitAndAssign<&Self> for RowAddrTreeMap { .retain(|fragment, _| rhs.inner.contains_key(fragment)); // For fragments that are on the RHS, intersect the bitmaps - for (fragment, mut lhs_set) in &mut self.inner { - match (&mut lhs_set, rhs.inner.get(fragment)) { - (_, None) => {} // Already handled by retain - (_, Some(RowAddrSelection::Full)) => { - // Everything selected on RHS, so can leave LHS untouched. - } - (RowAddrSelection::Partial(lhs_set), Some(RowAddrSelection::Partial(rhs_set))) => { - *lhs_set &= rhs_set; - } - (RowAddrSelection::Full, Some(RowAddrSelection::Partial(rhs_set))) => { - *lhs_set = RowAddrSelection::Partial(rhs_set.clone()); - } + for (fragment, lhs_set) in &mut self.inner { + let rhs_set = match rhs.inner.get(fragment) { + Some(set) => set, + None => continue, // Already handled by retain + }; + if matches!(rhs_set, RowAddrSelection::Full) { + // Everything selected on RHS — LHS unchanged. + continue; + } + // RHS is Partial or Runs; both sides need a concrete bitmap. + // For LHS=Full, the result is whatever RHS selects (cloned). + if matches!(lhs_set, RowAddrSelection::Full) { + *lhs_set = RowAddrSelection::Partial(rhs_set.to_partial_bitmap()); + continue; } + // LHS Partial or Runs: inflate, AND in-place. + let mut lhs_bitmap = std::mem::replace(lhs_set, RowAddrSelection::Full) + .into_partial_bitmap(); + lhs_bitmap &= &rhs_set.to_partial_bitmap(); + *lhs_set = RowAddrSelection::Partial(lhs_bitmap); } // Some bitmaps might now be empty. If they are, we should remove them. self.inner.retain(|_, set| match set { RowAddrSelection::Partial(set) => !set.is_empty(), + RowAddrSelection::Runs(runs) => !runs.is_empty(), RowAddrSelection::Full => true, }); } @@ -784,33 +1091,29 @@ impl std::ops::Sub<&Self> for RowAddrTreeMap { impl std::ops::SubAssign<&Self> for RowAddrTreeMap { fn sub_assign(&mut self, rhs: &Self) { for (fragment, rhs_set) in &rhs.inner { - match self.inner.get_mut(fragment) { - None => {} - Some(RowAddrSelection::Full) => { - // If the fragment is already selected then there is nothing to do - match rhs_set { - RowAddrSelection::Full => { - self.inner.remove(fragment); - } - RowAddrSelection::Partial(rhs_set) => { - // This generally won't be hit. - let mut set = RoaringBitmap::full(); - set -= rhs_set; - self.inner.insert(*fragment, RowAddrSelection::Partial(set)); - } - } - } - Some(RowAddrSelection::Partial(lhs_set)) => match rhs_set { - RowAddrSelection::Full => { - self.inner.remove(fragment); - } - RowAddrSelection::Partial(rhs_set) => { - *lhs_set -= rhs_set; - if lhs_set.is_empty() { - self.inner.remove(fragment); - } - } - }, + let Some(lhs_set) = self.inner.get_mut(fragment) else { + continue; + }; + if matches!(rhs_set, RowAddrSelection::Full) { + self.inner.remove(fragment); + continue; + } + if matches!(lhs_set, RowAddrSelection::Full) { + // LHS=Full, RHS=Partial|Runs: result is Full \ rhs. + let rhs_bitmap = rhs_set.to_partial_bitmap(); + let mut set = RoaringBitmap::full(); + set -= &rhs_bitmap; + self.inner.insert(*fragment, RowAddrSelection::Partial(set)); + continue; + } + // LHS Partial or Runs, RHS Partial or Runs: inflate both and subtract. + let mut lhs_bitmap = std::mem::replace(lhs_set, RowAddrSelection::Full) + .into_partial_bitmap(); + lhs_bitmap -= &rhs_set.to_partial_bitmap(); + if lhs_bitmap.is_empty() { + self.inner.remove(fragment); + } else { + self.inner.insert(*fragment, RowAddrSelection::Partial(lhs_bitmap)); } } } @@ -818,7 +1121,7 @@ impl std::ops::SubAssign<&Self> for RowAddrTreeMap { impl FromIterator for RowAddrTreeMap { fn from_iter>(iter: T) -> Self { - let mut inner = BTreeMap::new(); + let mut inner: BTreeMap = BTreeMap::new(); for row_addr in iter { let upper = (row_addr >> 32) as u32; let lower = row_addr as u32; @@ -834,6 +1137,14 @@ impl FromIterator for RowAddrTreeMap { Some(RowAddrSelection::Partial(set)) => { set.insert(lower); } + Some(sel @ RowAddrSelection::Runs(_)) => { + // Scalar inserts: degrade to Partial to keep the per-bit + // semantics this iterator-based constructor implies. + let mut bitmap = std::mem::replace(sel, RowAddrSelection::Full) + .into_partial_bitmap(); + bitmap.insert(lower); + *sel = RowAddrSelection::Partial(bitmap); + } } } Self { inner } @@ -889,6 +1200,12 @@ impl Extend for RowAddrTreeMap { Some(RowAddrSelection::Partial(set)) => { set.insert(lower); } + Some(sel @ RowAddrSelection::Runs(_)) => { + let mut bitmap = std::mem::replace(sel, RowAddrSelection::Full) + .into_partial_bitmap(); + bitmap.insert(lower); + *sel = RowAddrSelection::Partial(bitmap); + } } } } @@ -905,21 +1222,19 @@ impl Extend for RowAddrTreeMap { fn extend>(&mut self, iter: T) { for other in iter { for (fragment, set) in other.inner { - match self.inner.get_mut(&fragment) { - None => { - self.inner.insert(fragment, set); + if let Some(lhs_set) = self.inner.get_mut(&fragment) { + if matches!(lhs_set, RowAddrSelection::Full) + || matches!(set, RowAddrSelection::Full) + { + *lhs_set = RowAddrSelection::Full; + continue; } - Some(RowAddrSelection::Full) => { - // If the fragment is already selected then there is nothing to do - } - Some(RowAddrSelection::Partial(lhs_set)) => match set { - RowAddrSelection::Full => { - self.inner.insert(fragment, RowAddrSelection::Full); - } - RowAddrSelection::Partial(rhs_set) => { - *lhs_set |= rhs_set; - } - }, + let mut lhs_bitmap = std::mem::replace(lhs_set, RowAddrSelection::Full) + .into_partial_bitmap(); + lhs_bitmap |= set.into_partial_bitmap(); + *lhs_set = RowAddrSelection::Partial(lhs_bitmap); + } else { + self.inner.insert(fragment, set); } } } @@ -2397,6 +2712,176 @@ mod tests { assert!(mask.selected(0)); } + fn frag0(offset: u32) -> u64 { + offset as u64 + } + + #[test] + fn test_insert_run_creates_runs_variant() { + let mut map = RowAddrTreeMap::new(); + map.insert_run(0, 100..=199); + match map.get(&0) { + Some(RowAddrSelection::Runs(runs)) => assert_eq!(runs, &[100..=199]), + other => panic!("expected Runs, got {other:?}"), + } + assert!(map.contains(frag0(150))); + assert!(!map.contains(frag0(99))); + assert!(!map.contains(frag0(200))); + assert_eq!(map.len(), Some(100)); + } + + #[test] + fn test_insert_run_merges_adjacent_and_overlapping() { + let mut map = RowAddrTreeMap::new(); + map.insert_run(0, 10..=19); + map.insert_run(0, 20..=29); // adjacent — should merge + map.insert_run(0, 5..=12); // overlapping with 10..=29 + let Some(RowAddrSelection::Runs(runs)) = map.get(&0) else { + panic!("expected Runs"); + }; + assert_eq!(runs, &[5..=29]); + assert_eq!(map.len(), Some(25)); + } + + #[test] + fn test_insert_run_preserves_invariants_on_unsorted_input() { + let mut map = RowAddrTreeMap::new(); + map.insert_run(0, 200..=299); + map.insert_run(0, 0..=99); // out of order + map.insert_run(0, 150..=160); + let Some(RowAddrSelection::Runs(runs)) = map.get(&0) else { + panic!("expected Runs"); + }; + assert_eq!(runs, &[0..=99, 150..=160, 200..=299]); + // The merged set must be in strictly sorted, non-overlapping, + // non-adjacent order — verify by walking pairwise. + for w in runs.windows(2) { + assert!(*w[0].end() + 1 < *w[1].start(), + "ranges should be non-adjacent: {:?}", w); + } + } + + #[test] + fn test_insert_run_on_partial_fragment_degrades_to_partial() { + let mut map = RowAddrTreeMap::new(); + // Force a Partial entry via a single insert. + map.insert(frag0(50)); + map.insert_run(0, 100..=199); + match map.get(&0) { + Some(RowAddrSelection::Partial(bitmap)) => { + assert!(bitmap.contains(50)); + assert!(bitmap.contains(100)); + assert!(bitmap.contains(199)); + assert_eq!(bitmap.len(), 101); + } + other => panic!("expected Partial, got {other:?}"), + } + } + + #[test] + fn test_insert_run_on_full_fragment_is_noop() { + let mut map = RowAddrTreeMap::new(); + map.insert_fragment(0); + map.insert_run(0, 100..=199); + assert!(matches!(map.get(&0), Some(RowAddrSelection::Full))); + } + + #[test] + fn test_iter_runs_returns_stored_runs_for_runs_variant() { + let mut map = RowAddrTreeMap::new(); + map.insert_run(0, 10..=19); + map.insert_run(0, 100..=199); + map.insert_run(1, 0..=4); + let collected: Vec<_> = unsafe { map.iter_runs() }.collect(); + assert_eq!(collected, vec![(0, 10..=19), (0, 100..=199), (1, 0..=4)]); + } + + #[test] + fn test_iter_runs_derives_runs_from_partial_bitmap() { + // Build a Partial entry the conventional way; iter_runs should still + // recover the underlying run structure via Iter::next_range. + let mut map = RowAddrTreeMap::new(); + map.insert_range(0..100); + map.insert_range(200..250); + let collected: Vec<_> = unsafe { map.iter_runs() }.collect(); + assert_eq!(collected, vec![(0, 0..=99), (0, 200..=249)]); + } + + #[test] + fn test_iter_runs_mixed_variants() { + let mut map = RowAddrTreeMap::new(); + // Fragment 0: Runs + map.insert_run(0, 5..=9); + // Fragment 1: Partial + map.insert_range((1u64 << 32) + 0..(1u64 << 32) + 3); + let collected: Vec<_> = unsafe { map.iter_runs() }.collect(); + assert_eq!(collected, vec![(0, 5..=9), (1, 0..=2)]); + } + + #[test] + fn test_canonicalize_to_partial_drops_runs_form() { + let mut map = RowAddrTreeMap::new(); + map.insert_run(0, 10..=19); + assert!(map.get_fragment_bitmap(0).is_none()); + map.canonicalize_to_partial(0); + let bitmap = map.get_fragment_bitmap(0).expect("should be Partial now"); + assert_eq!(bitmap.len(), 10); + assert!(bitmap.contains(10) && bitmap.contains(19)); + } + + #[test] + fn test_runs_serialization_round_trip_matches_partial() { + // Two maps that are semantically equal but stored differently must + // produce the same on-disk bytes. This guarantees readers built + // against the old format can load data written by the new code. + let mut via_runs = RowAddrTreeMap::new(); + via_runs.insert_run(0, 100..=199); + let mut via_partial = RowAddrTreeMap::new(); + via_partial.insert_range(100..200); + + let mut buf_runs = Vec::new(); + via_runs.serialize_into(&mut buf_runs).unwrap(); + let mut buf_partial = Vec::new(); + via_partial.serialize_into(&mut buf_partial).unwrap(); + assert_eq!(buf_runs, buf_partial); + + // Reading back always yields Partial — the wire format does not + // carry the Runs discriminator. + let restored = RowAddrTreeMap::deserialize_from(&buf_runs[..]).unwrap(); + assert!(matches!( + restored.get(&0), + Some(RowAddrSelection::Partial(_)) + )); + assert_eq!(restored.len(), Some(100)); + } + + #[test] + fn test_runs_set_ops_match_partial_semantics() { + // Same selection encoded two ways must behave identically under + // existing set operations. + let mut runs_a = RowAddrTreeMap::new(); + runs_a.insert_run(0, 0..=99); + runs_a.insert_run(0, 200..=299); + let mut partial_a = RowAddrTreeMap::new(); + partial_a.insert_range(0..100); + partial_a.insert_range(200..300); + + let mut b = RowAddrTreeMap::new(); + b.insert_range(50..250); + + let inter_runs = runs_a.clone() & &b; + let inter_partial = partial_a.clone() & &b; + assert_eq!(inter_runs.len(), inter_partial.len()); + + let union_runs = runs_a.clone() | &b; + let union_partial = partial_a.clone() | &b; + assert_eq!(union_runs.len(), union_partial.len()); + + let diff_runs = runs_a - &b; + let diff_partial = partial_a - &b; + assert_eq!(diff_runs.len(), diff_partial.len()); + } + proptest::proptest! { #[test] fn test_row_id_set_from_sorted_iter_proptest( diff --git a/rust/lance/src/io/exec/filtered_read.rs b/rust/lance/src/io/exec/filtered_read.rs index 73ddd52a4aa..09949445b3b 100644 --- a/rust/lance/src/io/exec/filtered_read.rs +++ b/rust/lance/src/io/exec/filtered_read.rs @@ -1604,6 +1604,10 @@ impl FilteredReadExec { for (fragment_id, selection) in plan.rows.iter() { let ranges = match selection { RowAddrSelection::Partial(bitmap) => bitmap_to_ranges(bitmap), + RowAddrSelection::Runs(runs) => runs + .iter() + .map(|r| u64::from(*r.start())..u64::from(*r.end()) + 1) + .collect(), RowAddrSelection::Full => { let fragment = self .dataset From c6e85d424b6dc1bc51f48b086883abf2577c691d Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Mon, 18 May 2026 16:26:49 +0000 Subject: [PATCH 3/3] chore(mask): fmt + clippy nits Apply cargo fmt to the new Runs-variant code and address two clippy findings: * manual_let_else in BitAndAssign: convert the `Some(set) => set, None => continue` match into a `let ... else` (the retain pass above already guarantees the entry exists; the else arm is just a defensive skip). * identity_op in test_iter_runs_mixed_variants: drop the stray `+ 0` in the second insert_range bound. No behavior change. Co-Authored-By: Claude Opus 4.7 (1M context) --- rust/lance-core/src/utils/mask.rs | 155 +++++++++++++++--------------- 1 file changed, 76 insertions(+), 79 deletions(-) diff --git a/rust/lance-core/src/utils/mask.rs b/rust/lance-core/src/utils/mask.rs index de7bc9cdfaa..ac35d5f4b92 100644 --- a/rust/lance-core/src/utils/mask.rs +++ b/rust/lance-core/src/utils/mask.rs @@ -489,8 +489,8 @@ impl RowSetOps for RowAddrTreeMap { } Some(sel @ RowAddrSelection::Runs(_)) => { // Conservative: inflate to Partial so the same code path runs. - let mut bitmap = std::mem::replace(sel, RowAddrSelection::Full) - .into_partial_bitmap(); + let mut bitmap = + std::mem::replace(sel, RowAddrSelection::Full).into_partial_bitmap(); let removed = bitmap.remove(lower); if bitmap.is_empty() { self.inner.remove(&upper); @@ -584,21 +584,19 @@ impl RowAddrTreeMap { let inner_iters: Vec<_> = self .inner .iter() - .filter_map(|(frag_id, row_addr_selection)| { - match row_addr_selection { - RowAddrSelection::Full => None, - RowAddrSelection::Partial(bitmap) => Some(Either::Left( - bitmap - .iter() - .map(|row_offset| RowAddress::new_from_parts(*frag_id, row_offset)), - )), - RowAddrSelection::Runs(runs) => { - let frag_id = *frag_id; - Some(Either::Right(runs.iter().flat_map(move |run| { - (*run.start()..=*run.end()) - .map(move |row_offset| RowAddress::new_from_parts(frag_id, row_offset)) - }))) - } + .filter_map(|(frag_id, row_addr_selection)| match row_addr_selection { + RowAddrSelection::Full => None, + RowAddrSelection::Partial(bitmap) => { + Some(Either::Left(bitmap.iter().map(|row_offset| { + RowAddress::new_from_parts(*frag_id, row_offset) + }))) + } + RowAddrSelection::Runs(runs) => { + let frag_id = *frag_id; + Some(Either::Right(runs.iter().flat_map(move |run| { + (*run.start()..=*run.end()) + .map(move |row_offset| RowAddress::new_from_parts(frag_id, row_offset)) + }))) } }) .collect(); @@ -637,8 +635,8 @@ impl RowAddrTreeMap { // Single-row inserts always inflate Runs back to Partial; the // representation is for range-shaped producers, not scalar // inserts. - let mut bitmap = std::mem::replace(sel, RowAddrSelection::Full) - .into_partial_bitmap(); + let mut bitmap = + std::mem::replace(sel, RowAddrSelection::Full).into_partial_bitmap(); let inserted = bitmap.insert(row_addr); *sel = RowAddrSelection::Partial(bitmap); inserted @@ -691,8 +689,8 @@ impl RowAddrTreeMap { // Inflate to Partial so the count-tracking logic matches // the existing semantics. Callers wanting to stay on Runs // should use insert_run. - let mut bitmap = std::mem::replace(sel, RowAddrSelection::Full) - .into_partial_bitmap(); + let mut bitmap = + std::mem::replace(sel, RowAddrSelection::Full).into_partial_bitmap(); count += bitmap.insert_range(start..=end); *sel = RowAddrSelection::Partial(bitmap); } @@ -895,23 +893,21 @@ impl RowAddrTreeMap { // dyn-dispatch cost on the existing Partial path. itertools::Either is // already a workspace dependency. use itertools::Either; - self.inner - .into_iter() - .flat_map(|(fragment, selection)| { - let frag = fragment as u64; - match selection { - RowAddrSelection::Full => panic!("Size of full fragment is unknown"), - RowAddrSelection::Partial(bitmap) => Either::Left( - bitmap.into_iter().map(move |val| (frag << 32) | val as u64), - ), - RowAddrSelection::Runs(runs) => Either::Right(runs.into_iter().flat_map( - move |run| { - (u64::from(*run.start())..=u64::from(*run.end())) - .map(move |val| (frag << 32) | val) - }, - )), + self.inner.into_iter().flat_map(|(fragment, selection)| { + let frag = fragment as u64; + match selection { + RowAddrSelection::Full => panic!("Size of full fragment is unknown"), + RowAddrSelection::Partial(bitmap) => { + Either::Left(bitmap.into_iter().map(move |val| (frag << 32) | val as u64)) } - }) + RowAddrSelection::Runs(runs) => { + Either::Right(runs.into_iter().flat_map(move |run| { + (u64::from(*run.start())..=u64::from(*run.end())) + .map(move |val| (frag << 32) | val) + })) + } + } + }) } /// Iterate the selected row addresses as `(fragment_id, run)` pairs. @@ -934,31 +930,27 @@ impl RowAddrTreeMap { /// Like `into_addr_iter`, this is unsafe because of the `Full` panic. /// Marking it unsafe forces callers to acknowledge the invariant they /// must uphold (no `Full` entries) at construction time. - pub unsafe fn iter_runs( - &self, - ) -> impl Iterator)> + '_ { + pub unsafe fn iter_runs(&self) -> impl Iterator)> + '_ { use itertools::Either; - self.inner - .iter() - .flat_map(|(fragment, selection)| { - let frag = *fragment; - match selection { - RowAddrSelection::Full => { - panic!("Size of full fragment is unknown") - } - RowAddrSelection::Partial(bitmap) => { - // Use roaring's run iterator so dense regions cost - // O(num_runs) instead of O(num_bits). - let mut iter = bitmap.iter(); - Either::Left(std::iter::from_fn(move || { - iter.next_range().map(|r| (frag, r)) - })) - } - RowAddrSelection::Runs(runs) => { - Either::Right(runs.iter().map(move |r| (frag, r.clone()))) - } + self.inner.iter().flat_map(|(fragment, selection)| { + let frag = *fragment; + match selection { + RowAddrSelection::Full => { + panic!("Size of full fragment is unknown") } - }) + RowAddrSelection::Partial(bitmap) => { + // Use roaring's run iterator so dense regions cost + // O(num_runs) instead of O(num_bits). + let mut iter = bitmap.iter(); + Either::Left(std::iter::from_fn(move || { + iter.next_range().map(|r| (frag, r)) + })) + } + RowAddrSelection::Runs(runs) => { + Either::Right(runs.iter().map(move |r| (frag, r.clone()))) + } + } + }) } } @@ -998,8 +990,8 @@ impl std::ops::BitOrAssign<&Self> for RowAddrTreeMap { } // Inflate any Runs on either side; the existing roaring // OR machinery handles the merged bitmap path. - let mut lhs_bitmap = std::mem::replace(lhs_set, RowAddrSelection::Full) - .into_partial_bitmap(); + let mut lhs_bitmap = + std::mem::replace(lhs_set, RowAddrSelection::Full).into_partial_bitmap(); lhs_bitmap |= &rhs_set.to_partial_bitmap(); *lhs_set = RowAddrSelection::Partial(lhs_bitmap); } else { @@ -1041,9 +1033,10 @@ impl std::ops::BitAndAssign<&Self> for RowAddrTreeMap { // For fragments that are on the RHS, intersect the bitmaps for (fragment, lhs_set) in &mut self.inner { - let rhs_set = match rhs.inner.get(fragment) { - Some(set) => set, - None => continue, // Already handled by retain + // Already handled by retain — any fragment without an RHS entry + // was dropped above. + let Some(rhs_set) = rhs.inner.get(fragment) else { + continue; }; if matches!(rhs_set, RowAddrSelection::Full) { // Everything selected on RHS — LHS unchanged. @@ -1056,8 +1049,8 @@ impl std::ops::BitAndAssign<&Self> for RowAddrTreeMap { continue; } // LHS Partial or Runs: inflate, AND in-place. - let mut lhs_bitmap = std::mem::replace(lhs_set, RowAddrSelection::Full) - .into_partial_bitmap(); + let mut lhs_bitmap = + std::mem::replace(lhs_set, RowAddrSelection::Full).into_partial_bitmap(); lhs_bitmap &= &rhs_set.to_partial_bitmap(); *lhs_set = RowAddrSelection::Partial(lhs_bitmap); } @@ -1107,13 +1100,14 @@ impl std::ops::SubAssign<&Self> for RowAddrTreeMap { continue; } // LHS Partial or Runs, RHS Partial or Runs: inflate both and subtract. - let mut lhs_bitmap = std::mem::replace(lhs_set, RowAddrSelection::Full) - .into_partial_bitmap(); + let mut lhs_bitmap = + std::mem::replace(lhs_set, RowAddrSelection::Full).into_partial_bitmap(); lhs_bitmap -= &rhs_set.to_partial_bitmap(); if lhs_bitmap.is_empty() { self.inner.remove(fragment); } else { - self.inner.insert(*fragment, RowAddrSelection::Partial(lhs_bitmap)); + self.inner + .insert(*fragment, RowAddrSelection::Partial(lhs_bitmap)); } } } @@ -1140,8 +1134,8 @@ impl FromIterator for RowAddrTreeMap { Some(sel @ RowAddrSelection::Runs(_)) => { // Scalar inserts: degrade to Partial to keep the per-bit // semantics this iterator-based constructor implies. - let mut bitmap = std::mem::replace(sel, RowAddrSelection::Full) - .into_partial_bitmap(); + let mut bitmap = + std::mem::replace(sel, RowAddrSelection::Full).into_partial_bitmap(); bitmap.insert(lower); *sel = RowAddrSelection::Partial(bitmap); } @@ -1201,8 +1195,8 @@ impl Extend for RowAddrTreeMap { set.insert(lower); } Some(sel @ RowAddrSelection::Runs(_)) => { - let mut bitmap = std::mem::replace(sel, RowAddrSelection::Full) - .into_partial_bitmap(); + let mut bitmap = + std::mem::replace(sel, RowAddrSelection::Full).into_partial_bitmap(); bitmap.insert(lower); *sel = RowAddrSelection::Partial(bitmap); } @@ -1229,8 +1223,8 @@ impl Extend for RowAddrTreeMap { *lhs_set = RowAddrSelection::Full; continue; } - let mut lhs_bitmap = std::mem::replace(lhs_set, RowAddrSelection::Full) - .into_partial_bitmap(); + let mut lhs_bitmap = + std::mem::replace(lhs_set, RowAddrSelection::Full).into_partial_bitmap(); lhs_bitmap |= set.into_partial_bitmap(); *lhs_set = RowAddrSelection::Partial(lhs_bitmap); } else { @@ -2756,8 +2750,11 @@ mod tests { // The merged set must be in strictly sorted, non-overlapping, // non-adjacent order — verify by walking pairwise. for w in runs.windows(2) { - assert!(*w[0].end() + 1 < *w[1].start(), - "ranges should be non-adjacent: {:?}", w); + assert!( + *w[0].end() + 1 < *w[1].start(), + "ranges should be non-adjacent: {:?}", + w + ); } } @@ -2813,7 +2810,7 @@ mod tests { // Fragment 0: Runs map.insert_run(0, 5..=9); // Fragment 1: Partial - map.insert_range((1u64 << 32) + 0..(1u64 << 32) + 3); + map.insert_range((1u64 << 32)..(1u64 << 32) + 3); let collected: Vec<_> = unsafe { map.iter_runs() }.collect(); assert_eq!(collected, vec![(0, 5..=9), (1, 0..=2)]); }