From 6c8496f87b228a132e8319c28b9ec835f1763c16 Mon Sep 17 00:00:00 2001 From: Abanoub Doss Date: Thu, 21 May 2026 08:36:15 -0500 Subject: [PATCH] =?UTF-8?q?test(vortex-fastlanes):=20repro=20for=20ABA-14?= =?UTF-8?q?=20=E2=80=94=20ZeroBytes=20scheme=20reports=20nbytes()=3D=3D0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an ignored regression test that reproduces the bug described in ABA-14: a BitPackedArray produced from an all-zero input selects bit_width=0 (cheapest for all-zero data), which causes `bitpack_primitive` to short-circuit to `Buffer::::empty()`. The VTable exposes that buffer as the sole physical buffer, so `Array::nbytes()` reports 0 for an array of 1024 logical elements. Downstream: `vortex-compressor/src/estimate.rs` maps `after_nbytes==0` from sample compression to `EstimateScore::ZeroBytes`, which `EstimateScore::is_valid` treats as ineligible for scheme selection — the scheme that achieves perfect compression of all-zero data is silently excluded from the compressor's selection. No fix is included pending a semantics decision: should `nbytes()` for a zero-bit-width scheme report virtual uncompressed size or actual byte size? See https://linear.app/abanoubdoss/issue/ABA-14 Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Abanoub Doss --- .../src/bitpacking/array/bitpack_compress.rs | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs b/encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs index 2e517819059..6dd1452b904 100644 --- a/encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs +++ b/encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs @@ -650,4 +650,55 @@ mod test { assert_arrays_eq!(chunk_offsets, PrimitiveArray::from_iter([0u64])); Ok(()) } + + /// Repro for ABA-14: a `BitPackedArray` produced from an all-zero input + /// calls `bitpack_primitive` with `bit_width == 0`, which short-circuits to + /// `Buffer::::empty()` (bitpack_compress.rs:147-149). The VTable exposes + /// that buffer as the sole physical buffer, so `Array::nbytes()` — which + /// sums buffer lengths — reports 0 despite the array representing 1024 + /// logical elements. + /// + /// Downstream: `vortex-compressor/src/estimate.rs` maps `after_nbytes == 0` + /// from sample compression to `EstimateScore::ZeroBytes`, which + /// `EstimateScore::is_valid` treats as ineligible for scheme selection + /// (line 142-149). The scheme that achieves *perfect* compression of all-zero + /// data is therefore silently excluded from the compressor's decision. + /// + /// Open question for upstream: should `nbytes()` for a zero-bit-width scheme + /// report virtual uncompressed size or actual byte size? See + /// https://linear.app/abanoubdoss/issue/ABA-14 + #[test] + #[ignore = "demonstrates ABA-14; see https://linear.app/abanoubdoss/issue/ABA-14"] + fn issue_aba14_bitpack_zerobytes_reports_nonzero_nbytes() -> VortexResult<()> { + let mut ctx = SESSION.create_execution_ctx(); + + // 1024 zeros: exactly one FastLanes chunk, unambiguously non-empty input. + // `bitpack_to_best_bit_width` will select bit_width=0 (zero packed bits + // is cheapest for all-zero data), triggering the bug. + let arr = PrimitiveArray::new( + Buffer::::copy_from(&vec![0u32; 1024]), + Validity::NonNullable, + ); + assert_eq!(arr.len(), 1024, "harness: input length must be 1024"); + + let bp = bitpack_to_best_bit_width(&arr, &mut ctx)?; + let bit_width = bp.bit_width(); + let encoded = bp.into_array(); + let nbytes = encoded.nbytes(); + + // A BitPackedArray representing 1024 logical values cannot honestly + // report zero bytes — even at bit_width==0 there is metadata overhead. + assert!( + nbytes > 0, + "ABA-14 reproduced: BitPacked(1024 zeros) reports nbytes={nbytes} \ + with bit_width={bit_width}. Bug site: \ + encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs:147-149 \ + (Buffer::empty() short-circuit for bit_width==0). Downstream: \ + vortex-compressor/src/estimate.rs maps this to EstimateScore::ZeroBytes \ + which is ineligible for scheme selection — perfect compression is \ + silently rejected." + ); + + Ok(()) + } }