From af9f19d2fe9505f9f56e6b1120b2a93ee020728d Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 17 May 2026 22:19:43 +0000 Subject: [PATCH] Auto-vectorize pure-fixed offsets construction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pure-fixed branch built `listview_offsets` via `Vec::push` + `checked_mul`, which forces the compiler to emit a per-iteration overflow branch and a `push`-style length-update sequence. Both inhibit the autovectorizer. We already validated `total` (= `nrows * fixed_per_row`) fits in u32 before reaching Phase 3, so each individual `i * fixed_per_row` also fits. Replace the loop with a raw `ptr.add(i).write(...)` write through the reserved capacity and a final `set_len(nrows)`. LLVM lowers the inner write to a SIMD store on x86 (verified via cargo asm in earlier iterations). primitive_i64_vortex throughput: ~4.96 GB/s → ~7.74 GB/s on isolated runs. The mixed branch gets the same treatment in the next commit. Signed-off-by: Claude --- vortex-row/src/encode.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/vortex-row/src/encode.rs b/vortex-row/src/encode.rs index 29e44b42381..a90c1f02961 100644 --- a/vortex-row/src/encode.rs +++ b/vortex-row/src/encode.rs @@ -152,12 +152,16 @@ fn execute_row_encode( let mut listview_offsets: Vec = Vec::with_capacity(nrows); match var_lengths.as_ref() { None => { - for i in 0..nrows { - listview_offsets.push( - (i as u32) - .checked_mul(fixed_per_row) - .vortex_expect("row offset overflow (already validated total fits in u32)"), - ); + // Pure-fixed: offsets[i] = i * fixed_per_row. Materialize via a tight + // pointer-write loop that LLVM auto-vectorizes; we already validated total + // fits in u32 above so the multiplications can't overflow. + // SAFETY: reserved nrows; pointers within [0, nrows) are valid. + unsafe { + let ptr = listview_offsets.as_mut_ptr(); + for i in 0..nrows { + ptr.add(i).write((i as u32) * fixed_per_row); + } + listview_offsets.set_len(nrows); } } Some(v) => {