Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions vortex-row/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,6 @@ version = { workspace = true }
workspace = true

[dependencies]
smallvec = { workspace = true }
vortex-error = { workspace = true }
vortex-session = { workspace = true }
156 changes: 156 additions & 0 deletions vortex-row/public-api.lock
Original file line number Diff line number Diff line change
@@ -1,3 +1,159 @@
pub mod vortex_row

pub mod vortex_row::options

pub struct vortex_row::options::RowEncodeOptions

pub vortex_row::options::RowEncodeOptions::fields: smallvec::SmallVec<[vortex_row::options::SortField; 4]>

impl vortex_row::options::RowEncodeOptions

pub fn vortex_row::options::RowEncodeOptions::new(impl core::iter::traits::collect::IntoIterator<Item = vortex_row::options::SortField>) -> Self

impl core::clone::Clone for vortex_row::options::RowEncodeOptions

pub fn vortex_row::options::RowEncodeOptions::clone(&self) -> vortex_row::options::RowEncodeOptions

impl core::cmp::Eq for vortex_row::options::RowEncodeOptions

impl core::cmp::PartialEq for vortex_row::options::RowEncodeOptions

pub fn vortex_row::options::RowEncodeOptions::eq(&self, &vortex_row::options::RowEncodeOptions) -> bool

impl core::fmt::Debug for vortex_row::options::RowEncodeOptions

pub fn vortex_row::options::RowEncodeOptions::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result

impl core::fmt::Display for vortex_row::options::RowEncodeOptions

pub fn vortex_row::options::RowEncodeOptions::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result

impl core::hash::Hash for vortex_row::options::RowEncodeOptions

pub fn vortex_row::options::RowEncodeOptions::hash<__H: core::hash::Hasher>(&self, &mut __H)

impl core::marker::StructuralPartialEq for vortex_row::options::RowEncodeOptions

pub struct vortex_row::options::SortField

pub vortex_row::options::SortField::descending: bool

pub vortex_row::options::SortField::nulls_first: bool

impl vortex_row::options::SortField

pub fn vortex_row::options::SortField::new(bool, bool) -> Self

pub fn vortex_row::options::SortField::non_null_sentinel(&self) -> u8

pub fn vortex_row::options::SortField::null_sentinel(&self) -> u8

impl core::clone::Clone for vortex_row::options::SortField

pub fn vortex_row::options::SortField::clone(&self) -> vortex_row::options::SortField

impl core::cmp::Eq for vortex_row::options::SortField

impl core::cmp::PartialEq for vortex_row::options::SortField

pub fn vortex_row::options::SortField::eq(&self, &vortex_row::options::SortField) -> bool

impl core::default::Default for vortex_row::options::SortField

pub fn vortex_row::options::SortField::default() -> Self

impl core::fmt::Debug for vortex_row::options::SortField

pub fn vortex_row::options::SortField::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result

impl core::fmt::Display for vortex_row::options::SortField

pub fn vortex_row::options::SortField::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result

impl core::hash::Hash for vortex_row::options::SortField

pub fn vortex_row::options::SortField::hash<__H: core::hash::Hasher>(&self, &mut __H)

impl core::marker::Copy for vortex_row::options::SortField

impl core::marker::StructuralPartialEq for vortex_row::options::SortField

pub const vortex_row::options::FIELDS_INLINE: usize

pub struct vortex_row::RowEncodeOptions

pub vortex_row::RowEncodeOptions::fields: smallvec::SmallVec<[vortex_row::options::SortField; 4]>

impl vortex_row::options::RowEncodeOptions

pub fn vortex_row::options::RowEncodeOptions::new(impl core::iter::traits::collect::IntoIterator<Item = vortex_row::options::SortField>) -> Self

impl core::clone::Clone for vortex_row::options::RowEncodeOptions

pub fn vortex_row::options::RowEncodeOptions::clone(&self) -> vortex_row::options::RowEncodeOptions

impl core::cmp::Eq for vortex_row::options::RowEncodeOptions

impl core::cmp::PartialEq for vortex_row::options::RowEncodeOptions

pub fn vortex_row::options::RowEncodeOptions::eq(&self, &vortex_row::options::RowEncodeOptions) -> bool

impl core::fmt::Debug for vortex_row::options::RowEncodeOptions

pub fn vortex_row::options::RowEncodeOptions::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result

impl core::fmt::Display for vortex_row::options::RowEncodeOptions

pub fn vortex_row::options::RowEncodeOptions::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result

impl core::hash::Hash for vortex_row::options::RowEncodeOptions

pub fn vortex_row::options::RowEncodeOptions::hash<__H: core::hash::Hasher>(&self, &mut __H)

impl core::marker::StructuralPartialEq for vortex_row::options::RowEncodeOptions

pub struct vortex_row::SortField

pub vortex_row::SortField::descending: bool

pub vortex_row::SortField::nulls_first: bool

impl vortex_row::options::SortField

pub fn vortex_row::options::SortField::new(bool, bool) -> Self

pub fn vortex_row::options::SortField::non_null_sentinel(&self) -> u8

pub fn vortex_row::options::SortField::null_sentinel(&self) -> u8

impl core::clone::Clone for vortex_row::options::SortField

pub fn vortex_row::options::SortField::clone(&self) -> vortex_row::options::SortField

impl core::cmp::Eq for vortex_row::options::SortField

impl core::cmp::PartialEq for vortex_row::options::SortField

pub fn vortex_row::options::SortField::eq(&self, &vortex_row::options::SortField) -> bool

impl core::default::Default for vortex_row::options::SortField

pub fn vortex_row::options::SortField::default() -> Self

impl core::fmt::Debug for vortex_row::options::SortField

pub fn vortex_row::options::SortField::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result

impl core::fmt::Display for vortex_row::options::SortField

pub fn vortex_row::options::SortField::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result

impl core::hash::Hash for vortex_row::options::SortField

pub fn vortex_row::options::SortField::hash<__H: core::hash::Hasher>(&self, &mut __H)

impl core::marker::Copy for vortex_row::options::SortField

impl core::marker::StructuralPartialEq for vortex_row::options::SortField

pub fn vortex_row::initialize(&vortex_session::VortexSession)
4 changes: 4 additions & 0 deletions vortex-row/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
//! Subsequent commits add the encoder, decoder helpers, and per-encoding fast paths.
//! This commit only establishes the crate skeleton and an `initialize` stub.

pub mod options;

pub use options::RowEncodeOptions;
pub use options::SortField;
use vortex_session::VortexSession;

/// Register the row-encoding scalar functions on the given session.
Expand Down
157 changes: 157 additions & 0 deletions vortex-row/src/options.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::fmt::Display;
use std::fmt::Formatter;

use smallvec::SmallVec;

/// Per-column options for the row-oriented byte encoder.
///
/// These options control how a single column is encoded into row bytes:
/// - `descending`: if true, the encoded value bytes are bit-inverted so that
/// lexicographic byte comparison reflects the reverse of the natural ordering.
/// The null sentinel byte is NOT inverted, so nulls keep their requested
/// position relative to non-nulls.
/// - `nulls_first`: if true, nulls sort before non-nulls. If false, nulls sort
/// after non-nulls. Implemented via the sentinel byte that precedes every
/// value's encoded bytes.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct SortField {
/// If true, encoded value bytes are bit-inverted so lexicographic byte
/// comparison reflects the reverse of the natural ordering.
pub descending: bool,
/// If true, nulls sort before non-null values; otherwise nulls sort after.
pub nulls_first: bool,
}

impl Default for SortField {
fn default() -> Self {
Self {
descending: false,
nulls_first: true,
}
}
}

impl Display for SortField {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"descending={}, nulls_first={}",
self.descending, self.nulls_first
)
}
}

impl SortField {
/// Construct a new `SortField` with explicit options.
pub fn new(descending: bool, nulls_first: bool) -> Self {
Self {
descending,
nulls_first,
}
}

/// Returns the sentinel byte to write for a non-null value.
#[inline]
pub fn non_null_sentinel(&self) -> u8 {
// Non-null is always 0x01. Null choices are < or > 0x01.
0x01
}

/// Returns the sentinel byte to write for a null value.
#[inline]
pub fn null_sentinel(&self) -> u8 {
if self.nulls_first {
// Nulls before non-nulls (smaller byte sorts first).
0x00
} else {
// Nulls after non-nulls (larger byte sorts later).
0x02
}
}
}

/// Inline capacity for [`RowEncodeOptions::fields`]. Up to this many [`SortField`]s
/// are held inline without a heap allocation; beyond, the storage spills.
pub const FIELDS_INLINE: usize = 4;

/// Options for the variadic [`RowSize`] and [`RowEncode`] scalar functions:
/// one [`SortField`] per input column.
///
/// Stored in a [`SmallVec`] so that typical 1–4 column keys avoid a heap
/// allocation; longer field lists spill to the heap transparently.
///
/// [`RowSize`]: super::size::RowSize
/// [`RowEncode`]: super::encode::RowEncode
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct RowEncodeOptions {
/// Per-column sort fields, in left-to-right column order.
pub fields: SmallVec<[SortField; FIELDS_INLINE]>,
}

impl RowEncodeOptions {
/// Construct a new `RowEncodeOptions` from any iterator of [`SortField`]s.
pub fn new(fields: impl IntoIterator<Item = SortField>) -> Self {
Self {
fields: fields.into_iter().collect(),
}
}
}

impl Display for RowEncodeOptions {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "[")?;
for (i, field) in self.fields.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{}", field)?;
}
write!(f, "]")
}
}

/// Serialize a [`RowEncodeOptions`] to a compact byte vector: 4-byte LE length followed by
/// `2 * len` bytes (descending + nulls_first booleans for each field).
pub(crate) fn serialize_row_encode_options(opts: &RowEncodeOptions) -> Vec<u8> {
use vortex_error::VortexExpect;
let n =
u32::try_from(opts.fields.len()).vortex_expect("RowEncodeOptions length must fit in u32");
let mut out = Vec::with_capacity(4 + 2 * opts.fields.len());
out.extend_from_slice(&n.to_le_bytes());
for f in &opts.fields {
out.push(u8::from(f.descending));
out.push(u8::from(f.nulls_first));
}
out
}

/// Deserialize a [`RowEncodeOptions`] produced by [`serialize_row_encode_options`].
pub(crate) fn deserialize_row_encode_options(
bytes: &[u8],
) -> vortex_error::VortexResult<RowEncodeOptions> {
if bytes.len() < 4 {
vortex_error::vortex_bail!("RowEncodeOptions metadata must contain a 4-byte length prefix");
}
let n = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as usize;
let expected = 4 + 2 * n;
if bytes.len() != expected {
vortex_error::vortex_bail!(
"RowEncodeOptions metadata wrong size: got {}, expected {}",
bytes.len(),
expected
);
}
let mut fields: SmallVec<[SortField; FIELDS_INLINE]> = SmallVec::with_capacity(n);
let mut i = 4;
for _ in 0..n {
fields.push(SortField {
descending: bytes[i] != 0,
nulls_first: bytes[i + 1] != 0,
});
i += 2;
}
Ok(RowEncodeOptions { fields })
}
Loading