From 00e6b3f232aec41c21a664f8c9f0ac7944b32063 Mon Sep 17 00:00:00 2001 From: Mikhail Kot Date: Mon, 18 May 2026 15:41:28 +0100 Subject: [PATCH] initial --- .../cpp/include/duckdb_vx/table_function.h | 1 + vortex-duckdb/cpp/table_function.cpp | 15 ++++++++++++ vortex-duckdb/src/convert/dtype.rs | 8 +++++++ vortex-duckdb/src/datasource.rs | 24 +++++++++++++++++++ .../src/duckdb/table_function/mod.rs | 19 +++++++++++++++ 5 files changed, 67 insertions(+) diff --git a/vortex-duckdb/cpp/include/duckdb_vx/table_function.h b/vortex-duckdb/cpp/include/duckdb_vx/table_function.h index 4b60207a036..1524f066ae4 100644 --- a/vortex-duckdb/cpp/include/duckdb_vx/table_function.h +++ b/vortex-duckdb/cpp/include/duckdb_vx/table_function.h @@ -125,6 +125,7 @@ typedef struct { void (*cardinality)(void *bind_data, duckdb_vx_node_statistics *node_stats_out); + void (*pushdown_column_type)(void *bind_data, idx_t id, duckdb_logical_type type); bool (*pushdown_complex_filter)(void *bind_data, duckdb_vx_expr expr, duckdb_vx_error *error_out); void (*to_string)(void *bind_data, duckdb_vx_string_map map); diff --git a/vortex-duckdb/cpp/table_function.cpp b/vortex-duckdb/cpp/table_function.cpp index 92d989d30d4..41e5a77a7f4 100644 --- a/vortex-duckdb/cpp/table_function.cpp +++ b/vortex-duckdb/cpp/table_function.cpp @@ -263,6 +263,20 @@ void function(ClientContext &, TableFunctionInput &input, DataChunk &output) { } } +void type_pushdown(ClientContext &, optional_ptr bind_data, + const unordered_map &new_column_types) { + const auto &bind = bind_data->Cast(); + void *const ffi_bind = bind.ffi_data->DataPtr(); + for (const auto& [id, type] : new_column_types) { + const duckdb_logical_type casted_type = reinterpret_cast( + // This is a flaw of duckdb api which doesn't allow passing const + // LogicalTypes. We guarantee this variable won't be mutated on + // Rust side + const_cast(&type)); + bind.info.vtab.pushdown_column_type(ffi_bind, id, casted_type); + } +} + void c_pushdown_complex_filter(ClientContext &, LogicalGet &, FunctionData *bind_data, @@ -394,6 +408,7 @@ extern "C" duckdb_state duckdb_vx_tfunc_register(duckdb_database ffi_db, const d tf.filter_prune = true; tf.sampling_pushdown = false; + tf.type_pushdown = type_pushdown; tf.pushdown_complex_filter = c_pushdown_complex_filter; tf.cardinality = c_cardinality; tf.get_partition_info = get_partition_info; diff --git a/vortex-duckdb/src/convert/dtype.rs b/vortex-duckdb/src/convert/dtype.rs index 2b7c656be64..0328bb8a821 100644 --- a/vortex-duckdb/src/convert/dtype.rs +++ b/vortex-duckdb/src/convert/dtype.rs @@ -282,6 +282,14 @@ impl TryFrom<&DType> for LogicalType { } } +impl TryFrom<&LogicalTypeRef> for DType { + type Error = VortexError; + + fn try_from(value: &LogicalTypeRef) -> Result { + DType::from_logical_type(value, Nullability::Nullable) + } +} + impl TryFrom for LogicalType { type Error = VortexError; diff --git a/vortex-duckdb/src/datasource.rs b/vortex-duckdb/src/datasource.rs index a7d3eaf6f68..cd36f69035a 100644 --- a/vortex-duckdb/src/datasource.rs +++ b/vortex-duckdb/src/datasource.rs @@ -32,6 +32,7 @@ use vortex::dtype::FieldNames; use vortex::error::VortexExpect; use vortex::error::VortexResult; use vortex::error::vortex_err; +use vortex::error::vortex_panic; use vortex::expr::Expression; use vortex::expr::and_collect; use vortex::expr::col; @@ -73,6 +74,7 @@ use crate::duckdb::DataChunkRef; use crate::duckdb::DuckdbStringMapRef; use crate::duckdb::ExpressionRef; use crate::duckdb::LogicalType; +use crate::duckdb::LogicalTypeRef; use crate::duckdb::PartitionData; use crate::duckdb::TableFilterSetRef; use crate::duckdb::TableFunction; @@ -118,6 +120,7 @@ pub struct DataSourceBindData { data_source: Arc, filter_exprs: Vec, column_fields: Vec, + //column_casts: Vec<(usize, DType)>, } impl Clone for DataSourceBindData { @@ -569,6 +572,27 @@ impl TableFunction for T { map.push("Filters", &filters.join(" /\\\n")); } } + + fn pushdown_column_type( + bind_data: &mut Self::BindData, + column_id: u64, + new_type: &LogicalTypeRef, + ) { + // TODO virtual column count? + let column_id: usize = column_id.as_(); + if column_id >= bind_data.column_fields.len() { + vortex_panic!("column_id {column_id} >= {}", bind_data.column_fields.len()); + } + // TODO casting? + let field = &mut bind_data.column_fields[column_id]; + let old_dtype = field.dtype.clone(); + // TODO we don't need a copy? + field.logical_type = LogicalType::new(new_type.as_type_id()); + field.dtype = new_type + .try_into() + .vortex_expect("logical type -> dtype conversion failed"); + println!("Cast {} -> {}", old_dtype, field.dtype); + } } /// Extracts DuckDB column names and logical types from a Vortex struct DType. diff --git a/vortex-duckdb/src/duckdb/table_function/mod.rs b/vortex-duckdb/src/duckdb/table_function/mod.rs index 986ac64d100..752e0c842a6 100644 --- a/vortex-duckdb/src/duckdb/table_function/mod.rs +++ b/vortex-duckdb/src/duckdb/table_function/mod.rs @@ -21,6 +21,7 @@ use crate::duckdb::DataChunk; use crate::duckdb::DatabaseRef; use crate::duckdb::Expression; use crate::duckdb::LogicalType; +use crate::duckdb::LogicalTypeRef; use crate::duckdb::Value; use crate::duckdb::client_context::ClientContextRef; use crate::duckdb::data_chunk::DataChunkRef; @@ -103,6 +104,12 @@ pub trait TableFunction: Sized + Debug { /// Return table scanning progress from 0. to 100. fn table_scan_progress(global_state: &Self::GlobalState) -> f64; + fn pushdown_column_type( + bind_data: &mut Self::BindData, + column_id: u64, + new_type: &LogicalTypeRef + ); + /// Pushes down a filter expression to the table function. /// /// Returns `true` if the filter was successfully pushed down (and stored on the bind data), @@ -157,6 +164,7 @@ impl DatabaseRef { function: Some(function::), statistics: Some(statistics::), cardinality: Some(cardinality_callback::), + pushdown_column_type: Some(pushdown_column_type::), pushdown_complex_filter: Some(pushdown_complex_filter_callback::), to_string: Some(to_string_callback::), table_scan_progress: Some(table_scan_progress_callback::), @@ -225,6 +233,17 @@ unsafe extern "C-unwind" fn get_partition_data_callback( out.file_index = data.file_index; } +unsafe extern "C-unwind" fn pushdown_column_type( + bind_data: *mut c_void, + column_id: u64, + new_type: cpp::duckdb_logical_type +) { + let bind_data = + unsafe { bind_data.cast::().as_mut() }.vortex_expect("bind_data null pointer"); + let new_type = unsafe { LogicalType::borrow(new_type) }; + T::pushdown_column_type(bind_data, column_id, new_type); +} + unsafe extern "C-unwind" fn pushdown_complex_filter_callback( bind_data: *mut c_void, expr: cpp::duckdb_vx_expr,