diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml index 3652506c32672..b1cabc2d0083d 100644 --- a/.github/workflows/audit.yml +++ b/.github/workflows/audit.yml @@ -48,4 +48,4 @@ jobs: - name: Run audit check # Note: you can ignore specific RUSTSEC issues using the `--ignore` flag ,for example: # run: cargo audit --ignore RUSTSEC-2026-0001 - run: cargo audit --ignore RUSTSEC-2024-0436 --ignore RUSTSEC-2024-0014 + run: cargo audit --ignore RUSTSEC-2024-0014 diff --git a/Cargo.lock b/Cargo.lock index 6914453b3da2c..7928950d306b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1768,7 +1768,6 @@ dependencies = [ "object_store", "parking_lot", "parquet", - "paste", "pretty_assertions", "rand 0.9.2", "rand_distr", @@ -1908,7 +1907,6 @@ dependencies = [ "log", "object_store", "parquet", - "paste", "rand 0.9.2", "recursive", "sqlparser", @@ -2164,7 +2162,6 @@ dependencies = [ "indexmap 2.13.0", "insta", "itertools 0.14.0", - "paste", "recursive", "serde_json", "sqlparser", @@ -2179,7 +2176,6 @@ dependencies = [ "indexmap 2.13.0", "insta", "itertools 0.14.0", - "paste", ] [[package]] @@ -2268,7 +2264,6 @@ dependencies = [ "half", "log", "num-traits", - "paste", "rand 0.9.2", ] @@ -2306,7 +2301,6 @@ dependencies = [ "itertools 0.14.0", "itoa", "log", - "paste", "rand 0.9.2", ] @@ -2321,7 +2315,6 @@ dependencies = [ "datafusion-expr", "datafusion-physical-plan", "parking_lot", - "paste", ] [[package]] @@ -2338,7 +2331,6 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "log", - "paste", ] [[package]] @@ -2404,7 +2396,6 @@ dependencies = [ "insta", "itertools 0.14.0", "parking_lot", - "paste", "petgraph", "rand 0.9.2", "recursive", @@ -2623,7 +2614,6 @@ dependencies = [ "insta", "itertools 0.14.0", "log", - "paste", "recursive", "regex", "rstest", diff --git a/Cargo.toml b/Cargo.toml index a185cd874a013..73e8a61d33d95 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -173,7 +173,6 @@ parquet = { version = "58.0.0", default-features = false, features = [ "async", "object_store", ] } -paste = "1.0.15" pbjson = { version = "0.9.0" } pbjson-types = "0.9" # Should match arrow-flight's version of prost. diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index e4ba71e45c661..8758c9555c5c5 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -81,7 +81,6 @@ libc = "0.2.180" log = { workspace = true } object_store = { workspace = true, optional = true } parquet = { workspace = true, optional = true, default-features = true } -paste = { workspace = true } recursive = { workspace = true, optional = true } sqlparser = { workspace = true, optional = true } tokio = { workspace = true } diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs index 4f681896dfc66..b7a30f868a02b 100644 --- a/datafusion/common/src/error.rs +++ b/datafusion/common/src/error.rs @@ -903,76 +903,125 @@ macro_rules! assert_ne_or_internal_err { /// plan_err!("Error {val:?}") /// /// `NAME_ERR` - macro name for wrapping Err(DataFusionError::*) +/// `PREFIXED_NAME_ERR` - underscore-prefixed alias for NAME_ERR (e.g., _plan_err) +/// (Needed to avoid compiler error when using macro in the same crate: `macros from the current crate cannot be referred to by absolute paths`) /// `NAME_DF_ERR` - macro name for wrapping DataFusionError::*. Needed to keep backtrace opportunity /// in construction where DataFusionError::* used directly, like `map_err`, `ok_or_else`, etc +/// `PREFIXED_NAME_DF_ERR` - underscore-prefixed alias for NAME_DF_ERR (e.g., _plan_datafusion_err). +/// (Needed to avoid compiler error when using macro in the same crate: `macros from the current crate cannot be referred to by absolute paths`) macro_rules! make_error { - ($NAME_ERR:ident, $NAME_DF_ERR: ident, $ERR:ident) => { make_error!(@inner ($), $NAME_ERR, $NAME_DF_ERR, $ERR); }; - (@inner ($d:tt), $NAME_ERR:ident, $NAME_DF_ERR:ident, $ERR:ident) => { - ::paste::paste!{ - /// Macro wraps `$ERR` to add backtrace feature - #[macro_export] - macro_rules! $NAME_DF_ERR { - ($d($d args:expr),* $d(; diagnostic=$d DIAG:expr)?) => {{ - let err =$crate::DataFusionError::$ERR( - ::std::format!( - "{}{}", - ::std::format!($d($d args),*), - $crate::DataFusionError::get_back_trace(), - ).into() - ); - $d ( - let err = err.with_diagnostic($d DIAG); - )? - err - } - } + ($NAME_ERR:ident, $PREFIXED_NAME_ERR:ident, $NAME_DF_ERR:ident, $PREFIXED_NAME_DF_ERR:ident, $ERR:ident) => { + make_error!(@inner ($), $NAME_ERR, $PREFIXED_NAME_ERR, $NAME_DF_ERR, $PREFIXED_NAME_DF_ERR, $ERR); + }; + (@inner ($d:tt), $NAME_ERR:ident, $PREFIXED_NAME_ERR:ident, $NAME_DF_ERR:ident, $PREFIXED_NAME_DF_ERR:ident, $ERR:ident) => { + /// Macro wraps `$ERR` to add backtrace feature + #[macro_export] + macro_rules! $NAME_DF_ERR { + ($d($d args:expr),* $d(; diagnostic = $d DIAG:expr)?) => {{ + let err = $crate::DataFusionError::$ERR( + ::std::format!( + "{}{}", + ::std::format!($d($d args),*), + $crate::DataFusionError::get_back_trace(), + ).into() + ); + $d ( + let err = err.with_diagnostic($d DIAG); + )? + err + }} } - /// Macro wraps Err(`$ERR`) to add backtrace feature - #[macro_export] - macro_rules! $NAME_ERR { - ($d($d args:expr),* $d(; diagnostic = $d DIAG:expr)?) => {{ - let err = $crate::[<_ $NAME_DF_ERR>]!($d($d args),*); - $d ( - let err = err.with_diagnostic($d DIAG); - )? - Err(err) - - }} - } - - - #[doc(hidden)] - pub use $NAME_ERR as [<_ $NAME_ERR>]; - #[doc(hidden)] - pub use $NAME_DF_ERR as [<_ $NAME_DF_ERR>]; + /// Macro wraps Err(`$ERR`) to add backtrace feature + #[macro_export] + macro_rules! $NAME_ERR { + ($d($d args:expr),* $d(; diagnostic = $d DIAG:expr)?) => {{ + let err = $crate::$PREFIXED_NAME_DF_ERR!($d($d args),*); + $d ( + let err = err.with_diagnostic($d DIAG); + )? + Err(err) + }} } + + #[doc(hidden)] + pub use $NAME_ERR as $PREFIXED_NAME_ERR; + #[doc(hidden)] + pub use $NAME_DF_ERR as $PREFIXED_NAME_DF_ERR; }; } // Exposes a macro to create `DataFusionError::Plan` with optional backtrace -make_error!(plan_err, plan_datafusion_err, Plan); +make_error!( + plan_err, + _plan_err, + plan_datafusion_err, + _plan_datafusion_err, + Plan +); // Exposes a macro to create `DataFusionError::Internal` with optional backtrace -make_error!(internal_err, internal_datafusion_err, Internal); +make_error!( + internal_err, + _internal_err, + internal_datafusion_err, + _internal_datafusion_err, + Internal +); // Exposes a macro to create `DataFusionError::NotImplemented` with optional backtrace -make_error!(not_impl_err, not_impl_datafusion_err, NotImplemented); +make_error!( + not_impl_err, + _not_impl_err, + not_impl_datafusion_err, + _not_impl_datafusion_err, + NotImplemented +); // Exposes a macro to create `DataFusionError::Execution` with optional backtrace -make_error!(exec_err, exec_datafusion_err, Execution); +make_error!( + exec_err, + _exec_err, + exec_datafusion_err, + _exec_datafusion_err, + Execution +); // Exposes a macro to create `DataFusionError::Configuration` with optional backtrace -make_error!(config_err, config_datafusion_err, Configuration); +make_error!( + config_err, + _config_err, + config_datafusion_err, + _config_datafusion_err, + Configuration +); // Exposes a macro to create `DataFusionError::Substrait` with optional backtrace -make_error!(substrait_err, substrait_datafusion_err, Substrait); +make_error!( + substrait_err, + _substrait_err, + substrait_datafusion_err, + _substrait_datafusion_err, + Substrait +); // Exposes a macro to create `DataFusionError::ResourcesExhausted` with optional backtrace -make_error!(resources_err, resources_datafusion_err, ResourcesExhausted); +make_error!( + resources_err, + _resources_err, + resources_datafusion_err, + _resources_datafusion_err, + ResourcesExhausted +); // Exposes a macro to create `DataFusionError::Ffi` with optional backtrace -make_error!(ffi_err, ffi_datafusion_err, Ffi); +make_error!( + ffi_err, + _ffi_err, + ffi_datafusion_err, + _ffi_datafusion_err, + Ffi +); // Exposes a macro to create `DataFusionError::SQL` with optional backtrace #[macro_export] diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index be507e0691a19..326b791a2f624 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -172,7 +172,6 @@ bytes = { workspace = true } env_logger = { workspace = true } glob = { workspace = true } insta = { workspace = true } -paste = { workspace = true } pretty_assertions = "1.0" rand = { workspace = true, features = ["small_rng"] } rand_distr = "0.5" diff --git a/datafusion/core/tests/parquet/page_pruning.rs b/datafusion/core/tests/parquet/page_pruning.rs index 6d49e0bcc676e..a41803191ad05 100644 --- a/datafusion/core/tests/parquet/page_pruning.rs +++ b/datafusion/core/tests/parquet/page_pruning.rs @@ -366,281 +366,367 @@ async fn prune_date64() { } macro_rules! int_tests { - ($bits:expr) => { - paste::item! { - #[tokio::test] - // null count min max - // page-0 0 -5 -1 - // page-1 0 -4 0 - // page-2 0 0 4 - // page-3 0 5 9 - async fn []() { - test_prune( - Scenario::Int, - &format!("SELECT * FROM t where i{} < 1", $bits), - Some(0), - Some(5), - 11, - 5, - ) - .await; - // result of sql "SELECT * FROM t where i < 1" is same as - // "SELECT * FROM t where -i > -1" - test_prune( - Scenario::Int, - &format!("SELECT * FROM t where -i{} > -1", $bits), - Some(0), - Some(5), - 11, - 5, - ) - .await; - } - - #[tokio::test] - async fn []() { - test_prune( - Scenario::Int, - &format!("SELECT * FROM t where i{} > 8", $bits), - Some(0), - Some(15), - 1, - 5, - ) - .await; - - test_prune( - Scenario::Int, - &format!("SELECT * FROM t where -i{} < -8", $bits), - Some(0), - Some(15), - 1, - 5, - ) - .await; - } - - #[tokio::test] - async fn []() { - test_prune( - Scenario::Int, - &format!("SELECT * FROM t where i{} = 1", $bits), - Some(0), - Some(15), - 1, - 5 - ) - .await; - } - #[tokio::test] - async fn []() { - test_prune( - Scenario::Int, - &format!("SELECT * FROM t where abs(i{}) = 1 and i{} = 1", $bits, $bits), - Some(0), - Some(15), - 1, - 5 - ) - .await; - } - - #[tokio::test] - async fn []() { - test_prune( - Scenario::Int, - &format!("SELECT * FROM t where abs(i{}) = 1", $bits), - Some(0), - Some(0), - 3, - 5 - ) - .await; - } - - #[tokio::test] - async fn []() { - test_prune( - Scenario::Int, - &format!("SELECT * FROM t where i{}+1 = 1", $bits), - Some(0), - Some(0), - 2, - 5 - ) - .await; - } - - #[tokio::test] - async fn []() { - test_prune( - Scenario::Int, - &format!("SELECT * FROM t where 1-i{} > 1", $bits), - Some(0), - Some(0), - 9, - 5 - ) - .await; - } - - #[tokio::test] - async fn []() { - // result of sql "SELECT * FROM t where in (1)" - test_prune( - Scenario::Int, - &format!("SELECT * FROM t where i{} in (1)", $bits), - Some(0), - Some(15), - 1, - 5 - ) - .await; - } - - #[tokio::test] - async fn []() { - // result of sql "SELECT * FROM t where not in (1)" prune nothing - test_prune( - Scenario::Int, - &format!("SELECT * FROM t where i{} not in (1)", $bits), - Some(0), - Some(0), - 19, - 5 - ) - .await; - } + ($bits:expr, $fn_lt:ident, $fn_gt:ident, $fn_eq:ident, $fn_scalar_fun_and_eq:ident, $fn_scalar_fun:ident, $fn_complex_expr:ident, $fn_complex_expr_subtract:ident, $fn_eq_in_list:ident, $fn_eq_in_list_negated:ident) => { + #[tokio::test] + // null count min max + // page-0 0 -5 -1 + // page-1 0 -4 0 + // page-2 0 0 4 + // page-3 0 5 9 + async fn $fn_lt() { + test_prune( + Scenario::Int, + &format!("SELECT * FROM t where i{} < 1", $bits), + Some(0), + Some(5), + 11, + 5, + ) + .await; + // result of sql "SELECT * FROM t where i < 1" is same as + // "SELECT * FROM t where -i > -1" + test_prune( + Scenario::Int, + &format!("SELECT * FROM t where -i{} > -1", $bits), + Some(0), + Some(5), + 11, + 5, + ) + .await; } - } + + #[tokio::test] + async fn $fn_gt() { + test_prune( + Scenario::Int, + &format!("SELECT * FROM t where i{} > 8", $bits), + Some(0), + Some(15), + 1, + 5, + ) + .await; + + test_prune( + Scenario::Int, + &format!("SELECT * FROM t where -i{} < -8", $bits), + Some(0), + Some(15), + 1, + 5, + ) + .await; + } + + #[tokio::test] + async fn $fn_eq() { + test_prune( + Scenario::Int, + &format!("SELECT * FROM t where i{} = 1", $bits), + Some(0), + Some(15), + 1, + 5, + ) + .await; + } + #[tokio::test] + async fn $fn_scalar_fun_and_eq() { + test_prune( + Scenario::Int, + &format!( + "SELECT * FROM t where abs(i{}) = 1 and i{} = 1", + $bits, $bits + ), + Some(0), + Some(15), + 1, + 5, + ) + .await; + } + + #[tokio::test] + async fn $fn_scalar_fun() { + test_prune( + Scenario::Int, + &format!("SELECT * FROM t where abs(i{}) = 1", $bits), + Some(0), + Some(0), + 3, + 5, + ) + .await; + } + + #[tokio::test] + async fn $fn_complex_expr() { + test_prune( + Scenario::Int, + &format!("SELECT * FROM t where i{}+1 = 1", $bits), + Some(0), + Some(0), + 2, + 5, + ) + .await; + } + + #[tokio::test] + async fn $fn_complex_expr_subtract() { + test_prune( + Scenario::Int, + &format!("SELECT * FROM t where 1-i{} > 1", $bits), + Some(0), + Some(0), + 9, + 5, + ) + .await; + } + + #[tokio::test] + async fn $fn_eq_in_list() { + // result of sql "SELECT * FROM t where in (1)" + test_prune( + Scenario::Int, + &format!("SELECT * FROM t where i{} in (1)", $bits), + Some(0), + Some(15), + 1, + 5, + ) + .await; + } + + #[tokio::test] + async fn $fn_eq_in_list_negated() { + // result of sql "SELECT * FROM t where not in (1)" prune nothing + test_prune( + Scenario::Int, + &format!("SELECT * FROM t where i{} not in (1)", $bits), + Some(0), + Some(0), + 19, + 5, + ) + .await; + } + }; } -int_tests!(8); -int_tests!(16); -int_tests!(32); -int_tests!(64); +int_tests!( + 8, + prune_int8_lt, + prune_int8_gt, + prune_int8_eq, + prune_int8_scalar_fun_and_eq, + prune_int8_scalar_fun, + prune_int8_complex_expr, + prune_int8_complex_expr_subtract, + prune_int8_eq_in_list, + prune_int8_eq_in_list_negated +); +int_tests!( + 16, + prune_int16_lt, + prune_int16_gt, + prune_int16_eq, + prune_int16_scalar_fun_and_eq, + prune_int16_scalar_fun, + prune_int16_complex_expr, + prune_int16_complex_expr_subtract, + prune_int16_eq_in_list, + prune_int16_eq_in_list_negated +); +int_tests!( + 32, + prune_int32_lt, + prune_int32_gt, + prune_int32_eq, + prune_int32_scalar_fun_and_eq, + prune_int32_scalar_fun, + prune_int32_complex_expr, + prune_int32_complex_expr_subtract, + prune_int32_eq_in_list, + prune_int32_eq_in_list_negated +); +int_tests!( + 64, + prune_int64_lt, + prune_int64_gt, + prune_int64_eq, + prune_int64_scalar_fun_and_eq, + prune_int64_scalar_fun, + prune_int64_complex_expr, + prune_int64_complex_expr_subtract, + prune_int64_eq_in_list, + prune_int64_eq_in_list_negated +); macro_rules! uint_tests { - ($bits:expr) => { - paste::item! { - #[tokio::test] - // null count min max - // page-0 0 0 4 - // page-1 0 1 5 - // page-2 0 5 9 - // page-3 0 250 254 - async fn []() { - test_prune( - Scenario::UInt, - &format!("SELECT * FROM t where u{} < 6", $bits), - Some(0), - Some(5), - 11, - 5 - ) - .await; - } - - #[tokio::test] - async fn []() { - test_prune( - Scenario::UInt, - &format!("SELECT * FROM t where u{} > 253", $bits), - Some(0), - Some(15), - 1, - 5 - ) - .await; - } - - #[tokio::test] - async fn []() { - test_prune( - Scenario::UInt, - &format!("SELECT * FROM t where u{} = 6", $bits), - Some(0), - Some(15), - 1, - 5 - ) - .await; - } - - #[tokio::test] - async fn []() { - test_prune( - Scenario::UInt, - &format!("SELECT * FROM t where power(u{}, 2) = 36 and u{} = 6", $bits, $bits), - Some(0), - Some(15), - 1, - 5 - ) - .await; - } - - #[tokio::test] - async fn []() { - test_prune( - Scenario::UInt, - &format!("SELECT * FROM t where power(u{}, 2) = 25", $bits), - Some(0), - Some(0), - 2, - 5 - ) - .await; - } - - #[tokio::test] - async fn []() { - test_prune( - Scenario::UInt, - &format!("SELECT * FROM t where u{}+1 = 6", $bits), - Some(0), - Some(0), - 2, - 5 - ) - .await; - } - - #[tokio::test] - async fn []() { - // result of sql "SELECT * FROM t where in (1)" - test_prune( - Scenario::UInt, - &format!("SELECT * FROM t where u{} in (6)", $bits), - Some(0), - Some(15), - 1, - 5 - ) - .await; - } - - #[tokio::test] - async fn []() { - // result of sql "SELECT * FROM t where not in (6)" prune nothing - test_prune( - Scenario::UInt, - &format!("SELECT * FROM t where u{} not in (6)", $bits), - Some(0), - Some(0), - 19, - 5 - ) - .await; - } + ($bits:expr, $fn_lt:ident, $fn_gt:ident, $fn_eq:ident, $fn_scalar_fun_and_eq:ident, $fn_scalar_fun:ident, $fn_complex_expr:ident, $fn_eq_in_list:ident, $fn_eq_in_list_negated:ident) => { + #[tokio::test] + // null count min max + // page-0 0 0 4 + // page-1 0 1 5 + // page-2 0 5 9 + // page-3 0 250 254 + async fn $fn_lt() { + test_prune( + Scenario::UInt, + &format!("SELECT * FROM t where u{} < 6", $bits), + Some(0), + Some(5), + 11, + 5, + ) + .await; } - } + + #[tokio::test] + async fn $fn_gt() { + test_prune( + Scenario::UInt, + &format!("SELECT * FROM t where u{} > 253", $bits), + Some(0), + Some(15), + 1, + 5, + ) + .await; + } + + #[tokio::test] + async fn $fn_eq() { + test_prune( + Scenario::UInt, + &format!("SELECT * FROM t where u{} = 6", $bits), + Some(0), + Some(15), + 1, + 5, + ) + .await; + } + + #[tokio::test] + async fn $fn_scalar_fun_and_eq() { + test_prune( + Scenario::UInt, + &format!( + "SELECT * FROM t where power(u{}, 2) = 36 and u{} = 6", + $bits, $bits + ), + Some(0), + Some(15), + 1, + 5, + ) + .await; + } + + #[tokio::test] + async fn $fn_scalar_fun() { + test_prune( + Scenario::UInt, + &format!("SELECT * FROM t where power(u{}, 2) = 25", $bits), + Some(0), + Some(0), + 2, + 5, + ) + .await; + } + + #[tokio::test] + async fn $fn_complex_expr() { + test_prune( + Scenario::UInt, + &format!("SELECT * FROM t where u{}+1 = 6", $bits), + Some(0), + Some(0), + 2, + 5, + ) + .await; + } + + #[tokio::test] + async fn $fn_eq_in_list() { + // result of sql "SELECT * FROM t where in (1)" + test_prune( + Scenario::UInt, + &format!("SELECT * FROM t where u{} in (6)", $bits), + Some(0), + Some(15), + 1, + 5, + ) + .await; + } + + #[tokio::test] + async fn $fn_eq_in_list_negated() { + // result of sql "SELECT * FROM t where not in (6)" prune nothing + test_prune( + Scenario::UInt, + &format!("SELECT * FROM t where u{} not in (6)", $bits), + Some(0), + Some(0), + 19, + 5, + ) + .await; + } + }; } -uint_tests!(8); -uint_tests!(16); -uint_tests!(32); -uint_tests!(64); +uint_tests!( + 8, + prune_uint8_lt, + prune_uint8_gt, + prune_uint8_eq, + prune_uint8_scalar_fun_and_eq, + prune_uint8_scalar_fun, + prune_uint8_complex_expr, + prune_uint8_eq_in_list, + prune_uint8_eq_in_list_negated +); +uint_tests!( + 16, + prune_uint16_lt, + prune_uint16_gt, + prune_uint16_eq, + prune_uint16_scalar_fun_and_eq, + prune_uint16_scalar_fun, + prune_uint16_complex_expr, + prune_uint16_eq_in_list, + prune_uint16_eq_in_list_negated +); +uint_tests!( + 32, + prune_uint32_lt, + prune_uint32_gt, + prune_uint32_eq, + prune_uint32_scalar_fun_and_eq, + prune_uint32_scalar_fun, + prune_uint32_complex_expr, + prune_uint32_eq_in_list, + prune_uint32_eq_in_list_negated +); +uint_tests!( + 64, + prune_uint64_lt, + prune_uint64_gt, + prune_uint64_eq, + prune_uint64_scalar_fun_and_eq, + prune_uint64_scalar_fun, + prune_uint64_complex_expr, + prune_uint64_eq_in_list, + prune_uint64_eq_in_list_negated +); #[tokio::test] // null count min max diff --git a/datafusion/core/tests/parquet/row_group_pruning.rs b/datafusion/core/tests/parquet/row_group_pruning.rs index 445ae7e97f228..3ec3541af977a 100644 --- a/datafusion/core/tests/parquet/row_group_pruning.rs +++ b/datafusion/core/tests/parquet/row_group_pruning.rs @@ -399,321 +399,365 @@ async fn prune_disabled() { // https://github.com/apache/datafusion/issues/9779 bug so that tests pass // if and only if Bloom filters on Int8 and Int16 columns are still buggy. macro_rules! int_tests { - ($bits:expr) => { - paste::item! { - #[tokio::test] - async fn []() { - RowGroupPruningTest::new() - .with_scenario(Scenario::Int) - .with_query(&format!("SELECT * FROM t where i{} < 1", $bits)) - .with_expected_errors(Some(0)) - .with_matched_by_stats(Some(3)) - .with_pruned_by_stats(Some(1)) - .with_pruned_files(Some(0)) - .with_matched_by_bloom_filter(Some(3)) - .with_pruned_by_bloom_filter(Some(0)) - .with_expected_rows(11) - .test_row_group_prune() - .await; - - // result of sql "SELECT * FROM t where i < 1" is same as - // "SELECT * FROM t where -i > -1" - RowGroupPruningTest::new() - .with_scenario(Scenario::Int) - .with_query(&format!("SELECT * FROM t where -i{} > -1", $bits)) - .with_expected_errors(Some(0)) - .with_matched_by_stats(Some(3)) - .with_pruned_by_stats(Some(1)) - .with_pruned_files(Some(0)) - .with_matched_by_bloom_filter(Some(3)) - .with_pruned_by_bloom_filter(Some(0)) - .with_expected_rows(11) - .test_row_group_prune() - .await; - } - - #[tokio::test] - async fn []() { - RowGroupPruningTest::new() - .with_scenario(Scenario::Int) - .with_query(&format!("SELECT * FROM t where i{} = 1", $bits)) - .with_expected_errors(Some(0)) - .with_matched_by_stats(Some(1)) - .with_pruned_by_stats(Some(3)) - .with_pruned_files(Some(0)) - .with_matched_by_bloom_filter(Some(1)) - .with_pruned_by_bloom_filter(Some(0)) - .with_expected_rows(1) - .test_row_group_prune() - .await; - } - #[tokio::test] - async fn []() { - RowGroupPruningTest::new() - .with_scenario(Scenario::Int) - .with_query(&format!("SELECT * FROM t where abs(i{}) = 1 and i{} = 1", $bits, $bits)) - .with_expected_errors(Some(0)) - .with_matched_by_stats(Some(1)) - .with_pruned_by_stats(Some(3)) - .with_pruned_files(Some(0)) - .with_matched_by_bloom_filter(Some(1)) - .with_pruned_by_bloom_filter(Some(0)) - .with_expected_rows(1) - .test_row_group_prune() - .await; - } - - #[tokio::test] - async fn []() { - RowGroupPruningTest::new() - .with_scenario(Scenario::Int) - .with_query(&format!("SELECT * FROM t where abs(i{}) = 1", $bits)) - .with_expected_errors(Some(0)) - .with_matched_by_stats(Some(4)) - .with_pruned_by_stats(Some(0)) - .with_pruned_files(Some(0)) - .with_matched_by_bloom_filter(Some(4)) - .with_pruned_by_bloom_filter(Some(0)) - .with_expected_rows(3) - .test_row_group_prune() - .await; - } - - #[tokio::test] - async fn []() { - RowGroupPruningTest::new() - .with_scenario(Scenario::Int) - .with_query(&format!("SELECT * FROM t where i{}+1 = 1", $bits)) - .with_expected_errors(Some(0)) - .with_matched_by_stats(Some(4)) - .with_pruned_by_stats(Some(0)) - .with_pruned_files(Some(0)) - .with_matched_by_bloom_filter(Some(4)) - .with_pruned_by_bloom_filter(Some(0)) - .with_expected_rows(2) - .test_row_group_prune() - .await; - } - - #[tokio::test] - async fn []() { - RowGroupPruningTest::new() - .with_scenario(Scenario::Int) - .with_query(&format!("SELECT * FROM t where 1-i{} > 1", $bits)) - .with_expected_errors(Some(0)) - .with_matched_by_stats(Some(4)) - .with_pruned_by_stats(Some(0)) - .with_pruned_files(Some(0)) - .with_matched_by_bloom_filter(Some(4)) - .with_pruned_by_bloom_filter(Some(0)) - .with_expected_rows(9) - .test_row_group_prune() - .await; - } - - #[tokio::test] - async fn []() { - // result of sql "SELECT * FROM t where in (1)" - RowGroupPruningTest::new() - .with_scenario(Scenario::Int) - .with_query(&format!("SELECT * FROM t where i{} in (1)", $bits)) - .with_expected_errors(Some(0)) - .with_matched_by_stats(Some(1)) - .with_pruned_by_stats(Some(3)) - .with_pruned_files(Some(0)) - .with_matched_by_bloom_filter(Some(1)) - .with_pruned_by_bloom_filter(Some(0)) - .with_expected_rows(1) - .test_row_group_prune() - .await; - } - - #[tokio::test] - async fn []() { - // result of sql "SELECT * FROM t where in (1000)", prune all - // test whether statistics works - RowGroupPruningTest::new() - .with_scenario(Scenario::Int) - .with_query(&format!("SELECT * FROM t where i{} in (100)", $bits)) - .with_expected_errors(Some(0)) - .with_matched_by_stats(Some(0)) - .with_pruned_by_stats(Some(0)) - .with_pruned_files(Some(1)) - .with_matched_by_bloom_filter(Some(0)) - .with_pruned_by_bloom_filter(Some(0)) - .with_expected_rows(0) - .test_row_group_prune() - .await; - } - - #[tokio::test] - async fn []() { - // result of sql "SELECT * FROM t where not in (1)" prune nothing - RowGroupPruningTest::new() - .with_scenario(Scenario::Int) - .with_query(&format!("SELECT * FROM t where i{} not in (1)", $bits)) - .with_expected_errors(Some(0)) - .with_matched_by_stats(Some(4)) - .with_pruned_by_stats(Some(0)) - .with_pruned_files(Some(0)) - .with_matched_by_bloom_filter(Some(4)) - .with_pruned_by_bloom_filter(Some(0)) - .with_expected_rows(19) - .test_row_group_prune() - .await; - } + ($bits:expr, $fn_lt:ident, $fn_eq:ident, $fn_scalar_fun_and_eq:ident, $fn_scalar_fun:ident, $fn_complex_expr:ident, $fn_complex_expr_subtract:ident, $fn_eq_in_list:ident, $fn_eq_in_list_2:ident, $fn_eq_in_list_negated:ident) => { + #[tokio::test] + async fn $fn_lt() { + RowGroupPruningTest::new() + .with_scenario(Scenario::Int) + .with_query(&format!("SELECT * FROM t where i{} < 1", $bits)) + .with_expected_errors(Some(0)) + .with_matched_by_stats(Some(3)) + .with_pruned_by_stats(Some(1)) + .with_pruned_files(Some(0)) + .with_matched_by_bloom_filter(Some(3)) + .with_pruned_by_bloom_filter(Some(0)) + .with_expected_rows(11) + .test_row_group_prune() + .await; + + // result of sql "SELECT * FROM t where i < 1" is same as + // "SELECT * FROM t where -i > -1" + RowGroupPruningTest::new() + .with_scenario(Scenario::Int) + .with_query(&format!("SELECT * FROM t where -i{} > -1", $bits)) + .with_expected_errors(Some(0)) + .with_matched_by_stats(Some(3)) + .with_pruned_by_stats(Some(1)) + .with_pruned_files(Some(0)) + .with_matched_by_bloom_filter(Some(3)) + .with_pruned_by_bloom_filter(Some(0)) + .with_expected_rows(11) + .test_row_group_prune() + .await; + } + + #[tokio::test] + async fn $fn_eq() { + RowGroupPruningTest::new() + .with_scenario(Scenario::Int) + .with_query(&format!("SELECT * FROM t where i{} = 1", $bits)) + .with_expected_errors(Some(0)) + .with_matched_by_stats(Some(1)) + .with_pruned_by_stats(Some(3)) + .with_pruned_files(Some(0)) + .with_matched_by_bloom_filter(Some(1)) + .with_pruned_by_bloom_filter(Some(0)) + .with_expected_rows(1) + .test_row_group_prune() + .await; + } + #[tokio::test] + async fn $fn_scalar_fun_and_eq() { + RowGroupPruningTest::new() + .with_scenario(Scenario::Int) + .with_query(&format!( + "SELECT * FROM t where abs(i{}) = 1 and i{} = 1", + $bits, $bits + )) + .with_expected_errors(Some(0)) + .with_matched_by_stats(Some(1)) + .with_pruned_by_stats(Some(3)) + .with_pruned_files(Some(0)) + .with_matched_by_bloom_filter(Some(1)) + .with_pruned_by_bloom_filter(Some(0)) + .with_expected_rows(1) + .test_row_group_prune() + .await; + } + + #[tokio::test] + async fn $fn_scalar_fun() { + RowGroupPruningTest::new() + .with_scenario(Scenario::Int) + .with_query(&format!("SELECT * FROM t where abs(i{}) = 1", $bits)) + .with_expected_errors(Some(0)) + .with_matched_by_stats(Some(4)) + .with_pruned_by_stats(Some(0)) + .with_pruned_files(Some(0)) + .with_matched_by_bloom_filter(Some(4)) + .with_pruned_by_bloom_filter(Some(0)) + .with_expected_rows(3) + .test_row_group_prune() + .await; + } + + #[tokio::test] + async fn $fn_complex_expr() { + RowGroupPruningTest::new() + .with_scenario(Scenario::Int) + .with_query(&format!("SELECT * FROM t where i{}+1 = 1", $bits)) + .with_expected_errors(Some(0)) + .with_matched_by_stats(Some(4)) + .with_pruned_by_stats(Some(0)) + .with_pruned_files(Some(0)) + .with_matched_by_bloom_filter(Some(4)) + .with_pruned_by_bloom_filter(Some(0)) + .with_expected_rows(2) + .test_row_group_prune() + .await; + } + + #[tokio::test] + async fn $fn_complex_expr_subtract() { + RowGroupPruningTest::new() + .with_scenario(Scenario::Int) + .with_query(&format!("SELECT * FROM t where 1-i{} > 1", $bits)) + .with_expected_errors(Some(0)) + .with_matched_by_stats(Some(4)) + .with_pruned_by_stats(Some(0)) + .with_pruned_files(Some(0)) + .with_matched_by_bloom_filter(Some(4)) + .with_pruned_by_bloom_filter(Some(0)) + .with_expected_rows(9) + .test_row_group_prune() + .await; + } + + #[tokio::test] + async fn $fn_eq_in_list() { + // result of sql "SELECT * FROM t where in (1)" + RowGroupPruningTest::new() + .with_scenario(Scenario::Int) + .with_query(&format!("SELECT * FROM t where i{} in (1)", $bits)) + .with_expected_errors(Some(0)) + .with_matched_by_stats(Some(1)) + .with_pruned_by_stats(Some(3)) + .with_pruned_files(Some(0)) + .with_matched_by_bloom_filter(Some(1)) + .with_pruned_by_bloom_filter(Some(0)) + .with_expected_rows(1) + .test_row_group_prune() + .await; + } + + #[tokio::test] + async fn $fn_eq_in_list_2() { + // result of sql "SELECT * FROM t where in (1000)", prune all + // test whether statistics works + RowGroupPruningTest::new() + .with_scenario(Scenario::Int) + .with_query(&format!("SELECT * FROM t where i{} in (100)", $bits)) + .with_expected_errors(Some(0)) + .with_matched_by_stats(Some(0)) + .with_pruned_by_stats(Some(0)) + .with_pruned_files(Some(1)) + .with_matched_by_bloom_filter(Some(0)) + .with_pruned_by_bloom_filter(Some(0)) + .with_expected_rows(0) + .test_row_group_prune() + .await; + } + + #[tokio::test] + async fn $fn_eq_in_list_negated() { + // result of sql "SELECT * FROM t where not in (1)" prune nothing + RowGroupPruningTest::new() + .with_scenario(Scenario::Int) + .with_query(&format!("SELECT * FROM t where i{} not in (1)", $bits)) + .with_expected_errors(Some(0)) + .with_matched_by_stats(Some(4)) + .with_pruned_by_stats(Some(0)) + .with_pruned_files(Some(0)) + .with_matched_by_bloom_filter(Some(4)) + .with_pruned_by_bloom_filter(Some(0)) + .with_expected_rows(19) + .test_row_group_prune() + .await; } }; } // int8/int16 are incorrect: https://github.com/apache/datafusion/issues/9779 -int_tests!(32); -int_tests!(64); +int_tests!( + 32, + prune_int32_lt, + prune_int32_eq, + prune_int32_scalar_fun_and_eq, + prune_int32_scalar_fun, + prune_int32_complex_expr, + prune_int32_complex_expr_subtract, + prune_int32_eq_in_list, + prune_int32_eq_in_list_2, + prune_int32_eq_in_list_negated +); +int_tests!( + 64, + prune_int64_lt, + prune_int64_eq, + prune_int64_scalar_fun_and_eq, + prune_int64_scalar_fun, + prune_int64_complex_expr, + prune_int64_complex_expr_subtract, + prune_int64_eq_in_list, + prune_int64_eq_in_list_2, + prune_int64_eq_in_list_negated +); // $bits: number of bits of the integer to test (8, 16, 32, 64) // $correct_bloom_filters: if false, replicates the // https://github.com/apache/datafusion/issues/9779 bug so that tests pass // if and only if Bloom filters on UInt8 and UInt16 columns are still buggy. macro_rules! uint_tests { - ($bits:expr) => { - paste::item! { - #[tokio::test] - async fn []() { - RowGroupPruningTest::new() - .with_scenario(Scenario::UInt) - .with_query(&format!("SELECT * FROM t where u{} < 6", $bits)) - .with_expected_errors(Some(0)) - .with_matched_by_stats(Some(3)) - .with_pruned_by_stats(Some(1)) - .with_pruned_files(Some(0)) - .with_matched_by_bloom_filter(Some(3)) - .with_pruned_by_bloom_filter(Some(0)) - .with_expected_rows(11) - .test_row_group_prune() - .await; - } - - #[tokio::test] - async fn []() { - RowGroupPruningTest::new() - .with_scenario(Scenario::UInt) - .with_query(&format!("SELECT * FROM t where u{} = 6", $bits)) - .with_expected_errors(Some(0)) - .with_matched_by_stats(Some(1)) - .with_pruned_by_stats(Some(3)) - .with_pruned_files(Some(0)) - .with_matched_by_bloom_filter(Some(1)) - .with_pruned_by_bloom_filter(Some(0)) - .with_expected_rows(1) - .test_row_group_prune() - .await; - } - #[tokio::test] - async fn []() { - RowGroupPruningTest::new() - .with_scenario(Scenario::UInt) - .with_query(&format!("SELECT * FROM t where power(u{}, 2) = 36 and u{} = 6", $bits, $bits)) - .with_expected_errors(Some(0)) - .with_matched_by_stats(Some(1)) - .with_pruned_by_stats(Some(3)) - .with_pruned_files(Some(0)) - .with_matched_by_bloom_filter(Some(1)) - .with_pruned_by_bloom_filter(Some(0)) - .with_expected_rows(1) - .test_row_group_prune() - .await; - } - - #[tokio::test] - async fn []() { - RowGroupPruningTest::new() - .with_scenario(Scenario::UInt) - .with_query(&format!("SELECT * FROM t where power(u{}, 2) = 25", $bits)) - .with_expected_errors(Some(0)) - .with_matched_by_stats(Some(4)) - .with_pruned_by_stats(Some(0)) - .with_pruned_files(Some(0)) - .with_matched_by_bloom_filter(Some(4)) - .with_pruned_by_bloom_filter(Some(0)) - .with_expected_rows(2) - .test_row_group_prune() - .await; - } - - #[tokio::test] - async fn []() { - RowGroupPruningTest::new() - .with_scenario(Scenario::UInt) - .with_query(&format!("SELECT * FROM t where u{}+1 = 6", $bits)) - .with_expected_errors(Some(0)) - .with_matched_by_stats(Some(4)) - .with_pruned_by_stats(Some(0)) - .with_pruned_files(Some(0)) - .with_matched_by_bloom_filter(Some(4)) - .with_pruned_by_bloom_filter(Some(0)) - .with_expected_rows(2) - .test_row_group_prune() - .await; - } - - #[tokio::test] - async fn []() { - // result of sql "SELECT * FROM t where in (1)" - RowGroupPruningTest::new() - .with_scenario(Scenario::UInt) - .with_query(&format!("SELECT * FROM t where u{} in (6)", $bits)) - .with_expected_errors(Some(0)) - .with_matched_by_stats(Some(1)) - .with_pruned_by_stats(Some(3)) - .with_pruned_files(Some(0)) - .with_matched_by_bloom_filter(Some(1)) - .with_pruned_by_bloom_filter(Some(0)) - .with_expected_rows(1) - .test_row_group_prune() - .await; - } - - #[tokio::test] - async fn []() { - // result of sql "SELECT * FROM t where in (1000)", prune all - // test whether statistics works - RowGroupPruningTest::new() - .with_scenario(Scenario::UInt) - .with_query(&format!("SELECT * FROM t where u{} in (100)", $bits)) - .with_expected_errors(Some(0)) - .with_matched_by_stats(Some(0)) - .with_pruned_by_stats(Some(4)) - .with_pruned_files(Some(0)) - .with_matched_by_bloom_filter(Some(0)) - .with_pruned_by_bloom_filter(Some(0)) - .with_expected_rows(0) - .test_row_group_prune() - .await; - } - - #[tokio::test] - async fn []() { - // result of sql "SELECT * FROM t where not in (1)" prune nothing - RowGroupPruningTest::new() - .with_scenario(Scenario::UInt) - .with_query(&format!("SELECT * FROM t where u{} not in (6)", $bits)) - .with_expected_errors(Some(0)) - .with_matched_by_stats(Some(4)) - .with_pruned_by_stats(Some(0)) - .with_pruned_files(Some(0)) - .with_matched_by_bloom_filter(Some(4)) - .with_pruned_by_bloom_filter(Some(0)) - .with_expected_rows(19) - .test_row_group_prune() - .await; - } + ($bits:expr, $fn_lt:ident, $fn_eq:ident, $fn_scalar_fun_and_eq:ident, $fn_scalar_fun:ident, $fn_complex_expr:ident, $fn_eq_in_list:ident, $fn_eq_in_list_2:ident, $fn_eq_in_list_negated:ident) => { + #[tokio::test] + async fn $fn_lt() { + RowGroupPruningTest::new() + .with_scenario(Scenario::UInt) + .with_query(&format!("SELECT * FROM t where u{} < 6", $bits)) + .with_expected_errors(Some(0)) + .with_matched_by_stats(Some(3)) + .with_pruned_by_stats(Some(1)) + .with_pruned_files(Some(0)) + .with_matched_by_bloom_filter(Some(3)) + .with_pruned_by_bloom_filter(Some(0)) + .with_expected_rows(11) + .test_row_group_prune() + .await; + } + + #[tokio::test] + async fn $fn_eq() { + RowGroupPruningTest::new() + .with_scenario(Scenario::UInt) + .with_query(&format!("SELECT * FROM t where u{} = 6", $bits)) + .with_expected_errors(Some(0)) + .with_matched_by_stats(Some(1)) + .with_pruned_by_stats(Some(3)) + .with_pruned_files(Some(0)) + .with_matched_by_bloom_filter(Some(1)) + .with_pruned_by_bloom_filter(Some(0)) + .with_expected_rows(1) + .test_row_group_prune() + .await; + } + #[tokio::test] + async fn $fn_scalar_fun_and_eq() { + RowGroupPruningTest::new() + .with_scenario(Scenario::UInt) + .with_query(&format!( + "SELECT * FROM t where power(u{}, 2) = 36 and u{} = 6", + $bits, $bits + )) + .with_expected_errors(Some(0)) + .with_matched_by_stats(Some(1)) + .with_pruned_by_stats(Some(3)) + .with_pruned_files(Some(0)) + .with_matched_by_bloom_filter(Some(1)) + .with_pruned_by_bloom_filter(Some(0)) + .with_expected_rows(1) + .test_row_group_prune() + .await; + } + + #[tokio::test] + async fn $fn_scalar_fun() { + RowGroupPruningTest::new() + .with_scenario(Scenario::UInt) + .with_query(&format!("SELECT * FROM t where power(u{}, 2) = 25", $bits)) + .with_expected_errors(Some(0)) + .with_matched_by_stats(Some(4)) + .with_pruned_by_stats(Some(0)) + .with_pruned_files(Some(0)) + .with_matched_by_bloom_filter(Some(4)) + .with_pruned_by_bloom_filter(Some(0)) + .with_expected_rows(2) + .test_row_group_prune() + .await; + } + + #[tokio::test] + async fn $fn_complex_expr() { + RowGroupPruningTest::new() + .with_scenario(Scenario::UInt) + .with_query(&format!("SELECT * FROM t where u{}+1 = 6", $bits)) + .with_expected_errors(Some(0)) + .with_matched_by_stats(Some(4)) + .with_pruned_by_stats(Some(0)) + .with_pruned_files(Some(0)) + .with_matched_by_bloom_filter(Some(4)) + .with_pruned_by_bloom_filter(Some(0)) + .with_expected_rows(2) + .test_row_group_prune() + .await; + } + + #[tokio::test] + async fn $fn_eq_in_list() { + // result of sql "SELECT * FROM t where in (1)" + RowGroupPruningTest::new() + .with_scenario(Scenario::UInt) + .with_query(&format!("SELECT * FROM t where u{} in (6)", $bits)) + .with_expected_errors(Some(0)) + .with_matched_by_stats(Some(1)) + .with_pruned_by_stats(Some(3)) + .with_pruned_files(Some(0)) + .with_matched_by_bloom_filter(Some(1)) + .with_pruned_by_bloom_filter(Some(0)) + .with_expected_rows(1) + .test_row_group_prune() + .await; + } + + #[tokio::test] + async fn $fn_eq_in_list_2() { + // result of sql "SELECT * FROM t where in (1000)", prune all + // test whether statistics works + RowGroupPruningTest::new() + .with_scenario(Scenario::UInt) + .with_query(&format!("SELECT * FROM t where u{} in (100)", $bits)) + .with_expected_errors(Some(0)) + .with_matched_by_stats(Some(0)) + .with_pruned_by_stats(Some(4)) + .with_pruned_files(Some(0)) + .with_matched_by_bloom_filter(Some(0)) + .with_pruned_by_bloom_filter(Some(0)) + .with_expected_rows(0) + .test_row_group_prune() + .await; + } + + #[tokio::test] + async fn $fn_eq_in_list_negated() { + // result of sql "SELECT * FROM t where not in (1)" prune nothing + RowGroupPruningTest::new() + .with_scenario(Scenario::UInt) + .with_query(&format!("SELECT * FROM t where u{} not in (6)", $bits)) + .with_expected_errors(Some(0)) + .with_matched_by_stats(Some(4)) + .with_pruned_by_stats(Some(0)) + .with_pruned_files(Some(0)) + .with_matched_by_bloom_filter(Some(4)) + .with_pruned_by_bloom_filter(Some(0)) + .with_expected_rows(19) + .test_row_group_prune() + .await; } }; } // uint8/uint16 are incorrect: https://github.com/apache/datafusion/issues/9779 -uint_tests!(32); -uint_tests!(64); +uint_tests!( + 32, + prune_uint32_lt, + prune_uint32_eq, + prune_uint32_scalar_fun_and_eq, + prune_uint32_scalar_fun, + prune_uint32_complex_expr, + prune_uint32_eq_in_list, + prune_uint32_eq_in_list_2, + prune_uint32_eq_in_list_negated +); +uint_tests!( + 64, + prune_uint64_lt, + prune_uint64_eq, + prune_uint64_scalar_fun_and_eq, + prune_uint64_scalar_fun, + prune_uint64_complex_expr, + prune_uint64_eq_in_list, + prune_uint64_eq_in_list_2, + prune_uint64_eq_in_list_negated +); #[tokio::test] async fn prune_int32_eq_large_in_list() { diff --git a/datafusion/expr-common/Cargo.toml b/datafusion/expr-common/Cargo.toml index d66d8ee8583d0..072c8f14da503 100644 --- a/datafusion/expr-common/Cargo.toml +++ b/datafusion/expr-common/Cargo.toml @@ -45,7 +45,6 @@ arrow = { workspace = true } datafusion-common = { workspace = true } indexmap = { workspace = true } itertools = { workspace = true } -paste = { workspace = true } [dev-dependencies] insta = { workspace = true } diff --git a/datafusion/expr-common/src/interval_arithmetic.rs b/datafusion/expr-common/src/interval_arithmetic.rs index f93ef3b79595b..0f88723d116f5 100644 --- a/datafusion/expr-common/src/interval_arithmetic.rs +++ b/datafusion/expr-common/src/interval_arithmetic.rs @@ -37,7 +37,7 @@ use datafusion_common::{ }; macro_rules! get_extreme_value { - ($extreme:ident, $value:expr) => { + ($extreme:ident, $DECIMAL128_ARRAY:ident, $DECIMAL256_ARRAY:ident, $value:expr) => { match $value { DataType::UInt8 => ScalarValue::UInt8(Some(u8::$extreme)), DataType::UInt16 => ScalarValue::UInt16(Some(u16::$extreme)), @@ -83,18 +83,12 @@ macro_rules! get_extreme_value { ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::$extreme)) } DataType::Decimal128(precision, scale) => ScalarValue::Decimal128( - Some( - paste::paste! {[<$extreme _DECIMAL128_FOR_EACH_PRECISION>]} - [*precision as usize], - ), + Some($DECIMAL128_ARRAY[*precision as usize]), *precision, *scale, ), DataType::Decimal256(precision, scale) => ScalarValue::Decimal256( - Some( - paste::paste! {[<$extreme _DECIMAL256_FOR_EACH_PRECISION>]} - [*precision as usize], - ), + Some($DECIMAL256_ARRAY[*precision as usize]), *precision, *scale, ), @@ -1162,10 +1156,20 @@ fn handle_overflow( match (UPPER, positive_sign) { (true, true) | (false, false) => ScalarValue::try_from(dt).unwrap(), (true, false) => { - get_extreme_value!(MIN, dt) + get_extreme_value!( + MIN, + MIN_DECIMAL128_FOR_EACH_PRECISION, + MIN_DECIMAL256_FOR_EACH_PRECISION, + dt + ) } (false, true) => { - get_extreme_value!(MAX, dt) + get_extreme_value!( + MAX, + MAX_DECIMAL128_FOR_EACH_PRECISION, + MAX_DECIMAL256_FOR_EACH_PRECISION, + dt + ) } } } @@ -4218,12 +4222,8 @@ mod tests { } macro_rules! capture_mode_change { - ($TYPE:ty) => { - paste::item! { - capture_mode_change_helper!([], - [], - $TYPE); - } + ($TYPE:ty, $TEST_FN_NAME:ident, $CREATE_FN_NAME:ident) => { + capture_mode_change_helper!($TEST_FN_NAME, $CREATE_FN_NAME, $TYPE); }; } @@ -4251,8 +4251,8 @@ mod tests { }; } - capture_mode_change!(f32); - capture_mode_change!(f64); + capture_mode_change!(f32, capture_mode_change_f32, create_interval_f32); + capture_mode_change!(f64, capture_mode_change_f64, create_interval_f64); #[cfg(all( any(target_arch = "x86_64", target_arch = "aarch64"), diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index 75aa59595bed5..6990714585001 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -57,7 +57,6 @@ datafusion-functions-window-common = { workspace = true } datafusion-physical-expr-common = { workspace = true } indexmap = { workspace = true } itertools = { workspace = true } -paste = { workspace = true } recursive = { workspace = true, optional = true } serde_json = { workspace = true } sqlparser = { workspace = true, optional = true } diff --git a/datafusion/expr/src/test/function_stub.rs b/datafusion/expr/src/test/function_stub.rs index d784169d541ab..26ac16d90d69f 100644 --- a/datafusion/expr/src/test/function_stub.rs +++ b/datafusion/expr/src/test/function_stub.rs @@ -41,7 +41,6 @@ use crate::{ macro_rules! create_func { ($UDAF:ty, $AGGREGATE_UDF_FN:ident) => { - paste::paste! { #[doc = concat!("AggregateFunction that returns a [AggregateUDF](crate::AggregateUDF) for [`", stringify!($UDAF), "`]")] pub fn $AGGREGATE_UDF_FN() -> std::sync::Arc { // Singleton instance of [$UDAF], ensures the UDAF is only created once @@ -51,7 +50,6 @@ macro_rules! create_func { }); std::sync::Arc::clone(&INSTANCE) } - } } } diff --git a/datafusion/functions-aggregate/Cargo.toml b/datafusion/functions-aggregate/Cargo.toml index 39337e44bb051..1ca494e38e07e 100644 --- a/datafusion/functions-aggregate/Cargo.toml +++ b/datafusion/functions-aggregate/Cargo.toml @@ -54,7 +54,6 @@ datafusion-physical-expr-common = { workspace = true } half = { workspace = true } log = { workspace = true } num-traits = { workspace = true } -paste = { workspace = true } [dev-dependencies] arrow = { workspace = true, features = ["test_utils"] } diff --git a/datafusion/functions-aggregate/src/macros.rs b/datafusion/functions-aggregate/src/macros.rs index 6c6bf72838899..0c919a1e5ea74 100644 --- a/datafusion/functions-aggregate/src/macros.rs +++ b/datafusion/functions-aggregate/src/macros.rs @@ -67,7 +67,6 @@ macro_rules! create_func { create_func!($UDAF, $AGGREGATE_UDF_FN, <$UDAF>::default()); }; ($UDAF:ty, $AGGREGATE_UDF_FN:ident, $CREATE:expr) => { - paste::paste! { #[doc = concat!("AggregateFunction that returns a [`AggregateUDF`](datafusion_expr::AggregateUDF) for [`", stringify!($UDAF), "`]")] pub fn $AGGREGATE_UDF_FN() -> std::sync::Arc { // Singleton instance of [$UDAF], ensures the UDAF is only created once @@ -76,7 +75,6 @@ macro_rules! create_func { std::sync::Arc::new(datafusion_expr::AggregateUDF::from($CREATE)) }); std::sync::Arc::clone(&INSTANCE) - } } } } diff --git a/datafusion/functions-nested/Cargo.toml b/datafusion/functions-nested/Cargo.toml index 0fdb69e6e7625..5fce3e854eb33 100644 --- a/datafusion/functions-nested/Cargo.toml +++ b/datafusion/functions-nested/Cargo.toml @@ -61,7 +61,6 @@ hashbrown = { workspace = true } itertools = { workspace = true, features = ["use_std"] } itoa = { workspace = true } log = { workspace = true } -paste = { workspace = true } [dev-dependencies] criterion = { workspace = true, features = ["async_tokio"] } diff --git a/datafusion/functions-nested/src/macros.rs b/datafusion/functions-nested/src/macros.rs index 5380f6b1272d1..5f12113150a40 100644 --- a/datafusion/functions-nested/src/macros.rs +++ b/datafusion/functions-nested/src/macros.rs @@ -50,7 +50,6 @@ macro_rules! make_udf_expr_and_func { make_udf_expr_and_func!($UDF, $EXPR_FN, $($arg)*, $DOC, $SCALAR_UDF_FN, $UDF::new); }; ($UDF:ident, $EXPR_FN:ident, $($arg:ident)*, $DOC:expr, $SCALAR_UDF_FN:ident, $CTOR:path) => { - paste::paste! { // "fluent expr_fn" style function #[doc = $DOC] pub fn $EXPR_FN($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr { @@ -60,13 +59,11 @@ macro_rules! make_udf_expr_and_func { )) } create_func!($UDF, $SCALAR_UDF_FN, $CTOR); - } }; ($UDF:ident, $EXPR_FN:ident, $DOC:expr, $SCALAR_UDF_FN:ident) => { make_udf_expr_and_func!($UDF, $EXPR_FN, $DOC, $SCALAR_UDF_FN, $UDF::new); }; ($UDF:ident, $EXPR_FN:ident, $DOC:expr, $SCALAR_UDF_FN:ident, $CTOR:path) => { - paste::paste! { // "fluent expr_fn" style function #[doc = $DOC] pub fn $EXPR_FN(arg: Vec) -> datafusion_expr::Expr { @@ -76,7 +73,6 @@ macro_rules! make_udf_expr_and_func { )) } create_func!($UDF, $SCALAR_UDF_FN, $CTOR); - } }; } @@ -97,7 +93,6 @@ macro_rules! create_func { create_func!($UDF, $SCALAR_UDF_FN, $UDF::new); }; ($UDF:ident, $SCALAR_UDF_FN:ident, $CTOR:path) => { - paste::paste! { #[doc = concat!("ScalarFunction that returns a [`ScalarUDF`](datafusion_expr::ScalarUDF) for ")] #[doc = stringify!($UDF)] pub fn $SCALAR_UDF_FN() -> std::sync::Arc { @@ -110,6 +105,5 @@ macro_rules! create_func { }); std::sync::Arc::clone(&INSTANCE) } - } }; } diff --git a/datafusion/functions-table/Cargo.toml b/datafusion/functions-table/Cargo.toml index aa401fbd7d4ed..4edb640cb2cf2 100644 --- a/datafusion/functions-table/Cargo.toml +++ b/datafusion/functions-table/Cargo.toml @@ -48,7 +48,6 @@ datafusion-common = { workspace = true } datafusion-expr = { workspace = true } datafusion-physical-plan = { workspace = true } parking_lot = { workspace = true } -paste = { workspace = true } [dev-dependencies] arrow = { workspace = true, features = ["test_utils"] } diff --git a/datafusion/functions-table/src/lib.rs b/datafusion/functions-table/src/lib.rs index cd9ade041acbf..668e964901c04 100644 --- a/datafusion/functions-table/src/lib.rs +++ b/datafusion/functions-table/src/lib.rs @@ -38,25 +38,27 @@ pub fn all_default_table_functions() -> Vec> { /// Creates a singleton instance of a table function /// - `$module`: A struct implementing `TableFunctionImpl` to create the function from /// - `$name`: The name to give to the created function -/// -/// This is used to ensure creating the list of `TableFunction` only happens once. +/// - `$func_name`: The name of the function to be called +/// This is used to ensure creating the list of `TableFunction` only happens once. #[macro_export] macro_rules! create_udtf_function { - ($module:path, $name:expr) => { - paste::paste! { - pub fn [<$name:lower>]() -> Arc { - static INSTANCE: std::sync::LazyLock> = - std::sync::LazyLock::new(|| { - std::sync::Arc::new(TableFunction::new( - $name.to_string(), - Arc::new($module {}), - )) - }); - std::sync::Arc::clone(&INSTANCE) - } + ($module:expr, $func_name:ident, $name:expr) => { + pub fn $func_name() -> Arc { + static INSTANCE: std::sync::LazyLock> = + std::sync::LazyLock::new(|| { + std::sync::Arc::new(TableFunction::new( + $name.to_string(), + Arc::new($module), + )) + }); + std::sync::Arc::clone(&INSTANCE) } }; } -create_udtf_function!(generate_series::GenerateSeriesFunc, "generate_series"); -create_udtf_function!(generate_series::RangeFunc, "range"); +create_udtf_function!( + generate_series::GenerateSeriesFunc {}, + generate_series, + "generate_series" +); +create_udtf_function!(generate_series::RangeFunc {}, range, "range"); diff --git a/datafusion/functions-window/Cargo.toml b/datafusion/functions-window/Cargo.toml index fae71e180e34c..9c4342adae8fd 100644 --- a/datafusion/functions-window/Cargo.toml +++ b/datafusion/functions-window/Cargo.toml @@ -50,7 +50,6 @@ datafusion-macros = { workspace = true } datafusion-physical-expr = { workspace = true } datafusion-physical-expr-common = { workspace = true } log = { workspace = true } -paste = { workspace = true } [dev-dependencies] arrow = { workspace = true, features = ["test_utils"] } diff --git a/datafusion/functions-window/src/cume_dist.rs b/datafusion/functions-window/src/cume_dist.rs index dccb9148d2c5e..8e1cb1b1e6639 100644 --- a/datafusion/functions-window/src/cume_dist.rs +++ b/datafusion/functions-window/src/cume_dist.rs @@ -39,6 +39,7 @@ use std::sync::Arc; define_udwf_and_expr!( CumeDist, cume_dist, + cume_dist_udwf, "Calculates the cumulative distribution of a value in a group of values." ); diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 7569dac9ac106..fbb84483e23cf 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -43,6 +43,7 @@ use std::sync::{Arc, LazyLock}; get_or_init_udwf!( Lag, lag, + lag_udwf, "Returns the row value that precedes the current row by a specified \ offset within partition. If no such row exists, then returns the \ default value.", @@ -51,6 +52,7 @@ get_or_init_udwf!( get_or_init_udwf!( Lead, lead, + lead_udwf, "Returns the value from a row that follows the current row by a \ specified offset within the partition. If no such row exists, then \ returns the default value.", diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index 890ced90a9a21..aeb54356f8966 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -30,8 +30,8 @@ /// /// * `$UDWF`: The struct which defines the [`Signature`](datafusion_expr::Signature) /// of the user-defined window function. -/// * `$OUT_FN_NAME`: The basename to generate a unique function name like -/// `$OUT_FN_NAME_udwf`. +/// * `$OUT_FN_NAME`: The expression function name +/// `UDWF_FN` : The unique function name /// * `$DOC`: Doc comments for UDWF. /// * (optional) `$CTOR`: Pass a custom constructor. When omitted it /// automatically resolves to `$UDWF::default()`. @@ -52,6 +52,7 @@ /// get_or_init_udwf!( /// SimpleUDWF, /// simple, +/// simple_udwf, /// "Simple user-defined window function doc comment." /// ); /// # @@ -94,16 +95,15 @@ /// ``` #[macro_export] macro_rules! get_or_init_udwf { - ($UDWF:ident, $OUT_FN_NAME:ident, $DOC:expr) => { - get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC, $UDWF::default); + ($UDWF:ident, $OUT_FN_NAME:ident, $UDWF_FN:ident, $DOC:expr) => { + get_or_init_udwf!($UDWF, $OUT_FN_NAME, $UDWF_FN, $DOC, $UDWF::default); }; - ($UDWF:ident, $OUT_FN_NAME:ident, $DOC:expr, $CTOR:path) => { - paste::paste! { + ($UDWF:ident, $OUT_FN_NAME:ident, $UDWF_FN:ident, $DOC:expr, $CTOR:path) => { #[doc = concat!(" Returns a [`WindowUDF`](datafusion_expr::WindowUDF) for [`", stringify!($OUT_FN_NAME), "`].")] #[doc = ""] #[doc = concat!(" ", $DOC)] - pub fn [<$OUT_FN_NAME _udwf>]() -> std::sync::Arc { + pub fn $UDWF_FN() -> std::sync::Arc { // Singleton instance of UDWF, ensures it is only created once. static INSTANCE: std::sync::LazyLock> = std::sync::LazyLock::new(|| { @@ -111,7 +111,6 @@ macro_rules! get_or_init_udwf { }); std::sync::Arc::clone(&INSTANCE) } - } }; } @@ -149,6 +148,7 @@ macro_rules! get_or_init_udwf { /// # get_or_init_udwf!( /// # RowNumber, /// # row_number, +/// # row_number_udwf, /// # "Returns a unique row number for each row in window partition beginning at 1." /// # ); /// /// Creates `row_number()` API which has zero parameters: @@ -163,6 +163,7 @@ macro_rules! get_or_init_udwf { /// create_udwf_expr!( /// RowNumber, /// row_number, +/// row_number_udwf, /// "Returns a unique row number for each row in window partition beginning at 1." /// ); /// # @@ -221,7 +222,7 @@ macro_rules! get_or_init_udwf { /// # use datafusion_expr::{col, lit}; /// # use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; /// # -/// # get_or_init_udwf!(Lead, lead, "user-defined window function"); +/// # get_or_init_udwf!(Lead, lead,lead_udwf, "user-defined window function"); /// # /// /// Creates `lead(expr, offset, default)` with 3 parameters: /// /// @@ -240,6 +241,7 @@ macro_rules! get_or_init_udwf { /// Lead, /// lead, /// [expr, offset, default], +/// lead_udwf, /// "Returns a value evaluated at the row that is offset rows after the current row within the partition." /// ); /// # @@ -298,21 +300,18 @@ macro_rules! get_or_init_udwf { #[macro_export] macro_rules! create_udwf_expr { // zero arguments - ($UDWF:ident, $OUT_FN_NAME:ident, $DOC:expr) => { - paste::paste! { + ($UDWF:ident, $OUT_FN_NAME:ident, $UDWF_FN:ident, $DOC:expr) => { #[doc = " Create a [`WindowFunction`](datafusion_expr::Expr::WindowFunction) expression for"] #[doc = concat!(" `", stringify!($UDWF), "` user-defined window function.")] #[doc = ""] #[doc = concat!(" ", $DOC)] pub fn $OUT_FN_NAME() -> datafusion_expr::Expr { - [<$OUT_FN_NAME _udwf>]().call(vec![]) + $UDWF_FN().call(vec![]) } - } }; // 1 or more arguments - ($UDWF:ident, $OUT_FN_NAME:ident, [$($PARAM:ident),+], $DOC:expr) => { - paste::paste! { + ($UDWF:ident, $OUT_FN_NAME:ident, [$($PARAM:ident),+], $UDWF_FN:ident, $DOC:expr) => { #[doc = " Create a [`WindowFunction`](datafusion_expr::Expr::WindowFunction) expression for"] #[doc = concat!(" `", stringify!($UDWF), "` user-defined window function.")] #[doc = ""] @@ -320,10 +319,9 @@ macro_rules! create_udwf_expr { pub fn $OUT_FN_NAME( $($PARAM: datafusion_expr::Expr),+ ) -> datafusion_expr::Expr { - [<$OUT_FN_NAME _udwf>]() + $UDWF_FN() .call(vec![$($PARAM),+]) } - } }; } @@ -374,6 +372,7 @@ macro_rules! create_udwf_expr { /// define_udwf_and_expr!( /// SimpleUDWF, /// simple, +/// simple_udwf, /// "a simple user-defined window function" /// ); /// # @@ -437,6 +436,7 @@ macro_rules! create_udwf_expr { /// define_udwf_and_expr!( /// RowNumber, /// row_number, +/// row_number_udwf, /// "Returns a unique row number for each row in window partition beginning at 1.", /// RowNumber::new // <-- custom constructor /// ); @@ -514,6 +514,7 @@ macro_rules! create_udwf_expr { /// Lead, /// lead, /// [expr, offset, default], // <- 3 parameters +/// lead_udwf, /// "user-defined window function" /// ); /// # @@ -603,6 +604,7 @@ macro_rules! create_udwf_expr { /// Lead, /// lead, /// [expr, offset, default], // <- 3 parameters +/// lead_udwf, /// "user-defined window function", /// Lead::new // <- Custom constructor /// ); @@ -663,29 +665,29 @@ macro_rules! create_udwf_expr { macro_rules! define_udwf_and_expr { // Defines UDWF with default constructor // Defines expression API with zero parameters - ($UDWF:ident, $OUT_FN_NAME:ident, $DOC:expr) => { - get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC); - create_udwf_expr!($UDWF, $OUT_FN_NAME, $DOC); + ($UDWF:ident, $OUT_FN_NAME:ident, $UDWF_FN:ident, $DOC:expr) => { + get_or_init_udwf!($UDWF, $OUT_FN_NAME,$UDWF_FN, $DOC); + create_udwf_expr!($UDWF, $OUT_FN_NAME, $UDWF_FN, $DOC); }; // Defines UDWF by passing a custom constructor // Defines expression API with zero parameters - ($UDWF:ident, $OUT_FN_NAME:ident, $DOC:expr, $CTOR:path) => { - get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC, $CTOR); - create_udwf_expr!($UDWF, $OUT_FN_NAME, $DOC); + ($UDWF:ident, $OUT_FN_NAME:ident, $UDWF_FN:ident, $DOC:expr, $CTOR:path) => { + get_or_init_udwf!($UDWF, $OUT_FN_NAME, $UDWF_FN, $DOC, $CTOR); + create_udwf_expr!($UDWF, $OUT_FN_NAME, $UDWF_FN, $DOC); }; // Defines UDWF with default constructor // Defines expression API with multiple parameters - ($UDWF:ident, $OUT_FN_NAME:ident, [$($PARAM:ident),+], $DOC:expr) => { - get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC); - create_udwf_expr!($UDWF, $OUT_FN_NAME, [$($PARAM),+], $DOC); + ($UDWF:ident, $OUT_FN_NAME:ident, [$($PARAM:ident),+],$UDWF_FN:ident, $DOC:expr) => { + get_or_init_udwf!($UDWF, $OUT_FN_NAME, $UDWF_FN, $DOC); + create_udwf_expr!($UDWF, $OUT_FN_NAME, [$($PARAM),+], $UDWF_FN, $DOC); }; // Defines UDWF by passing a custom constructor // Defines expression API with multiple parameters - ($UDWF:ident, $OUT_FN_NAME:ident, [$($PARAM:ident),+], $DOC:expr, $CTOR:path) => { - get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC, $CTOR); - create_udwf_expr!($UDWF, $OUT_FN_NAME, [$($PARAM),+], $DOC); + ($UDWF:ident, $OUT_FN_NAME:ident, [$($PARAM:ident),+], $UDWF_FN:ident, $DOC:expr, $CTOR:path) => { + get_or_init_udwf!($UDWF, $OUT_FN_NAME, $UDWF_FN, $DOC, $CTOR); + create_udwf_expr!($UDWF, $OUT_FN_NAME, [$($PARAM),+], $UDWF_FN, $DOC); }; } diff --git a/datafusion/functions-window/src/nth_value.rs b/datafusion/functions-window/src/nth_value.rs index 8d37cf7e604a9..12b4146cc2f6f 100644 --- a/datafusion/functions-window/src/nth_value.rs +++ b/datafusion/functions-window/src/nth_value.rs @@ -45,6 +45,7 @@ define_udwf_and_expr!( First, first_value, [arg], + first_value_udwf, "Returns the first value in the window frame", NthValue::first ); @@ -52,12 +53,14 @@ define_udwf_and_expr!( Last, last_value, [arg], + last_value_udwf, "Returns the last value in the window frame", NthValue::last ); get_or_init_udwf!( NthValue, nth_value, + nth_value_udwf, "Returns the nth value in the window frame", NthValue::nth ); diff --git a/datafusion/functions-window/src/ntile.rs b/datafusion/functions-window/src/ntile.rs index 21ce2795b4c23..1f9b2344e5c75 100644 --- a/datafusion/functions-window/src/ntile.rs +++ b/datafusion/functions-window/src/ntile.rs @@ -40,6 +40,7 @@ define_udwf_and_expr!( Ntile, ntile, [arg], + ntile_udwf, "Integer ranging from 1 to the argument value, dividing the partition as equally as possible." ); diff --git a/datafusion/functions-window/src/rank.rs b/datafusion/functions-window/src/rank.rs index 9d5af64eb9c64..ee8546703b1f9 100644 --- a/datafusion/functions-window/src/rank.rs +++ b/datafusion/functions-window/src/rank.rs @@ -44,6 +44,7 @@ use std::sync::{Arc, LazyLock}; define_udwf_and_expr!( Rank, rank, + rank_udwf, "Returns rank of the current row with gaps. Same as `row_number` of its first peer", Rank::basic ); @@ -51,6 +52,7 @@ define_udwf_and_expr!( define_udwf_and_expr!( DenseRank, dense_rank, + dense_rank_udwf, "Returns rank of the current row without gaps. This function counts peer groups", Rank::dense_rank ); @@ -58,6 +60,7 @@ define_udwf_and_expr!( define_udwf_and_expr!( PercentRank, percent_rank, + percent_rank_udwf, "Returns the relative rank of the current row: (rank - 1) / (total rows - 1)", Rank::percent_rank ); diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index d7d298cecead8..cd60e51def72e 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -40,6 +40,7 @@ use std::sync::Arc; define_udwf_and_expr!( RowNumber, row_number, + row_number_udwf, "Returns a unique row number for each row in window partition beginning at 1." ); diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index 7a52441477f15..d6cb212737d5b 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -65,7 +65,6 @@ arrow = { workspace = true, features = ["test_utils"] } criterion = { workspace = true } datafusion-functions = { workspace = true } insta = { workspace = true } -paste = { workspace = true } rand = { workspace = true } rstest = { workspace = true } diff --git a/datafusion/physical-expr/src/expressions/negative.rs b/datafusion/physical-expr/src/expressions/negative.rs index c727c8fa5f77e..c78bbe999eb26 100644 --- a/datafusion/physical-expr/src/expressions/negative.rs +++ b/datafusion/physical-expr/src/expressions/negative.rs @@ -214,10 +214,9 @@ mod tests { use datafusion_common::{DataFusionError, ScalarValue}; use datafusion_physical_expr_common::physical_expr::fmt_sql; - use paste::paste; macro_rules! test_array_negative_op { - ($DATA_TY:tt, $($VALUE:expr),* ) => { + ($DATA_TY:tt, $ARRAY_TY:ty, $($VALUE:expr),* ) => { let schema = Schema::new(vec![Field::new("a", DataType::$DATA_TY, true)]); let expr = negative(col("a", &schema)?, &schema)?; assert_eq!(expr.data_type(&schema)?, DataType::$DATA_TY); @@ -230,8 +229,8 @@ mod tests { )+ arr.push(None); arr_expected.push(None); - let input = paste!{[<$DATA_TY Array>]::from(arr)}; - let expected = &paste!{[<$DATA_TY Array>]::from(arr_expected)}; + let input = <$ARRAY_TY>::from(arr); + let expected = &<$ARRAY_TY>::from(arr_expected); let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(input)])?; let result = expr.evaluate(&batch)?.into_array(batch.num_rows()).expect("Failed to convert to array"); @@ -243,12 +242,12 @@ mod tests { #[test] fn array_negative_op() -> Result<()> { - test_array_negative_op!(Int8, 2i8, 1i8); - test_array_negative_op!(Int16, 234i16, 123i16); - test_array_negative_op!(Int32, 2345i32, 1234i32); - test_array_negative_op!(Int64, 23456i64, 12345i64); - test_array_negative_op!(Float32, 2345.0f32, 1234.0f32); - test_array_negative_op!(Float64, 23456.0f64, 12345.0f64); + test_array_negative_op!(Int8, Int8Array, 2i8, 1i8); + test_array_negative_op!(Int16, Int16Array, 234i16, 123i16); + test_array_negative_op!(Int32, Int32Array, 2345i32, 1234i32); + test_array_negative_op!(Int64, Int64Array, 23456i64, 12345i64); + test_array_negative_op!(Float32, Float32Array, 2345.0f32, 1234.0f32); + test_array_negative_op!(Float64, Float64Array, 23456.0f64, 12345.0f64); Ok(()) } diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index b7338cb764d77..cc299ce507099 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -73,5 +73,4 @@ datafusion-functions-window = { workspace = true } env_logger = { workspace = true } insta = { workspace = true } itertools = { workspace = true } -paste = { workspace = true } rstest = { workspace = true } diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index cd42b68949983..79d2bd6ad847a 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -1333,46 +1333,42 @@ mod tests { } macro_rules! test_stack_overflow { - ($num_expr:expr) => { - paste::item! { - #[test] - fn []() { - let schema = DFSchema::empty(); - let mut planner_context = PlannerContext::default(); - - let expr_str = (0..$num_expr) - .map(|i| format!("column1 = 'value{:?}'", i)) - .collect::>() - .join(" OR "); - - let dialect = GenericDialect{}; - let mut parser = Parser::new(&dialect) - .try_with_sql(expr_str.as_str()) - .unwrap(); - let sql_expr = parser.parse_expr().unwrap(); - - let context_provider = TestContextProvider::new(); - let sql_to_rel = SqlToRel::new(&context_provider); - - // Should not stack overflow - sql_to_rel.sql_expr_to_logical_expr( - sql_expr, - &schema, - &mut planner_context, - ).unwrap(); - } + ($name:ident, $num_expr:expr) => { + #[test] + fn $name() { + let schema = DFSchema::empty(); + let mut planner_context = PlannerContext::default(); + + let expr_str = (0..$num_expr) + .map(|i| format!("column1 = 'value{:?}'", i)) + .collect::>() + .join(" OR "); + + let dialect = GenericDialect {}; + let mut parser = Parser::new(&dialect) + .try_with_sql(expr_str.as_str()) + .unwrap(); + let sql_expr = parser.parse_expr().unwrap(); + + let context_provider = TestContextProvider::new(); + let sql_to_rel = SqlToRel::new(&context_provider); + + // Should not stack overflow + sql_to_rel + .sql_expr_to_logical_expr(sql_expr, &schema, &mut planner_context) + .unwrap(); } }; } - test_stack_overflow!(64); - test_stack_overflow!(128); - test_stack_overflow!(256); - test_stack_overflow!(512); - test_stack_overflow!(1024); - test_stack_overflow!(2048); - test_stack_overflow!(4096); - test_stack_overflow!(8192); + test_stack_overflow!(test_stack_overflow_64, 64); + test_stack_overflow!(test_stack_overflow_128, 128); + test_stack_overflow!(test_stack_overflow_256, 256); + test_stack_overflow!(test_stack_overflow_512, 512); + test_stack_overflow!(test_stack_overflow_1024, 1024); + test_stack_overflow!(test_stack_overflow_2048, 2048); + test_stack_overflow!(test_stack_overflow_4096, 4096); + test_stack_overflow!(test_stack_overflow_8192, 8192); #[test] fn test_sql_to_expr_with_alias() { let schema = DFSchema::empty();