Skip to content
12 changes: 6 additions & 6 deletions datafusion/core/tests/dataframe/dataframe_functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ async fn test_fn_approx_percentile_cont() -> Result<()> {
+---------------------------------------------------------------------------+
| approx_percentile_cont(Float64(0.5)) WITHIN GROUP [test.b ASC NULLS LAST] |
+---------------------------------------------------------------------------+
| 10 |
| 10.0 |
+---------------------------------------------------------------------------+
");

Expand All @@ -437,7 +437,7 @@ async fn test_fn_approx_percentile_cont() -> Result<()> {
+----------------------------------------------------------------------------+
| approx_percentile_cont(Float64(0.1)) WITHIN GROUP [test.b DESC NULLS LAST] |
+----------------------------------------------------------------------------+
| 100 |
| 100.0 |
+----------------------------------------------------------------------------+
");

Expand All @@ -457,7 +457,7 @@ async fn test_fn_approx_percentile_cont() -> Result<()> {
+--------------------------------------------------------------------+
| approx_percentile_cont(arg_2) WITHIN GROUP [test.b ASC NULLS LAST] |
+--------------------------------------------------------------------+
| 10 |
| 10.0 |
+--------------------------------------------------------------------+
"
);
Expand All @@ -477,7 +477,7 @@ async fn test_fn_approx_percentile_cont() -> Result<()> {
+---------------------------------------------------------------------+
| approx_percentile_cont(arg_2) WITHIN GROUP [test.b DESC NULLS LAST] |
+---------------------------------------------------------------------+
| 100 |
| 100.0 |
+---------------------------------------------------------------------+
"
);
Expand All @@ -494,7 +494,7 @@ async fn test_fn_approx_percentile_cont() -> Result<()> {
+------------------------------------------------------------------------------------+
| approx_percentile_cont(Float64(0.5),Int32(2)) WITHIN GROUP [test.b ASC NULLS LAST] |
+------------------------------------------------------------------------------------+
| 30 |
| 30.25 |
+------------------------------------------------------------------------------------+
");

Expand All @@ -510,7 +510,7 @@ async fn test_fn_approx_percentile_cont() -> Result<()> {
+-------------------------------------------------------------------------------------+
| approx_percentile_cont(Float64(0.1),Int32(2)) WITHIN GROUP [test.b DESC NULLS LAST] |
+-------------------------------------------------------------------------------------+
| 69 |
| 69.85 |
+-------------------------------------------------------------------------------------+
");

Expand Down
15 changes: 14 additions & 1 deletion datafusion/expr-common/src/signature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ use std::fmt::Display;
use std::hash::Hash;
use std::sync::Arc;

use crate::type_coercion::aggregates::NUMERICS;
use arrow::datatypes::{
DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION, DECIMAL128_MAX_PRECISION, DataType,
Decimal128Type, DecimalType, Field, IntervalUnit, TimeUnit,
Expand Down Expand Up @@ -595,6 +594,20 @@ impl Display for ArrayFunctionArgument {
}
}

static NUMERICS: &[DataType] = &[
DataType::Int8,
DataType::Int16,
DataType::Int32,
DataType::Int64,
DataType::UInt8,
DataType::UInt16,
DataType::UInt32,
DataType::UInt64,
DataType::Float16,
DataType::Float32,
DataType::Float64,
];

impl TypeSignature {
pub fn to_string_repr(&self) -> Vec<String> {
match self {
Expand Down
2 changes: 2 additions & 0 deletions datafusion/expr-common/src/type_coercion/aggregates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use datafusion_common::{Result, internal_err, plan_err};

// TODO: remove usage of these (INTEGERS and NUMERICS) in favour of signatures
// see https://github.com/apache/datafusion/issues/18092
#[deprecated(since = "54.0.0", note = "Use functions signatures")]
pub static INTEGERS: &[DataType] = &[
DataType::Int8,
DataType::Int16,
Expand All @@ -33,6 +34,7 @@ pub static INTEGERS: &[DataType] = &[
DataType::UInt64,
];

#[deprecated(since = "54.0.0", note = "Use functions signatures")]
pub static NUMERICS: &[DataType] = &[
DataType::Int8,
DataType::Int16,
Expand Down
23 changes: 20 additions & 3 deletions datafusion/expr/src/test/function_stub.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,14 @@ use datafusion_common::plan_err;
use datafusion_common::{Result, exec_err, not_impl_err, utils::take_function_args};

use crate::Volatility::Immutable;
use crate::type_coercion::aggregates::NUMERICS;
use crate::{
Accumulator, AggregateUDFImpl, Expr, GroupsAccumulator, ReversedUDAF, Signature,
Accumulator, AggregateUDFImpl, Coercion, Expr, GroupsAccumulator, ReversedUDAF,
Signature, TypeSignature, TypeSignatureClass,
expr::AggregateFunction,
function::{AccumulatorArgs, StateFieldsArgs},
utils::AggregateOrderSensitivity,
};
use datafusion_common::types::{NativeType, logical_float64};

macro_rules! create_func {
($UDAF:ty, $AGGREGATE_UDF_FN:ident) => {
Expand Down Expand Up @@ -462,9 +463,25 @@ pub struct Avg {

impl Avg {
pub fn new() -> Self {
let signature = Signature::one_of(
vec![
TypeSignature::Coercible(vec![Coercion::new_exact(
TypeSignatureClass::Decimal,
)]),
TypeSignature::Coercible(vec![Coercion::new_exact(
TypeSignatureClass::Duration,
)]),
TypeSignature::Coercible(vec![Coercion::new_implicit(
TypeSignatureClass::Native(logical_float64()),
vec![TypeSignatureClass::Integer, TypeSignatureClass::Float],
NativeType::Float64,
)]),
],
Immutable,
);
Self {
aliases: vec![String::from("mean")],
signature: Signature::uniform(1, NUMERICS.to_vec(), Immutable),
signature,
}
}
}
Expand Down
59 changes: 41 additions & 18 deletions datafusion/functions-aggregate/src/approx_percentile_cont.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,17 @@ use arrow::{
},
datatypes::{DataType, Field},
};
use datafusion_common::types::{NativeType, logical_float64};
use datafusion_common::{
DataFusionError, Result, ScalarValue, downcast_value, internal_err, not_impl_err,
plan_err,
};
use datafusion_expr::expr::{AggregateFunction, Sort};
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
use datafusion_expr::type_coercion::aggregates::{INTEGERS, NUMERICS};
use datafusion_expr::utils::format_state_name;
use datafusion_expr::{
Accumulator, AggregateUDFImpl, Documentation, Expr, Signature, TypeSignature,
Volatility,
Accumulator, AggregateUDFImpl, Coercion, Documentation, Expr, Signature,
TypeSignature, TypeSignatureClass, Volatility,
};
use datafusion_functions_aggregate_common::tdigest::{DEFAULT_MAX_SIZE, TDigest};
use datafusion_macros::user_doc;
Expand Down Expand Up @@ -133,22 +133,45 @@ impl Default for ApproxPercentileCont {
impl ApproxPercentileCont {
/// Create a new [`ApproxPercentileCont`] aggregate function.
pub fn new() -> Self {
let mut variants = Vec::with_capacity(NUMERICS.len() * (INTEGERS.len() + 1));
// Accept any numeric value paired with a float64 percentile
for num in NUMERICS {
variants.push(TypeSignature::Exact(vec![num.clone(), DataType::Float64]));
// Additionally accept an integer number of centroids for T-Digest
for int in INTEGERS {
variants.push(TypeSignature::Exact(vec![
num.clone(),
DataType::Float64,
int.clone(),
]))
}
}
Self {
signature: Signature::one_of(variants, Volatility::Immutable),
}
// Additionally accept an integer number of centroids for T-Digest
let signature = Signature::one_of(
vec![
// 2 args - numeric, percentile (float)
TypeSignature::Coercible(vec![
Coercion::new_implicit(
TypeSignatureClass::Float,
vec![TypeSignatureClass::Numeric],
NativeType::Float64,
),
Coercion::new_implicit(
TypeSignatureClass::Native(logical_float64()),
vec![TypeSignatureClass::Numeric],
NativeType::Float64,
),
]),
// 3 args - numeric, percentile (float), centroid (integer)
TypeSignature::Coercible(vec![
Coercion::new_implicit(
TypeSignatureClass::Float,
vec![TypeSignatureClass::Numeric],
NativeType::Float64,
),
Coercion::new_implicit(
TypeSignatureClass::Native(logical_float64()),
vec![TypeSignatureClass::Numeric],
NativeType::Float64,
),
Coercion::new_implicit(
TypeSignatureClass::Integer,
vec![TypeSignatureClass::Numeric],
NativeType::Int64,
),
]),
],
Volatility::Immutable,
);
Self { signature }
}

pub(crate) fn create_accumulator(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ use arrow::compute::{and, filter, is_not_null};
use arrow::datatypes::FieldRef;
use arrow::{array::ArrayRef, datatypes::DataType};
use datafusion_common::ScalarValue;
use datafusion_common::types::{NativeType, logical_float64};
use datafusion_common::{Result, not_impl_err, plan_err};
use datafusion_expr::Volatility::Immutable;
use datafusion_expr::expr::{AggregateFunction, Sort};
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
use datafusion_expr::type_coercion::aggregates::{INTEGERS, NUMERICS};
use datafusion_expr::{
Accumulator, AggregateUDFImpl, Documentation, Expr, Signature, TypeSignature,
Accumulator, AggregateUDFImpl, Coercion, Documentation, Expr, Signature,
TypeSignature, TypeSignatureClass, Volatility,
};
use datafusion_functions_aggregate_common::tdigest::{Centroid, TDigest};
use datafusion_macros::user_doc;
Expand Down Expand Up @@ -126,26 +126,54 @@ impl Default for ApproxPercentileContWithWeight {
impl ApproxPercentileContWithWeight {
/// Create a new [`ApproxPercentileContWithWeight`] aggregate function.
pub fn new() -> Self {
let mut variants = Vec::with_capacity(NUMERICS.len() * (INTEGERS.len() + 1));
// Accept any numeric value paired with weight and float64 percentile
for num in NUMERICS {
variants.push(TypeSignature::Exact(vec![
num.clone(),
num.clone(),
DataType::Float64,
]));
// Additionally accept an integer number of centroids for T-Digest
for int in INTEGERS {
variants.push(TypeSignature::Exact(vec![
num.clone(),
num.clone(),
DataType::Float64,
int.clone(),
]));
}
}
let signature = Signature::one_of(
vec![
// 3 args - numeric, weight (float), percentile (float)
TypeSignature::Coercible(vec![
Coercion::new_implicit(
TypeSignatureClass::Float,
vec![TypeSignatureClass::Numeric],
NativeType::Float64,
),
Coercion::new_implicit(
TypeSignatureClass::Float,
vec![TypeSignatureClass::Numeric],
NativeType::Float64,
),
Coercion::new_implicit(
TypeSignatureClass::Native(logical_float64()),
vec![TypeSignatureClass::Numeric],
NativeType::Float64,
),
]),
// 4 args - numeric, weight (float), percentile (float), centroid (integer)
TypeSignature::Coercible(vec![
Coercion::new_implicit(
TypeSignatureClass::Float,
vec![TypeSignatureClass::Numeric],
NativeType::Float64,
),
Coercion::new_implicit(
TypeSignatureClass::Float,
vec![TypeSignatureClass::Numeric],
NativeType::Float64,
),
Coercion::new_implicit(
TypeSignatureClass::Native(logical_float64()),
vec![TypeSignatureClass::Numeric],
NativeType::Float64,
),
Coercion::new_implicit(
TypeSignatureClass::Integer,
vec![TypeSignatureClass::Numeric],
NativeType::Int64,
),
]),
],
Volatility::Immutable,
);
Self {
signature: Signature::one_of(variants, Immutable),
signature,
approx_percentile_cont: ApproxPercentileCont::new(),
}
}
Expand Down
5 changes: 4 additions & 1 deletion datafusion/optimizer/src/analyzer/type_coercion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1888,7 +1888,10 @@ mod test {
.err()
.unwrap()
.strip_backtrace();
assert!(err.starts_with("Error during planning: Failed to coerce arguments to satisfy a call to 'avg' function: coercion from Utf8 to the signature Uniform(1, [Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float16, Float32, Float64]) failed"));
assert!(
err.contains("Function 'avg' failed to match any signature"),
"Err: {err:?}"
);
Ok(())
}

Expand Down
Loading
Loading