From db29e4feb10b28a36c6b1c2eed4ccb4e097ab3b7 Mon Sep 17 00:00:00 2001 From: noroshi <253434427+n0r0shi@users.noreply.github.com> Date: Tue, 24 Feb 2026 02:56:23 +0000 Subject: [PATCH] feat: support elt expression Register datafusion-spark's SparkElt UDF and add serde mapping in stringExpressions. --- native/core/src/execution/jni_api.rs | 2 ++ .../scala/org/apache/comet/serde/QueryPlanSerde.scala | 1 + .../org/apache/comet/CometStringExpressionSuite.scala | 8 ++++++++ 3 files changed, 11 insertions(+) diff --git a/native/core/src/execution/jni_api.rs b/native/core/src/execution/jni_api.rs index 0193f3012c..fe639f47fc 100644 --- a/native/core/src/execution/jni_api.rs +++ b/native/core/src/execution/jni_api.rs @@ -55,6 +55,7 @@ use datafusion_spark::function::math::hex::SparkHex; use datafusion_spark::function::math::width_bucket::SparkWidthBucket; use datafusion_spark::function::string::char::CharFunc; use datafusion_spark::function::string::concat::SparkConcat; +use datafusion_spark::function::string::elt::SparkElt; use futures::poll; use futures::stream::StreamExt; use jni::objects::JByteBuffer; @@ -400,6 +401,7 @@ fn register_datafusion_spark_function(session_ctx: &SessionContext) { session_ctx.register_udf(ScalarUDF::new_from_impl(SparkWidthBucket::default())); session_ctx.register_udf(ScalarUDF::new_from_impl(MapFromEntries::default())); session_ctx.register_udf(ScalarUDF::new_from_impl(SparkCrc32::default())); + session_ctx.register_udf(ScalarUDF::new_from_impl(SparkElt::default())); } /// Prepares arrow arrays for output. diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 9d13ccd9ed..59c03877bb 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -153,6 +153,7 @@ object QueryPlanSerde extends Logging with CometExprShim { classOf[ConcatWs] -> CometConcatWs, classOf[Concat] -> CometConcat, classOf[Contains] -> CometScalarFunction("contains"), + classOf[Elt] -> CometScalarFunction("elt"), classOf[EndsWith] -> CometScalarFunction("ends_with"), classOf[InitCap] -> CometInitCap, classOf[Length] -> CometLength, diff --git a/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala index 121d7f7d5a..1701069dcd 100644 --- a/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala @@ -148,6 +148,14 @@ class CometStringExpressionSuite extends CometTestBase { } } + test("elt") { + withParquetTable(Seq((1, "a", "b", "c"), (2, "x", "y", "z"), (3, "p", "q", "r")), "tbl") { + checkSparkAnswerAndOperator("SELECT elt(_1, _2, _3, _4) FROM tbl") + checkSparkAnswerAndOperator("SELECT elt(1, 'hello', 'world') FROM tbl") + checkSparkAnswerAndOperator("SELECT elt(NULL, 'a', 'b') FROM tbl") + } + } + test("split string basic") { withSQLConf("spark.comet.expression.StringSplit.allowIncompatible" -> "true") { withParquetTable((0 until 5).map(i => (s"value$i,test$i", i)), "tbl") {