From 0b59dac1e02630b74f925cef2ff00d664a4f8ada Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 1 Mar 2026 12:53:50 +0800 Subject: [PATCH 1/2] Support HourOfTime expression via version-specific shims --- .../org/apache/comet/serde/strings.scala | 33 +++++++++++++++++++ .../apache/comet/shims/CometExprShim.scala | 5 +++ .../apache/comet/shims/CometExprShim.scala | 3 ++ .../apache/comet/shims/CometExprShim.scala | 20 +++++++++++ .../apache/comet/CometExpressionSuite.scala | 17 ++++++++++ 5 files changed, 78 insertions(+) diff --git a/spark/src/main/scala/org/apache/comet/serde/strings.scala b/spark/src/main/scala/org/apache/comet/serde/strings.scala index 64ba644048..9a1ea7176e 100644 --- a/spark/src/main/scala/org/apache/comet/serde/strings.scala +++ b/spark/src/main/scala/org/apache/comet/serde/strings.scala @@ -408,4 +408,37 @@ trait CommonStringExprs { None } } + + def hoursOfTimeToProto( + expr: Expression, + inputs: Seq[Attribute], + binding: Boolean): Option[Expr] = { + if (expr.children.nonEmpty) { + exprToProtoInternal(expr.children.head, inputs, binding) match { + case Some(childExpr) => + val builder = ExprOuterClass.Hour.newBuilder() + builder.setChild(childExpr) + val timeZone = + try { + val timeZoneIdMethod = expr.getClass.getMethod("timeZoneId") + timeZoneIdMethod.invoke(expr).asInstanceOf[Option[String]].getOrElse("UTC") + } catch { + case _: NoSuchMethodException => "UTC" + case _: Exception => "UTC" + } + builder.setTimezone(timeZone) + Some( + ExprOuterClass.Expr + .newBuilder() + .setHour(builder) + .build()) + case None => + withInfo(expr, expr.children.head) + None + } + } else { + withInfo(expr, "HoursOfTime expression has no child") + None + } + } } diff --git a/spark/src/main/spark-3.4/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-3.4/org/apache/comet/shims/CometExprShim.scala index 600931c346..33aad5632d 100644 --- a/spark/src/main/spark-3.4/org/apache/comet/shims/CometExprShim.scala +++ b/spark/src/main/spark-3.4/org/apache/comet/shims/CometExprShim.scala @@ -21,9 +21,11 @@ package org.apache.comet.shims import org.apache.spark.sql.catalyst.expressions._ +import org.apache.comet.CometSparkSessionExtensions.withInfo import org.apache.comet.expressions.CometEvalMode import org.apache.comet.serde.CommonStringExprs import org.apache.comet.serde.ExprOuterClass.{BinaryOutputStyle, Expr} +import org.apache.comet.serde.QueryPlanSerde.exprToProtoInternal /** * `CometExprShim` acts as a shim for parsing expressions from different Spark versions. @@ -43,6 +45,9 @@ trait CometExprShim extends CommonStringExprs { // Right child is the encoding expression. stringDecode(expr, s.charset, s.bin, inputs, binding) + case _ if expr.getClass.getSimpleName == "HoursOfTime" => + hoursOfTimeToProto(expr, inputs, binding) + case _ => None } } diff --git a/spark/src/main/spark-3.5/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-3.5/org/apache/comet/shims/CometExprShim.scala index 8e9cb1c07b..23a56ade40 100644 --- a/spark/src/main/spark-3.5/org/apache/comet/shims/CometExprShim.scala +++ b/spark/src/main/spark-3.5/org/apache/comet/shims/CometExprShim.scala @@ -91,6 +91,9 @@ trait CometExprShim extends CommonStringExprs { // val optExpr = scalarFunctionExprToProto("width_bucket", childExprs: _*) // optExprWithInfo(optExpr, wb, wb.children: _*) + case _ if expr.getClass.getSimpleName == "HoursOfTime" => + hoursOfTimeToProto(expr, inputs, binding) + case _ => None } } diff --git a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala index 2c5cebd166..e8e561af5a 100644 --- a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala +++ b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala @@ -113,6 +113,26 @@ trait CometExprShim extends CommonStringExprs { // val optExpr = scalarFunctionExprToProto("width_bucket", childExprs: _*) // optExprWithInfo(optExpr, wb, wb.children: _*) + case h: HoursOfTime => + exprToProtoInternal(h.child, inputs, binding) match { + case Some(childExpr) => + val builder = ExprOuterClass.Hour.newBuilder() + builder.setChild(childExpr) + val timeZone = h.timeZoneId.getOrElse("UTC") + builder.setTimezone(timeZone) + Some( + ExprOuterClass.Expr + .newBuilder() + .setHour(builder) + .build()) + case None => + withInfo(h, h.child) + None + } + + case _ if expr.getClass.getSimpleName == "HoursOfTime" => + hoursOfTimeToProto(expr, inputs, binding) + case _ => None } } diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index f0f022868f..778effc95a 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -601,6 +601,23 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } + test("hourOfTime expression support") { + // This test verifies that hour() function works correctly with timestamp columns. + // If Spark generates HoursOfTime expression (a RuntimeReplaceable expression), + // it will be handled by the version-specific shim and converted to Hour proto. + Seq(true, false).foreach { dictionaryEnabled => + withTempDir { dir => + val path = new Path(dir.toURI.toString, "part-r-0.parquet") + makeRawTimeParquetFile(path, dictionaryEnabled = dictionaryEnabled, 10000) + readParquetFile(path.toString) { df => + val query = df.select(expr("hour(_1)")) + + checkSparkAnswerAndOperator(query) + } + } + } + } + test("cast timestamp and timestamp_ntz") { withSQLConf( SESSION_LOCAL_TIMEZONE.key -> "Asia/Kathmandu", From 45b2236b9ee7f53091f5e67b3c87ae064b53ef0f Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 2 Mar 2026 13:34:09 +0800 Subject: [PATCH 2/2] modify strings.scala --- .../org/apache/comet/serde/strings.scala | 55 ++++++++++++------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/strings.scala b/spark/src/main/scala/org/apache/comet/serde/strings.scala index 9a1ea7176e..bf685137d3 100644 --- a/spark/src/main/scala/org/apache/comet/serde/strings.scala +++ b/spark/src/main/scala/org/apache/comet/serde/strings.scala @@ -413,32 +413,45 @@ trait CommonStringExprs { expr: Expression, inputs: Seq[Attribute], binding: Boolean): Option[Expr] = { - if (expr.children.nonEmpty) { - exprToProtoInternal(expr.children.head, inputs, binding) match { - case Some(childExpr) => - val builder = ExprOuterClass.Hour.newBuilder() - builder.setChild(childExpr) - val timeZone = + val childOpt = expr.children.headOption.orElse { + withInfo(expr, "HoursOfTime has no child expression") + None + } + + childOpt.flatMap { child => + val timeZoneId = { + val exprClass = expr.getClass + try { + val timeZoneIdMethod = exprClass.getMethod("timeZoneId") + timeZoneIdMethod.invoke(expr).asInstanceOf[Option[String]] + } catch { + case _: NoSuchMethodException => try { - val timeZoneIdMethod = expr.getClass.getMethod("timeZoneId") - timeZoneIdMethod.invoke(expr).asInstanceOf[Option[String]].getOrElse("UTC") + val timeZoneIdField = exprClass.getField("timeZoneId") + timeZoneIdField.get(expr).asInstanceOf[Option[String]] } catch { - case _: NoSuchMethodException => "UTC" - case _: Exception => "UTC" + case _: NoSuchFieldException | _: SecurityException => None } + } + } + + exprToProtoInternal(child, inputs, binding) + .map { childExpr => + val builder = ExprOuterClass.Hour.newBuilder() + builder.setChild(childExpr) + + val timeZone = timeZoneId.getOrElse("UTC") builder.setTimezone(timeZone) - Some( - ExprOuterClass.Expr - .newBuilder() - .setHour(builder) - .build()) - case None => - withInfo(expr, expr.children.head) + + ExprOuterClass.Expr + .newBuilder() + .setHour(builder) + .build() + } + .orElse { + withInfo(expr, child) None - } - } else { - withInfo(expr, "HoursOfTime expression has no child") - None + } } } }