diff --git a/.github/workflows/pr_build_macos.yml b/.github/workflows/pr_build_macos.yml index a83d70f380..84421d4ef2 100644 --- a/.github/workflows/pr_build_macos.yml +++ b/.github/workflows/pr_build_macos.yml @@ -138,7 +138,7 @@ jobs: - name: "Spark 4.1, JDK 17, Scala 2.13" java_version: "17" - # The spark-4.1 profile already pins Scala to 2.13.17 to match Spark 4.1.1's + # The spark-4.1 profile already pins Scala to 2.13.17 to match Spark 4.1's # runtime; the scala-2.13 profile would override it back to 2.13.16 and break. maven_opts: "-Pspark-4.1" diff --git a/.github/workflows/spark_sql_test.yml b/.github/workflows/spark_sql_test.yml index b4f249d62d..b199851ae4 100644 --- a/.github/workflows/spark_sql_test.yml +++ b/.github/workflows/spark_sql_test.yml @@ -146,7 +146,7 @@ jobs: - {spark-short: '3.4', spark-full: '3.4.3', java: 11} - {spark-short: '3.5', spark-full: '3.5.8', java: 11} - {spark-short: '4.0', spark-full: '4.0.2', java: 21} - - {spark-short: '4.1', spark-full: '4.1.1', java: 17} + - {spark-short: '4.1', spark-full: '4.1.2', java: 17} fail-fast: false name: spark-sql-${{ matrix.module.name }}/spark-${{ matrix.config.spark-full }}-jdk${{ matrix.config.java }} runs-on: ubuntu-24.04 diff --git a/dev/diffs/4.1.1.diff b/dev/diffs/4.1.2.diff similarity index 99% rename from dev/diffs/4.1.1.diff rename to dev/diffs/4.1.2.diff index 5cf6326dbf..c2b4b0ccc8 100644 --- a/dev/diffs/4.1.1.diff +++ b/dev/diffs/4.1.2.diff @@ -39,7 +39,7 @@ index 6df8bc85b51..dabb75e2b75 100644 withSpark(sc) { sc => TestUtils.waitUntilExecutorsUp(sc, 2, 60000) diff --git a/pom.xml b/pom.xml -index dc757d78812..10f7b202e71 100644 +index dc201151999..3e278cfb34c 100644 --- a/pom.xml +++ b/pom.xml @@ -152,6 +152,8 @@ @@ -78,7 +78,7 @@ index dc757d78812..10f7b202e71 100644 org.apache.datasketches diff --git a/sql/core/pom.xml b/sql/core/pom.xml -index d2d07a08aa9..d89f80e5b68 100644 +index c25b83c355b..5e23b863dcf 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -97,6 +97,10 @@ @@ -392,7 +392,7 @@ index 0d807aeae4d..6d7744e771b 100644 withTempView("t0", "t1", "t2") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala -index 0dfd37ebeae..66340218c7c 100644 +index bfe15b33768..55c23a38ccc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala @@ -31,7 +31,7 @@ import org.apache.spark.sql.errors.DataTypeErrors.toSQLId @@ -695,7 +695,7 @@ index e1a2fd33c7c..632f4b695df 100644 } assert(scanOption.isDefined) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala -index b27122a8de2..3c690dbe788 100644 +index 4c62c47971a..3c690dbe788 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala @@ -267,7 +267,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite @@ -708,7 +708,7 @@ index b27122a8de2..3c690dbe788 100644 withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") { withTempView("df") { val df1 = spark.range(1, 100) -@@ -470,7 +471,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite +@@ -471,7 +472,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite } } @@ -718,7 +718,7 @@ index b27122a8de2..3c690dbe788 100644 withTempDir { dir => Seq("parquet", "orc", "csv", "json").foreach { fmt => val basePath = dir.getCanonicalPath + "/" + fmt -@@ -548,7 +550,9 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite +@@ -549,7 +551,9 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite } } @@ -1232,7 +1232,7 @@ index d7b2511eac2..d5f5b940b94 100644 val session = classic.SparkSession.builder().sparkContext(sc).getOrCreate() import session.implicits._ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala -index ff0ee19ae97..01958e0c45b 100644 +index 7bfc8cf4fa6..7a425b74184 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -17,6 +17,8 @@ @@ -1393,7 +1393,7 @@ index fee375db10a..8c2c24e2c5f 100644 val v = VariantBuilder.parseJson(s, false) new VariantVal(v.getValue, v.getMetadata) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala -index 6cdf681d65c..34a0e3714bd 100644 +index 8f7a68bcbe6..88dbe1793c9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala @@ -26,6 +26,8 @@ import org.apache.spark.sql.{AnalysisException, Row} @@ -1549,7 +1549,7 @@ index 2a0ab21ddb0..6030e7c2b9b 100644 } finally { spark.listenerManager.unregister(listener) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala -index 7c830bf6c6e..6d9c643d83e 100644 +index 122c511bf83..9bea26c5225 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala @@ -24,6 +24,8 @@ import org.apache.spark.sql.{DataFrame, Row} @@ -1562,7 +1562,7 @@ index 7c830bf6c6e..6d9c643d83e 100644 import org.apache.spark.sql.connector.catalog.functions._ import org.apache.spark.sql.connector.distributions.Distributions @@ -32,7 +34,7 @@ import org.apache.spark.sql.connector.expressions.Expressions._ - import org.apache.spark.sql.execution.SparkPlan + import org.apache.spark.sql.execution.{RDDScanExec, SparkPlan} import org.apache.spark.sql.execution.datasources.v2.BatchScanExec import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation -import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec @@ -1596,7 +1596,7 @@ index 7c830bf6c6e..6d9c643d83e 100644 } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala -index 7c4852c5e22..d1a34456bdc 100644 +index ede5d285932..c9a8abb5a94 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala @@ -21,7 +21,7 @@ package org.apache.spark.sql.connector @@ -1605,10 +1605,10 @@ index 7c4852c5e22..d1a34456bdc 100644 -import org.apache.spark.sql.{catalyst, AnalysisException, DataFrame, Row} +import org.apache.spark.sql.{catalyst, AnalysisException, DataFrame, IgnoreCometSuite, Row} - import org.apache.spark.sql.catalyst.expressions.{ApplyFunctionExpression, Cast, Literal} + import org.apache.spark.sql.catalyst.expressions.{ApplyFunctionExpression, Cast, Literal, TransformExpression} import org.apache.spark.sql.catalyst.expressions.objects.Invoke import org.apache.spark.sql.catalyst.plans.physical -@@ -45,7 +45,8 @@ import org.apache.spark.sql.util.QueryExecutionListener +@@ -46,7 +46,8 @@ import org.apache.spark.sql.util.QueryExecutionListener import org.apache.spark.tags.SlowSQLTest @SlowSQLTest @@ -2845,7 +2845,7 @@ index 6b73cc8618d..e67aaeff9df 100644 case _ => assert(false, "Can not match ParquetTable in the query.") } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala -index 3072657a095..599d169cf8a 100644 +index 6ba790deddf..34b2f424c8f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala @@ -40,6 +40,7 @@ import org.apache.parquet.schema.{MessageType, MessageTypeParser} @@ -2856,7 +2856,7 @@ index 3072657a095..599d169cf8a 100644 import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericInternalRow, UnsafeRow} import org.apache.spark.sql.catalyst.util.{DateTimeConstants, DateTimeUtils} -@@ -953,7 +954,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession +@@ -971,7 +972,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession } } @@ -2866,7 +2866,7 @@ index 3072657a095..599d169cf8a 100644 val data = Seq( Tuple1((null, null)), Tuple1((null, null)), -@@ -1567,7 +1569,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession +@@ -1585,7 +1587,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession } } @@ -3230,7 +3230,7 @@ index 38e5b15465b..ca3e8fef27a 100644 testWithColumnFamilies("RocksDBStateStore", diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala -index e839ccd35ec..d182aa07b44 100644 +index 232332a6575..324afe9ebb7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala @@ -32,7 +32,8 @@ import org.apache.hadoop.conf.Configuration @@ -3595,7 +3595,7 @@ index 465da3cd469..92ac998929d 100644 val aggregateExecsWithoutPartialAgg = allAggregateExecs.filter { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala -index 22028a585e2..20c6b7c796a 100644 +index 6cdca9fb530..6542bc8dced 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.{DataFrame, Row, SparkSession} @@ -3775,10 +3775,10 @@ index f0f3f94b811..b7d18771314 100644 spark.internalCreateDataFrame(withoutFilters.execute(), schema) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala -index 245219c1756..b566f970ccd 100644 +index 720b13b812e..71b20c79a12 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala -@@ -75,6 +75,21 @@ trait SharedSparkSessionBase +@@ -98,6 +98,21 @@ trait SharedSparkSessionBase // this rule may potentially block testing of other optimization rules such as // ConstantPropagation etc. .set(SQLConf.OPTIMIZER_EXCLUDED_RULES.key, ConvertToLocalRelation.ruleName) diff --git a/docs/source/contributor-guide/benchmarking_macos.md b/docs/source/contributor-guide/benchmarking_macos.md index 4c37a32a26..5e98acaae9 100644 --- a/docs/source/contributor-guide/benchmarking_macos.md +++ b/docs/source/contributor-guide/benchmarking_macos.md @@ -55,13 +55,13 @@ export DF_BENCH=`pwd` ## Install Spark -Install Apache Spark. This example refers to 4.1.1 version. +Install Apache Spark. This example refers to 4.1.2 version. ```shell -wget https://archive.apache.org/dist/spark/spark-4.1.1/spark-4.1.1-bin-hadoop3.tgz -tar xzf spark-4.1.1-bin-hadoop3.tgz -sudo mv spark-4.1.1-bin-hadoop3 /opt -export SPARK_HOME=/opt/spark-4.1.1-bin-hadoop3/ +wget https://archive.apache.org/dist/spark/spark-4.1.2/spark-4.1.2-bin-hadoop3.tgz +tar xzf spark-4.1.2-bin-hadoop3.tgz +sudo mv spark-4.1.2-bin-hadoop3 /opt +export SPARK_HOME=/opt/spark-4.1.2-bin-hadoop3/ ``` Start Spark in standalone mode: diff --git a/docs/source/user-guide/latest/compatibility/spark-versions.md b/docs/source/user-guide/latest/compatibility/spark-versions.md index 4856cf5a1b..a6ad5bcf16 100644 --- a/docs/source/user-guide/latest/compatibility/spark-versions.md +++ b/docs/source/user-guide/latest/compatibility/spark-versions.md @@ -66,7 +66,7 @@ Spark 4.0.2 is supported with Java 17 and Scala 2.13. ## Spark 4.1 -Spark 4.1.1 is supported with Java 17/21 and Scala 2.13. +Spark 4.1.2 is supported with Java 17/21 and Scala 2.13. ### Known Limitations diff --git a/docs/source/user-guide/latest/installation.md b/docs/source/user-guide/latest/installation.md index 4b7717b688..442133facc 100644 --- a/docs/source/user-guide/latest/installation.md +++ b/docs/source/user-guide/latest/installation.md @@ -50,7 +50,7 @@ Other versions may work well enough for development and evaluation purposes. | 3.4.3 | 11/17 | 2.12/2.13 | Yes | Yes | | 3.5.8 | 11/17 | 2.12/2.13 | Yes | Yes | | 4.0.2 | 17/21 | 2.13 | Yes | Yes | -| 4.1.1 | 17/21 | 2.13 | Yes | Yes | +| 4.1.2 | 17/21 | 2.13 | Yes | Yes | Note that we do not test the full matrix of supported Java and Scala versions in CI for every Spark version. diff --git a/docs/source/user-guide/latest/kubernetes.md b/docs/source/user-guide/latest/kubernetes.md index c06c469f32..498eefb614 100644 --- a/docs/source/user-guide/latest/kubernetes.md +++ b/docs/source/user-guide/latest/kubernetes.md @@ -72,7 +72,7 @@ spec: image: apache/datafusion-comet:$COMET_VERSION-spark3.5.5-scala2.12-java11 imagePullPolicy: IfNotPresent mainClass: org.apache.spark.examples.SparkPi - mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.13-4.1.1.jar + mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.13-4.1.2.jar sparkConf: "spark.executor.extraClassPath": "/opt/spark/jars/comet-spark-spark3.5_2.12-$COMET_VERSION.jar" "spark.driver.extraClassPath": "/opt/spark/jars/comet-spark-spark3.5_2.12-$COMET_VERSION.jar" @@ -82,17 +82,17 @@ spec: "spark.comet.exec.shuffle.enabled": "true" "spark.comet.exec.shuffle.mode": "auto" "spark.shuffle.manager": "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager" - sparkVersion: 4.1.1 + sparkVersion: 4.1.2 driver: labels: - version: 4.1.1 + version: 4.1.2 cores: 1 coreLimit: 1200m memory: 512m serviceAccount: spark-operator-spark executor: labels: - version: 4.1.1 + version: 4.1.2 instances: 1 cores: 1 coreLimit: 1200m diff --git a/kube/Dockerfile b/kube/Dockerfile index 7e73e45241..5ee3eac35c 100644 --- a/kube/Dockerfile +++ b/kube/Dockerfile @@ -15,7 +15,7 @@ # limitations under the License. # -FROM apache/spark:4.1.1 AS builder +FROM apache/spark:4.1.2 AS builder USER root @@ -69,7 +69,7 @@ RUN mkdir -p /root/.m2 && \ RUN cd /comet \ && JAVA_HOME=$(readlink -f $(which javac) | sed "s/\/bin\/javac//") make release-nogit PROFILES="-Pspark-$SPARK_VERSION -Pscala-$SCALA_VERSION" -FROM apache/spark:4.1.1 +FROM apache/spark:4.1.2 ENV SPARK_VERSION=4.1 ENV SCALA_VERSION=2.13 USER root diff --git a/pom.xml b/pom.xml index 7419fecc92..5fdc7eb5a4 100644 --- a/pom.xml +++ b/pom.xml @@ -69,7 +69,7 @@ under the License. 4.9.6 3.2.16 2.2.0 - 4.1.1 + 4.1.2 4.1 provided 3.25.5 @@ -687,13 +687,13 @@ under the License. spark-4.1 - 2.13.17 2.13 - 4.1.1 + 4.1.2 4.1 1.16.0 4.13.6 diff --git a/spark/pom.xml b/spark/pom.xml index 6d97ea831f..d1613460ba 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -291,7 +291,7 @@ under the License. 1.10.0 test - + org.eclipse.jetty jetty-server diff --git a/spark/src/test/spark-4.1/org/apache/spark/sql/comet/CometDecimalArithmeticViewSuite.scala b/spark/src/test/spark-4.1/org/apache/spark/sql/comet/CometDecimalArithmeticViewSuite.scala index d540bb2ba3..a1d8e73e2c 100644 --- a/spark/src/test/spark-4.1/org/apache/spark/sql/comet/CometDecimalArithmeticViewSuite.scala +++ b/spark/src/test/spark-4.1/org/apache/spark/sql/comet/CometDecimalArithmeticViewSuite.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.types.DecimalType class CometDecimalArithmeticViewSuite extends CometTestBase { - // Spark 4.1.1 (SPARK-53968) stores `spark.sql.decimalOperations.allowPrecisionLoss` per + // Spark 4.1+ (SPARK-53968) stores `spark.sql.decimalOperations.allowPrecisionLoss` per // arithmetic expression so a view's analyzed plan keeps a stable result type across config // changes. Comet's DecimalPrecision rule used to recompute the result type from the current // SQLConf, producing a CheckOverflow target that disagreed with the stored Add.dataType and