[spark] Fix column projection on log/upsert read

fresh-borzoni · fresh-borzoni · commit 264a9550736c · 2026-04-10T12:56:40.000+01:00
diff --git a/fluss-spark/fluss-spark-common/src/main/scala/org/apache/fluss/spark/read/FlussAppendPartitionReader.scala b/fluss-spark/fluss-spark-common/src/main/scala/org/apache/fluss/spark/read/FlussAppendPartitionReader.scala
@@ -20,6 +20,10 @@ package org.apache.fluss.spark.read
 import org.apache.fluss.client.table.scanner.ScanRecord
 import org.apache.fluss.config.Configuration
 import org.apache.fluss.metadata.{TableBucket, TablePath}
+import org.apache.fluss.row.{InternalRow => FlussInternalRow}
+import org.apache.fluss.spark.row.DataConverter
+
+import org.apache.spark.sql.catalyst.InternalRow
 
 /** Partition reader that reads log data from a single Fluss table bucket. */
 class FlussAppendPartitionReader(
@@ -29,6 +33,12 @@ class FlussAppendPartitionReader(
     flussConfig: Configuration)
   extends FlussPartitionReader(tablePath, flussConfig) {
 
+  private lazy val projectedRowType = rowType.project(projection)
+
+  override protected def convertToSparkRow(flussRow: FlussInternalRow): InternalRow = {
+    DataConverter.toSparkInternalRow(flussRow, projectedRowType)
+  }
+
   private val tableBucket: TableBucket = flussPartition.tableBucket
   private val partitionId = tableBucket.getPartitionId
   private val bucketId = tableBucket.getBucket
diff --git a/fluss-spark/fluss-spark-common/src/main/scala/org/apache/fluss/spark/read/FlussUpsertPartitionReader.scala b/fluss-spark/fluss-spark-common/src/main/scala/org/apache/fluss/spark/read/FlussUpsertPartitionReader.scala
@@ -24,13 +24,15 @@ import org.apache.fluss.config.Configuration
 import org.apache.fluss.memory.MemorySegment
 import org.apache.fluss.metadata.{TableBucket, TablePath}
 import org.apache.fluss.record.LogRecord
-import org.apache.fluss.row.{encode, InternalRow, KeyValueRow}
+import org.apache.fluss.row.{encode, InternalRow => FlussInternalRow, KeyValueRow}
 import org.apache.fluss.spark.SparkFlussConf
+import org.apache.fluss.spark.row.DataConverter
 import org.apache.fluss.spark.utils.LogChangesIterator
 import org.apache.fluss.types.{DataField, RowType}
 import org.apache.fluss.utils.CloseableIterator
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.InternalRow
 
 import java.util.Comparator
 
@@ -51,6 +53,12 @@ class FlussUpsertPartitionReader(
   extends FlussPartitionReader(tablePath, flussConfig)
   with Logging {
 
+  private lazy val projectedRowType = rowType.project(projectionWithPks)
+
+  override protected def convertToSparkRow(flussRow: FlussInternalRow): InternalRow = {
+    DataConverter.toSparkInternalRow(flussRow, projectedRowType)
+  }
+
   private val readOptimized = flussConfig.get(SparkFlussConf.READ_OPTIMIZED_OPTION)
   private val tableBucket: TableBucket = flussPartition.tableBucket
   private val snapshotId: Long = flussPartition.snapshotId
@@ -79,7 +87,7 @@ class FlussUpsertPartitionReader(
 
   private var snapshotScanner: BatchScanner = _
   private var logScanner: LogScanner = _
-  private var mergedIterator: Iterator[InternalRow] = _
+  private var mergedIterator: Iterator[FlussInternalRow] = _
 
   // initialize scanners
   initialize()
@@ -111,8 +119,8 @@ class FlussUpsertPartitionReader(
         tableInfo.isDefaultBucketKey)
 
     // Create comparators based on primary key
-    val comparator = new Comparator[InternalRow] {
-      override def compare(o1: InternalRow, o2: InternalRow): Int = {
+    val comparator = new Comparator[FlussInternalRow] {
+      override def compare(o1: FlussInternalRow, o2: FlussInternalRow): Int = {
         val key1 = keyEncoder.encodeKey(o1)
         val key2 = keyEncoder.encodeKey(o2)
         MemorySegment.wrap(key1).compare(MemorySegment.wrap(key2), 0, 0, key1.length)
@@ -160,7 +168,7 @@ class FlussUpsertPartitionReader(
 
       // Convert snapshot iterator to LogRecord iterator for SortMergeReader
       new CloseableIterator[LogRecord] {
-        private var currentBatch: java.util.Iterator[InternalRow] = _
+        private var currentBatch: java.util.Iterator[FlussInternalRow] = _
         private var hasMoreBatches = true
 
         override def hasNext: Boolean = {
@@ -200,9 +208,9 @@ class FlussUpsertPartitionReader(
       createSnapshotIterator()
     }
 
-    // Create the SortMergeReader
+    // null: scanners already project rows; passing projectionWithPks here double-projects
     val sortMergeReader = new SortMergeReader(
-      projectionWithPks,
+      null,
       pkProjection,
       snapshotIterators,
       comparator,
diff --git a/fluss-spark/fluss-spark-ut/src/test/scala/org/apache/fluss/spark/SparkLogTableReadTest.scala b/fluss-spark/fluss-spark-ut/src/test/scala/org/apache/fluss/spark/SparkLogTableReadTest.scala
@@ -211,6 +211,42 @@ class SparkLogTableReadTest extends FlussSparkTestBase {
     }
   }
 
+  test("Spark Read: log table projection with type-dependent columns") {
+    withTable("t") {
+      sql(s"""
+             |CREATE TABLE $DEFAULT_DATABASE.t (
+             |id INT,
+             |ts TIMESTAMP,
+             |name STRING,
+             |arr ARRAY<INT>,
+             |struct_col STRUCT<col1: INT, col2: STRING>,
+             |ts_ltz TIMESTAMP_LTZ
+             |)""".stripMargin)
+
+      sql(s"""
+             |INSERT INTO $DEFAULT_DATABASE.t VALUES
+             |(1, TIMESTAMP "2026-01-01 12:00:00", "a", ARRAY(1, 2), STRUCT(10, 'x'),
+             | TIMESTAMP "2026-01-01 12:00:00"),
+             |(2, TIMESTAMP "2026-01-02 12:00:00", "b", ARRAY(3, 4), STRUCT(20, 'y'),
+             | TIMESTAMP "2026-01-02 12:00:00")
+             |""".stripMargin)
+
+      // Projection reorders type-dependent columns (array, timestamp, struct)
+      checkAnswer(
+        sql(s"SELECT arr, ts, struct_col FROM $DEFAULT_DATABASE.t ORDER BY ts"),
+        Row(Seq(1, 2), java.sql.Timestamp.valueOf("2026-01-01 12:00:00"), Row(10, "x")) ::
+          Row(Seq(3, 4), java.sql.Timestamp.valueOf("2026-01-02 12:00:00"), Row(20, "y")) :: Nil
+      )
+
+      // Projection with timestamp_ltz at shifted ordinal
+      checkAnswer(
+        sql(s"SELECT ts_ltz, name FROM $DEFAULT_DATABASE.t ORDER BY name"),
+        Row(java.sql.Timestamp.valueOf("2026-01-01 12:00:00"), "a") ::
+          Row(java.sql.Timestamp.valueOf("2026-01-02 12:00:00"), "b") :: Nil
+      )
+    }
+  }
+
   test("Spark Read: nested data types table") {
     withTable("t") {
       // TODO: support map type
diff --git a/fluss-spark/fluss-spark-ut/src/test/scala/org/apache/fluss/spark/SparkPrimaryKeyTableReadTest.scala b/fluss-spark/fluss-spark-ut/src/test/scala/org/apache/fluss/spark/SparkPrimaryKeyTableReadTest.scala
@@ -255,6 +255,53 @@ class SparkPrimaryKeyTableReadTest extends FlussSparkTestBase {
     }
   }
 
+  test("Spark Read: primary key table projection with type-dependent columns") {
+    withTable("t") {
+      val tablePath = createTablePath("t")
+      sql(s"""
+             |CREATE TABLE $DEFAULT_DATABASE.t (
+             |pk INT,
+             |ts TIMESTAMP,
+             |name STRING,
+             |arr ARRAY<INT>,
+             |struct_col STRUCT<col1: INT, col2: STRING>,
+             |ts_ltz TIMESTAMP_LTZ
+             |) TBLPROPERTIES("primary.key" = "pk", "bucket.num" = 1)
+             |""".stripMargin)
+
+      sql(s"""
+             |INSERT INTO $DEFAULT_DATABASE.t VALUES
+             |(1, TIMESTAMP "2026-01-01 12:00:00", "a", ARRAY(1, 2), STRUCT(10, 'x'),
+             | TIMESTAMP "2026-01-01 12:00:00"),
+             |(2, TIMESTAMP "2026-01-02 12:00:00", "b", ARRAY(3, 4), STRUCT(20, 'y'),
+             | TIMESTAMP "2026-01-02 12:00:00")
+             |""".stripMargin)
+
+      // Log-only: projection reorders type-dependent columns (PK not in projection)
+      checkAnswer(
+        sql(s"SELECT arr, ts, struct_col FROM $DEFAULT_DATABASE.t ORDER BY ts"),
+        Row(Seq(1, 2), java.sql.Timestamp.valueOf("2026-01-01 12:00:00"), Row(10, "x")) ::
+          Row(Seq(3, 4), java.sql.Timestamp.valueOf("2026-01-02 12:00:00"), Row(20, "y")) :: Nil
+      )
+
+      // Trigger snapshot, then test with snapshot + log merge
+      flussServer.triggerAndWaitSnapshot(tablePath)
+
+      sql(s"""
+             |INSERT INTO $DEFAULT_DATABASE.t VALUES
+             |(1, TIMESTAMP "2026-03-01 12:00:00", "a_updated", ARRAY(10, 20), STRUCT(100, 'xx'),
+             | TIMESTAMP "2026-03-01 12:00:00")
+             |""".stripMargin)
+
+      // Snapshot + log: projection with type-dependent columns at shifted ordinals
+      checkAnswer(
+        sql(s"SELECT ts_ltz, arr, name FROM $DEFAULT_DATABASE.t ORDER BY name"),
+        Row(java.sql.Timestamp.valueOf("2026-03-01 12:00:00"), Seq(10, 20), "a_updated") ::
+          Row(java.sql.Timestamp.valueOf("2026-01-02 12:00:00"), Seq(3, 4), "b") :: Nil
+      )
+    }
+  }
+
   private def genInputPartition(
       tablePath: TablePath,
       partitionName: String): Array[FlussUpsertInputPartition] = {