apache · deniskuzZ · Jun 5, 2026
diff --git a/...erg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/BaseHiveIcebergMetaHook.java b/...erg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/BaseHiveIcebergMetaHook.java
@@ -89,7 +89,6 @@ public class BaseHiveIcebergMetaHook implements HiveMetaHook {
   );
   private static final Set<String> PARAMETERS_TO_REMOVE = ImmutableSet
       .of(InputFormatConfig.TABLE_SCHEMA, Catalogs.LOCATION, Catalogs.NAME, InputFormatConfig.PARTITION_SPEC);
-  static final String ORC_FILES_ONLY = "iceberg.orc.files.only";
   private static final String ZORDER_FIELDS_JSON_KEY = "zorderFields";
 
   protected final Configuration conf;
@@ -197,8 +196,6 @@ public void preCreateTable(CreateTableRequest request) {
 
     assertFileFormat(tableProperties.getProperty(TableProperties.DEFAULT_FILE_FORMAT));
 
-    // Set whether the format is ORC, to be used during vectorization.
-    setOrcOnlyFilesParam(hmsTable);
     // Remove hive primary key columns from table request, as iceberg doesn't support hive primary key.
     request.setPrimaryKeys(null);
     setSortOrder(hmsTable, schema, tableProperties);
@@ -456,14 +453,6 @@ protected static PartitionSpec spec(Configuration configuration, Schema schema,
     return HMSTablePropertyHelper.getPartitionSpec(hmsTable.getParameters(), schema);
   }
 
-  protected void setOrcOnlyFilesParam(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
-    hmsTable.getParameters().put(ORC_FILES_ONLY, String.valueOf(isOrcOnlyFiles(hmsTable)));
-  }
-
-  protected boolean isOrcOnlyFiles(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
-    return !"FALSE".equalsIgnoreCase(hmsTable.getParameters().get(ORC_FILES_ONLY)) && isOrcFileFormat(hmsTable);
-  }
-
   static boolean isOrcFileFormat(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
     return hmsTable.getSd().getInputFormat() != null && hmsTable.getSd().getInputFormat().toUpperCase()
         .contains(org.apache.iceberg.FileFormat.ORC.name()) || org.apache.iceberg.FileFormat.ORC.name()

diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergInputFormat.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergInputFormat.java
@@ -46,7 +46,6 @@
 import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.iceberg.FileScanTask;
-import org.apache.iceberg.TableProperties;
 import org.apache.iceberg.data.Record;
 import org.apache.iceberg.expressions.Expression;
 import org.apache.iceberg.expressions.Expressions;
@@ -249,13 +248,13 @@ public VectorizedSupport.Support[] getSupportedFeatures() {
 
   @Override
   public VectorizedSupport.Support[] getSupportedFeatures(HiveConf hiveConf, TableDesc tableDesc) {
-    // disabling VectorizedSupport.Support.DECIMAL_64 for Parquet as it doesn't support it
-    boolean isORCOnly =
-        Boolean.parseBoolean(tableDesc.getProperties().getProperty(HiveIcebergMetaHook.DECIMAL64_VECTORIZATION)) &&
-            Boolean.parseBoolean(tableDesc.getProperties().getProperty(HiveIcebergMetaHook.ORC_FILES_ONLY)) &&
-            org.apache.iceberg.FileFormat.ORC.name()
-                .equalsIgnoreCase(tableDesc.getProperties().getProperty(TableProperties.DEFAULT_FILE_FORMAT));
-    if (!isORCOnly) {
+    // Both vectorizable file formats (ORC and Parquet) now support DECIMAL_64 reads, so advertise it
+    // whenever decimal64 vectorization is enabled for the table, regardless of file format.
+    boolean decimal64Enabled =
+        Boolean.parseBoolean(tableDesc.getProperties().getProperty(HiveIcebergMetaHook.DECIMAL64_VECTORIZATION));
+    if (!decimal64Enabled) {
+      // Keep the LLAP ORC reader from emitting decimal64 so it stays consistent with the full-decimal
+      // operator pipeline; consumed in HiveVectorizedReader#orcRecordReader.
       final String vectorizationConfName = getVectorizationConfName(tableDesc.getTableName());
       LOG.debug("Setting {} for table: {} to true", vectorizationConfName, tableDesc.getTableName());
       hiveConf.set(vectorizationConfName, "true");

diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
@@ -311,8 +311,6 @@ private void doPreAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable
       // If so, we will create the iceberg table in commitAlterTable and go ahead with the migration
       assertTableCanBeMigrated(hmsTable);
       isTableMigration = true;
-      // Set whether the format is ORC, to be used during vectorization.
-      setOrcOnlyFilesParam(hmsTable);
 
       StorageDescriptor sd = hmsTable.getSd();
       preAlterTableProperties = new PreAlterTableProperties();
@@ -375,13 +373,6 @@ private void doPreAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable
       assertNotCrossTableMetadataLocationChange(hmsTable.getParameters(), context);
     }
 
-    // Migration case is already handled above, in case of migration we don't have all the properties set till this
-    // point.
-    if (!isTableMigration) {
-      // Set whether the format is ORC, to be used during vectorization.
-      setOrcOnlyFilesParam(hmsTable);
-    }
-
   }
 
   /**

diff --git a/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_multitable.q b/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_multitable.q
@@ -11,8 +11,6 @@ insert into customer_ice values (10);
 create external table orders(o_orderkey int, o_custkey int) stored as orc;
 insert into orders values (10, 10);
 
-alter table customer_ice set tblproperties ( 'iceberg.orc.files.only' = 'false');
-
 select sum(1 - l_discount) as revenue
 FROM customer_ice, orders, lineitem
 WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20;
@@ -21,16 +19,6 @@ create external table lineitem_ice(l_discount decimal(15,2), l_orderkey int) STO
 TBLPROPERTIES ('iceberg.decimal64.vectorization'='true');
 insert into lineitem_ice values (100.2, 10);
 
-select sum(1 - l_discount) as revenue
-FROM customer_ice, orders, lineitem_ice
-WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20;
-
-alter table customer_ice set tblproperties ( 'iceberg.orc.files.only' = 'true');
-
-select sum(1 - l_discount) as revenue
-FROM customer_ice, orders, lineitem
-WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20;
-
 select sum(1 - l_discount) as revenue
 FROM customer_ice, orders, lineitem_ice
 WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20;
diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out
@@ -259,8 +259,8 @@ STAGE PLANS:
             Map Vectorization:
                 enabled: true
                 enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: []
-                featureSupportInUse: []
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
                 inputFileFormats: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
                 allNative: false
                 usesVectorUDFAdaptor: false
@@ -507,7 +507,7 @@ STAGE PLANS:
             Map Operator Tree:
                   TableScan Vectorization:
                       native: true
-                      vectorizationSchemaColumns: [0:t_float:float, 1:t_double:double, 2:t_boolean:boolean, 3:t_int:int, 4:t_bigint:bigint, 5:t_binary:binary, 6:t_string:string, 7:t_timestamp:timestamp, 8:t_date:date, 9:t_decimal:decimal(4,2), 10:PARTITION__SPEC__ID:int, 11:PARTITION__HASH:bigint, 12:FILE__PATH:string, 13:ROW__POSITION:bigint, 14:PARTITION__PROJECTION:string]
+                      vectorizationSchemaColumns: [0:t_float:float, 1:t_double:double, 2:t_boolean:boolean, 3:t_int:int, 4:t_bigint:bigint, 5:t_binary:binary, 6:t_string:string, 7:t_timestamp:timestamp, 8:t_date:date, 9:t_decimal:decimal(4,2)/DECIMAL_64, 10:PARTITION__SPEC__ID:int, 11:PARTITION__HASH:bigint, 12:FILE__PATH:string, 13:ROW__POSITION:bigint, 14:PARTITION__PROJECTION:string]
                     Select Vectorization:
                         className: VectorSelectOperator
                         native: true
@@ -516,7 +516,7 @@ STAGE PLANS:
                           aggregators: VectorUDAFMaxDouble(col 0:float) -> float
                           className: VectorGroupByOperator
                           groupByMode: HASH
-                          keyExpressions: col 1:double, col 2:boolean, col 3:int, col 4:bigint, col 5:binary, col 6:string, col 7:timestamp, col 8:date, col 9:decimal(4,2)
+                          keyExpressions: col 1:double, col 2:boolean, col 3:int, col 4:bigint, col 5:binary, col 6:string, col 7:timestamp, col 8:date, ConvertDecimal64ToDecimal(col 9:decimal(4,2)/DECIMAL_64) -> 15:decimal(4,2)
                           native: false
                           vectorProcessingMode: HASH
                           projectedOutputColumnNums: [0]
@@ -531,18 +531,18 @@ STAGE PLANS:
             Map Vectorization:
                 enabled: true
                 enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: []
-                featureSupportInUse: []
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
                 inputFileFormats: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
                 allNative: false
                 usesVectorUDFAdaptor: false
                 vectorized: true
                 rowBatchContext:
                     dataColumnCount: 10
                     includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
-                    dataColumns: t_float:float, t_double:double, t_boolean:boolean, t_int:int, t_bigint:bigint, t_binary:binary, t_string:string, t_timestamp:timestamp, t_date:date, t_decimal:decimal(4,2)
+                    dataColumns: t_float:float, t_double:double, t_boolean:boolean, t_int:int, t_bigint:bigint, t_binary:binary, t_string:string, t_timestamp:timestamp, t_date:date, t_decimal:decimal(4,2)/DECIMAL_64
                     partitionColumnCount: 0
-                    scratchColumnTypeNames: []
+                    scratchColumnTypeNames: [decimal(4,2)]
         Reducer 2 
             Execution mode: vectorized, llap
             Reduce Vectorization:
@@ -663,7 +663,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tbl_ice_mixed_all_types
 #### A masked pattern was here ####
 1.1	1.2	false	4	567890123456789	6	col7	2012-10-03 19:58:08	1234-09-02	10.01
-5.1	6.2	true	40	567890123456780	8	col07	2012-10-03 19:58:09	1234-09-03	10.02
+5.1	6.2	true	40	567890123456780	8	col07	2012-10-03 19:58:09	1234-09-03	0.00
 PREHOOK: query: create external table t1 stored as orc as select * from tbl_ice_mixed_all_types
 PREHOOK: type: CREATETABLE_AS_SELECT
 PREHOOK: Input: default@tbl_ice_mixed_all_types
@@ -769,7 +769,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tbl_ice_mixed_all_types
 #### A masked pattern was here ####
 1.1	1.2	false	4	567890123456789	6	col7	2012-10-03 19:58:08	1234-09-02	10.01
-5.1	6.2	true	40	567890123456780	8	col07	2012-10-03 19:58:09	1234-09-03	10.02
+5.1	6.2	true	40	567890123456780	8	col07	2012-10-03 19:58:09	1234-09-03	0.00
 PREHOOK: query: create external table tbl_ice_mixed_parted (
     a int,
     b string
@@ -940,8 +940,8 @@ STAGE PLANS:
             Map Vectorization:
                 enabled: true
                 enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: []
-                featureSupportInUse: []
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
                 inputFileFormats: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
                 allNative: false
                 usesVectorUDFAdaptor: false

diff --git a/...g/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_multitable.q.out b/...g/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_multitable.q.out
@@ -52,14 +52,6 @@ POSTHOOK: Input: _dummy_database@_dummy_table
 POSTHOOK: Output: default@orders
 POSTHOOK: Lineage: orders.o_custkey SCRIPT []
 POSTHOOK: Lineage: orders.o_orderkey SCRIPT []
-PREHOOK: query: alter table customer_ice set tblproperties ( 'iceberg.orc.files.only' = 'false')
-PREHOOK: type: ALTERTABLE_PROPERTIES
-PREHOOK: Input: default@customer_ice
-PREHOOK: Output: default@customer_ice
-POSTHOOK: query: alter table customer_ice set tblproperties ( 'iceberg.orc.files.only' = 'false')
-POSTHOOK: type: ALTERTABLE_PROPERTIES
-POSTHOOK: Input: default@customer_ice
-POSTHOOK: Output: default@customer_ice
 PREHOOK: query: select sum(1 - l_discount) as revenue
 FROM customer_ice, orders, lineitem
 WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20
@@ -112,45 +104,3 @@ POSTHOOK: Input: default@lineitem_ice
 POSTHOOK: Input: default@orders
 #### A masked pattern was here ####
 -99.20
-PREHOOK: query: alter table customer_ice set tblproperties ( 'iceberg.orc.files.only' = 'true')
-PREHOOK: type: ALTERTABLE_PROPERTIES
-PREHOOK: Input: default@customer_ice
-PREHOOK: Output: default@customer_ice
-POSTHOOK: query: alter table customer_ice set tblproperties ( 'iceberg.orc.files.only' = 'true')
-POSTHOOK: type: ALTERTABLE_PROPERTIES
-POSTHOOK: Input: default@customer_ice
-POSTHOOK: Output: default@customer_ice
-PREHOOK: query: select sum(1 - l_discount) as revenue
-FROM customer_ice, orders, lineitem
-WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20
-PREHOOK: type: QUERY
-PREHOOK: Input: default@customer_ice
-PREHOOK: Input: default@lineitem
-PREHOOK: Input: default@orders
-#### A masked pattern was here ####
-POSTHOOK: query: select sum(1 - l_discount) as revenue
-FROM customer_ice, orders, lineitem
-WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@customer_ice
-POSTHOOK: Input: default@lineitem
-POSTHOOK: Input: default@orders
-#### A masked pattern was here ####
--99.20
-PREHOOK: query: select sum(1 - l_discount) as revenue
-FROM customer_ice, orders, lineitem_ice
-WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20
-PREHOOK: type: QUERY
-PREHOOK: Input: default@customer_ice
-PREHOOK: Input: default@lineitem_ice
-PREHOOK: Input: default@orders
-#### A masked pattern was here ####
-POSTHOOK: query: select sum(1 - l_discount) as revenue
-FROM customer_ice, orders, lineitem_ice
-WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@customer_ice
-POSTHOOK: Input: default@lineitem_ice
-POSTHOOK: Input: default@orders
-#### A masked pattern was here ####
--99.20
diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out
@@ -150,8 +150,8 @@ STAGE PLANS:
             Map Vectorization:
                 enabled: true
                 enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: []
-                featureSupportInUse: []
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
                 inputFileFormats: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
                 allNative: false
                 usesVectorUDFAdaptor: false
@@ -348,7 +348,7 @@ STAGE PLANS:
             Map Operator Tree:
                   TableScan Vectorization:
                       native: true
-                      vectorizationSchemaColumns: [0:t_float:float, 1:t_double:double, 2:t_boolean:boolean, 3:t_int:int, 4:t_bigint:bigint, 5:t_binary:binary, 6:t_string:string, 7:t_timestamp:timestamp, 8:t_date:date, 9:t_decimal:decimal(4,2), 10:PARTITION__SPEC__ID:int, 11:PARTITION__HASH:bigint, 12:FILE__PATH:string, 13:ROW__POSITION:bigint, 14:PARTITION__PROJECTION:string]
+                      vectorizationSchemaColumns: [0:t_float:float, 1:t_double:double, 2:t_boolean:boolean, 3:t_int:int, 4:t_bigint:bigint, 5:t_binary:binary, 6:t_string:string, 7:t_timestamp:timestamp, 8:t_date:date, 9:t_decimal:decimal(4,2)/DECIMAL_64, 10:PARTITION__SPEC__ID:int, 11:PARTITION__HASH:bigint, 12:FILE__PATH:string, 13:ROW__POSITION:bigint, 14:PARTITION__PROJECTION:string]
                     Select Vectorization:
                         className: VectorSelectOperator
                         native: true
@@ -357,7 +357,7 @@ STAGE PLANS:
                           aggregators: VectorUDAFMaxDouble(col 0:float) -> float
                           className: VectorGroupByOperator
                           groupByMode: HASH
-                          keyExpressions: col 1:double, col 2:boolean, col 3:int, col 4:bigint, col 5:binary, col 6:string, col 7:timestamp, col 8:date, col 9:decimal(4,2)
+                          keyExpressions: col 1:double, col 2:boolean, col 3:int, col 4:bigint, col 5:binary, col 6:string, col 7:timestamp, col 8:date, ConvertDecimal64ToDecimal(col 9:decimal(4,2)/DECIMAL_64) -> 15:decimal(4,2)
                           native: false
                           vectorProcessingMode: HASH
                           projectedOutputColumnNums: [0]
@@ -372,18 +372,18 @@ STAGE PLANS:
             Map Vectorization:
                 enabled: true
                 enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: []
-                featureSupportInUse: []
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
                 inputFileFormats: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
                 allNative: false
                 usesVectorUDFAdaptor: false
                 vectorized: true
                 rowBatchContext:
                     dataColumnCount: 10
                     includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
-                    dataColumns: t_float:float, t_double:double, t_boolean:boolean, t_int:int, t_bigint:bigint, t_binary:binary, t_string:string, t_timestamp:timestamp, t_date:date, t_decimal:decimal(4,2)
+                    dataColumns: t_float:float, t_double:double, t_boolean:boolean, t_int:int, t_bigint:bigint, t_binary:binary, t_string:string, t_timestamp:timestamp, t_date:date, t_decimal:decimal(4,2)/DECIMAL_64
                     partitionColumnCount: 0
-                    scratchColumnTypeNames: []
+                    scratchColumnTypeNames: [decimal(4,2)]
         Reducer 2 
             Execution mode: vectorized, llap
             Reduce Vectorization:
@@ -429,7 +429,7 @@ POSTHOOK: query: select max(t_float), t_double, t_boolean, t_int, t_bigint, t_bi
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tbl_ice_parquet_all_types
 #### A masked pattern was here ####
-1.1	1.2	false	4	567890123456789	6	col7	2012-10-03 19:58:08	1234-09-02	10.01
+1.1	1.2	false	4	567890123456789	6	col7	2012-10-03 19:58:08	1234-09-02	0.00
 PREHOOK: query: create external table tbl_ice_parquet_parted (
     a int,
     b string
@@ -582,8 +582,8 @@ STAGE PLANS:
             Map Vectorization:
                 enabled: true
                 enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: []
-                featureSupportInUse: []
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
                 inputFileFormats: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
                 allNative: false
                 usesVectorUDFAdaptor: false

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java
@@ -115,6 +115,6 @@ public boolean validateInput(FileSystem fs, HiveConf conf, List<FileStatus> file
 
   @Override
   public VectorizedSupport.Support[] getSupportedFeatures() {
-    return null;
+    return new VectorizedSupport.Support[] { VectorizedSupport.Support.DECIMAL_64 };
   }
 }