Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2306,6 +2306,7 @@ public List<Partition> getPartitionsByExpr(org.apache.hadoop.hive.ql.metadata.Ta
}

Set<Partition> partitions = Sets.newHashSet();
String defaultPartitionName = HiveConf.getVar(conf, ConfVars.DEFAULT_PARTITION_NAME);

try (CloseableIterable<FileScanTask> tasks = scan.planFiles()) {
FluentIterable.from(tasks)
Expand All @@ -2316,8 +2317,8 @@ public List<Partition> getPartitionsByExpr(org.apache.hadoop.hive.ql.metadata.Ta
PartitionData partitionData = IcebergTableUtil.toPartitionData(task.partition(), spec.partitionType());
String partName = spec.partitionToPath(partitionData);

Map<String, String> partSpecMap = Maps.newLinkedHashMap();
Warehouse.makeSpecFromName(partSpecMap, new Path(partName), null);
Comment thread
zabetak marked this conversation as resolved.
Map<String, String> partSpecMap =
IcebergTableUtil.makeSpecFromName(partName, spec, partitionData, defaultPartitionName);

DummyPartition partition = new DummyPartition(hmsTable, partName, partSpecMap);
partitions.add(partition);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,24 @@ public static void performMetadataDelete(Table icebergTable, String branchName,
deleteFiles.deleteFromRowFilter(exp).commit();
}

/**
* Parses an Iceberg partition path into a Hive-compatible spec map, representing null partition
* values with the Hive default partition name.
*/
Comment thread
deniskuzZ marked this conversation as resolved.
public static Map<String, String> makeSpecFromName(String partName, PartitionSpec spec, PartitionData data,
String defaultPartitionName) {
Map<String, String> partSpecMap = Maps.newLinkedHashMap();
Warehouse.makeSpecFromName(partSpecMap, new Path(partName), null);

List<PartitionField> fields = spec.fields();
for (int i = 0; i < fields.size(); i++) {
if (data.get(i) == null) {
partSpecMap.put(fields.get(i).name(), defaultPartitionName);
}
}
return partSpecMap;
}

public static PartitionData toPartitionData(StructLike key, Types.StructType keyType) {
PartitionData keyTemplate = new PartitionData(keyType);
return keyTemplate.copyFor(key);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.fetch.task.conversion=none;
set hive.explain.user=false;

drop table if exists ice_01;
create external table ice_01 (key string, value string) partitioned by (ds string) stored by iceberg;

insert into ice_01 partition (ds) select 'A', 'V1', '2000-04-08';
insert into ice_01 partition (ds) select 'B', 'V2', 'null';
insert into ice_01 partition (ds) select 'C', 'V3', null;

explain select key, value, ds from ice_01 where ds is null;
select key, value, ds from ice_01 where ds is null;

explain select key, value, ds from ice_01 where ds is not null;
select key, value, ds from ice_01 where ds is not null order by key;

select key, value, ds from ice_01 where ds = 'null';
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
PREHOOK: query: drop table if exists ice_01
PREHOOK: type: DROPTABLE
PREHOOK: Output: database:default
POSTHOOK: query: drop table if exists ice_01
POSTHOOK: type: DROPTABLE
POSTHOOK: Output: database:default
PREHOOK: query: create external table ice_01 (key string, value string) partitioned by (ds string) stored by iceberg
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@ice_01
POSTHOOK: query: create external table ice_01 (key string, value string) partitioned by (ds string) stored by iceberg
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@ice_01
PREHOOK: query: insert into ice_01 partition (ds) select 'A', 'V1', '2000-04-08'
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@ice_01
POSTHOOK: query: insert into ice_01 partition (ds) select 'A', 'V1', '2000-04-08'
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@ice_01
PREHOOK: query: insert into ice_01 partition (ds) select 'B', 'V2', 'null'
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@ice_01
POSTHOOK: query: insert into ice_01 partition (ds) select 'B', 'V2', 'null'
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@ice_01
PREHOOK: query: insert into ice_01 partition (ds) select 'C', 'V3', null
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@ice_01
POSTHOOK: query: insert into ice_01 partition (ds) select 'C', 'V3', null
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@ice_01
PREHOOK: query: explain select key, value, ds from ice_01 where ds is null
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_01
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: explain select key, value, ds from ice_01 where ds is null
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_01
POSTHOOK: Output: hdfs://### HDFS PATH ###
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1

STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: ice_01
filterExpr: ds is null (type: boolean)
Statistics: Num rows: 1 Data size: 171 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), value (type: string), null (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized

Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink

PREHOOK: query: select key, value, ds from ice_01 where ds is null
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_01
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select key, value, ds from ice_01 where ds is null
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_01
POSTHOOK: Output: hdfs://### HDFS PATH ###
C V3 NULL
PREHOOK: query: explain select key, value, ds from ice_01 where ds is not null
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_01
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: explain select key, value, ds from ice_01 where ds is not null
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_01
POSTHOOK: Output: hdfs://### HDFS PATH ###
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1

STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: ice_01
filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), value (type: string), ds (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized

Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink

PREHOOK: query: select key, value, ds from ice_01 where ds is not null order by key
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_01
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select key, value, ds from ice_01 where ds is not null order by key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_01
POSTHOOK: Output: hdfs://### HDFS PATH ###
A V1 2000-04-08
B V2 null
PREHOOK: query: select key, value, ds from ice_01 where ds = 'null'
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_01
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select key, value, ds from ice_01 where ds = 'null'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_01
POSTHOOK: Output: hdfs://### HDFS PATH ###
B V2 null
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.util.Properties;

import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.ddl.DDLUtils;
Expand All @@ -33,6 +34,7 @@
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
Expand Down Expand Up @@ -73,6 +75,9 @@ static public Object evalExprWithPart(ExprNodeDesc expr, Partition p) throws Hiv
throw new HiveException("Internal error : Partition Spec size, " + partSpec.size() +
" doesn't match partition key definition size, " + partKeyTypes.length);
}
String defaultPartitionName = HiveConf.getVar(SessionState.getSessionConf(),
HiveConf.ConfVars.DEFAULT_PARTITION_NAME);

// Create the row object
List<String> partNames = new ArrayList<>();
List<Object> partValues = new ArrayList<>();
Expand All @@ -82,9 +87,15 @@ static public Object evalExprWithPart(ExprNodeDesc expr, Partition p) throws Hiv
partNames.add(entry.getKey());
ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector
(TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i++]));
partValues.add(ObjectInspectorConverters.getConverter(
PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi)
.convert(entry.getValue()));

String partitionValue = entry.getValue();
if (partitionValue.equals(defaultPartitionName)) {
partValues.add(null); // Null for default partition.
} else {
partValues.add(ObjectInspectorConverters.getConverter(
PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi)
.convert(partitionValue));
}
Comment on lines +90 to +98
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The description of hive.exec.default.partition.name property says the following:

"The default partition name in case the dynamic partition column value is null/empty string or any other values that cannot be escaped."

This code assumes that the value is null but according to the description it may be other things as well.

The change here will fix the IS [NOT] NULL predicate but may potentially change the behavior of some queries:

SELECT key, value, ds FROM pcr_t1 WHERE ds > 'A';
SELECT key, value, ds FROM pcr_t1 WHERE ds < 'A';

Do the queries return the same results before/after the changes in the PR for Iceberg and non-Iceberg tables ?
Which behavior should prevail?

Copy link
Copy Markdown
Member Author

@deniskuzZ deniskuzZ Jun 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Non-Iceberg pcr_t1

  (NULL and '' both land in ds=__HIVE_DEFAULT_PARTITION__)
  
  ┌────────────────┬─────────────────────────────────────────┬─────────────────────────────────────────┬─────────────────┐
  │     Query      │               WITHOUT fix               │                WITH fix                 │ Changed by fix? │
  ├────────────────┼─────────────────────────────────────────┼─────────────────────────────────────────┼─────────────────┤
  │ ds is null     │ (none) ❌                                │ C, D → ds=NULL                          │ ✅  fixed        │
  ├────────────────┼─────────────────────────────────────────┼─────────────────────────────────────────┼─────────────────┤
  │ ds is not null │ A, B                                    │ A, B                                    │ no              │
  ├────────────────┼─────────────────────────────────────────┼─────────────────────────────────────────┼─────────────────┤
  │ ds = 'null'    │ B                                       │ B                                       │ no              │
  ├────────────────┼─────────────────────────────────────────┼─────────────────────────────────────────┼─────────────────┤
  │ ds > 'A'       │ C, D, B → ds=__HIVE_DEFAULT_PARTITION__ │ C, D, B → ds=__HIVE_DEFAULT_PARTITION__ │ no              │
  ├────────────────┼─────────────────────────────────────────┼─────────────────────────────────────────┼─────────────────┤
  │ ds < 'A'       │ A                                       │ A                                       │ no              │
  └────────────────┴─────────────────────────────────────────┴─────────────────────────────────────────┴─────────────────┘
  
  Iceberg ice_01

  (NULL → null partition; '' → real ds='')

  ┌────────────────┬─────────────┬──────────┬─────────────────┐
  │     Query      │ WITHOUT fix │ WITH fix │ Changed by fix? │
  ├────────────────┼─────────────┼──────────┼─────────────────┤
  │ ds is null     │ (none) ❌    │ C        │ ✅  fixed        │
  ├────────────────┼─────────────┼──────────┼─────────────────┤
  │ ds is not null │ A, B, D     │ A, B, D  │ no              │
  ├────────────────┼─────────────┼──────────┼─────────────────┤
  │ ds = 'null'    │ B           │ B        │ no              │
  ├────────────────┼─────────────┼──────────┼─────────────────┤
  │ ds > 'A'       │ B           │ B        │ no              │
  ├────────────────┼─────────────┼──────────┼─────────────────┤
  │ ds < 'A'       │ D, A        │ A, D     │ no (same set)   │
  └────────────────┴─────────────┴──────────┴─────────────────┘

  • The fix changes exactly one query on each table type: ds is null (previously returned nothing — the bug)
  • Range queries (>, <) are unchanged before/after on both table types → no regression.
  • Iceberg is fully SQL-correct throughout because it never conflates NULL with ''.

PR scope is NULL semantics only.

partObjectInspectors.add(oi);
}
StructObjectInspector partObjectInspector = ObjectInspectorFactory
Expand All @@ -104,7 +115,7 @@ public static Pair<PrimitiveObjectInspector, ExprNodeEvaluator> prepareExpr(
ExprNodeDesc expr, List<String> partColumnNames,
List<PrimitiveTypeInfo> partColumnTypeInfos) throws HiveException {
// Create the row object
List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>();
List<ObjectInspector> partObjectInspectors = new ArrayList<>();
for (int i = 0; i < partColumnNames.size(); i++) {
partObjectInspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
partColumnTypeInfos.get(i)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ public static boolean prunePartitionNames(List<String> partColumnNames,
Warehouse.makeValsFromName(partName, values);

List<Object> convertedValues = new ArrayList<>(values.size());
for(int i=0; i<values.size(); i++) {
for (int i = 0; i < values.size(); i++) {
String partitionValue = values.get(i);
PrimitiveTypeInfo typeInfo = partColumnTypeInfos.get(i);

Expand Down
17 changes: 17 additions & 0 deletions ql/src/test/queries/clientpositive/pcr_null_partition.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.fetch.task.conversion=none;

drop table if exists pcr_t1;
create table pcr_t1 (key string, value string) partitioned by (ds string);

insert into pcr_t1 partition (ds) select 'A', 'V1', '2000-04-08';
insert into pcr_t1 partition (ds) select 'B', 'V2', 'null';
insert into pcr_t1 partition (ds) select 'C', 'V3', null;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The empty string value is another edge case that may create problems

insert into pcr_t1 partition (ds) select 'D', 'V4', '';


explain select key, value, ds from pcr_t1 where ds is null;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there something useful in the EXPLAIN output? If not we can drop those.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, filterExpr: ds is not null (type: boolean)

select key, value, ds from pcr_t1 where ds is null;

explain select key, value, ds from pcr_t1 where ds is not null;
select key, value, ds from pcr_t1 where ds is not null order by key;

select key, value, ds from pcr_t1 where ds = 'null';
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider enriching the tests with:

SELECT key, value, ds FROM pcr_t1 WHERE ds > 'A';
SELECT key, value, ds FROM pcr_t1 WHERE ds < 'A';

40 changes: 24 additions & 16 deletions ql/src/test/results/clientpositive/llap/annotate_stats_part.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,12 @@ STAGE PLANS:
TableScan
alias: loc_orc_n4
filterExpr: (year = '__HIVE_DEFAULT_PARTITION__') (type: boolean)
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
ListSink
Filter Operator
predicate: (year = '__HIVE_DEFAULT_PARTITION__') (type: boolean)
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
ListSink

PREHOOK: query: explain select * from loc_orc_n4
PREHOOK: type: QUERY
Expand Down Expand Up @@ -228,10 +230,12 @@ STAGE PLANS:
TableScan
alias: loc_orc_n4
filterExpr: (year = '__HIVE_DEFAULT_PARTITION__') (type: boolean)
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
ListSink
Filter Operator
predicate: (year = '__HIVE_DEFAULT_PARTITION__') (type: boolean)
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
ListSink

PREHOOK: query: explain select * from loc_orc_n4
PREHOOK: type: QUERY
Expand Down Expand Up @@ -283,10 +287,12 @@ STAGE PLANS:
TableScan
alias: loc_orc_n4
filterExpr: (year) IN ('2001', '__HIVE_DEFAULT_PARTITION__') (type: boolean)
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
ListSink
Filter Operator
predicate: (year) IN ('2001', '__HIVE_DEFAULT_PARTITION__') (type: boolean)
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
ListSink

PREHOOK: query: explain select * from loc_orc_n4 where year='2001' and year='__HIVE_DEFAULT_PARTITION__'
PREHOOK: type: QUERY
Expand Down Expand Up @@ -475,10 +481,12 @@ STAGE PLANS:
TableScan
alias: loc_orc_n4
filterExpr: (year <> '2001') (type: boolean)
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: _col0, _col1
ListSink
Filter Operator
predicate: (year <> '2001') (type: boolean)
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: _col0, _col1
ListSink

PREHOOK: query: explain select * from loc_orc_n4
PREHOOK: type: QUERY
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,8 +276,11 @@ STAGE PLANS:
alias: dynamic_part_table
filterExpr: ((partcol1 = '1') and (partcol2) IN ('1', '__HIVE_DEFAULT_PARTITION__')) (type: boolean)
GatherStats: false
Select Operator
expressions: intcol (type: string)
outputColumnNames: _col0
ListSink
Filter Operator
isSamplingPred: false
predicate: (partcol2) IN ('1', '__HIVE_DEFAULT_PARTITION__') (type: boolean)
Select Operator
expressions: intcol (type: string)
outputColumnNames: _col0
ListSink

Loading
Loading