-
Notifications
You must be signed in to change notification settings - Fork 4.8k
HIVE-29637: Incorrect Results for IS_NULL Predicate on Partition Column #6515
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| set hive.exec.dynamic.partition.mode=nonstrict; | ||
| set hive.fetch.task.conversion=none; | ||
| set hive.explain.user=false; | ||
|
|
||
| drop table if exists ice_01; | ||
| create external table ice_01 (key string, value string) partitioned by (ds string) stored by iceberg; | ||
|
|
||
| insert into ice_01 partition (ds) select 'A', 'V1', '2000-04-08'; | ||
| insert into ice_01 partition (ds) select 'B', 'V2', 'null'; | ||
| insert into ice_01 partition (ds) select 'C', 'V3', null; | ||
|
|
||
| explain select key, value, ds from ice_01 where ds is null; | ||
| select key, value, ds from ice_01 where ds is null; | ||
|
|
||
| explain select key, value, ds from ice_01 where ds is not null; | ||
| select key, value, ds from ice_01 where ds is not null order by key; | ||
|
|
||
| select key, value, ds from ice_01 where ds = 'null'; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,150 @@ | ||
| PREHOOK: query: drop table if exists ice_01 | ||
| PREHOOK: type: DROPTABLE | ||
| PREHOOK: Output: database:default | ||
| POSTHOOK: query: drop table if exists ice_01 | ||
| POSTHOOK: type: DROPTABLE | ||
| POSTHOOK: Output: database:default | ||
| PREHOOK: query: create external table ice_01 (key string, value string) partitioned by (ds string) stored by iceberg | ||
| PREHOOK: type: CREATETABLE | ||
| PREHOOK: Output: database:default | ||
| PREHOOK: Output: default@ice_01 | ||
| POSTHOOK: query: create external table ice_01 (key string, value string) partitioned by (ds string) stored by iceberg | ||
| POSTHOOK: type: CREATETABLE | ||
| POSTHOOK: Output: database:default | ||
| POSTHOOK: Output: default@ice_01 | ||
| PREHOOK: query: insert into ice_01 partition (ds) select 'A', 'V1', '2000-04-08' | ||
| PREHOOK: type: QUERY | ||
| PREHOOK: Input: _dummy_database@_dummy_table | ||
| PREHOOK: Output: default@ice_01 | ||
| POSTHOOK: query: insert into ice_01 partition (ds) select 'A', 'V1', '2000-04-08' | ||
| POSTHOOK: type: QUERY | ||
| POSTHOOK: Input: _dummy_database@_dummy_table | ||
| POSTHOOK: Output: default@ice_01 | ||
| PREHOOK: query: insert into ice_01 partition (ds) select 'B', 'V2', 'null' | ||
| PREHOOK: type: QUERY | ||
| PREHOOK: Input: _dummy_database@_dummy_table | ||
| PREHOOK: Output: default@ice_01 | ||
| POSTHOOK: query: insert into ice_01 partition (ds) select 'B', 'V2', 'null' | ||
| POSTHOOK: type: QUERY | ||
| POSTHOOK: Input: _dummy_database@_dummy_table | ||
| POSTHOOK: Output: default@ice_01 | ||
| PREHOOK: query: insert into ice_01 partition (ds) select 'C', 'V3', null | ||
| PREHOOK: type: QUERY | ||
| PREHOOK: Input: _dummy_database@_dummy_table | ||
| PREHOOK: Output: default@ice_01 | ||
| POSTHOOK: query: insert into ice_01 partition (ds) select 'C', 'V3', null | ||
| POSTHOOK: type: QUERY | ||
| POSTHOOK: Input: _dummy_database@_dummy_table | ||
| POSTHOOK: Output: default@ice_01 | ||
| PREHOOK: query: explain select key, value, ds from ice_01 where ds is null | ||
| PREHOOK: type: QUERY | ||
| PREHOOK: Input: default@ice_01 | ||
| PREHOOK: Output: hdfs://### HDFS PATH ### | ||
| POSTHOOK: query: explain select key, value, ds from ice_01 where ds is null | ||
| POSTHOOK: type: QUERY | ||
| POSTHOOK: Input: default@ice_01 | ||
| POSTHOOK: Output: hdfs://### HDFS PATH ### | ||
| STAGE DEPENDENCIES: | ||
| Stage-1 is a root stage | ||
| Stage-0 depends on stages: Stage-1 | ||
|
|
||
| STAGE PLANS: | ||
| Stage: Stage-1 | ||
| Tez | ||
| #### A masked pattern was here #### | ||
| Vertices: | ||
| Map 1 | ||
| Map Operator Tree: | ||
| TableScan | ||
| alias: ice_01 | ||
| filterExpr: ds is null (type: boolean) | ||
| Statistics: Num rows: 1 Data size: 171 Basic stats: COMPLETE Column stats: COMPLETE | ||
| Select Operator | ||
| expressions: key (type: string), value (type: string), null (type: string) | ||
| outputColumnNames: _col0, _col1, _col2 | ||
| Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE | ||
| File Output Operator | ||
| compressed: false | ||
| Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE | ||
| table: | ||
| input format: org.apache.hadoop.mapred.SequenceFileInputFormat | ||
| output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat | ||
| serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe | ||
| Execution mode: vectorized | ||
|
|
||
| Stage: Stage-0 | ||
| Fetch Operator | ||
| limit: -1 | ||
| Processor Tree: | ||
| ListSink | ||
|
|
||
| PREHOOK: query: select key, value, ds from ice_01 where ds is null | ||
| PREHOOK: type: QUERY | ||
| PREHOOK: Input: default@ice_01 | ||
| PREHOOK: Output: hdfs://### HDFS PATH ### | ||
| POSTHOOK: query: select key, value, ds from ice_01 where ds is null | ||
| POSTHOOK: type: QUERY | ||
| POSTHOOK: Input: default@ice_01 | ||
| POSTHOOK: Output: hdfs://### HDFS PATH ### | ||
| C V3 NULL | ||
| PREHOOK: query: explain select key, value, ds from ice_01 where ds is not null | ||
| PREHOOK: type: QUERY | ||
| PREHOOK: Input: default@ice_01 | ||
| PREHOOK: Output: hdfs://### HDFS PATH ### | ||
| POSTHOOK: query: explain select key, value, ds from ice_01 where ds is not null | ||
| POSTHOOK: type: QUERY | ||
| POSTHOOK: Input: default@ice_01 | ||
| POSTHOOK: Output: hdfs://### HDFS PATH ### | ||
| STAGE DEPENDENCIES: | ||
| Stage-1 is a root stage | ||
| Stage-0 depends on stages: Stage-1 | ||
|
|
||
| STAGE PLANS: | ||
| Stage: Stage-1 | ||
| Tez | ||
| #### A masked pattern was here #### | ||
| Vertices: | ||
| Map 1 | ||
| Map Operator Tree: | ||
| TableScan | ||
| alias: ice_01 | ||
| filterExpr: ds is not null (type: boolean) | ||
| Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE | ||
| Select Operator | ||
| expressions: key (type: string), value (type: string), ds (type: string) | ||
| outputColumnNames: _col0, _col1, _col2 | ||
| Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE | ||
| File Output Operator | ||
| compressed: false | ||
| Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE | ||
| table: | ||
| input format: org.apache.hadoop.mapred.SequenceFileInputFormat | ||
| output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat | ||
| serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe | ||
| Execution mode: vectorized | ||
|
|
||
| Stage: Stage-0 | ||
| Fetch Operator | ||
| limit: -1 | ||
| Processor Tree: | ||
| ListSink | ||
|
|
||
| PREHOOK: query: select key, value, ds from ice_01 where ds is not null order by key | ||
| PREHOOK: type: QUERY | ||
| PREHOOK: Input: default@ice_01 | ||
| PREHOOK: Output: hdfs://### HDFS PATH ### | ||
| POSTHOOK: query: select key, value, ds from ice_01 where ds is not null order by key | ||
| POSTHOOK: type: QUERY | ||
| POSTHOOK: Input: default@ice_01 | ||
| POSTHOOK: Output: hdfs://### HDFS PATH ### | ||
| A V1 2000-04-08 | ||
| B V2 null | ||
| PREHOOK: query: select key, value, ds from ice_01 where ds = 'null' | ||
| PREHOOK: type: QUERY | ||
| PREHOOK: Input: default@ice_01 | ||
| PREHOOK: Output: hdfs://### HDFS PATH ### | ||
| POSTHOOK: query: select key, value, ds from ice_01 where ds = 'null' | ||
| POSTHOOK: type: QUERY | ||
| POSTHOOK: Input: default@ice_01 | ||
| POSTHOOK: Output: hdfs://### HDFS PATH ### | ||
| B V2 null |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -25,6 +25,7 @@ | |
| import java.util.Properties; | ||
|
|
||
| import org.apache.commons.lang3.tuple.Pair; | ||
| import org.apache.hadoop.hive.conf.HiveConf; | ||
| import org.apache.hadoop.hive.metastore.api.FieldSchema; | ||
| import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; | ||
| import org.apache.hadoop.hive.ql.ddl.DDLUtils; | ||
|
|
@@ -33,6 +34,7 @@ | |
| import org.apache.hadoop.hive.ql.metadata.HiveException; | ||
| import org.apache.hadoop.hive.ql.metadata.Partition; | ||
| import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; | ||
| import org.apache.hadoop.hive.ql.session.SessionState; | ||
| import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; | ||
| import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; | ||
| import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; | ||
|
|
@@ -73,6 +75,9 @@ static public Object evalExprWithPart(ExprNodeDesc expr, Partition p) throws Hiv | |
| throw new HiveException("Internal error : Partition Spec size, " + partSpec.size() + | ||
| " doesn't match partition key definition size, " + partKeyTypes.length); | ||
| } | ||
| String defaultPartitionName = HiveConf.getVar(SessionState.getSessionConf(), | ||
| HiveConf.ConfVars.DEFAULT_PARTITION_NAME); | ||
|
|
||
| // Create the row object | ||
| List<String> partNames = new ArrayList<>(); | ||
| List<Object> partValues = new ArrayList<>(); | ||
|
|
@@ -82,9 +87,15 @@ static public Object evalExprWithPart(ExprNodeDesc expr, Partition p) throws Hiv | |
| partNames.add(entry.getKey()); | ||
| ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector | ||
| (TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i++])); | ||
| partValues.add(ObjectInspectorConverters.getConverter( | ||
| PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi) | ||
| .convert(entry.getValue())); | ||
|
|
||
| String partitionValue = entry.getValue(); | ||
| if (partitionValue.equals(defaultPartitionName)) { | ||
| partValues.add(null); // Null for default partition. | ||
| } else { | ||
| partValues.add(ObjectInspectorConverters.getConverter( | ||
| PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi) | ||
| .convert(partitionValue)); | ||
| } | ||
|
Comment on lines
+90
to
+98
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The description of This code assumes that the value is null but according to the description it may be other things as well. The change here will fix the SELECT key, value, ds FROM pcr_t1 WHERE ds > 'A';
SELECT key, value, ds FROM pcr_t1 WHERE ds < 'A';Do the queries return the same results before/after the changes in the PR for Iceberg and non-Iceberg tables ?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
PR scope is NULL semantics only. |
||
| partObjectInspectors.add(oi); | ||
| } | ||
| StructObjectInspector partObjectInspector = ObjectInspectorFactory | ||
|
|
@@ -104,7 +115,7 @@ public static Pair<PrimitiveObjectInspector, ExprNodeEvaluator> prepareExpr( | |
| ExprNodeDesc expr, List<String> partColumnNames, | ||
| List<PrimitiveTypeInfo> partColumnTypeInfos) throws HiveException { | ||
| // Create the row object | ||
| List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>(); | ||
| List<ObjectInspector> partObjectInspectors = new ArrayList<>(); | ||
| for (int i = 0; i < partColumnNames.size(); i++) { | ||
| partObjectInspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( | ||
| partColumnTypeInfos.get(i))); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| set hive.exec.dynamic.partition.mode=nonstrict; | ||
| set hive.fetch.task.conversion=none; | ||
|
|
||
| drop table if exists pcr_t1; | ||
| create table pcr_t1 (key string, value string) partitioned by (ds string); | ||
|
|
||
| insert into pcr_t1 partition (ds) select 'A', 'V1', '2000-04-08'; | ||
| insert into pcr_t1 partition (ds) select 'B', 'V2', 'null'; | ||
| insert into pcr_t1 partition (ds) select 'C', 'V3', null; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The empty string value is another edge case that may create problems insert into pcr_t1 partition (ds) select 'D', 'V4', ''; |
||
|
|
||
| explain select key, value, ds from pcr_t1 where ds is null; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there something useful in the EXPLAIN output? If not we can drop those.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, |
||
| select key, value, ds from pcr_t1 where ds is null; | ||
|
|
||
| explain select key, value, ds from pcr_t1 where ds is not null; | ||
| select key, value, ds from pcr_t1 where ds is not null order by key; | ||
|
|
||
| select key, value, ds from pcr_t1 where ds = 'null'; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider enriching the tests with: SELECT key, value, ds FROM pcr_t1 WHERE ds > 'A';
SELECT key, value, ds FROM pcr_t1 WHERE ds < 'A'; |
||
Uh oh!
There was an error while loading. Please reload this page.