diff --git a/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_partitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_partitioned.q.out index 6e6d2da48e8c..ba3ea51df07e 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_partitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_partitioned.q.out @@ -1679,7 +1679,7 @@ STAGE PLANS: Stats Work Basic Stats Work: -Warning: Shuffle Join MERGEJOIN[180][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[217][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: delete from tbl_ice where a in (select t1.a from tbl_ice t1 join tbl_standard_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice diff --git a/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_unpartitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_unpartitioned.q.out index 707be189e497..7820af959f45 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_unpartitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_unpartitioned.q.out @@ -1388,7 +1388,7 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_standard_other POSTHOOK: Lineage: tbl_standard_other.a SCRIPT [] POSTHOOK: Lineage: tbl_standard_other.b SCRIPT [] -Warning: Shuffle Join MERGEJOIN[178][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[215][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain delete from tbl_ice where a in (select t1.a from tbl_ice t1 join tbl_standard_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -1410,16 +1410,16 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 11 <- Reducer 10 (SIMPLE_EDGE) - Reducer 12 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) - Reducer 3 <- Reducer 12 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) - Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 10 <- Reducer 8 (SIMPLE_EDGE) + Reducer 11 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 9 (XPROD_EDGE) + Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 8 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1501,6 +1501,25 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reducer 10 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) + Reducer 11 Reduce Operator Tree: Merge Join Operator condition map: @@ -1516,7 +1535,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 11 + Reducer 12 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -1562,19 +1581,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reducer 12 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -1708,6 +1714,17 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col1) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Group By Operator keys: _col1 (type: int) minReductionHashAggr: 0.99 @@ -1736,17 +1753,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(_col1) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 @@ -1767,21 +1773,15 @@ STAGE PLANS: Execution mode: vectorized Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: int) + aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Union 5 Vertex: Union 5 @@ -1802,7 +1802,7 @@ STAGE PLANS: Stats Work Basic Stats Work: -Warning: Shuffle Join MERGEJOIN[178][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[215][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: delete from tbl_ice where a in (select t1.a from tbl_ice t1 join tbl_standard_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice diff --git a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out index 5d4e328faf21..1ea6391e5a7c 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out @@ -1615,7 +1615,7 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_standard_other POSTHOOK: Lineage: tbl_standard_other.a SCRIPT [] POSTHOOK: Lineage: tbl_standard_other.b SCRIPT [] -Warning: Shuffle Join MERGEJOIN[224][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[269][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain update tbl_ice set b='The last one' where a in (select t1.a from tbl_ice t1 join tbl_standard_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -1637,41 +1637,21 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 12 <- Reducer 11 (SIMPLE_EDGE) - Reducer 13 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) - Reducer 3 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 5 <- Union 4 (SIMPLE_EDGE) - Reducer 6 <- Reducer 2 (SIMPLE_EDGE) - Reducer 7 <- Map 15 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 8 <- Reducer 10 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) - Reducer 9 <- Reducer 12 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE), Union 4 (CONTAINS) + Reducer 10 <- Map 1 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) + Reducer 11 <- Reducer 10 (SIMPLE_EDGE) + Reducer 12 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) + Reducer 13 <- Reducer 12 (SIMPLE_EDGE) + Reducer 14 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) + Reducer 3 <- Reducer 14 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) + Reducer 4 <- Reducer 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 6 <- Union 5 (SIMPLE_EDGE) + Reducer 7 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 8 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) + Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 5 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t2 - filterExpr: a is not null (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: a is not null (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: a (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized - Map 15 Map Operator Tree: TableScan alias: tbl_ice @@ -1740,20 +1720,97 @@ STAGE PLANS: Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized + Map 15 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: a is not null (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized Reducer 10 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 11 Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 11 + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) + Reducer 12 Reduce Operator Tree: Merge Join Operator condition map: @@ -1769,7 +1826,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reducer 12 + Reducer 13 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -1815,182 +1872,20 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Reducer 13 - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col5 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col5 (type: string) - Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col6 (type: string) Reducer 14 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: bigint), KEY.reducesinkkey0 (type: string), VALUE._col5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col5 ASC NULLS FIRST - partition by: _col5 - raw input shape: - window functions: - window function definition - alias: row_number_window_0 - name: row_number - window function: GenericUDAFRowNumberEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (row_number_window_0 = 1) (type: boolean) - Statistics: Num rows: 4 Data size: 844 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: int), _col4 (type: bigint), _col5 (type: string), -1L (type: bigint), _col6 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 4 Data size: 844 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) - Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int) - Reducer 2 - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 9 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint), _col6 (type: string), _col0 (type: int), 'The last one' (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) - Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int) - Reducer 5 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: string), VALUE._col7 (type: int), KEY.iceberg_bucket(_col5, 16) (type: int), KEY.iceberg_truncate(_col6, 3) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, iceberg_bucket(_col5, 16), iceberg_truncate(_col6, 3) - File Output Operator - compressed: false - Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat - output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat - serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe - name: default.tbl_ice - Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: int) + aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) - Reducer 7 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 2 Reduce Operator Tree: Merge Join Operator condition map: @@ -2005,7 +1900,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string), _col6 (type: bigint), _col7 (type: string), _col9 (type: boolean) - Reducer 8 + Reducer 3 Reduce Operator Tree: Merge Join Operator condition map: @@ -2033,7 +1928,7 @@ STAGE PLANS: Map-reduce partition columns: _col5 (type: string) Statistics: Num rows: 9 Data size: 2053 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col6 (type: bigint), _col7 (type: string) - Reducer 9 + Reducer 4 Reduce Operator Tree: Merge Join Operator condition map: @@ -2054,8 +1949,101 @@ STAGE PLANS: Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int) - Union 4 - Vertex: Union 4 + Reducer 6 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: string), VALUE._col7 (type: int), KEY.iceberg_bucket(_col5, 16) (type: int), KEY.iceberg_truncate(_col6, 3) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, iceberg_bucket(_col5, 16), iceberg_truncate(_col6, 3) + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat + output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat + serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe + name: default.tbl_ice + Reducer 7 + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 9 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint), _col6 (type: string), _col0 (type: int), 'The last one' (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 9 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) + Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int) + Reducer 8 + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col6 (type: string) + Reducer 9 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: bigint), KEY.reducesinkkey0 (type: string), VALUE._col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col5 ASC NULLS FIRST + partition by: _col5 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (row_number_window_0 = 1) (type: boolean) + Statistics: Num rows: 4 Data size: 844 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: int), _col4 (type: bigint), _col5 (type: string), -1L (type: bigint), _col6 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 4 Data size: 844 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) + Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int) + Union 5 + Vertex: Union 5 Stage: Stage-2 Dependency Collection @@ -2074,7 +2062,7 @@ STAGE PLANS: Stats Work Basic Stats Work: -Warning: Shuffle Join MERGEJOIN[224][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[269][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: update tbl_ice set b='The last one' where a in (select t1.a from tbl_ice t1 join tbl_standard_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice diff --git a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_unpartitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_unpartitioned.q.out index 6a149603f73a..eca2961603bc 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_unpartitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_unpartitioned.q.out @@ -1602,7 +1602,7 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_standard_other POSTHOOK: Lineage: tbl_standard_other.a SCRIPT [] POSTHOOK: Lineage: tbl_standard_other.b SCRIPT [] -Warning: Shuffle Join MERGEJOIN[222][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[267][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain update tbl_ice set b='The last one' where a in (select t1.a from tbl_ice t1 join tbl_standard_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -1624,40 +1624,20 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 11 <- Reducer 10 (SIMPLE_EDGE) - Reducer 12 <- Map 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) - Reducer 3 <- Map 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 5 <- Reducer 2 (SIMPLE_EDGE) - Reducer 6 <- Map 14 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (XPROD_EDGE), Reducer 9 (XPROD_EDGE) - Reducer 8 <- Reducer 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 9 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 10 <- Reducer 9 (SIMPLE_EDGE) + Reducer 11 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE) + Reducer 13 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) + Reducer 3 <- Reducer 13 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) + Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 6 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 7 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 9 <- Map 1 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: t2 - filterExpr: a is not null (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: a is not null (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: a (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized - Map 14 Map Operator Tree: TableScan alias: tbl_ice @@ -1726,7 +1706,46 @@ STAGE PLANS: Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized + Map 14 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: a is not null (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized Reducer 10 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) + Reducer 11 Reduce Operator Tree: Merge Join Operator condition map: @@ -1742,7 +1761,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reducer 11 + Reducer 12 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -1788,7 +1807,107 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Reducer 12 + Reducer 13 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9 + Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string), _col6 (type: bigint), _col7 (type: string), _col9 (type: boolean) + Reducer 3 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11 + Statistics: Num rows: 9 Data size: 2053 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string), _col6 (type: bigint), _col7 (type: string), _col10 (type: bigint), _col11 (type: bigint), _col9 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col11 + Statistics: Num rows: 9 Data size: 2053 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col8 = 0L) or ((_col11 is not null and (_col8 <> 0L)) or ((_col0 is null or (_col9 < _col8)) and null and (_col8 <> 0L) and _col11 is null)) is null or (_col11 is null and (_col9 >= _col8) and _col0 is not null)) (type: boolean) + Statistics: Num rows: 9 Data size: 2053 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string), _col6 (type: bigint), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 9 Data size: 2053 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 9 Data size: 2053 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col6 (type: bigint), _col7 (type: string) + Reducer 4 + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 9 Data size: 2258 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: int), _col4 (type: bigint), _col5 (type: string), _col6 (type: bigint), _col7 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 9 Data size: 2258 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat + output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat + serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe + name: default.tbl_ice + Reducer 6 + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 9 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint), _col6 (type: string), _col0 (type: int), 'The last one' (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 9 Data size: 79 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat + output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat + serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe + name: default.tbl_ice + Reducer 7 Reduce Operator Tree: Merge Join Operator condition map: @@ -1805,7 +1924,7 @@ STAGE PLANS: Map-reduce partition columns: _col5 (type: string) Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col6 (type: string) - Reducer 13 + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -1846,7 +1965,7 @@ STAGE PLANS: output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.tbl_ice - Reducer 2 + Reducer 9 Reduce Operator Tree: Merge Join Operator condition map: @@ -1874,162 +1993,31 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 9 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint), _col6 (type: string), _col0 (type: int), 'The last one' (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9 Data size: 79 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat - output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat - serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe - name: default.tbl_ice - Reducer 5 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), true (type: boolean) + Group By Operator + aggregations: count(), count(_col0) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) - Reducer 6 - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9 - Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string), _col6 (type: bigint), _col7 (type: string), _col9 (type: boolean) - Reducer 7 - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11 - Statistics: Num rows: 9 Data size: 2053 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string), _col6 (type: bigint), _col7 (type: string), _col10 (type: bigint), _col11 (type: bigint), _col9 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col11 - Statistics: Num rows: 9 Data size: 2053 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col8 = 0L) or ((_col11 is not null and (_col8 <> 0L)) or ((_col0 is null or (_col9 < _col8)) and null and (_col8 <> 0L) and _col11 is null)) is null or (_col11 is null and (_col9 >= _col8) and _col0 is not null)) (type: boolean) - Statistics: Num rows: 9 Data size: 2053 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string), _col6 (type: bigint), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9 Data size: 2053 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col5 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col5 (type: string) - Statistics: Num rows: 9 Data size: 2053 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col6 (type: bigint), _col7 (type: string) - Reducer 8 - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col5 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9 Data size: 2258 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: int), _col4 (type: bigint), _col5 (type: string), _col6 (type: bigint), _col7 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9 Data size: 2258 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat - output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat - serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe - name: default.tbl_ice - Reducer 9 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Union 4 - Vertex: Union 4 + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Union 5 + Vertex: Union 5 Stage: Stage-2 Dependency Collection @@ -2048,7 +2036,7 @@ STAGE PLANS: Stats Work Basic Stats Work: -Warning: Shuffle Join MERGEJOIN[222][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[267][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: update tbl_ice set b='The last one' where a in (select t1.a from tbl_ice t1 join tbl_standard_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index 0d94dff357c0..4daa2b068111 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -1777,7 +1777,7 @@ private static boolean canTableUseStats(Operator op, StringBuilder sb) { Operator ancestor = OperatorUtils.findSingleOperatorUpstream(op, TableScanOperator.class); if (ancestor != null) { TableScanOperator ts = (TableScanOperator) ancestor; - Boolean canUseStats = StatsUtils.checkCanProvideStats(new Table(ts.getConf().getTableMetadata().getTTable())); + Boolean canUseStats = StatsUtils.checkCanProvideStatsForOpt(new Table(ts.getConf().getTableMetadata().getTTable())); if (!canUseStats) { sb.append(ts.getConf().getTableMetadata().getFullyQualifiedName()); return false; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index dba737e382c0..1fe69df47e58 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -302,7 +302,9 @@ private boolean disableSemiJoinOptDueToExternalTable(HiveConf conf, TableScanOpe boolean disableSemiJoin = false; if (conf.getBoolVar(HiveConf.ConfVars.HIVE_DISABLE_UNSAFE_EXTERNALTABLE_OPERATIONS)) { // We already have the TableScan for one side of the join. Check this now. - if (!StatsUtils.checkCanProvideStats(new Table(ts.getConf().getTableMetadata().getTTable()))) { + // Semijoin reduction is a runtime filter, so relying on (possibly stale) stats only costs + // performance, not correctness - the relaxed check also accepts analyzed external tables. + if (!StatsUtils.checkCanProvideStatsForOpt(new Table(ts.getConf().getTableMetadata().getTTable()))) { LOG.debug("Disabling semijoin optimzation on {} since it is an external table and also could not provide statistics.", ts.getConf().getTableMetadata().getFullyQualifiedName()); disableSemiJoin = true; @@ -318,7 +320,7 @@ private boolean disableSemiJoinOptDueToExternalTable(HiveConf conf, TableScanOpe if (columnOrigin != null && columnOrigin.op instanceof TableScanOperator) { // Join key origin has been traced to a table column. Check if the table is external. TableScanOperator joinKeyTs = (TableScanOperator) columnOrigin.op; - if (!StatsUtils.checkCanProvideStats(new Table(joinKeyTs.getConf().getTableMetadata().getTTable()))) { + if (!StatsUtils.checkCanProvideStatsForOpt(new Table(joinKeyTs.getConf().getTableMetadata().getTTable()))) { LOG.debug("Join key {} is from {} which is an external table and also could not provide statistics. " + "Disabling semijoin optimization.", columnOrigin.col, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 55f9d0c1e158..cfafb69d99eb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -1982,6 +1982,15 @@ public static boolean checkCanProvideColumnStats(Table table) { return !table.isNonNative() || table.getStorageHandler().canProvideColStatistics(table); } + /** + * Whether a table's statistics may be used for plan-shape optimizations such as semijoin + * reduction or map-join conversion, where relying on stale stats only affects performance, + * never correctness. + */ + public static boolean checkCanProvideStatsForOpt(Table table) { + return checkCanProvideStats(table) || StatsSetupConst.areBasicStatsUptoDate(table.getParameters()); + } + /** * Are the basic stats for the table up-to-date for query planning. * Can run additional checks compared to the version in StatsSetupConst. diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out index 9ea03dbdcca3..6fb59f4dd2aa 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out @@ -184,7 +184,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@lday POSTHOOK: Output: default@lday #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[129][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product PREHOOK: query: EXPLAIN VECTORIZATION DETAIL select * from (select item1.S_ID S_ID, @@ -348,6 +348,7 @@ STAGE PLANS: TableScan alias: lday2 filterExpr: (ly_date is not null and d_date is not null) (type: boolean) + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_126_container, bigKeyColName:d_date, smallTablePos:1, keyRatio:1.0 Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -800,7 +801,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[129][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product PREHOOK: query: select * from (select item1.S_ID S_ID, ytday1.D_DATE D_DATE