apache · rubenada · May 21, 2026 · May 21, 2026 · May 21, 2026 · May 23, 2026
diff --git a/...ceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_unpartitioned.q.out b/...ceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_unpartitioned.q.out
@@ -48,10 +48,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: tbl_ice
-                  filterExpr: (((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22)) or (b) IN ('four', 'one') or (a = 22)) (type: boolean)
+                  filterExpr: (((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22)) or (b) IN ('four', 'one') or (a = 22)) (type: boolean)
                   Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) and FILE__PATH is not null) (type: boolean)
+                    predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22))) and FILE__PATH is not null) (type: boolean)
                     Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: a (type: int), b (type: string), c (type: int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type: bigint), PARTITION__PROJECTION (type: string)

diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_7.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_7.q.out
@@ -150,27 +150,27 @@ Stage-0
       File Output Operator [FS_61]
         Limit [LIM_60] (rows=20 width=447)
           Number of rows:20
-          Select Operator [SEL_59] (rows=473 width=447)
+          Select Operator [SEL_59] (rows=791 width=447)
             Output:["_col0","_col1","_col2","_col3","_col4"]
           <-Map 1 [SIMPLE_EDGE] vectorized, llap
             SHUFFLE [RS_58]
-              Top N Key Operator [TNK_57] (rows=473 width=447)
+              Top N Key Operator [TNK_57] (rows=791 width=447)
                 keys:_col0,top n:20
-                Map Join Operator [MAPJOIN_56] (rows=473 width=447)
+                Map Join Operator [MAPJOIN_56] (rows=791 width=447)
                   BucketMapJoin:true,Conds:SEL_55._col0, _col1=RS_53._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"]
                 <-Map 3 [CUSTOM_EDGE] vectorized, llap
                   MULTICAST [RS_53]
                     PartitionCols:_col0, _col1
-                    Select Operator [SEL_52] (rows=387 width=178)
+                    Select Operator [SEL_52] (rows=500 width=178)
                       Output:["_col0","_col1"]
-                      Filter Operator [FIL_51] (rows=387 width=178)
-                        predicate:(((key < '0') or ((key > '0') and (key < '100')) or (key > '100')) and value is not null)
+                      Filter Operator [FIL_51] (rows=500 width=178)
+                        predicate:((key <> '0') and (key <> '100') and value is not null)
                         TableScan [TS_3] (rows=500 width=178)
                           default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
-                <-Select Operator [SEL_55] (rows=387 width=269)
+                <-Select Operator [SEL_55] (rows=500 width=269)
                     Output:["_col0","_col1","_col2"]
-                    Filter Operator [FIL_54] (rows=387 width=269)
-                      predicate:(((key1 < '0') or ((key1 > '0') and (key1 < '100')) or (key1 > '100')) and key2 is not null)
+                    Filter Operator [FIL_54] (rows=500 width=269)
+                      predicate:((key1 <> '0') and (key1 <> '100') and key2 is not null)
                       TableScan [TS_0] (rows=500 width=269)
                         default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:8,Grouping Partition Columns:["key1","key2"],Output:["key1","key2","value"]
 
@@ -346,27 +346,27 @@ Stage-0
       File Output Operator [FS_41]
         Limit [LIM_40] (rows=20 width=447)
           Number of rows:20
-          Select Operator [SEL_39] (rows=473 width=447)
+          Select Operator [SEL_39] (rows=791 width=447)
             Output:["_col0","_col1","_col2","_col3","_col4"]
           <-Map 1 [SIMPLE_EDGE] vectorized, llap
             SHUFFLE [RS_38]
-              Top N Key Operator [TNK_37] (rows=473 width=447)
+              Top N Key Operator [TNK_37] (rows=791 width=447)
                 keys:_col0,top n:20
-                Map Join Operator [MAPJOIN_36] (rows=473 width=447)
+                Map Join Operator [MAPJOIN_36] (rows=791 width=447)
                   BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"]
                 <-Map 3 [CUSTOM_EDGE] vectorized, llap
                   MULTICAST [RS_33]
                     PartitionCols:_col0
-                    Select Operator [SEL_32] (rows=387 width=178)
+                    Select Operator [SEL_32] (rows=500 width=178)
                       Output:["_col0","_col1"]
-                      Filter Operator [FIL_31] (rows=387 width=178)
-                        predicate:((key < '0') or (key > '100') or ((key > '0') and (key < '100')))
+                      Filter Operator [FIL_31] (rows=500 width=178)
+                        predicate:((key <> '0') and (key <> '100'))
                         TableScan [TS_3] (rows=500 width=178)
                           default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
-                <-Select Operator [SEL_35] (rows=387 width=269)
+                <-Select Operator [SEL_35] (rows=500 width=269)
                     Output:["_col0","_col1","_col2"]
-                    Filter Operator [FIL_34] (rows=387 width=269)
-                      predicate:((key1 < '0') or (key1 > '100') or ((key1 > '0') and (key1 < '100')))
+                    Filter Operator [FIL_34] (rows=500 width=269)
+                      predicate:((key1 <> '0') and (key1 <> '100'))
                       TableScan [TS_0] (rows=500 width=269)
                         default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:4,Grouping Partition Columns:["key1"],Output:["key1","key2","value"]
 
@@ -435,40 +435,40 @@ POSTHOOK: Input: default@srcbucket_big
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Map 2 <- Map 1 (BROADCAST_EDGE)
-Reducer 3 <- Map 2 (SIMPLE_EDGE)
+Map 1 <- Map 3 (CUSTOM_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:20
     Stage-1
-      Reducer 3 vectorized, llap
+      Reducer 2 vectorized, llap
       File Output Operator [FS_41]
         Limit [LIM_40] (rows=20 width=447)
           Number of rows:20
-          Select Operator [SEL_39] (rows=612 width=447)
+          Select Operator [SEL_39] (rows=791 width=447)
             Output:["_col0","_col1","_col2","_col3","_col4"]
-          <-Map 2 [SIMPLE_EDGE] vectorized, llap
+          <-Map 1 [SIMPLE_EDGE] vectorized, llap
             SHUFFLE [RS_38]
-              Top N Key Operator [TNK_37] (rows=612 width=447)
+              Top N Key Operator [TNK_37] (rows=791 width=447)
                 keys:_col0,top n:20
-                Map Join Operator [MAPJOIN_36] (rows=612 width=447)
-                  Conds:RS_33._col0=SEL_35._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"]
-                <-Map 1 [BROADCAST_EDGE] vectorized, llap
-                  BROADCAST [RS_33]
+                Map Join Operator [MAPJOIN_36] (rows=791 width=447)
+                  BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"]
+                <-Map 3 [CUSTOM_EDGE] vectorized, llap
+                  MULTICAST [RS_33]
                     PartitionCols:_col0
-                    Select Operator [SEL_32] (rows=387 width=269)
-                      Output:["_col0","_col1","_col2"]
-                      Filter Operator [FIL_31] (rows=387 width=269)
-                        predicate:(((key2 < 'val_0') or ((key2 > 'val_0') and (key2 < 'val_100')) or (key2 > 'val_100')) and key1 is not null)
-                        TableScan [TS_0] (rows=500 width=269)
-                          default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key1","key2","value"]
-                <-Select Operator [SEL_35] (rows=500 width=178)
-                    Output:["_col0","_col1"]
-                    Filter Operator [FIL_34] (rows=500 width=178)
-                      predicate:key is not null
-                      TableScan [TS_3] (rows=500 width=178)
-                        default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+                    Select Operator [SEL_32] (rows=500 width=178)
+                      Output:["_col0","_col1"]
+                      Filter Operator [FIL_31] (rows=500 width=178)
+                        predicate:key is not null
+                        TableScan [TS_3] (rows=500 width=178)
+                          default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+                <-Select Operator [SEL_35] (rows=500 width=269)
+                    Output:["_col0","_col1","_col2"]
+                    Filter Operator [FIL_34] (rows=500 width=269)
+                      predicate:((key2 <> 'val_0') and (key2 <> 'val_100') and key1 is not null)
+                      TableScan [TS_0] (rows=500 width=269)
+                        default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:4,Grouping Partition Columns:["key1"],Output:["key1","key2","value"]
 
 PREHOOK: query: SELECT *
 FROM srcbucket_big a

diff --git a/.../iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out b/.../iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out
@@ -71,10 +71,10 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: tbl_ice
-                  filterExpr: (((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) (type: boolean)
+                  filterExpr: (((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22))) (type: boolean)
                   Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: PARTIAL
                   Filter Operator
-                    predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) and FILE__PATH is not null) (type: boolean)
+                    predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22))) and FILE__PATH is not null) (type: boolean)
                     Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: PARTIAL
                     Select Operator
                       expressions: a (type: int), b (type: string), c (type: int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type: bigint), PARTITION__PROJECTION (type: string)

diff --git a/...ceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_unpartitioned.q.out b/...ceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_unpartitioned.q.out
@@ -71,7 +71,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: tbl_ice
-                  filterExpr: ((a = 22) or (b) IN ('four', 'one') or ((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) (type: boolean)
+                  filterExpr: ((a = 22) or (b) IN ('four', 'one') or ((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22))) (type: boolean)
                   Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: ((a = 22) or (b) IN ('four', 'one')) (type: boolean)
@@ -93,7 +93,7 @@ STAGE PLANS:
                       Map-reduce partition columns: FILE__PATH (type: string)
                       Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) and FILE__PATH is not null) (type: boolean)
+                    predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22))) and FILE__PATH is not null) (type: boolean)
                     Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: a (type: int), b (type: string), c (type: int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type: bigint), PARTITION__PROJECTION (type: string)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/SearchTransformer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/SearchTransformer.java
@@ -72,30 +72,44 @@ public SearchTransformer(RexBuilder rexBuilder, RexCall search, final RexUnknown
     this.unknownContext = unknownContext;
   }
 
+  /**
+   * Transforms the SEARCH expression into an equivalent RexNode expression.
+   * Warning: when called from a shuttle, callers of this method should consider flattening AND/OR expressions
+   * afterward, to get the same result as applying {@link SearchTransformer.Shuttle}.
+   */
   public RexNode transform() {
     PerfLogger perfLogger = SessionState.getPerfLogger();
     perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.SEARCH_TRANSFORMER);
 
-    RangeConverter<C> consumer = new RangeConverter<>(rexBuilder, operandType, ref);
-    RangeSets.forEach(sarg.rangeSet, consumer);
-
     List<RexNode> orList = new ArrayList<>();
     if (sarg.nullAs == RexUnknownAs.TRUE && unknownContext != RexUnknownAs.TRUE) {
       orList.add(rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL, ref));
     }
-    switch (consumer.inLiterals.size()) {
-    case 0:
-      break;
-    case 1:
-      orList.add(rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, ref, consumer.inLiterals.get(0)));
-      break;
-    default:
-      List<RexNode> operands = new ArrayList<>(consumer.inLiterals.size() + 1);
-      operands.add(ref);
-      operands.addAll(consumer.inLiterals);
-      orList.add(rexBuilder.makeCall(HiveIn.INSTANCE, operands));
+
+    if (sarg.isComplementedPoints()) {
+      // Generate 'ref <> value1 AND ... AND ref <> valueN'
+      List<RexNode> list = sarg.rangeSet.complement().asRanges().stream().map(
+          range -> rexBuilder.makeCall(SqlStdOperatorTable.NOT_EQUALS, ref,
+              rexBuilder.makeLiteral(range.lowerEndpoint(), operandType, true, true))).toList();
+      orList.add(RexUtil.composeConjunction(rexBuilder, list));
+    } else {
+      RangeConverter<C> consumer = new RangeConverter<>(rexBuilder, operandType, ref);
+      RangeSets.forEach(sarg.rangeSet, consumer);
+
+      switch (consumer.inLiterals.size()) {
+      case 0:
+        break;
+      case 1:
+        orList.add(rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, ref, consumer.inLiterals.get(0)));
+        break;
+      default:
+        List<RexNode> operands = new ArrayList<>(consumer.inLiterals.size() + 1);
+        operands.add(ref);
+        operands.addAll(consumer.inLiterals);
+        orList.add(rexBuilder.makeCall(HiveIn.INSTANCE, operands));
+      }
+      orList.addAll(consumer.nodes);
     }
-    orList.addAll(consumer.nodes);
     RexNode x = RexUtil.composeDisjunction(rexBuilder, orList);
 
     if (sarg.nullAs == RexUnknownAs.FALSE && unknownContext != RexUnknownAs.FALSE) {

diff --git a/...rc/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java b/...rc/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
@@ -628,14 +628,23 @@ private RexNode makeLiteral(C value) {
     private double compute() {
       final List<RexNode> inLiterals = new ArrayList<>();
       final List<Double> rangeSelectivities = new ArrayList<>();
-      for (Range<C> range : sarg.rangeSet.asRanges()) {
-        if (!range.hasLowerBound() && !range.hasUpperBound()) {
-          return 1.0; // "all" range
+      final List<Double> searchSelectivities = new ArrayList<>();
+
+      if (sarg.isComplementedPoints()) {
+        // Generate 'ref <> value1 AND ... AND ref <> valueN'
+        List<RexNode> notEq = sarg.rangeSet.complement().asRanges().stream()
+            .map(range -> rexBuilder.makeCall(SqlStdOperatorTable.NOT_EQUALS, ref, makeLiteral(range.lowerEndpoint())))
+            .toList();
+        searchSelectivities.add(RexUtil.composeConjunction(rexBuilder, notEq).accept(FilterSelectivityEstimator.this));
+      } else {
+        for (Range<C> range : sarg.rangeSet.asRanges()) {
+          if (!range.hasLowerBound() && !range.hasUpperBound()) {
+            return 1.0; // "all" range
+          }
+          processRangeSelectivity(range, rangeSelectivities, inLiterals);
         }
-        processRangeSelectivity(range, rangeSelectivities, inLiterals);
       }
 
-      final List<Double> searchSelectivities = new ArrayList<>();
       if (!rangeSelectivities.isEmpty() && rangeSelectivities.stream().noneMatch(Objects::isNull)) {
         // Aggregate all ranges selectivity, respecting the max value of 1
         double total = Math.min(1.0, rangeSelectivities.stream().mapToDouble(Double::doubleValue).sum());
@@ -655,7 +664,8 @@ private double compute() {
           List<RexNode> operands = new ArrayList<>(inLiterals.size() + 1);
           operands.add(ref);
           operands.addAll(inLiterals);
-          searchSelectivities.add(rexBuilder.makeCall(HiveIn.INSTANCE, operands).accept(FilterSelectivityEstimator.this));
+          searchSelectivities.add(
+              rexBuilder.makeCall(HiveIn.INSTANCE, operands).accept(FilterSelectivityEstimator.this));
         }
       }
 
@@ -664,7 +674,9 @@ private double compute() {
             rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL, ref).accept(FilterSelectivityEstimator.this));
       }
 
-      return searchSelectivities.size() == 1 ? searchSelectivities.get(0) : computeDisjunctionSelectivity(searchSelectivities);
+      return searchSelectivities.size() == 1
+          ? searchSelectivities.get(0)
+          : computeDisjunctionSelectivity(searchSelectivities);
     }
 
     private void processRangeSelectivity(Range<C> range, List<Double> rangeSelectivities, List<RexNode> inLiterals) {

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java
@@ -82,6 +82,7 @@
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
 import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
@@ -214,6 +215,12 @@ public ExprNodeDesc visitCall(RexCall call) {
         && SqlTypeUtil.equalSansNullability(dTFactory, call.getType(),
             call.operands.get(0).getType())) {
       return args.get(0);
+    } else if (call.isA(SqlKind.AND)) {
+      // Make sure AND is flattened (we may have nested ANDs due to SearchTransformer conversion above)
+      return ExprNodeDescUtils.and(args);
+    }  else if (call.isA(SqlKind.OR)) {
+      // Make sure OR is flattened (we may have nested ORs due to SearchTransformer conversion above)
+      return ExprNodeDescUtils.or(args);
     } else {
       GenericUDF hiveUdf = SqlFunctionConverter.getHiveUDF(call.getOperator(), call.getType(),
           args.size());