From 34c5a442a19eadf6e290fe81d61dc28522e3ce5b Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 5 Jun 2026 13:46:28 -0700 Subject: [PATCH] Improve optimization of select between same values We previously optimized selects between the same value only when the ifTrue and ifFalse values had identical structure and no side effects and the condition could be dropped or reordered with the value. Make this more robust by using the `areConsecutiveInputs...` utilities to check whether the arms have equal values and whether they are foldable. The old code avoided introducing blocks to keep code size down, but the blocks it would introduce are not named and will be elided in the output, so introducing blocks is not a problem. The old code also avoided introducing a scratch local, but if a scratch local is necessary, it is because the arms have some side effects that would prevent reordering. In that case it is more likely to be beneficial to execute only one arm, so go ahead and use scratch locals in the case where the second arm can be folded away. --- src/passes/OptimizeInstructions.cpp | 43 ++++++----- .../lit/passes/optimize-instructions-mvp.wast | 77 ++++++++++++++++++- ...timize-instructions_branch-hints-fold.wast | 26 +++++-- 3 files changed, 114 insertions(+), 32 deletions(-) diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 6cce997c2bf..ecdc3594083 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -3399,28 +3399,29 @@ struct OptimizeInstructions // execute anyhow, and things like branch hints were already being run. // After optimization, we will only run fewer things, and run no risk of // running new bad things. - if (matches(curr, select(any(&ifTrue), any(&ifFalse), any(&c))) && - ExpressionAnalyzer::equal(ifTrue, ifFalse)) { - auto value = effects(ifTrue); - if (value.hasSideEffects()) { - // At best we don't need the condition, but need to execute the - // value twice. a block is larger than a select by 2 bytes, and we - // must drop one value, so 3, while we save the condition, so it's - // not clear this is worth it, TODO - } else { - // The value has no side effects, so we can replace ourselves with one - // of the two identical values in the arms. - auto condition = effects(c); - if (!condition.hasSideEffects()) { - return ifTrue; - } else { - // The condition is last, so we need a new local, and it may be a - // bad idea to use a block like we do for an if. Do it only if we - // can reorder - if (!condition.invalidates(value)) { - return builder.makeSequence(builder.makeDrop(c), ifTrue); - } + if (matches(curr, select(any(&ifTrue), any(&ifFalse), any(&c)))) { + // Check if the values are identical and we can keep just the first one. + if (areConsecutiveInputsEqualAndFoldable(ifTrue, ifFalse)) { + // We can keep just the ifTrue branch, but we need to move its value + // past the condition. If there are side effects that force us to use + // a scratch local, then it is probably worth the extra local to + // execute the value expression once instead of twice. + if (canReorder(ifTrue, c)) { + return builder.makeSequence(builder.makeDrop(c), ifTrue); } + auto scratch = builder.addVar(getFunction(), ifTrue->type); + return builder.makeBlock( + {builder.makeLocalSet(scratch, ifTrue), + builder.makeDrop(c), + builder.makeLocalGet(scratch, ifTrue->type)}); + } + // Otherwise, check if the values are identical, but we would have to + // keep both. In this case the benefit is less, so we only optimize if + // we can avoid the scratch local. + if (areConsecutiveInputsEqual(ifTrue, ifFalse) && + canReorder(ifFalse, c)) { + return builder.makeBlock( + {builder.makeDrop(ifTrue), builder.makeDrop(c), ifFalse}); } } } diff --git a/test/lit/passes/optimize-instructions-mvp.wast b/test/lit/passes/optimize-instructions-mvp.wast index 021b868bcc1..4ed3b3a5a9a 100644 --- a/test/lit/passes/optimize-instructions-mvp.wast +++ b/test/lit/passes/optimize-instructions-mvp.wast @@ -6190,9 +6190,14 @@ ) ;; CHECK: (func $select-parallel (param $0 i32) (param $1 i32) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.add - ;; CHECK-NEXT: (local.get $1) - ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop @@ -6281,6 +6286,72 @@ ) ) ) + + ;; CHECK: (func $select-no-fold (param $x i32) (param $y i32) (param $c i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $c) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $y + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $y + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $c) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $select-no-fold (param $x i32) (param $y i32) (param $c i32) + (drop + ;; We cannot fold away the ifFalse branch because it sets $y. But both + ;; branches produce the same value, so we can still optimize. + (select + (local.get $x) + (block (result i32) + (local.set $y (i32.const 0)) + (local.get $x) + ) + (local.get $c) + ) + ) + (drop + ;; Same, but now we cannot reorder the ifFalse and the condition, so we + ;; would need a scratch local. We do not bother optimizing. + (select + (local.get $x) + (block (result i32) + (local.set $y (i32.const 0)) + (local.get $x) + ) + (block (result i32) + (drop (local.get $y)) + (local.get $c) + ) + ) + ) + ) + ;; CHECK: (func $zero-shifts-is-not-sign-ext ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (i32.eq diff --git a/test/lit/passes/optimize-instructions_branch-hints-fold.wast b/test/lit/passes/optimize-instructions_branch-hints-fold.wast index 6223817e59e..5d6904f457b 100644 --- a/test/lit/passes/optimize-instructions_branch-hints-fold.wast +++ b/test/lit/passes/optimize-instructions_branch-hints-fold.wast @@ -168,6 +168,9 @@ ) ;; CHECK: (func $always-fold-select (type $2) (param $x i32) (param $y i32) (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") ;; CHECK-NEXT: (if (result i32) ;; CHECK-NEXT: (local.get $x) @@ -180,16 +183,23 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; NO_FO: (func $always-fold-select (type $2) (param $x i32) (param $y i32) (result i32) - ;; NO_FO-NEXT: (@metadata.code.branch_hint "\00") - ;; NO_FO-NEXT: (if (result i32) - ;; NO_FO-NEXT: (local.get $x) - ;; NO_FO-NEXT: (then - ;; NO_FO-NEXT: (i32.const 10) - ;; NO_FO-NEXT: ) - ;; NO_FO-NEXT: (else - ;; NO_FO-NEXT: (i32.const 20) + ;; NO_FO-NEXT: (local $2 i32) + ;; NO_FO-NEXT: (local.set $2 + ;; NO_FO-NEXT: (@metadata.code.branch_hint "\00") + ;; NO_FO-NEXT: (if (result i32) + ;; NO_FO-NEXT: (local.get $x) + ;; NO_FO-NEXT: (then + ;; NO_FO-NEXT: (i32.const 10) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: (else + ;; NO_FO-NEXT: (i32.const 20) + ;; NO_FO-NEXT: ) ;; NO_FO-NEXT: ) ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: (drop + ;; NO_FO-NEXT: (local.get $y) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: (local.get $2) ;; NO_FO-NEXT: ) (func $always-fold-select (param $x i32) (param $y i32) (result i32) ;; A select with different metadata is still foldable: the code was executed