diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 6cce997c2bf..ecdc3594083 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -3399,28 +3399,29 @@ struct OptimizeInstructions // execute anyhow, and things like branch hints were already being run. // After optimization, we will only run fewer things, and run no risk of // running new bad things. - if (matches(curr, select(any(&ifTrue), any(&ifFalse), any(&c))) && - ExpressionAnalyzer::equal(ifTrue, ifFalse)) { - auto value = effects(ifTrue); - if (value.hasSideEffects()) { - // At best we don't need the condition, but need to execute the - // value twice. a block is larger than a select by 2 bytes, and we - // must drop one value, so 3, while we save the condition, so it's - // not clear this is worth it, TODO - } else { - // The value has no side effects, so we can replace ourselves with one - // of the two identical values in the arms. - auto condition = effects(c); - if (!condition.hasSideEffects()) { - return ifTrue; - } else { - // The condition is last, so we need a new local, and it may be a - // bad idea to use a block like we do for an if. Do it only if we - // can reorder - if (!condition.invalidates(value)) { - return builder.makeSequence(builder.makeDrop(c), ifTrue); - } + if (matches(curr, select(any(&ifTrue), any(&ifFalse), any(&c)))) { + // Check if the values are identical and we can keep just the first one. + if (areConsecutiveInputsEqualAndFoldable(ifTrue, ifFalse)) { + // We can keep just the ifTrue branch, but we need to move its value + // past the condition. If there are side effects that force us to use + // a scratch local, then it is probably worth the extra local to + // execute the value expression once instead of twice. + if (canReorder(ifTrue, c)) { + return builder.makeSequence(builder.makeDrop(c), ifTrue); } + auto scratch = builder.addVar(getFunction(), ifTrue->type); + return builder.makeBlock( + {builder.makeLocalSet(scratch, ifTrue), + builder.makeDrop(c), + builder.makeLocalGet(scratch, ifTrue->type)}); + } + // Otherwise, check if the values are identical, but we would have to + // keep both. In this case the benefit is less, so we only optimize if + // we can avoid the scratch local. + if (areConsecutiveInputsEqual(ifTrue, ifFalse) && + canReorder(ifFalse, c)) { + return builder.makeBlock( + {builder.makeDrop(ifTrue), builder.makeDrop(c), ifFalse}); } } } diff --git a/test/lit/passes/optimize-instructions-mvp.wast b/test/lit/passes/optimize-instructions-mvp.wast index 021b868bcc1..4ed3b3a5a9a 100644 --- a/test/lit/passes/optimize-instructions-mvp.wast +++ b/test/lit/passes/optimize-instructions-mvp.wast @@ -6190,9 +6190,14 @@ ) ;; CHECK: (func $select-parallel (param $0 i32) (param $1 i32) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.add - ;; CHECK-NEXT: (local.get $1) - ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop @@ -6281,6 +6286,72 @@ ) ) ) + + ;; CHECK: (func $select-no-fold (param $x i32) (param $y i32) (param $c i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $c) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $y + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (local.set $y + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $c) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $select-no-fold (param $x i32) (param $y i32) (param $c i32) + (drop + ;; We cannot fold away the ifFalse branch because it sets $y. But both + ;; branches produce the same value, so we can still optimize. + (select + (local.get $x) + (block (result i32) + (local.set $y (i32.const 0)) + (local.get $x) + ) + (local.get $c) + ) + ) + (drop + ;; Same, but now we cannot reorder the ifFalse and the condition, so we + ;; would need a scratch local. We do not bother optimizing. + (select + (local.get $x) + (block (result i32) + (local.set $y (i32.const 0)) + (local.get $x) + ) + (block (result i32) + (drop (local.get $y)) + (local.get $c) + ) + ) + ) + ) + ;; CHECK: (func $zero-shifts-is-not-sign-ext ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (i32.eq diff --git a/test/lit/passes/optimize-instructions_branch-hints-fold.wast b/test/lit/passes/optimize-instructions_branch-hints-fold.wast index 6223817e59e..5d6904f457b 100644 --- a/test/lit/passes/optimize-instructions_branch-hints-fold.wast +++ b/test/lit/passes/optimize-instructions_branch-hints-fold.wast @@ -168,6 +168,9 @@ ) ;; CHECK: (func $always-fold-select (type $2) (param $x i32) (param $y i32) (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") ;; CHECK-NEXT: (if (result i32) ;; CHECK-NEXT: (local.get $x) @@ -180,16 +183,23 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; NO_FO: (func $always-fold-select (type $2) (param $x i32) (param $y i32) (result i32) - ;; NO_FO-NEXT: (@metadata.code.branch_hint "\00") - ;; NO_FO-NEXT: (if (result i32) - ;; NO_FO-NEXT: (local.get $x) - ;; NO_FO-NEXT: (then - ;; NO_FO-NEXT: (i32.const 10) - ;; NO_FO-NEXT: ) - ;; NO_FO-NEXT: (else - ;; NO_FO-NEXT: (i32.const 20) + ;; NO_FO-NEXT: (local $2 i32) + ;; NO_FO-NEXT: (local.set $2 + ;; NO_FO-NEXT: (@metadata.code.branch_hint "\00") + ;; NO_FO-NEXT: (if (result i32) + ;; NO_FO-NEXT: (local.get $x) + ;; NO_FO-NEXT: (then + ;; NO_FO-NEXT: (i32.const 10) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: (else + ;; NO_FO-NEXT: (i32.const 20) + ;; NO_FO-NEXT: ) ;; NO_FO-NEXT: ) ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: (drop + ;; NO_FO-NEXT: (local.get $y) + ;; NO_FO-NEXT: ) + ;; NO_FO-NEXT: (local.get $2) ;; NO_FO-NEXT: ) (func $always-fold-select (param $x i32) (param $y i32) (result i32) ;; A select with different metadata is still foldable: the code was executed