From 18f188d257f4c2b70796e636d91b94d609d7d9ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20A=C4=9Facan?= Date: Thu, 19 Jun 2025 12:32:39 +0100 Subject: [PATCH 01/20] Always inline trivial calls that always shrinks A "trivial call" is a function that just calls another function. (func $foo ... (call $target ...)) Currently we inline these functions always only when not optimizing for code size. When optimizing for code size, these functions can always be inlined when 1. The arguments to `$target` are all function argument locals. 2. The locals are not used more than once 3. The locals are used in the same order they appear in the function arguments. When these hold, inlining `$foo` never increases code size as it doesn't cause introducing more locals at call sites. Improve `FunctionInfo` type and `FunctionInfoScanner` to annotate functions with "trivial call" information that also contains whether inlining shrinks code size. If a function shrinks when inlined always inline it even with `-Os`. Otherwise inline it as before, i.e. when not optimizing for code size. --- src/passes/Inlining.cpp | 82 ++++++++++++++++++++++++++++------------- 1 file changed, 57 insertions(+), 25 deletions(-) diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index 876c186d471..cfe4e455398 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -67,7 +67,25 @@ enum class InliningMode { SplitPatternB }; -// Useful into on a function, helping us decide if we can inline it +// Whether a function just calls another function with only `local.get`s as +// arguments. +enum class TrivialCall { + // Function does not just call another function, with only `local.get`s as + // arguments. + NotTrivial, + + // Function just calls another function, and all arguments are `local.get`s + // with strictly increasing local index. This means code size always shrinks + // when this function is inlined. + Shrinks, + + // Function just calls another function, but maybe with arguments other than + // `local.get`s, or maybe some locals are used more than once. In this case + // code size does not always shrink. + MayNotShrink, +}; + +// Useful info on a function, helping us decide if we can inline it. struct FunctionInfo { std::atomic refs; Index size; @@ -77,16 +95,7 @@ struct FunctionInfo { // Something is used globally if there is a reference to it in a table or // export etc. bool usedGlobally; - // We consider a function to be a trivial call if the body is just a call with - // trivial arguments, like this: - // - // (func $forward (param $x) (param $y) - // (call $target (local.get $x) (local.get $y)) - // ) - // - // Specifically the body must be a call, and the operands to the call must be - // of size 1 (generally, LocalGet or Const). - bool isTrivialCall; + TrivialCall isTrivialCall; InliningMode inliningMode; FunctionInfo() { clear(); } @@ -98,7 +107,7 @@ struct FunctionInfo { hasLoops = false; hasTryDelegate = false; usedGlobally = false; - isTrivialCall = false; + isTrivialCall = TrivialCall::NotTrivial; inliningMode = InliningMode::Unknown; } @@ -132,6 +141,13 @@ struct FunctionInfo { size <= options.inlining.oneCallerInlineMaxSize) { return true; } + // If the function just calls another function using its locals as + // arguments, and arguments are used in strictly increasing order, and each + // argument is used at most once, then inlining it shrinks the code size and + // it's also good for runtime. So we always inline it. + if (isTrivialCall == TrivialCall::Shrinks) { + return true; + } // If it's so big that we have no flexible options that could allow it, // do not inline. if (size > options.inlining.flexibleInlineMaxSize) { @@ -143,18 +159,13 @@ struct FunctionInfo { if (options.shrinkLevel > 0 || options.optimizeLevel < 3) { return false; } - if (hasCalls) { - // This has calls. If it is just a trivial call itself then inline, as we - // will save a call that way - basically we skip a trampoline in the - // middle - but if it is something more complex, leave it alone, as we may - // not help much (and with recursion we may end up with a wasteful - // increase in code size). - // - // Note that inlining trivial calls may increase code size, e.g. if they - // use a parameter more than once (forcing us after inlining to save that - // value to a local, etc.), but here we are optimizing for speed and not - // size, so we risk it. - return isTrivialCall; + // The function just calls another function, but it's using locals in + // different order than the argument order, and/or using some locals more + // than once. In this case we inline if we're not optimizing for code size, + // as inlining it to more than one call site may increase code size by + // introducing locals. + if (isTrivialCall == TrivialCall::MayNotShrink) { + return true; } // This doesn't have calls. Inline if loops do not prevent us (normally, a // loop suggests a lot of work and so inlining is less useful). @@ -227,10 +238,31 @@ struct FunctionInfoScanner info.size = Measurer::measure(curr->body); if (auto* call = curr->body->dynCast()) { + // If call arguments are function locals read in order (maybe by skipping + // some of the arguments), then the code size always shrinks when the call + // is inlined. + bool shrinks = true; + Index lastLocalGetIndex = -1; + for (auto* operand : call->operands) { + if (auto* localGet = operand->dynCast()) { + if (localGet->index > lastLocalGetIndex) { + lastLocalGetIndex = localGet->index; + } else { + shrinks = false; + break; + } + } + } + + if (shrinks) { + info.isTrivialCall = TrivialCall::Shrinks; + return; + } + if (info.size == call->operands.size() + 1) { // This function body is a call with some trivial (size 1) operands like // LocalGet or Const, so it is a trivial call. - info.isTrivialCall = true; + info.isTrivialCall = TrivialCall::MayNotShrink; } } } From d5afc8e38e3f7fb909b7058934e7fe381edb23b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20A=C4=9Facan?= Date: Thu, 19 Jun 2025 13:52:52 +0100 Subject: [PATCH 02/20] Generalize "simple call" check when inlining to cover binary and unary instructions --- src/passes/Inlining.cpp | 44 ++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index cfe4e455398..3d4ed344b82 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -67,19 +67,19 @@ enum class InliningMode { SplitPatternB }; -// Whether a function just calls another function with only `local.get`s as +// Whether a function just a single instruction with only `local.get`s as // arguments. -enum class TrivialCall { - // Function does not just call another function, with only `local.get`s as +enum class TrivialInstruction { + // Function is not just a single instruction, with only `local.get`s as // arguments. NotTrivial, - // Function just calls another function, and all arguments are `local.get`s - // with strictly increasing local index. This means code size always shrinks - // when this function is inlined. + // Function is a single instruction, and all arguments to the instruction are + // `local.get`s with strictly increasing local index. This means code size + // always shrinks when this function is inlined. Shrinks, - // Function just calls another function, but maybe with arguments other than + // Function is a single instruction, but maybe with arguments other than // `local.get`s, or maybe some locals are used more than once. In this case // code size does not always shrink. MayNotShrink, @@ -95,7 +95,7 @@ struct FunctionInfo { // Something is used globally if there is a reference to it in a table or // export etc. bool usedGlobally; - TrivialCall isTrivialCall; + TrivialInstruction isTrivialInstruction; InliningMode inliningMode; FunctionInfo() { clear(); } @@ -107,7 +107,7 @@ struct FunctionInfo { hasLoops = false; hasTryDelegate = false; usedGlobally = false; - isTrivialCall = TrivialCall::NotTrivial; + isTrivialInstruction = TrivialInstruction::NotTrivial; inliningMode = InliningMode::Unknown; } @@ -119,7 +119,7 @@ struct FunctionInfo { hasLoops = other.hasLoops; hasTryDelegate = other.hasTryDelegate; usedGlobally = other.usedGlobally; - isTrivialCall = other.isTrivialCall; + isTrivialInstruction = other.isTrivialInstruction; inliningMode = other.inliningMode; return *this; } @@ -145,7 +145,7 @@ struct FunctionInfo { // arguments, and arguments are used in strictly increasing order, and each // argument is used at most once, then inlining it shrinks the code size and // it's also good for runtime. So we always inline it. - if (isTrivialCall == TrivialCall::Shrinks) { + if (isTrivialInstruction == TrivialInstruction::Shrinks) { return true; } // If it's so big that we have no flexible options that could allow it, @@ -164,7 +164,7 @@ struct FunctionInfo { // than once. In this case we inline if we're not optimizing for code size, // as inlining it to more than one call site may increase code size by // introducing locals. - if (isTrivialCall == TrivialCall::MayNotShrink) { + if (isTrivialInstruction == TrivialInstruction::MayNotShrink) { return true; } // This doesn't have calls. Inline if loops do not prevent us (normally, a @@ -255,14 +255,30 @@ struct FunctionInfoScanner } if (shrinks) { - info.isTrivialCall = TrivialCall::Shrinks; + info.isTrivialInstruction = TrivialInstruction::Shrinks; return; } if (info.size == call->operands.size() + 1) { // This function body is a call with some trivial (size 1) operands like // LocalGet or Const, so it is a trivial call. - info.isTrivialCall = TrivialCall::MayNotShrink; + info.isTrivialInstruction = TrivialInstruction::MayNotShrink; + } + + } else if (auto* binary = curr->body->dynCast()) { + info.isTrivialInstruction = TrivialInstruction::MayNotShrink; + if (auto* left = binary->left->dynCast()) { + if (auto* right = binary->right->dynCast()) { + if (right->index > left->index) { + info.isTrivialInstruction = TrivialInstruction::Shrinks; + } + } + } + + } else if (auto* unary = curr->body->dynCast()) { + info.isTrivialInstruction = TrivialInstruction::MayNotShrink; + if (unary->value->dynCast()) { + info.isTrivialInstruction = TrivialInstruction::Shrinks; } } } From 78960aa636ca09c9a2a9fa5a5fe4a5cb66f6c433 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20A=C4=9Facan?= Date: Thu, 19 Jun 2025 14:13:26 +0100 Subject: [PATCH 03/20] Fix shrink flag --- src/passes/Inlining.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index cfe4e455398..49ce384e864 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -251,6 +251,9 @@ struct FunctionInfoScanner shrinks = false; break; } + } else { + shrinks = false; + break; } } From 6486c91798162cdb4b19815171031f36040b8f52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20A=C4=9Facan?= Date: Thu, 19 Jun 2025 14:15:27 +0100 Subject: [PATCH 04/20] Rename isTrivialCall -> trivialCall --- src/passes/Inlining.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index 49ce384e864..1668ba13d1a 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -95,7 +95,7 @@ struct FunctionInfo { // Something is used globally if there is a reference to it in a table or // export etc. bool usedGlobally; - TrivialCall isTrivialCall; + TrivialCall trivialCall; InliningMode inliningMode; FunctionInfo() { clear(); } @@ -107,7 +107,7 @@ struct FunctionInfo { hasLoops = false; hasTryDelegate = false; usedGlobally = false; - isTrivialCall = TrivialCall::NotTrivial; + trivialCall = TrivialCall::NotTrivial; inliningMode = InliningMode::Unknown; } @@ -119,7 +119,7 @@ struct FunctionInfo { hasLoops = other.hasLoops; hasTryDelegate = other.hasTryDelegate; usedGlobally = other.usedGlobally; - isTrivialCall = other.isTrivialCall; + trivialCall = other.trivialCall; inliningMode = other.inliningMode; return *this; } @@ -145,7 +145,7 @@ struct FunctionInfo { // arguments, and arguments are used in strictly increasing order, and each // argument is used at most once, then inlining it shrinks the code size and // it's also good for runtime. So we always inline it. - if (isTrivialCall == TrivialCall::Shrinks) { + if (trivialCall == TrivialCall::Shrinks) { return true; } // If it's so big that we have no flexible options that could allow it, @@ -164,7 +164,7 @@ struct FunctionInfo { // than once. In this case we inline if we're not optimizing for code size, // as inlining it to more than one call site may increase code size by // introducing locals. - if (isTrivialCall == TrivialCall::MayNotShrink) { + if (trivialCall == TrivialCall::MayNotShrink) { return true; } // This doesn't have calls. Inline if loops do not prevent us (normally, a @@ -252,20 +252,20 @@ struct FunctionInfoScanner break; } } else { - shrinks = false; - break; + shrinks = false; + break; } } if (shrinks) { - info.isTrivialCall = TrivialCall::Shrinks; + info.trivialCall = TrivialCall::Shrinks; return; } if (info.size == call->operands.size() + 1) { // This function body is a call with some trivial (size 1) operands like // LocalGet or Const, so it is a trivial call. - info.isTrivialCall = TrivialCall::MayNotShrink; + info.trivialCall = TrivialCall::MayNotShrink; } } } From aff857dbaa0233a75097f0ac63991a07400258cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20A=C4=9Facan?= Date: Thu, 19 Jun 2025 15:31:29 +0100 Subject: [PATCH 05/20] Bring back calls check --- src/passes/Inlining.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index 1668ba13d1a..b98e39e27be 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -167,9 +167,9 @@ struct FunctionInfo { if (trivialCall == TrivialCall::MayNotShrink) { return true; } - // This doesn't have calls. Inline if loops do not prevent us (normally, a + // Inline if it doesn't have calls and loops do not prevent us (normally, a // loop suggests a lot of work and so inlining is less useful). - return !hasLoops || options.inlining.allowFunctionsWithLoops; + return !hasCalls && (!hasLoops || options.inlining.allowFunctionsWithLoops); } }; From 48b6914fb4e9b11799dca175cd73fbfb5dd10615 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20A=C4=9Facan?= Date: Thu, 19 Jun 2025 15:39:26 +0100 Subject: [PATCH 06/20] Don't allow skipping args --- src/passes/Inlining.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index b98e39e27be..8eecf1a2a92 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -238,15 +238,16 @@ struct FunctionInfoScanner info.size = Measurer::measure(curr->body); if (auto* call = curr->body->dynCast()) { - // If call arguments are function locals read in order (maybe by skipping - // some of the arguments), then the code size always shrinks when the call - // is inlined. + // If call arguments are function locals read in order, then the code size + // always shrinks when the call is inlined. Note that we don't allow + // skipping function arguments here, as that can create `drop` + // instructions at the call sites, increasing code size. bool shrinks = true; Index lastLocalGetIndex = -1; for (auto* operand : call->operands) { if (auto* localGet = operand->dynCast()) { - if (localGet->index > lastLocalGetIndex) { - lastLocalGetIndex = localGet->index; + if (localGet->index > lastLocalGetIndex + 1) { + lastLocalGetIndex += 1; } else { shrinks = false; break; From c607e1ecdcb7b6429d029cb2e74e2cf9b0b8dbbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20A=C4=9Facan?= Date: Fri, 20 Jun 2025 11:53:25 +0100 Subject: [PATCH 07/20] ADd test --- test/lit/passes/inlining-trivial-calls.wast | 164 ++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 test/lit/passes/inlining-trivial-calls.wast diff --git a/test/lit/passes/inlining-trivial-calls.wast b/test/lit/passes/inlining-trivial-calls.wast new file mode 100644 index 00000000000..c4962af20fc --- /dev/null +++ b/test/lit/passes/inlining-trivial-calls.wast @@ -0,0 +1,164 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; Check that "trivial calls" are always inlined, even when optimizing for +;; size. +;; +;; A trivial call is a function that calls another, using its locals in +;; the order, without skipping any locals. +;; +;; These functions can always be inlined because they can't cause binary size +;; increase at the call sites. + +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=0 -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=1 -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=2 -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=3 -S -o - | filecheck %s + +(module + ;; CHECK: (type $0 (func (param i32 i32 i32 i32 i32 i32))) + (type $0 (func (param i32 i32 i32 i32 i32 i32))) + ;; CHECK: (type $1 (func)) + (type $1 (func)) + ;; CHECK: (import "env" "foo" (func $imported-foo (type $0) (param i32 i32 i32 i32 i32 i32))) + (import "env" "foo" (func $imported-foo (type $0) (param i32 i32 i32 i32 i32 i32))) + (func $call-foo (type $0) (param $p1 i32) (param $p2 i32) (param $p3 i32) (param $p4 i32) (param $p5 i32) (param $p6 i32) + (call $imported-foo + (local.get $p1) + (local.get $p2) + (local.get $p3) + (local.get $p4) + (local.get $p5) + (local.get $p6) + ) + ) + ;; CHECK: (func $main (type $1) + ;; CHECK-NEXT: (local $0 i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (local $3 i32) + ;; CHECK-NEXT: (local $4 i32) + ;; CHECK-NEXT: (local $5 i32) + ;; CHECK-NEXT: (local $6 i32) + ;; CHECK-NEXT: (local $7 i32) + ;; CHECK-NEXT: (local $8 i32) + ;; CHECK-NEXT: (local $9 i32) + ;; CHECK-NEXT: (local $10 i32) + ;; CHECK-NEXT: (local $11 i32) + ;; CHECK-NEXT: (local $12 i32) + ;; CHECK-NEXT: (local $13 i32) + ;; CHECK-NEXT: (local $14 i32) + ;; CHECK-NEXT: (local $15 i32) + ;; CHECK-NEXT: (local $16 i32) + ;; CHECK-NEXT: (local $17 i32) + ;; CHECK-NEXT: (block $__inlined_func$call-foo + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (i32.const 4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $4 + ;; CHECK-NEXT: (i32.const 5) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $5 + ;; CHECK-NEXT: (i32.const 6) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $imported-foo + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: (local.get $5) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block $__inlined_func$call-foo$1 + ;; CHECK-NEXT: (local.set $6 + ;; CHECK-NEXT: (i32.const 7) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $7 + ;; CHECK-NEXT: (i32.const 8) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $8 + ;; CHECK-NEXT: (i32.const 9) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $9 + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $10 + ;; CHECK-NEXT: (i32.const 11) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $11 + ;; CHECK-NEXT: (i32.const 12) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $imported-foo + ;; CHECK-NEXT: (local.get $6) + ;; CHECK-NEXT: (local.get $7) + ;; CHECK-NEXT: (local.get $8) + ;; CHECK-NEXT: (local.get $9) + ;; CHECK-NEXT: (local.get $10) + ;; CHECK-NEXT: (local.get $11) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block $__inlined_func$call-foo$2 + ;; CHECK-NEXT: (local.set $12 + ;; CHECK-NEXT: (i32.const 13) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $13 + ;; CHECK-NEXT: (i32.const 14) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $14 + ;; CHECK-NEXT: (i32.const 15) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $15 + ;; CHECK-NEXT: (i32.const 16) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $16 + ;; CHECK-NEXT: (i32.const 17) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $17 + ;; CHECK-NEXT: (i32.const 18) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $imported-foo + ;; CHECK-NEXT: (local.get $12) + ;; CHECK-NEXT: (local.get $13) + ;; CHECK-NEXT: (local.get $14) + ;; CHECK-NEXT: (local.get $15) + ;; CHECK-NEXT: (local.get $16) + ;; CHECK-NEXT: (local.get $17) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $main (type $1) + (call $call-foo + (i32.const 1) + (i32.const 2) + (i32.const 3) + (i32.const 4) + (i32.const 5) + (i32.const 6) + ) + (call $call-foo + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 10) + (i32.const 11) + (i32.const 12) + ) + (call $call-foo + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const 16) + (i32.const 17) + (i32.const 18) + ) + ) +) From 3d86e662a74d4e6fe26df4d2e5b20d2b50234cef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20A=C4=9Facan?= Date: Fri, 20 Jun 2025 11:53:56 +0100 Subject: [PATCH 08/20] Refactor --- src/passes/Inlining.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index 8eecf1a2a92..ab2f27c2b0f 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -243,11 +243,11 @@ struct FunctionInfoScanner // skipping function arguments here, as that can create `drop` // instructions at the call sites, increasing code size. bool shrinks = true; - Index lastLocalGetIndex = -1; + Index nextLocalGetIndex = 0; for (auto* operand : call->operands) { if (auto* localGet = operand->dynCast()) { - if (localGet->index > lastLocalGetIndex + 1) { - lastLocalGetIndex += 1; + if (localGet->index == nextLocalGetIndex) { + nextLocalGetIndex += 1; } else { shrinks = false; break; From 703451bbc356e54e19dec364e1c4d7d6bb5d5f35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20A=C4=9Facan?= Date: Fri, 20 Jun 2025 12:57:42 +0100 Subject: [PATCH 09/20] More tests --- ...lls.wast => inlining-trivial-calls-1.wast} | 0 test/lit/passes/inlining-trivial-calls-2.wast | 61 ++++++++++++++++ test/lit/passes/inlining-trivial-calls-3.wast | 70 +++++++++++++++++++ 3 files changed, 131 insertions(+) rename test/lit/passes/{inlining-trivial-calls.wast => inlining-trivial-calls-1.wast} (100%) create mode 100644 test/lit/passes/inlining-trivial-calls-2.wast create mode 100644 test/lit/passes/inlining-trivial-calls-3.wast diff --git a/test/lit/passes/inlining-trivial-calls.wast b/test/lit/passes/inlining-trivial-calls-1.wast similarity index 100% rename from test/lit/passes/inlining-trivial-calls.wast rename to test/lit/passes/inlining-trivial-calls-1.wast diff --git a/test/lit/passes/inlining-trivial-calls-2.wast b/test/lit/passes/inlining-trivial-calls-2.wast new file mode 100644 index 00000000000..dfd908c5557 --- /dev/null +++ b/test/lit/passes/inlining-trivial-calls-2.wast @@ -0,0 +1,61 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; Same as inlining-trivial-calls-1, but arguments to the "trivial call" are +;; different than the caller's arguments. +;; +;; This can result in adding locals at the call sites and increase binary sizes. +;; So we don't inline these calls when optimizing for binary sizes. + +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=0 -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=1 -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=2 -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=3 -S -o - | filecheck %s + +(module + ;; CHECK: (type $0 (func (param i32 i32))) + (type $0 (func (param i32 i32))) + ;; CHECK: (type $1 (func)) + (type $1 (func)) + ;; CHECK: (import "env" "foo" (func $imported-foo (type $0) (param i32 i32))) + (import "env" "foo" (func $imported-foo (type $0) (param i32 i32))) + ;; CHECK: (func $call-foo (type $0) (param $p1 i32) (param $p2 i32) + ;; CHECK-NEXT: (call $imported-foo + ;; CHECK-NEXT: (local.get $p2) + ;; CHECK-NEXT: (local.get $p1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $call-foo (type $0) (param $p1 i32) (param $p2 i32) + (call $imported-foo + (local.get $p2) + (local.get $p1) + ) + ) + ;; CHECK: (func $main (type $1) + ;; CHECK-NEXT: (call $call-foo + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $call-foo + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: (i32.const 4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $call-foo + ;; CHECK-NEXT: (i32.const 5) + ;; CHECK-NEXT: (i32.const 6) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $main (type $1) + (call $call-foo + (i32.const 1) + (i32.const 2) + ) + (call $call-foo + (i32.const 3) + (i32.const 4) + ) + (call $call-foo + (i32.const 5) + (i32.const 6) + ) + ) +) diff --git a/test/lit/passes/inlining-trivial-calls-3.wast b/test/lit/passes/inlining-trivial-calls-3.wast new file mode 100644 index 00000000000..b4a71e15571 --- /dev/null +++ b/test/lit/passes/inlining-trivial-calls-3.wast @@ -0,0 +1,70 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; Same as inlining-trivial-calls-2, but arguments the "trivial call" omits an +;; argument from its own arguments. +;; +;; This can result in `drop` instructions at the call sites and increase binary +;; sizes. So we don't inline these calls when optimizing for binary sizes. + +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=0 -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=1 -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=2 -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=3 -S -o - | filecheck %s + +(module + ;; CHECK: (type $1 (func (param i32 i32))) + + ;; CHECK: (type $0 (func (param i32 i32 i32))) + (type $0 (func (param i32 i32 i32))) + (type $1 (func (param i32 i32))) + ;; CHECK: (type $2 (func)) + (type $2 (func)) + ;; CHECK: (import "env" "foo" (func $imported-foo (type $1) (param i32 i32))) + (import "env" "foo" (func $imported-foo (type $1) (param i32 i32))) + ;; CHECK: (func $call-foo (type $0) (param $p1 i32) (param $p2 i32) (param $p3 i32) + ;; CHECK-NEXT: (call $imported-foo + ;; CHECK-NEXT: (local.get $p1) + ;; CHECK-NEXT: (local.get $p3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $call-foo (type $0) (param $p1 i32) (param $p2 i32) (param $p3 i32) + (call $imported-foo + (local.get $p1) + (local.get $p3) + ) + ) + ;; CHECK: (func $main (type $2) + ;; CHECK-NEXT: (call $call-foo + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $call-foo + ;; CHECK-NEXT: (i32.const 4) + ;; CHECK-NEXT: (i32.const 5) + ;; CHECK-NEXT: (i32.const 6) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $call-foo + ;; CHECK-NEXT: (i32.const 7) + ;; CHECK-NEXT: (i32.const 8) + ;; CHECK-NEXT: (i32.const 9) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $main (type $2) + (call $call-foo + (i32.const 1) + (i32.const 2) + (i32.const 3) + ) + (call $call-foo + (i32.const 4) + (i32.const 5) + (i32.const 6) + ) + (call $call-foo + (i32.const 7) + (i32.const 8) + (i32.const 9) + ) + ) +) From ae77f8b75a06412de6bfe41e7602777dfb24a4dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20A=C4=9Facan?= Date: Mon, 23 Jun 2025 10:17:37 +0100 Subject: [PATCH 10/20] Rewording --- src/passes/Inlining.cpp | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index ab2f27c2b0f..9680616d80a 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -67,16 +67,19 @@ enum class InliningMode { SplitPatternB }; -// Whether a function just calls another function with only `local.get`s as -// arguments. +// Whether a function just calls another function in a way that always shrinks +// when the calling function is inlined. enum class TrivialCall { - // Function does not just call another function, with only `local.get`s as - // arguments. + // Function does not just call another function, or it may not shrink when + // inlined. NotTrivial, - // Function just calls another function, and all arguments are `local.get`s - // with strictly increasing local index. This means code size always shrinks - // when this function is inlined. + // Function just calls another function, with `local.get`s as arguments, and + // with each `local` is used exactly once, and in the order they appear in the + // argument list. + // + // In this case, inlining the function generates smaller code, and it is also + // good for runtime. Shrinks, // Function just calls another function, but maybe with arguments other than @@ -141,10 +144,8 @@ struct FunctionInfo { size <= options.inlining.oneCallerInlineMaxSize) { return true; } - // If the function just calls another function using its locals as - // arguments, and arguments are used in strictly increasing order, and each - // argument is used at most once, then inlining it shrinks the code size and - // it's also good for runtime. So we always inline it. + // If the function calls another one in a way that always shrinks when + // inlined, inline it in all optimization and shrink modes. if (trivialCall == TrivialCall::Shrinks) { return true; } @@ -167,8 +168,9 @@ struct FunctionInfo { if (trivialCall == TrivialCall::MayNotShrink) { return true; } - // Inline if it doesn't have calls and loops do not prevent us (normally, a - // loop suggests a lot of work and so inlining is less useful). + // Trivial calls are already handled. Inline if + // 1. The function doesn't have calls, and + // 2. The function doesn't have loops, or we allow inlining with loops. return !hasCalls && (!hasLoops || options.inlining.allowFunctionsWithLoops); } }; From c64e72bddcdc0bd701e824e847ab74e0351b385a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20A=C4=9Facan?= Date: Mon, 23 Jun 2025 11:14:21 +0100 Subject: [PATCH 11/20] Add test for const args --- test/lit/passes/inlining-const-args.wat | 88 +++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 test/lit/passes/inlining-const-args.wat diff --git a/test/lit/passes/inlining-const-args.wat b/test/lit/passes/inlining-const-args.wat new file mode 100644 index 00000000000..80caa137363 --- /dev/null +++ b/test/lit/passes/inlining-const-args.wat @@ -0,0 +1,88 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; With `-O3`, when not shrinking, we always inline calls to functions that just +;; call other functions with "trivial" arguments. +;; +;; A trivial argument for now is just an instruction with size 1. E.g. +;; `local.get`, constants. + +;; RUN: foreach %s %t wasm-opt -all -O3 -S -o - | filecheck %s --check-prefix=O3 +;; RUN: foreach %s %t wasm-opt -all -O2 -S -o - | filecheck %s --check-prefix=O2 +;; RUN: foreach %s %t wasm-opt -all -Os -S -o - | filecheck %s --check-prefix=Os + +(module + ;; O3: (type $0 (func (param i32 i32 i32))) + ;; O2: (type $1 (func)) + + ;; O2: (type $0 (func (param i32 i32 i32))) + ;; Os: (type $1 (func)) + + ;; Os: (type $0 (func (param i32 i32 i32))) + (type $0 (func (param i32 i32 i32))) + + ;; O3: (type $1 (func)) + (type $1 (func)) + + (type $2 (func)) + + ;; O3: (import "env" "foo" (func $imported-foo (type $0) (param i32 i32 i32))) + ;; O2: (import "env" "foo" (func $imported-foo (type $0) (param i32 i32 i32))) + ;; Os: (import "env" "foo" (func $imported-foo (type $0) (param i32 i32 i32))) + (import "env" "foo" (func $imported-foo (type $0) (param i32 i32 i32))) + + ;; O3: (export "main" (func $main)) + ;; O2: (export "main" (func $main)) + ;; Os: (export "main" (func $main)) + (export "main" (func $main)) + + ;; O2: (func $call-foo (type $1) + ;; O2-NEXT: (call $imported-foo + ;; O2-NEXT: (i32.const 1) + ;; O2-NEXT: (i32.const 2) + ;; O2-NEXT: (i32.const 3) + ;; O2-NEXT: ) + ;; O2-NEXT: ) + ;; Os: (func $call-foo (type $1) + ;; Os-NEXT: (call $imported-foo + ;; Os-NEXT: (i32.const 1) + ;; Os-NEXT: (i32.const 2) + ;; Os-NEXT: (i32.const 3) + ;; Os-NEXT: ) + ;; Os-NEXT: ) + (func $call-foo (type $1) + (call $imported-foo + (i32.const 1) + (i32.const 2) + (i32.const 3))) + + ;; O3: (func $main (type $1) + ;; O3-NEXT: (call $imported-foo + ;; O3-NEXT: (i32.const 1) + ;; O3-NEXT: (i32.const 2) + ;; O3-NEXT: (i32.const 3) + ;; O3-NEXT: ) + ;; O3-NEXT: (call $imported-foo + ;; O3-NEXT: (i32.const 1) + ;; O3-NEXT: (i32.const 2) + ;; O3-NEXT: (i32.const 3) + ;; O3-NEXT: ) + ;; O3-NEXT: (call $imported-foo + ;; O3-NEXT: (i32.const 1) + ;; O3-NEXT: (i32.const 2) + ;; O3-NEXT: (i32.const 3) + ;; O3-NEXT: ) + ;; O3-NEXT: ) + ;; O2: (func $main (type $1) + ;; O2-NEXT: (call $call-foo) + ;; O2-NEXT: (call $call-foo) + ;; O2-NEXT: (call $call-foo) + ;; O2-NEXT: ) + ;; Os: (func $main (type $1) + ;; Os-NEXT: (call $call-foo) + ;; Os-NEXT: (call $call-foo) + ;; Os-NEXT: (call $call-foo) + ;; Os-NEXT: ) + (func $main (type $2) + (call $call-foo) + (call $call-foo) + (call $call-foo))) From 85c29e55a449c75e746c5334492918b619867085 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20A=C4=9Facan?= Date: Mon, 23 Jun 2025 11:15:02 +0100 Subject: [PATCH 12/20] Update comments --- test/lit/passes/inlining-const-args.wat | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/lit/passes/inlining-const-args.wat b/test/lit/passes/inlining-const-args.wat index 80caa137363..023c62eaac6 100644 --- a/test/lit/passes/inlining-const-args.wat +++ b/test/lit/passes/inlining-const-args.wat @@ -1,7 +1,7 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. -;; With `-O3`, when not shrinking, we always inline calls to functions that just -;; call other functions with "trivial" arguments. +;; With `-O3`, we always inline calls to functions that just call other +;; functions with "trivial" arguments. ;; ;; A trivial argument for now is just an instruction with size 1. E.g. ;; `local.get`, constants. From 5b0b49e1c8c968aac383a4a7ae026fa752b0dee4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20A=C4=9Facan?= Date: Tue, 24 Jun 2025 13:52:06 +0100 Subject: [PATCH 13/20] Update comments --- src/passes/Inlining.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index 9680616d80a..f488b42c2c5 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -82,9 +82,13 @@ enum class TrivialCall { // good for runtime. Shrinks, - // Function just calls another function, but maybe with arguments other than - // `local.get`s, or maybe some locals are used more than once. In this case - // code size does not always shrink. + // Function just calls another function, but maybe with constant arguments, or + // maybe some locals are used more than once. In these cases code size does + // not always shrink: at the call sites, omitted locals can create `drop` + // instructions, a local used multiple times can create new locals, and + // encoding of constants may be larger than just a `local.get` with a small + // index. In these cases we still want to inline with `-O3`, but the code size + // may increase when inlined. MayNotShrink, }; From c7dbd5443d990be653184d699cbfa749a0968de2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20A=C4=9Facan?= Date: Tue, 24 Jun 2025 14:30:02 +0100 Subject: [PATCH 14/20] Update another comment --- src/passes/Inlining.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index f488b42c2c5..20cbc88baae 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -164,11 +164,8 @@ struct FunctionInfo { if (options.shrinkLevel > 0 || options.optimizeLevel < 3) { return false; } - // The function just calls another function, but it's using locals in - // different order than the argument order, and/or using some locals more - // than once. In this case we inline if we're not optimizing for code size, - // as inlining it to more than one call site may increase code size by - // introducing locals. + // The function just calls another function, but the code size may increase + // when inlined. We only inline it fully with `-O3`. if (trivialCall == TrivialCall::MayNotShrink) { return true; } From d5f680e2a06956f9f2b1ef259423468da19c3dcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Fri, 27 Jun 2025 20:38:42 +0200 Subject: [PATCH 15/20] Update src/passes/Inlining.cpp Co-authored-by: Alon Zakai --- src/passes/Inlining.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index 20cbc88baae..1f93493f7bc 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -250,7 +250,7 @@ struct FunctionInfoScanner for (auto* operand : call->operands) { if (auto* localGet = operand->dynCast()) { if (localGet->index == nextLocalGetIndex) { - nextLocalGetIndex += 1; + nextLocalGetIndex++; } else { shrinks = false; break; From 1ea9adfce331f2a4ac024bf52605620fcffa64cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Fri, 27 Jun 2025 19:54:45 +0100 Subject: [PATCH 16/20] Update comments --- test/lit/passes/inlining-const-args.wat | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/lit/passes/inlining-const-args.wat b/test/lit/passes/inlining-const-args.wat index 023c62eaac6..267715378ff 100644 --- a/test/lit/passes/inlining-const-args.wat +++ b/test/lit/passes/inlining-const-args.wat @@ -5,6 +5,9 @@ ;; ;; A trivial argument for now is just an instruction with size 1. E.g. ;; `local.get`, constants. +;; +;; In this test we check inlining when the trivial call arguments are constant. +;; In tests inlining-trivial-calls-{1,2,3}.wast we check locals. ;; RUN: foreach %s %t wasm-opt -all -O3 -S -o - | filecheck %s --check-prefix=O3 ;; RUN: foreach %s %t wasm-opt -all -O2 -S -o - | filecheck %s --check-prefix=O2 @@ -83,6 +86,10 @@ ;; Os-NEXT: (call $call-foo) ;; Os-NEXT: ) (func $main (type $2) + ;; All calls below should be inlined in -O3, but not in -O2 or -Os. We call + ;; it multiple times to make sure it won't be inlined because there's only + ;; one call, instead it will be inlined based on optimization settings and + ;; whether the call is trivial. (call $call-foo) (call $call-foo) (call $call-foo))) From 602e238beb1c9706be500f4d85867afa1b008ba2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Fri, 27 Jun 2025 19:55:47 +0100 Subject: [PATCH 17/20] Formatting --- test/lit/passes/inlining-const-args.wat | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/lit/passes/inlining-const-args.wat b/test/lit/passes/inlining-const-args.wat index 267715378ff..73fedcbc853 100644 --- a/test/lit/passes/inlining-const-args.wat +++ b/test/lit/passes/inlining-const-args.wat @@ -6,8 +6,8 @@ ;; A trivial argument for now is just an instruction with size 1. E.g. ;; `local.get`, constants. ;; -;; In this test we check inlining when the trivial call arguments are constant. -;; In tests inlining-trivial-calls-{1,2,3}.wast we check locals. +;; In this test we check inlining when the trivial call arguments are +;; constants. In tests inlining-trivial-calls-{1,2,3}.wast we check locals. ;; RUN: foreach %s %t wasm-opt -all -O3 -S -o - | filecheck %s --check-prefix=O3 ;; RUN: foreach %s %t wasm-opt -all -O2 -S -o - | filecheck %s --check-prefix=O2 From aec0fd5d83a24a3f688216eb9224bae5df80cfae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Fri, 27 Jun 2025 23:30:17 +0200 Subject: [PATCH 18/20] Update test/lit/passes/inlining-trivial-calls-3.wast Co-authored-by: Alon Zakai --- test/lit/passes/inlining-trivial-calls-3.wast | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/lit/passes/inlining-trivial-calls-3.wast b/test/lit/passes/inlining-trivial-calls-3.wast index b4a71e15571..8628cf7c547 100644 --- a/test/lit/passes/inlining-trivial-calls-3.wast +++ b/test/lit/passes/inlining-trivial-calls-3.wast @@ -1,6 +1,6 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. -;; Same as inlining-trivial-calls-2, but arguments the "trivial call" omits an +;; Same as inlining-trivial-calls-2, but the "trivial call" omits an ;; argument from its own arguments. ;; ;; This can result in `drop` instructions at the call sites and increase binary From 00db7d99ab1931154b0bc272ac7dcbe645f5c5fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20A=C4=9Facan?= Date: Mon, 30 Jun 2025 11:19:19 +0100 Subject: [PATCH 19/20] Generate MayNotShrinks for unops and binops too --- src/passes/Inlining.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index cc8c9a776ff..3fb721c1827 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -167,7 +167,7 @@ struct FunctionInfo { if (trivialInstruction == TrivialInstruction::MayNotShrink) { return true; } - // Trivial calls are already handled. Inline if + // Trivial instructions are already handled. Inline if // 1. The function doesn't have calls, and // 2. The function doesn't have loops, or we allow inlining with loops. return !hasCalls && (!hasLoops || options.inlining.allowFunctionsWithLoops); @@ -265,8 +265,8 @@ struct FunctionInfoScanner } if (info.size == call->operands.size() + 1) { - // This function body is a call with some trivial (size 1) operands like - // LocalGet or Const, so it is a trivial call. + // This function body is an instruction with some trivial (size 1) + // operands like LocalGet or Const, so it is a trivial instruction. info.trivialInstruction = TrivialInstruction::MayNotShrink; } @@ -276,14 +276,25 @@ struct FunctionInfoScanner if (auto* right = binary->right->dynCast()) { if (right->index > left->index) { info.trivialInstruction = TrivialInstruction::Shrinks; + return; } } } + if (info.size == 3) { + // Same as above: if arguments have size 1 we consider it a trivial + // instruction. + info.trivialInstruction = TrivialInstruction::MayNotShrink; + } + } else if (auto* unary = curr->body->dynCast()) { info.trivialInstruction = TrivialInstruction::MayNotShrink; if (unary->value->dynCast()) { info.trivialInstruction = TrivialInstruction::Shrinks; + } else if (info.size == 2) { + // Same as above: if the argument has size 1 we consider it a trivial + // instruction. + info.trivialInstruction = TrivialInstruction::MayNotShrink; } } } From e0d1c2876712b142c13075fa8e93a9d626ddea3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20A=C4=9Facan?= Date: Tue, 1 Jul 2025 11:46:50 +0100 Subject: [PATCH 20/20] Add test --- .../passes/inlining-trivial-instructions.wast | 401 ++++++++++++++++++ 1 file changed, 401 insertions(+) create mode 100644 test/lit/passes/inlining-trivial-instructions.wast diff --git a/test/lit/passes/inlining-trivial-instructions.wast b/test/lit/passes/inlining-trivial-instructions.wast new file mode 100644 index 00000000000..339bede9219 --- /dev/null +++ b/test/lit/passes/inlining-trivial-instructions.wast @@ -0,0 +1,401 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: wasm-opt -all --inlining %s -S -o - | filecheck %s --check-prefix=shrink0 +;; RUN: wasm-opt -all --inlining --shrink-level=1 %s -S -o - | filecheck %s --check-prefix=shrink1 +;; RUN: wasm-opt -all --inlining --optimize-level=3 %s -S -o - | filecheck %s --check-prefix=optimize3 + +(module + ;; shrink0: (type $2 (func (param i32))) + + ;; shrink0: (type $1 (func (param i32) (result i32))) + + ;; shrink0: (type $0 (func (param i32 i32) (result i32))) + ;; shrink1: (type $2 (func (param i32))) + + ;; shrink1: (type $1 (func (param i32) (result i32))) + + ;; shrink1: (type $0 (func (param i32 i32) (result i32))) + (type $0 (func (param i32 i32) (result i32))) + (type $1 (func (param i32) (result i32))) + ;; optimize3: (type $2 (func (param i32))) + (type $2 (func (param i32))) + ;; shrink0: (type $3 (func)) + ;; shrink1: (type $3 (func)) + ;; optimize3: (type $3 (func)) + (type $3 (func)) + ;; shrink0: (import "env" "foo" (func $drop-import (type $2) (param i32))) + ;; shrink1: (import "env" "foo" (func $drop-import (type $2) (param i32))) + ;; optimize3: (import "env" "foo" (func $drop-import (type $2) (param i32))) + (import "env" "foo" (func $drop-import (type $2) (param i32))) + ;; shrink0: (export "main" (func $main)) + ;; shrink1: (export "main" (func $main)) + ;; optimize3: (export "main" (func $main)) + (export "main" (func $main)) + ;; This will be inlined in all shrink and optimization modes. + (func $trivial-binary-instruction-small (type $0) (param $p1 i32) (param $p2 i32) (result i32) + (i32.add + (local.get $p1) + (local.get $p2) + ) + ) + ;; This will always be inlined when not shrinking and optimize level is 3. + ;; shrink0: (func $trivial-binary-instruction-large (type $1) (param $p1 i32) (result i32) + ;; shrink0-NEXT: (i32.add + ;; shrink0-NEXT: (local.get $p1) + ;; shrink0-NEXT: (i32.const 2147483647) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: ) + ;; shrink1: (func $trivial-binary-instruction-large (type $1) (param $p1 i32) (result i32) + ;; shrink1-NEXT: (i32.add + ;; shrink1-NEXT: (local.get $p1) + ;; shrink1-NEXT: (i32.const 2147483647) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: ) + (func $trivial-binary-instruction-large (type $1) (param $p1 i32) (result i32) + (i32.add + (local.get $p1) + (i32.const 2147483647) + ) + ) + ;; This will always be inlined when not shrinking and optimize level is 3. + ;; shrink0: (func $non-trivial-binary-instruction (type $0) (param $p1 i32) (param $p2 i32) (result i32) + ;; shrink0-NEXT: (i32.add + ;; shrink0-NEXT: (local.get $p2) + ;; shrink0-NEXT: (local.get $p1) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: ) + ;; shrink1: (func $non-trivial-binary-instruction (type $0) (param $p1 i32) (param $p2 i32) (result i32) + ;; shrink1-NEXT: (i32.add + ;; shrink1-NEXT: (local.get $p2) + ;; shrink1-NEXT: (local.get $p1) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: ) + (func $non-trivial-binary-instruction (type $0) (param $p1 i32) (param $p2 i32) (result i32) + (i32.add + (local.get $p2) + (local.get $p1) + ) + ) + ;; This will be inlined in all shrink and optimization modes. + (func $trivial-unary-instruction (type $1) (param $p1 i32) (result i32) + (i32.eqz + (local.get $p1) + ) + ) + ;; Note: we need more than one caller for each function to test inlining based + ;; on trivial-ness of the function bodies. With one call we always inline. + ;; shrink0: (func $main (type $3) + ;; shrink0-NEXT: (local $0 i32) + ;; shrink0-NEXT: (local $1 i32) + ;; shrink0-NEXT: (local $2 i32) + ;; shrink0-NEXT: (local $3 i32) + ;; shrink0-NEXT: (local $4 i32) + ;; shrink0-NEXT: (local $5 i32) + ;; shrink0-NEXT: (call $drop-import + ;; shrink0-NEXT: (block $__inlined_func$trivial-binary-instruction-small (result i32) + ;; shrink0-NEXT: (local.set $0 + ;; shrink0-NEXT: (i32.const 1) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: (local.set $1 + ;; shrink0-NEXT: (i32.const 2) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: (i32.add + ;; shrink0-NEXT: (local.get $0) + ;; shrink0-NEXT: (local.get $1) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: (call $drop-import + ;; shrink0-NEXT: (block $__inlined_func$trivial-binary-instruction-small$1 (result i32) + ;; shrink0-NEXT: (local.set $2 + ;; shrink0-NEXT: (i32.const 3) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: (local.set $3 + ;; shrink0-NEXT: (i32.const 4) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: (i32.add + ;; shrink0-NEXT: (local.get $2) + ;; shrink0-NEXT: (local.get $3) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: (call $drop-import + ;; shrink0-NEXT: (call $trivial-binary-instruction-large + ;; shrink0-NEXT: (i32.const 5) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: (call $drop-import + ;; shrink0-NEXT: (call $trivial-binary-instruction-large + ;; shrink0-NEXT: (i32.const 6) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: (call $drop-import + ;; shrink0-NEXT: (call $non-trivial-binary-instruction + ;; shrink0-NEXT: (i32.const 7) + ;; shrink0-NEXT: (i32.const 8) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: (call $drop-import + ;; shrink0-NEXT: (call $non-trivial-binary-instruction + ;; shrink0-NEXT: (i32.const 9) + ;; shrink0-NEXT: (i32.const 10) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: (call $drop-import + ;; shrink0-NEXT: (block $__inlined_func$trivial-unary-instruction$2 (result i32) + ;; shrink0-NEXT: (local.set $4 + ;; shrink0-NEXT: (i32.const 11) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: (i32.eqz + ;; shrink0-NEXT: (local.get $4) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: (call $drop-import + ;; shrink0-NEXT: (block $__inlined_func$trivial-unary-instruction$3 (result i32) + ;; shrink0-NEXT: (local.set $5 + ;; shrink0-NEXT: (i32.const 12) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: (i32.eqz + ;; shrink0-NEXT: (local.get $5) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: ) + ;; shrink0-NEXT: ) + ;; shrink1: (func $main (type $3) + ;; shrink1-NEXT: (local $0 i32) + ;; shrink1-NEXT: (local $1 i32) + ;; shrink1-NEXT: (local $2 i32) + ;; shrink1-NEXT: (local $3 i32) + ;; shrink1-NEXT: (local $4 i32) + ;; shrink1-NEXT: (local $5 i32) + ;; shrink1-NEXT: (call $drop-import + ;; shrink1-NEXT: (block $__inlined_func$trivial-binary-instruction-small (result i32) + ;; shrink1-NEXT: (local.set $0 + ;; shrink1-NEXT: (i32.const 1) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: (local.set $1 + ;; shrink1-NEXT: (i32.const 2) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: (i32.add + ;; shrink1-NEXT: (local.get $0) + ;; shrink1-NEXT: (local.get $1) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: (call $drop-import + ;; shrink1-NEXT: (block $__inlined_func$trivial-binary-instruction-small$1 (result i32) + ;; shrink1-NEXT: (local.set $2 + ;; shrink1-NEXT: (i32.const 3) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: (local.set $3 + ;; shrink1-NEXT: (i32.const 4) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: (i32.add + ;; shrink1-NEXT: (local.get $2) + ;; shrink1-NEXT: (local.get $3) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: (call $drop-import + ;; shrink1-NEXT: (call $trivial-binary-instruction-large + ;; shrink1-NEXT: (i32.const 5) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: (call $drop-import + ;; shrink1-NEXT: (call $trivial-binary-instruction-large + ;; shrink1-NEXT: (i32.const 6) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: (call $drop-import + ;; shrink1-NEXT: (call $non-trivial-binary-instruction + ;; shrink1-NEXT: (i32.const 7) + ;; shrink1-NEXT: (i32.const 8) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: (call $drop-import + ;; shrink1-NEXT: (call $non-trivial-binary-instruction + ;; shrink1-NEXT: (i32.const 9) + ;; shrink1-NEXT: (i32.const 10) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: (call $drop-import + ;; shrink1-NEXT: (block $__inlined_func$trivial-unary-instruction$2 (result i32) + ;; shrink1-NEXT: (local.set $4 + ;; shrink1-NEXT: (i32.const 11) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: (i32.eqz + ;; shrink1-NEXT: (local.get $4) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: (call $drop-import + ;; shrink1-NEXT: (block $__inlined_func$trivial-unary-instruction$3 (result i32) + ;; shrink1-NEXT: (local.set $5 + ;; shrink1-NEXT: (i32.const 12) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: (i32.eqz + ;; shrink1-NEXT: (local.get $5) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: ) + ;; shrink1-NEXT: ) + ;; optimize3: (func $main (type $3) + ;; optimize3-NEXT: (local $0 i32) + ;; optimize3-NEXT: (local $1 i32) + ;; optimize3-NEXT: (local $2 i32) + ;; optimize3-NEXT: (local $3 i32) + ;; optimize3-NEXT: (local $4 i32) + ;; optimize3-NEXT: (local $5 i32) + ;; optimize3-NEXT: (local $6 i32) + ;; optimize3-NEXT: (local $7 i32) + ;; optimize3-NEXT: (local $8 i32) + ;; optimize3-NEXT: (local $9 i32) + ;; optimize3-NEXT: (local $10 i32) + ;; optimize3-NEXT: (local $11 i32) + ;; optimize3-NEXT: (call $drop-import + ;; optimize3-NEXT: (block $__inlined_func$trivial-binary-instruction-small (result i32) + ;; optimize3-NEXT: (local.set $0 + ;; optimize3-NEXT: (i32.const 1) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: (local.set $1 + ;; optimize3-NEXT: (i32.const 2) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: (i32.add + ;; optimize3-NEXT: (local.get $0) + ;; optimize3-NEXT: (local.get $1) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: (call $drop-import + ;; optimize3-NEXT: (block $__inlined_func$trivial-binary-instruction-small$1 (result i32) + ;; optimize3-NEXT: (local.set $2 + ;; optimize3-NEXT: (i32.const 3) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: (local.set $3 + ;; optimize3-NEXT: (i32.const 4) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: (i32.add + ;; optimize3-NEXT: (local.get $2) + ;; optimize3-NEXT: (local.get $3) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: (call $drop-import + ;; optimize3-NEXT: (block $__inlined_func$trivial-binary-instruction-large$2 (result i32) + ;; optimize3-NEXT: (local.set $4 + ;; optimize3-NEXT: (i32.const 5) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: (i32.add + ;; optimize3-NEXT: (local.get $4) + ;; optimize3-NEXT: (i32.const 2147483647) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: (call $drop-import + ;; optimize3-NEXT: (block $__inlined_func$trivial-binary-instruction-large$3 (result i32) + ;; optimize3-NEXT: (local.set $5 + ;; optimize3-NEXT: (i32.const 6) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: (i32.add + ;; optimize3-NEXT: (local.get $5) + ;; optimize3-NEXT: (i32.const 2147483647) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: (call $drop-import + ;; optimize3-NEXT: (block $__inlined_func$non-trivial-binary-instruction$4 (result i32) + ;; optimize3-NEXT: (local.set $6 + ;; optimize3-NEXT: (i32.const 7) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: (local.set $7 + ;; optimize3-NEXT: (i32.const 8) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: (i32.add + ;; optimize3-NEXT: (local.get $7) + ;; optimize3-NEXT: (local.get $6) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: (call $drop-import + ;; optimize3-NEXT: (block $__inlined_func$non-trivial-binary-instruction$5 (result i32) + ;; optimize3-NEXT: (local.set $8 + ;; optimize3-NEXT: (i32.const 9) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: (local.set $9 + ;; optimize3-NEXT: (i32.const 10) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: (i32.add + ;; optimize3-NEXT: (local.get $9) + ;; optimize3-NEXT: (local.get $8) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: (call $drop-import + ;; optimize3-NEXT: (block $__inlined_func$trivial-unary-instruction$6 (result i32) + ;; optimize3-NEXT: (local.set $10 + ;; optimize3-NEXT: (i32.const 11) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: (i32.eqz + ;; optimize3-NEXT: (local.get $10) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: (call $drop-import + ;; optimize3-NEXT: (block $__inlined_func$trivial-unary-instruction$7 (result i32) + ;; optimize3-NEXT: (local.set $11 + ;; optimize3-NEXT: (i32.const 12) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: (i32.eqz + ;; optimize3-NEXT: (local.get $11) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: ) + ;; optimize3-NEXT: ) + (func $main (type $3) + (call $drop-import + (call $trivial-binary-instruction-small + (i32.const 1) + (i32.const 2) + ) + ) + (call $drop-import + (call $trivial-binary-instruction-small + (i32.const 3) + (i32.const 4) + ) + ) + (call $drop-import + (call $trivial-binary-instruction-large + (i32.const 5) + ) + ) + (call $drop-import + (call $trivial-binary-instruction-large + (i32.const 6) + ) + ) + (call $drop-import + (call $non-trivial-binary-instruction + (i32.const 7) + (i32.const 8) + ) + ) + (call $drop-import + (call $non-trivial-binary-instruction + (i32.const 9) + (i32.const 10) + ) + ) + (call $drop-import + (call $trivial-unary-instruction + (i32.const 11) + ) + ) + (call $drop-import + (call $trivial-unary-instruction + (i32.const 12) + ) + ) + ) +)