Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pulley: Add some disas tests of suboptimal codegen patterns #10101

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions tests/disas/pulley/coremark-1.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
;;! target = "pulley64"
;;! test = "compile"

;; Small test of a loop extracted from "coremark-minimal.wasm" here:
;; https://github.com/wasmi-labs/wasmi-benchmarks/blob/d045a88246d3ac9b0b80b188feda54b89ca126b5/benches/res/wasm/coremark-minimal.wasm
;;
;; This doesn't reproduce the exact regalloc decisions but does currently show
;; something suboptimal for Pulley which is at the end of the loop it's
;; currently:
;;
;; * `br_if_not32` to exit the loop
;; * `xmov` to move some registers in place
;; * `jump` to resume the loop
;;
;; Ideally to minimize Pulley opcodes this would skip the `xmov` and `jump`
;; with different register allocation and the back-edge would be a single
;; conditional branch.

(module
(memory 10)
(func (param $p1 i32) (param $p2 i32) (param $cnt i32)
(param $stride i32)
(result i32)
(local $accum i32)
loop
local.get $accum

local.get $p1
i32.load16_u
local.get $p2
i32.load16_u
i32.mul
local.tee $accum
i32.const 2
i32.shr_u
i32.const 15
i32.and
local.get $accum
i32.const 5
i32.shr_u
i32.const 127
i32.and
i32.mul
i32.add
local.set $accum

local.get $p2
i32.const 2
i32.add
local.set $p2

local.get $p1
local.get $stride
i32.add
local.set $p1

local.get $cnt
i32.const -1
i32.add
local.tee $cnt

br_if 0
end

call $other

(local.get $accum)
)

(func $other)
)
;; wasm[0]::function[0]:
;; push_frame_save 16, x16
;; xzero x6
;; xload64le_offset8 x11, x0, 80
;; xload64le_offset8 x12, x0, 88
;; xbc32_bound_trap x2, x12, 2
;; xload16le_u32_g32 x13, x11, x2, 0
;; xbc32_bound_trap x3, x12, 2
;; xload16le_u32_g32 x14, x11, x3, 0
;; xsub32_u8 x4, x4, 1
;; xmul32 x14, x13, x14
;; xshr32_u_u6 x15, x14, 2
;; xband32_s8 x15, x15, 15
;; xshr32_u_u6 x14, x14, 5
;; xband32_s8 x14, x14, 127
;; xmadd32 x6, x15, x14, x6
;; xmov x16, x6
;; xadd32 x2, x2, x5
;; xadd32_u8 x3, x3, 2
;; br_if_not32 x4, 0xe // target = 0x53
;; 4b: xmov x6, x16
;; jump -0x43 // target = 0xb
;; 53: call2 x0, x0, 0x10 // target = 0x63
;; xmov x0, x16
;; pop_frame_restore 16, x16
;; ret
;;
;; wasm[0]::function[1]::other:
;; push_frame
;; pop_frame
;; ret
71 changes: 71 additions & 0 deletions tests/disas/pulley/fib.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
;;! target = "pulley64"
;;! test = "compile"

;; Test of a recursive fibonacci routine and its codegen
;;
;; FIXME(#9942) this test currently has an extraneous `xmov` after the second
;; call instruction.

(module
(func $fib (export "fib") (param $n i32) (result i32)
(if (result i32)
(i32.eq
(i32.const 0)
(local.get $n)
)
(then
(i32.const 1)
)
(else
(if (result i32)
(i32.eq
(i32.const 1)
(local.get $n)
)
(then
(i32.const 1)
)
(else
(i32.add
;; fib(n - 1)
(call $fib
(i32.add
(local.get $n)
(i32.const -1)
)
)
;; fib(n - 2)
(call $fib
(i32.add
(local.get $n)
(i32.const -2)
)
)
)
)
)
)
)
)
)
;; wasm[0]::function[0]::fib:
;; push_frame_save 32, x17, x24, x29
;; br_if_xeq32_i8 x2, 0, 0x47 // target = 0x4c
;; br_if_xeq32_i8 x2, 1, 0x39 // target = 0x45
;; 13: xsub32_u8 x14, x2, 1
;; xmov x24, x0
;; xmov x29, x2
;; call3 x24, x24, x14, -0x1d // target = 0x0
;; xmov x17, x0
;; xmov x2, x29
;; xmov x0, x24
;; xsub32_u8 x14, x2, 2
;; call3 x0, x0, x14, -0x32 // target = 0x0
;; xmov x5, x17
;; xadd32 x0, x5, x0
;; jump 0xe // target = 0x4e
;; 45: xone x0
;; jump 0x7 // target = 0x4e
;; 4c: xone x0
;; pop_frame_restore 32, x17, x24, x29
;; ret
Loading