Skip to content

Commit 973b95e

Browse files
adibiagiotstellar
authored andcommitted
[MCA][LSUnit] Correctly update the internal group flags on store barrier execution. Fixes PR48024.
This is likely to be a regressigion introduced by my last refactoring of the LSUnit (commit 5578ec3). Before this patch, the "CurrentStoreBarrierGroupID" index was not correctly reset on store barrier executions. This was leading to unexpected crashes like the one reported as PR48024. (cherry picked from commit 0e20666)
1 parent 06f479c commit 973b95e

File tree

3 files changed

+206
-0
lines changed

3 files changed

+206
-0
lines changed

llvm/lib/MCA/HardwareUnits/LSUnit.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,8 @@ void LSUnit::onInstructionExecuted(const InstRef &IR) {
243243
CurrentStoreGroupID = 0;
244244
if (GroupID == CurrentLoadBarrierGroupID)
245245
CurrentLoadBarrierGroupID = 0;
246+
if (GroupID == CurrentStoreBarrierGroupID)
247+
CurrentStoreBarrierGroupID = 0;
246248
}
247249
}
248250

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2+
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
3+
4+
# Code snippet taken from PR48024.
5+
6+
stmxcsr -4(%rsp)
7+
movl $-24577, %eax # imm = 0x9FFF
8+
andl -4(%rsp), %eax
9+
movl %eax, -8(%rsp)
10+
ldmxcsr -8(%rsp)
11+
retq
12+
13+
# CHECK: Iterations: 100
14+
# CHECK-NEXT: Instructions: 600
15+
# CHECK-NEXT: Total Cycles: 704
16+
# CHECK-NEXT: Total uOps: 600
17+
18+
# CHECK: Dispatch Width: 2
19+
# CHECK-NEXT: uOps Per Cycle: 0.85
20+
# CHECK-NEXT: IPC: 0.85
21+
# CHECK-NEXT: Block RThroughput: 3.0
22+
23+
# CHECK: Instruction Info:
24+
# CHECK-NEXT: [1]: #uOps
25+
# CHECK-NEXT: [2]: Latency
26+
# CHECK-NEXT: [3]: RThroughput
27+
# CHECK-NEXT: [4]: MayLoad
28+
# CHECK-NEXT: [5]: MayStore
29+
# CHECK-NEXT: [6]: HasSideEffects (U)
30+
31+
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
32+
# CHECK-NEXT: 1 1 1.00 * U stmxcsr -4(%rsp)
33+
# CHECK-NEXT: 1 1 0.50 movl $-24577, %eax
34+
# CHECK-NEXT: 1 4 1.00 * andl -4(%rsp), %eax
35+
# CHECK-NEXT: 1 1 1.00 * movl %eax, -8(%rsp)
36+
# CHECK-NEXT: 1 3 1.00 * U ldmxcsr -8(%rsp)
37+
# CHECK-NEXT: 1 4 1.00 U retq
38+
39+
# CHECK: Resources:
40+
# CHECK-NEXT: [0] - JALU0
41+
# CHECK-NEXT: [1] - JALU1
42+
# CHECK-NEXT: [2] - JDiv
43+
# CHECK-NEXT: [3] - JFPA
44+
# CHECK-NEXT: [4] - JFPM
45+
# CHECK-NEXT: [5] - JFPU0
46+
# CHECK-NEXT: [6] - JFPU1
47+
# CHECK-NEXT: [7] - JLAGU
48+
# CHECK-NEXT: [8] - JMul
49+
# CHECK-NEXT: [9] - JSAGU
50+
# CHECK-NEXT: [10] - JSTC
51+
# CHECK-NEXT: [11] - JVALU0
52+
# CHECK-NEXT: [12] - JVALU1
53+
# CHECK-NEXT: [13] - JVIMUL
54+
55+
# CHECK: Resource pressure per iteration:
56+
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
57+
# CHECK-NEXT: 1.50 1.50 - - - - - 3.00 - 2.00 - - - -
58+
59+
# CHECK: Resource pressure by instruction:
60+
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
61+
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - stmxcsr -4(%rsp)
62+
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movl $-24577, %eax
63+
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - andl -4(%rsp), %eax
64+
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - movl %eax, -8(%rsp)
65+
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - ldmxcsr -8(%rsp)
66+
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - retq
67+
68+
# CHECK: Timeline view:
69+
# CHECK-NEXT: 0123456789
70+
# CHECK-NEXT: Index 0123456789 01234
71+
72+
# CHECK: [0,0] DeER . . . . . stmxcsr -4(%rsp)
73+
# CHECK-NEXT: [0,1] DeER . . . . . movl $-24577, %eax
74+
# CHECK-NEXT: [0,2] .DeeeeER . . . . andl -4(%rsp), %eax
75+
# CHECK-NEXT: [0,3] .D====eER . . . . movl %eax, -8(%rsp)
76+
# CHECK-NEXT: [0,4] . D===eeeER . . . ldmxcsr -8(%rsp)
77+
# CHECK-NEXT: [0,5] . DeeeeE--R . . . retq
78+
# CHECK-NEXT: [1,0] . D===eE--R . . . stmxcsr -4(%rsp)
79+
# CHECK-NEXT: [1,1] . DeE-----R . . . movl $-24577, %eax
80+
# CHECK-NEXT: [1,2] . D====eeeeER. . . andl -4(%rsp), %eax
81+
# CHECK-NEXT: [1,3] . D========eER . . movl %eax, -8(%rsp)
82+
# CHECK-NEXT: [1,4] . D=======eeeER . . ldmxcsr -8(%rsp)
83+
# CHECK-NEXT: [1,5] . D=eeeeE-----R . . retq
84+
# CHECK-NEXT: [2,0] . .D=======eE--R . . stmxcsr -4(%rsp)
85+
# CHECK-NEXT: [2,1] . .DeE---------R . . movl $-24577, %eax
86+
# CHECK-NEXT: [2,2] . . D========eeeeER . andl -4(%rsp), %eax
87+
# CHECK-NEXT: [2,3] . . D============eER . movl %eax, -8(%rsp)
88+
# CHECK-NEXT: [2,4] . . D===========eeeER ldmxcsr -8(%rsp)
89+
# CHECK-NEXT: [2,5] . . D=eeeeE---------R retq
90+
91+
# CHECK: Average Wait times (based on the timeline view):
92+
# CHECK-NEXT: [0]: Executions
93+
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
94+
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
95+
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
96+
97+
# CHECK: [0] [1] [2] [3]
98+
# CHECK-NEXT: 0. 3 4.3 1.0 1.3 stmxcsr -4(%rsp)
99+
# CHECK-NEXT: 1. 3 1.0 1.0 4.7 movl $-24577, %eax
100+
# CHECK-NEXT: 2. 3 5.0 0.3 0.0 andl -4(%rsp), %eax
101+
# CHECK-NEXT: 3. 3 9.0 0.0 0.0 movl %eax, -8(%rsp)
102+
# CHECK-NEXT: 4. 3 8.0 0.0 0.0 ldmxcsr -8(%rsp)
103+
# CHECK-NEXT: 5. 3 1.7 1.7 5.3 retq
104+
# CHECK-NEXT: 3 4.8 0.7 1.9 <total>
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2+
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -timeline -timeline-max-iterations=3 < %s | FileCheck %s
3+
4+
# Code snippet taken from PR48024.
5+
6+
stmxcsr -4(%rsp)
7+
movl $-24577, %eax # imm = 0x9FFF
8+
andl -4(%rsp), %eax
9+
movl %eax, -8(%rsp)
10+
ldmxcsr -8(%rsp)
11+
retq
12+
13+
# CHECK: Iterations: 100
14+
# CHECK-NEXT: Instructions: 600
15+
# CHECK-NEXT: Total Cycles: 1304
16+
# CHECK-NEXT: Total uOps: 1300
17+
18+
# CHECK: Dispatch Width: 4
19+
# CHECK-NEXT: uOps Per Cycle: 1.00
20+
# CHECK-NEXT: IPC: 0.46
21+
# CHECK-NEXT: Block RThroughput: 3.3
22+
23+
# CHECK: Instruction Info:
24+
# CHECK-NEXT: [1]: #uOps
25+
# CHECK-NEXT: [2]: Latency
26+
# CHECK-NEXT: [3]: RThroughput
27+
# CHECK-NEXT: [4]: MayLoad
28+
# CHECK-NEXT: [5]: MayStore
29+
# CHECK-NEXT: [6]: HasSideEffects (U)
30+
31+
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
32+
# CHECK-NEXT: 3 2 1.00 * U stmxcsr -4(%rsp)
33+
# CHECK-NEXT: 1 1 0.25 movl $-24577, %eax
34+
# CHECK-NEXT: 2 6 0.50 * andl -4(%rsp), %eax
35+
# CHECK-NEXT: 1 1 1.00 * movl %eax, -8(%rsp)
36+
# CHECK-NEXT: 3 7 1.00 * U ldmxcsr -8(%rsp)
37+
# CHECK-NEXT: 3 7 1.00 U retq
38+
39+
# CHECK: Resources:
40+
# CHECK-NEXT: [0] - HWDivider
41+
# CHECK-NEXT: [1] - HWFPDivider
42+
# CHECK-NEXT: [2] - HWPort0
43+
# CHECK-NEXT: [3] - HWPort1
44+
# CHECK-NEXT: [4] - HWPort2
45+
# CHECK-NEXT: [5] - HWPort3
46+
# CHECK-NEXT: [6] - HWPort4
47+
# CHECK-NEXT: [7] - HWPort5
48+
# CHECK-NEXT: [8] - HWPort6
49+
# CHECK-NEXT: [9] - HWPort7
50+
51+
# CHECK: Resource pressure per iteration:
52+
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
53+
# CHECK-NEXT: - - 1.75 1.74 1.67 1.68 2.00 1.75 1.76 1.65
54+
55+
# CHECK: Resource pressure by instruction:
56+
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
57+
# CHECK-NEXT: - - - - 0.30 - 1.00 1.00 - 0.70 stmxcsr -4(%rsp)
58+
# CHECK-NEXT: - - 0.08 0.67 - - - 0.04 0.21 - movl $-24577, %eax
59+
# CHECK-NEXT: - - 0.42 0.37 0.35 0.65 - 0.01 0.20 - andl -4(%rsp), %eax
60+
# CHECK-NEXT: - - - - 0.05 - 1.00 - - 0.95 movl %eax, -8(%rsp)
61+
# CHECK-NEXT: - - 1.00 0.23 0.34 0.66 - 0.42 0.35 - ldmxcsr -8(%rsp)
62+
# CHECK-NEXT: - - 0.25 0.47 0.63 0.37 - 0.28 1.00 - retq
63+
64+
# CHECK: Timeline view:
65+
# CHECK-NEXT: 0123456789 0123456789
66+
# CHECK-NEXT: Index 0123456789 0123456789 012
67+
68+
# CHECK: [0,0] DeeER. . . . . . . . . stmxcsr -4(%rsp)
69+
# CHECK-NEXT: [0,1] DeE-R. . . . . . . . . movl $-24577, %eax
70+
# CHECK-NEXT: [0,2] .DeeeeeeER. . . . . . . . andl -4(%rsp), %eax
71+
# CHECK-NEXT: [0,3] .D======eER . . . . . . . movl %eax, -8(%rsp)
72+
# CHECK-NEXT: [0,4] . D=====eeeeeeeER . . . . . . ldmxcsr -8(%rsp)
73+
# CHECK-NEXT: [0,5] . DeeeeeeeE----R . . . . . . retq
74+
# CHECK-NEXT: [1,0] . D====eeE----R . . . . . . stmxcsr -4(%rsp)
75+
# CHECK-NEXT: [1,1] . DeE---------R . . . . . . movl $-24577, %eax
76+
# CHECK-NEXT: [1,2] . D=========eeeeeeER . . . . . andl -4(%rsp), %eax
77+
# CHECK-NEXT: [1,3] . D===============eER . . . . . movl %eax, -8(%rsp)
78+
# CHECK-NEXT: [1,4] . .D==============eeeeeeeER. . . . ldmxcsr -8(%rsp)
79+
# CHECK-NEXT: [1,5] . . DeeeeeeeE-------------R. . . . retq
80+
# CHECK-NEXT: [2,0] . . D=============eeE----R. . . . stmxcsr -4(%rsp)
81+
# CHECK-NEXT: [2,1] . . DeE------------------R. . . . movl $-24577, %eax
82+
# CHECK-NEXT: [2,2] . . D==================eeeeeeER . . andl -4(%rsp), %eax
83+
# CHECK-NEXT: [2,3] . . D========================eER . . movl %eax, -8(%rsp)
84+
# CHECK-NEXT: [2,4] . . D=======================eeeeeeeER ldmxcsr -8(%rsp)
85+
# CHECK-NEXT: [2,5] . . .DeeeeeeeE----------------------R retq
86+
87+
# CHECK: Average Wait times (based on the timeline view):
88+
# CHECK-NEXT: [0]: Executions
89+
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
90+
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
91+
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
92+
93+
# CHECK: [0] [1] [2] [3]
94+
# CHECK-NEXT: 0. 3 6.7 1.0 2.7 stmxcsr -4(%rsp)
95+
# CHECK-NEXT: 1. 3 1.0 1.0 9.3 movl $-24577, %eax
96+
# CHECK-NEXT: 2. 3 10.0 0.3 0.0 andl -4(%rsp), %eax
97+
# CHECK-NEXT: 3. 3 16.0 0.0 0.0 movl %eax, -8(%rsp)
98+
# CHECK-NEXT: 4. 3 15.0 0.0 0.0 ldmxcsr -8(%rsp)
99+
# CHECK-NEXT: 5. 3 1.0 1.0 13.0 retq
100+
# CHECK-NEXT: 3 8.3 0.6 4.2 <total>

0 commit comments

Comments
 (0)