|
172 | 172 | // Lower bounded shifts first. No need to check shift value.
|
173 | 173 | (Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SLD x y)
|
174 | 174 | (Lsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SLW x y)
|
175 |
| -(Lsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SLW x y) |
176 |
| -(Lsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SLW x y) |
| 175 | +(Lsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SLD x y) |
| 176 | +(Lsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SLD x y) |
177 | 177 | (Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRD x y)
|
178 | 178 | (Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRW x y)
|
179 |
| -(Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRW (MOVHZreg x) y) |
180 |
| -(Rsh8Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRW (MOVBZreg x) y) |
| 179 | +(Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRD (MOVHZreg x) y) |
| 180 | +(Rsh8Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRD (MOVBZreg x) y) |
181 | 181 | (Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAD x y)
|
182 | 182 | (Rsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAW x y)
|
183 |
| -(Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAW (MOVHreg x) y) |
184 |
| -(Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAW (MOVBreg x) y) |
| 183 | +(Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAD (MOVHreg x) y) |
| 184 | +(Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAD (MOVBreg x) y) |
| 185 | + |
| 186 | +// Unbounded shifts. Go shifts saturate to 0 or -1 when shifting beyond the number of |
| 187 | +// bits in a type, PPC64 shifts do not (see the ISA for details). |
| 188 | +// |
| 189 | +// Note, y is always non-negative. |
| 190 | +// |
| 191 | +// Note, ISELZ is intentionally not used in lower. Where possible, ISEL is converted to ISELZ in late lower |
| 192 | +// after all the ISEL folding rules have been exercised. |
| 193 | + |
| 194 | +((Rsh64U|Lsh64)x64 <t> x y) => (ISEL [0] (S(R|L)D <t> x y) (MOVDconst [0]) (CMPUconst y [64])) |
| 195 | +((Rsh64U|Lsh64)x32 <t> x y) => (ISEL [0] (S(R|L)D <t> x y) (MOVDconst [0]) (CMPWUconst y [64])) |
| 196 | +((Rsh64U|Lsh64)x16 <t> x y) => (ISEL [2] (S(R|L)D <t> x y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFC0] y))) |
| 197 | +((Rsh64U|Lsh64)x8 <t> x y) => (ISEL [2] (S(R|L)D <t> x y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0x00C0] y))) |
| 198 | +(Rsh64x(64|32) <t> x y) => (ISEL [0] (SRAD <t> x y) (SRADconst <t> x [63]) (CMP(U|WU)const y [64])) |
| 199 | +(Rsh64x16 <t> x y) => (ISEL [2] (SRAD <t> x y) (SRADconst <t> x [63]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFC0] y))) |
| 200 | +(Rsh64x8 <t> x y) => (ISEL [2] (SRAD <t> x y) (SRADconst <t> x [63]) (Select1 <types.TypeFlags> (ANDCCconst [0x00C0] y))) |
| 201 | + |
| 202 | +((Rsh32U|Lsh32)x64 <t> x y) => (ISEL [0] (S(R|L)W <t> x y) (MOVDconst [0]) (CMPUconst y [32])) |
| 203 | +((Rsh32U|Lsh32)x32 <t> x y) => (ISEL [0] (S(R|L)W <t> x y) (MOVDconst [0]) (CMPWUconst y [32])) |
| 204 | +((Rsh32U|Lsh32)x16 <t> x y) => (ISEL [2] (S(R|L)W <t> x y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFE0] y))) |
| 205 | +((Rsh32U|Lsh32)x8 <t> x y) => (ISEL [2] (S(R|L)W <t> x y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0x00E0] y))) |
| 206 | +(Rsh32x(64|32) <t> x y) => (ISEL [0] (SRAW <t> x y) (SRAWconst <t> x [31]) (CMP(U|WU)const y [32])) |
| 207 | +(Rsh32x16 <t> x y) => (ISEL [2] (SRAW <t> x y) (SRAWconst <t> x [31]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFE0] y))) |
| 208 | +(Rsh32x8 <t> x y) => (ISEL [2] (SRAW <t> x y) (SRAWconst <t> x [31]) (Select1 <types.TypeFlags> (ANDCCconst [0x00E0] y))) |
| 209 | + |
| 210 | +((Rsh16U|Lsh16)x64 <t> x y) => (ISEL [0] (S(R|L)D <t> (MOVHZreg x) y) (MOVDconst [0]) (CMPUconst y [16])) |
| 211 | +((Rsh16U|Lsh16)x32 <t> x y) => (ISEL [0] (S(R|L)D <t> (MOVHZreg x) y) (MOVDconst [0]) (CMPWUconst y [16])) |
| 212 | +((Rsh16U|Lsh16)x16 <t> x y) => (ISEL [2] (S(R|L)D <t> (MOVHZreg x) y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFF0] y))) |
| 213 | +((Rsh16U|Lsh16)x8 <t> x y) => (ISEL [2] (S(R|L)D <t> (MOVHZreg x) y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0x00F0] y))) |
| 214 | +(Rsh16x(64|32) <t> x y) => (ISEL [0] (SRAD <t> (MOVHreg x) y) (SRADconst <t> (MOVHreg x) [15]) (CMP(U|WU)const y [16])) |
| 215 | +(Rsh16x16 <t> x y) => (ISEL [2] (SRAD <t> (MOVHreg x) y) (SRADconst <t> (MOVHreg x) [15]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFF0] y))) |
| 216 | +(Rsh16x8 <t> x y) => (ISEL [2] (SRAD <t> (MOVHreg x) y) (SRADconst <t> (MOVHreg x) [15]) (Select1 <types.TypeFlags> (ANDCCconst [0x00F0] y))) |
| 217 | + |
| 218 | +((Rsh8U|Lsh8)x64 <t> x y) => (ISEL [0] (S(R|L)D <t> (MOVBZreg x) y) (MOVDconst [0]) (CMPUconst y [8])) |
| 219 | +((Rsh8U|Lsh8)x32 <t> x y) => (ISEL [0] (S(R|L)D <t> (MOVBZreg x) y) (MOVDconst [0]) (CMPWUconst y [8])) |
| 220 | +((Rsh8U|Lsh8)x16 <t> x y) => (ISEL [2] (S(R|L)D <t> (MOVBZreg x) y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFF8] y))) |
| 221 | +((Rsh8U|Lsh8)x8 <t> x y) => (ISEL [2] (S(R|L)D <t> (MOVBZreg x) y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0x00F8] y))) |
| 222 | +(Rsh8x(64|32) <t> x y) => (ISEL [0] (SRAD <t> (MOVBreg x) y) (SRADconst <t> (MOVBreg x) [7]) (CMP(U|WU)const y [8])) |
| 223 | +(Rsh8x16 <t> x y) => (ISEL [2] (SRAD <t> (MOVBreg x) y) (SRADconst <t> (MOVBreg x) [7]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFF8] y))) |
| 224 | +(Rsh8x8 <t> x y) => (ISEL [2] (SRAD <t> (MOVBreg x) y) (SRADconst <t> (MOVBreg x) [7]) (Select1 <types.TypeFlags> (ANDCCconst [0x00F8] y))) |
| 225 | + |
| 226 | +// Catch bounded shifts in situations like foo<<uint(shift&63) which might not be caught by the prove pass. |
| 227 | +(CMP(U|WU)const [d] (Select0 (ANDCCconst z [c]))) && uint64(d) > uint64(c) => (FlagLT) |
185 | 228 |
|
186 |
| -// non-constant rotates |
187 |
| -// If shift > 64 then use -1 as shift count to shift all bits. |
188 |
| -((Lsh64|Rsh64|Rsh64U)x64 x y) => (S(L|RA|R)D x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64])))) |
189 |
| -((Rsh32|Rsh32U|Lsh32)x64 x y) => (S(RA|R|L)W x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32])))) |
190 |
| - |
191 |
| -(Rsh(16|16U)x64 x y) => (SR(AW|W) ((Sign|Zero)Ext16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [16])))) |
192 |
| -(Lsh16x64 x y) => (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [16])))) |
193 |
| - |
194 |
| -(Rsh(8|8U)x64 x y) => (SR(AW|W) ((Sign|Zero)Ext8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [8])))) |
195 |
| -(Lsh8x64 x y) => (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [8])))) |
196 |
| - |
197 |
| -((Rsh64|Rsh64U|Lsh64)x32 x y) => (S(RA|R|L)D x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64])))) |
198 |
| -((Rsh32|Rsh32U|Lsh32)x32 x y) => (S(RA|R|L)W x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32])))) |
199 |
| - |
200 |
| -(Rsh(16|16U)x32 x y) => (SR(AW|W) ((Sign|Zero)Ext16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [16])))) |
201 |
| -(Lsh16x32 x y) => (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [16])))) |
202 |
| - |
203 |
| -(Rsh(8|8U)x32 x y) => (SR(AW|W) ((Sign|Zero)Ext8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [8])))) |
204 |
| -(Lsh8x32 x y) => (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [8])))) |
205 |
| - |
206 |
| -((Rsh64|Rsh64U|Lsh64)x16 x y) => (S(RA|R|L)D x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [64])))) |
207 |
| - |
208 |
| -((Rsh32|Rsh32U|Lsh32)x16 x y) => (S(RA|R|L)W x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [32])))) |
209 |
| - |
210 |
| -(Rsh(16|16U)x16 x y) => (S(RA|R)W ((Sign|Zero)Ext16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [16])))) |
211 |
| -(Lsh16x16 x y) => (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [16])))) |
212 |
| - |
213 |
| -(Rsh(8|8U)x16 x y) => (SR(AW|W) ((Sign|Zero)Ext8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [8])))) |
214 |
| -(Lsh8x16 x y) => (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [8])))) |
215 |
| - |
216 |
| - |
217 |
| -((Rsh64|Rsh64U|Lsh64)x8 x y) => (S(RA|R|L)D x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [64])))) |
218 |
| - |
219 |
| -((Rsh32|Rsh32U|Lsh32)x8 x y) => (S(RA|R|L)W x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [32])))) |
220 |
| - |
221 |
| -(Rsh(16|16U)x8 x y) => (S(RA|R)W ((Sign|Zero)Ext16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [16])))) |
222 |
| -(Lsh16x8 x y) => (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [16])))) |
223 |
| - |
224 |
| -(Rsh(8|8U)x8 x y) => (S(RA|R)W ((Sign|Zero)Ext8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [8])))) |
225 |
| -(Lsh8x8 x y) => (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [8])))) |
226 |
| - |
227 |
| -// Cleaning up shift ops |
228 |
| -(ISEL [0] (Select0 (ANDCCconst [d] y)) (MOVDconst [-1]) (CMPU (Select0 (ANDCCconst [d] y)) (MOVDconst [c]))) && c >= d => (Select0 (ANDCCconst [d] y)) |
229 |
| -(ISEL [0] (Select0 (ANDCCconst [d] y)) (MOVDconst [-1]) (CMPUconst [c] (Select0 (ANDCCconst [d] y)))) && c >= d => (Select0 (ANDCCconst [d] y)) |
230 | 229 | (ORN x (MOVDconst [-1])) => x
|
231 | 230 |
|
232 | 231 | (S(RAD|RD|LD) x (MOVDconst [c])) => (S(RAD|RD|LD)const [c&63 | (c>>6&1*63)] x)
|
|
563 | 562 | (OR x (MOVDconst [c])) && isU32Bit(c) => (ORconst [c] x)
|
564 | 563 |
|
565 | 564 | // Simplify consts
|
566 |
| -(Select0 (ANDCCconst [c] (Select0 (ANDCCconst [d] x)))) => (Select0 (ANDCCconst [c&d] x)) |
| 565 | +(ANDCCconst [c] (Select0 (ANDCCconst [d] x))) => (ANDCCconst [c&d] x) |
567 | 566 | (ORconst [c] (ORconst [d] x)) => (ORconst [c|d] x)
|
568 | 567 | (XORconst [c] (XORconst [d] x)) => (XORconst [c^d] x)
|
569 | 568 | (Select0 (ANDCCconst [-1] x)) => x
|
570 | 569 | (Select0 (ANDCCconst [0] _)) => (MOVDconst [0])
|
| 570 | +(Select1 (ANDCCconst [0] _)) => (FlagEQ) |
571 | 571 | (XORconst [0] x) => x
|
572 | 572 | (ORconst [-1] _) => (MOVDconst [-1])
|
573 | 573 | (ORconst [0] x) => x
|
|
0 commit comments