Skip to content

Commit fc95de3

Browse files
committed
[RISCV] Require alignment when forming gather with larger element type
This fixes a bug in my 928564c that didn't get noticed in review. I found it when looking at the strided load case (upcoming patch), and realized the previous commit was buggy too. p.s. Sorry for the slightly confusing test diff. I'd apparently used the wrong mask for the aligned positive test; it was actually unaligned. Didn't seem worthy of a separate precommit.
1 parent a9a1f84 commit fc95de3

File tree

2 files changed

+15
-16
lines changed

2 files changed

+15
-16
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+4-3
Original file line numberDiff line numberDiff line change
@@ -13623,10 +13623,11 @@ static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
1362313623
// TODO: This offset check is too strict if we support fully
1362413624
// misaligned memory operations.
1362513625
uint64_t C = Index->getConstantOperandVal(i);
13626-
if (C % ElementSize != 0)
13627-
return false;
13628-
if (i % 2 == 0)
13626+
if (i % 2 == 0) {
13627+
if (C % WiderElementSize != 0)
13628+
return false;
1362913629
continue;
13630+
}
1363013631
uint64_t Last = Index->getConstantOperandVal(i-1);
1363113632
if (C != Last + ElementSize)
1363213633
return false;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll

+11-13
Original file line numberDiff line numberDiff line change
@@ -14040,7 +14040,6 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
1404014040
; RV32-LABEL: mgather_gather_2xSEW:
1404114041
; RV32: # %bb.0:
1404214042
; RV32-NEXT: lui a1, 16513
14043-
; RV32-NEXT: addi a1, a1, 512
1404414043
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1404514044
; RV32-NEXT: vmv.s.x v9, a1
1404614045
; RV32-NEXT: vluxei8.v v8, (a0), v9
@@ -14049,7 +14048,6 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
1404914048
; RV64V-LABEL: mgather_gather_2xSEW:
1405014049
; RV64V: # %bb.0:
1405114050
; RV64V-NEXT: lui a1, 16513
14052-
; RV64V-NEXT: addiw a1, a1, 512
1405314051
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1405414052
; RV64V-NEXT: vmv.s.x v9, a1
1405514053
; RV64V-NEXT: vluxei8.v v8, (a0), v9
@@ -14099,15 +14097,15 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
1409914097
; RV64ZVE32F-NEXT: andi a2, a1, 4
1410014098
; RV64ZVE32F-NEXT: beqz a2, .LBB112_3
1410114099
; RV64ZVE32F-NEXT: .LBB112_11: # %cond.load4
14102-
; RV64ZVE32F-NEXT: addi a2, a0, 18
14100+
; RV64ZVE32F-NEXT: addi a2, a0, 16
1410314101
; RV64ZVE32F-NEXT: lh a2, 0(a2)
1410414102
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
1410514103
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1410614104
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
1410714105
; RV64ZVE32F-NEXT: andi a2, a1, 8
1410814106
; RV64ZVE32F-NEXT: beqz a2, .LBB112_4
1410914107
; RV64ZVE32F-NEXT: .LBB112_12: # %cond.load7
14110-
; RV64ZVE32F-NEXT: addi a2, a0, 20
14108+
; RV64ZVE32F-NEXT: addi a2, a0, 18
1411114109
; RV64ZVE32F-NEXT: lh a2, 0(a2)
1411214110
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
1411314111
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
@@ -14147,7 +14145,7 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
1414714145
; RV64ZVE32F-NEXT: ret
1414814146
%head = insertelement <8 x i1> poison, i1 true, i16 0
1414914147
%allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
14150-
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 9, i32 10, i32 4, i32 5, i32 2, i32 3>
14148+
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 2, i32 3>
1415114149
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison)
1415214150
ret <8 x i16> %v
1415314151
}
@@ -14274,19 +14272,19 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) {
1427414272
define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) {
1427514273
; RV32-LABEL: mgather_gather_2xSEW_unaligned2:
1427614274
; RV32: # %bb.0:
14277-
; RV32-NEXT: lui a1, 16513
14278-
; RV32-NEXT: addi a1, a1, 514
14279-
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14280-
; RV32-NEXT: vmv.s.x v9, a1
14275+
; RV32-NEXT: lui a1, %hi(.LCPI114_0)
14276+
; RV32-NEXT: addi a1, a1, %lo(.LCPI114_0)
14277+
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14278+
; RV32-NEXT: vle8.v v9, (a1)
1428114279
; RV32-NEXT: vluxei8.v v8, (a0), v9
1428214280
; RV32-NEXT: ret
1428314281
;
1428414282
; RV64V-LABEL: mgather_gather_2xSEW_unaligned2:
1428514283
; RV64V: # %bb.0:
14286-
; RV64V-NEXT: lui a1, 16513
14287-
; RV64V-NEXT: addiw a1, a1, 514
14288-
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14289-
; RV64V-NEXT: vmv.s.x v9, a1
14284+
; RV64V-NEXT: lui a1, %hi(.LCPI114_0)
14285+
; RV64V-NEXT: addi a1, a1, %lo(.LCPI114_0)
14286+
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14287+
; RV64V-NEXT: vle8.v v9, (a1)
1429014288
; RV64V-NEXT: vluxei8.v v8, (a0), v9
1429114289
; RV64V-NEXT: ret
1429214290
;

0 commit comments

Comments
 (0)