[RISCV] Require alignment when forming gather with larger element type

preames · preames · commit fc95de38d91b · 2023-09-19T11:00:42.000-07:00
This fixes a bug in my 928564c that didn't get noticed in review. I found it when looking at the strided load case (upcoming patch), and realized the previous commit was buggy too. p.s. Sorry for the slightly confusing test diff. I'd apparently used the wrong mask for the aligned positive test; it was actually unaligned. Didn't seem worthy of a separate precommit.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13623,10 +13623,11 @@ static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
     // TODO: This offset check is too strict if we support fully
     // misaligned memory operations.
     uint64_t C = Index->getConstantOperandVal(i);
-    if (C % ElementSize != 0)
-      return false;
-    if (i % 2 == 0)
+    if (i % 2 == 0) {
+      if (C % WiderElementSize != 0)
+        return false;
       continue;
+    }
     uint64_t Last = Index->getConstantOperandVal(i-1);
     if (C != Last + ElementSize)
       return false;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -14040,7 +14040,6 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
 ; RV32-LABEL: mgather_gather_2xSEW:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    lui a1, 16513
-; RV32-NEXT:    addi a1, a1, 512
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.s.x v9, a1
 ; RV32-NEXT:    vluxei8.v v8, (a0), v9
@@ -14049,7 +14048,6 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
 ; RV64V-LABEL: mgather_gather_2xSEW:
 ; RV64V:       # %bb.0:
 ; RV64V-NEXT:    lui a1, 16513
-; RV64V-NEXT:    addiw a1, a1, 512
 ; RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64V-NEXT:    vmv.s.x v9, a1
 ; RV64V-NEXT:    vluxei8.v v8, (a0), v9
@@ -14099,15 +14097,15 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
 ; RV64ZVE32F-NEXT:    andi a2, a1, 4
 ; RV64ZVE32F-NEXT:    beqz a2, .LBB112_3
 ; RV64ZVE32F-NEXT:  .LBB112_11: # %cond.load4
-; RV64ZVE32F-NEXT:    addi a2, a0, 18
+; RV64ZVE32F-NEXT:    addi a2, a0, 16
 ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
 ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
 ; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
 ; RV64ZVE32F-NEXT:    andi a2, a1, 8
 ; RV64ZVE32F-NEXT:    beqz a2, .LBB112_4
 ; RV64ZVE32F-NEXT:  .LBB112_12: # %cond.load7
-; RV64ZVE32F-NEXT:    addi a2, a0, 20
+; RV64ZVE32F-NEXT:    addi a2, a0, 18
 ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
 ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
@@ -14147,7 +14145,7 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
 ; RV64ZVE32F-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i16 0
   %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
-  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32>  <i32 0, i32 1, i32 9, i32 10, i32 4, i32 5, i32 2, i32 3>
+  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32>  <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 2, i32 3>
   %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison)
   ret <8 x i16> %v
 }
@@ -14274,19 +14272,19 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) {
 define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) {
 ; RV32-LABEL: mgather_gather_2xSEW_unaligned2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a1, 16513
-; RV32-NEXT:    addi a1, a1, 514
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.s.x v9, a1
+; RV32-NEXT:    lui a1, %hi(.LCPI114_0)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI114_0)
+; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT:    vle8.v v9, (a1)
 ; RV32-NEXT:    vluxei8.v v8, (a0), v9
 ; RV32-NEXT:    ret
 ;
 ; RV64V-LABEL: mgather_gather_2xSEW_unaligned2:
 ; RV64V:       # %bb.0:
-; RV64V-NEXT:    lui a1, 16513
-; RV64V-NEXT:    addiw a1, a1, 514
-; RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64V-NEXT:    vmv.s.x v9, a1
+; RV64V-NEXT:    lui a1, %hi(.LCPI114_0)
+; RV64V-NEXT:    addi a1, a1, %lo(.LCPI114_0)
+; RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT:    vle8.v v9, (a1)
 ; RV64V-NEXT:    vluxei8.v v8, (a0), v9
 ; RV64V-NEXT:    ret
 ;