Skip to content

Commit aab3fee

Browse files
committed
use vextrins instruction
1 parent 6664ef0 commit aab3fee

File tree

2 files changed

+53
-16
lines changed

2 files changed

+53
-16
lines changed

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1482,6 +1482,28 @@ multiclass VstelmPat<PatFrag StoreOp, ValueType vt, LAInst Inst,
14821482
(Inst vt:$vd, BaseAddr:$rj, ImmOpnd:$imm, IdxOpnd:$idx)>;
14831483
}
14841484

1485+
multiclass InsertExtractPatV4<ValueType vecty, ValueType elemty> {
1486+
foreach imm1 = 0...3 in {
1487+
foreach imm2 = 0...3 in {
1488+
defvar Imm = !or(!shl(imm2, 4), imm1);
1489+
def : Pat<(vector_insert vecty:$vd,
1490+
(elemty (vector_extract vecty:$vj, imm1)), imm2),
1491+
(VEXTRINS_W $vd, $vj, Imm)>;
1492+
}
1493+
}
1494+
}
1495+
1496+
multiclass InsertExtractPatV2<ValueType vecty, ValueType elemty> {
1497+
foreach imm1 = 0...1 in {
1498+
foreach imm2 = 0...1 in {
1499+
defvar Imm = !or(!shl(imm2, 4), imm1);
1500+
def : Pat<(vector_insert vecty:$vd,
1501+
(elemty (vector_extract vecty:$vj, imm1)), imm2),
1502+
(VEXTRINS_D $vd, $vj, Imm)>;
1503+
}
1504+
}
1505+
}
1506+
14851507
let Predicates = [HasExtLSX] in {
14861508

14871509
// VADD_{B/H/W/D}
@@ -1782,6 +1804,31 @@ defm : PatCCVrVrF<SETUNE, "VFCMP_CUNE">;
17821804
defm : PatCCVrVrF<SETO, "VFCMP_COR">;
17831805
defm : PatCCVrVrF<SETUO, "VFCMP_CUN">;
17841806

1807+
// Insert element extracted from vector into vector.
1808+
// VPICKVE2GR_{B/H/W/D} + VINSGR2VR_{B/H/W/D} -> VEXTRINS_{B/H/W/D}
1809+
foreach imm1 = 0...15 in {
1810+
foreach imm2 = 0...15 in {
1811+
defvar Imm = !or(!shl(imm2, 4), imm1);
1812+
def : Pat<(vector_insert v16i8:$vd,
1813+
(GRLenVT (vector_extract v16i8:$vj, imm1)), imm2),
1814+
(VEXTRINS_B $vd, $vj, Imm)>;
1815+
}
1816+
}
1817+
1818+
foreach imm1 = 0...7 in {
1819+
foreach imm2 = 0...7 in {
1820+
defvar Imm = !or(!shl(imm2, 4), imm1);
1821+
def : Pat<(vector_insert v8i16:$vd,
1822+
(GRLenVT (vector_extract v8i16:$vj, imm1)), imm2),
1823+
(VEXTRINS_H $vd, $vj, Imm)>;
1824+
}
1825+
}
1826+
1827+
defm : InsertExtractPatV4<v4i32, GRLenVT>;
1828+
defm : InsertExtractPatV4<v4f32, f32>;
1829+
defm : InsertExtractPatV2<v2i64, GRLenVT>;
1830+
defm : InsertExtractPatV2<v2f64, f64>;
1831+
17851832
// VINSGR2VR_{B/H/W/D}
17861833
def : Pat<(vector_insert v16i8:$vd, GRLenVT:$rj, uimm4:$imm),
17871834
(VINSGR2VR_B v16i8:$vd, GRLenVT:$rj, uimm4:$imm)>;
@@ -1791,10 +1838,6 @@ def : Pat<(vector_insert v4i32:$vd, GRLenVT:$rj, uimm2:$imm),
17911838
(VINSGR2VR_W v4i32:$vd, GRLenVT:$rj, uimm2:$imm)>;
17921839
def : Pat<(vector_insert v2i64:$vd, GRLenVT:$rj, uimm1:$imm),
17931840
(VINSGR2VR_D v2i64:$vd, GRLenVT:$rj, uimm1:$imm)>;
1794-
def : Pat<(vector_insert v4f32:$vd, (f32 (vector_extract v4f32:$vj, uimm2:$imm1)), uimm2:$imm2),
1795-
(VINSGR2VR_W $vd, (VPICKVE2GR_W v4f32:$vj, uimm2:$imm1), uimm2:$imm2)>;
1796-
def : Pat<(vector_insert v2f64:$vd, (f64 (vector_extract v2f64:$vj, uimm1:$imm1)), uimm1:$imm2),
1797-
(VINSGR2VR_D $vd, (VPICKVE2GR_D v2f64:$vj, uimm1:$imm1), uimm1:$imm2)>;
17981841
def : Pat<(vector_insert v4f32:$vd, FPR32:$fj, uimm2:$imm),
17991842
(VINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm2:$imm)>;
18001843
def : Pat<(vector_insert v2f64:$vd, FPR64:$fj, uimm1:$imm),

llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-extract-element.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
define <16 x i8> @insert_extract_v16i8(<16 x i8> %a) nounwind {
55
; CHECK-LABEL: insert_extract_v16i8:
66
; CHECK: # %bb.0: # %entry
7-
; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
8-
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 1
7+
; CHECK-NEXT: vextrins.b $vr0, $vr0, 31
98
; CHECK-NEXT: ret
109
entry:
1110
%b = extractelement <16 x i8> %a, i32 15
@@ -16,8 +15,7 @@ entry:
1615
define <8 x i16> @insert_extract_v8i16(<8 x i16> %a) nounwind {
1716
; CHECK-LABEL: insert_extract_v8i16:
1817
; CHECK: # %bb.0: # %entry
19-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
20-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 1
18+
; CHECK-NEXT: vextrins.h $vr0, $vr0, 23
2119
; CHECK-NEXT: ret
2220
entry:
2321
%b = extractelement <8 x i16> %a, i32 7
@@ -28,8 +26,7 @@ entry:
2826
define <4 x i32> @insert_extract_v4i32(<4 x i32> %a) nounwind {
2927
; CHECK-LABEL: insert_extract_v4i32:
3028
; CHECK: # %bb.0: # %entry
31-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3
32-
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 1
29+
; CHECK-NEXT: vextrins.w $vr0, $vr0, 19
3330
; CHECK-NEXT: ret
3431
entry:
3532
%b = extractelement <4 x i32> %a, i32 3
@@ -40,8 +37,7 @@ entry:
4037
define <4 x float> @insert_extract_v4f32(<4 x float> %a) nounwind {
4138
; CHECK-LABEL: insert_extract_v4f32:
4239
; CHECK: # %bb.0: # %entry
43-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3
44-
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
40+
; CHECK-NEXT: vextrins.w $vr0, $vr0, 3
4541
; CHECK-NEXT: ret
4642
entry:
4743
%b = extractelement <4 x float> %a, i32 3
@@ -52,8 +48,7 @@ entry:
5248
define <2 x i64> @insert_extract_v2i64(<2 x i64> %a) nounwind {
5349
; CHECK-LABEL: insert_extract_v2i64:
5450
; CHECK: # %bb.0: # %entry
55-
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
56-
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
51+
; CHECK-NEXT: vextrins.d $vr0, $vr0, 1
5752
; CHECK-NEXT: ret
5853
entry:
5954
%b = extractelement <2 x i64> %a, i32 1
@@ -64,8 +59,7 @@ entry:
6459
define <2 x double> @insert_extract_v2f64(<2 x double> %a) nounwind {
6560
; CHECK-LABEL: insert_extract_v2f64:
6661
; CHECK: # %bb.0: # %entry
67-
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
68-
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
62+
; CHECK-NEXT: vextrins.d $vr0, $vr0, 1
6963
; CHECK-NEXT: ret
7064
entry:
7165
%b = extractelement <2 x double> %a, i32 1

0 commit comments

Comments
 (0)