Skip to content

[LoongArch] Optimize inserting extracted elements #146018

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2525,12 +2525,9 @@ LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
EVT VecTy = Op->getOperand(0)->getValueType(0);
SDValue Idx = Op->getOperand(1);
EVT EltTy = VecTy.getVectorElementType();
unsigned NumElts = VecTy.getVectorNumElements();

if (isa<ConstantSDNode>(Idx) &&
(EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
if (isa<ConstantSDNode>(Idx) && Idx->getAsZExtVal() < NumElts)
return Op;

return SDValue();
Expand Down
83 changes: 78 additions & 5 deletions llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1282,6 +1282,32 @@ multiclass PatCCXrXrF<CondCode CC, string Inst> {
(!cast<LAInst>(Inst#"_D") LASX256:$xj, LASX256:$xk)>;
}

multiclass PairInsertExtractPatV8<ValueType vecty, ValueType elemty> {
foreach imm1 = 0...3 in {
foreach imm2 = 0...3 in {
defvar Imm = !or(!shl(imm2, 4), imm1);
def : Pat<(vector_insert (vector_insert vecty:$xd,
(elemty (vector_extract vecty:$xj, imm1)), imm2),
(elemty (vector_extract vecty:$xj, !add(imm1, 4))),
!add(imm2, 4)),
(XVEXTRINS_W $xd, $xj, Imm)>;
}
}
}

multiclass PairInsertExtractPatV4<ValueType vecty, ValueType elemty> {
foreach imm1 = 0...1 in {
foreach imm2 = 0...1 in {
defvar Imm = !or(!shl(imm2, 4), imm1);
def : Pat<(vector_insert (vector_insert vecty:$xd,
(elemty (vector_extract vecty:$xj, imm1)), imm2),
(elemty (vector_extract vecty:$xj, !add(imm1, 2))),
!add(imm2, 2)),
(XVEXTRINS_D $xd, $xj, Imm)>;
}
}
}

let Predicates = [HasExtLASX] in {

// XVADD_{B/H/W/D}
Expand Down Expand Up @@ -1582,6 +1608,38 @@ defm : PatCCXrXrF<SETUNE, "XVFCMP_CUNE">;
defm : PatCCXrXrF<SETO, "XVFCMP_COR">;
defm : PatCCXrXrF<SETUO, "XVFCMP_CUN">;

// Insert two elements extracted from vector into vector. (The positions
// of the two elements must be same in the source or destination vector's
// front and back 128bits.)
// 2*XVPICKVE2GR_{W/D} + 2*XVINSGR2VR_{W/D} -> XVEXTRINS_{W/D}
// XVPERMI_D + 2*XVPICKVE2GR_{B/H} + 2*PseudoXVINSGR2VR_{B/H} -> XVEXTRINS_{W/D}
foreach imm1 = 0...15 in {
foreach imm2 = 0...15 in {
defvar Imm = !or(!shl(imm2, 4), imm1);
def : Pat<(vector_insert (vector_insert v32i8:$xd,
(GRLenVT (vector_extract v32i8:$xj, imm1)), imm2),
(GRLenVT (vector_extract v32i8:$xj, !add(imm1, 16))),
!add(imm2, 16)),
(XVEXTRINS_B $xd, $xj, Imm)>;
}
}

foreach imm1 = 0...7 in {
foreach imm2 = 0...7 in {
defvar Imm = !or(!shl(imm2, 4), imm1);
def : Pat<(vector_insert (vector_insert v16i16:$xd,
(GRLenVT (vector_extract v16i16:$xj, imm1)), imm2),
(GRLenVT (vector_extract v16i16:$xj, !add(imm1, 8))),
!add(imm2, 8)),
(XVEXTRINS_H $xd, $xj, Imm)>;
}
}

defm : PairInsertExtractPatV8<v8i32, GRLenVT>;
defm : PairInsertExtractPatV8<v8f32, f32>;
defm : PairInsertExtractPatV4<v4i64, GRLenVT>;
defm : PairInsertExtractPatV4<v4f64, f64>;

// PseudoXVINSGR2VR_{B/H}
def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm),
(PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>;
Expand All @@ -1593,11 +1651,14 @@ def : Pat<(vector_insert v8i32:$xd, GRLenVT:$rj, uimm3:$imm),
(XVINSGR2VR_W v8i32:$xd, GRLenVT:$rj, uimm3:$imm)>;
def : Pat<(vector_insert v4i64:$xd, GRLenVT:$rj, uimm2:$imm),
(XVINSGR2VR_D v4i64:$xd, GRLenVT:$rj, uimm2:$imm)>;

def : Pat<(vector_insert v8f32:$vd, FPR32:$fj, uimm3:$imm),
(XVINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm3:$imm)>;
def : Pat<(vector_insert v4f64:$vd, FPR64:$fj, uimm2:$imm),
(XVINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm2:$imm)>;
def : Pat<(vector_insert v8f32:$xd, (f32 (vector_extract v8f32:$xj, uimm3:$imm1)), uimm3:$imm2),
(XVINSGR2VR_W $xd, (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm1), uimm3:$imm2)>;
def : Pat<(vector_insert v4f64:$xd, (f64 (vector_extract v4f64:$xj, uimm2:$imm1)), uimm2:$imm2),
(XVINSGR2VR_D $xd, (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm1), uimm2:$imm2)>;
def : Pat<(vector_insert v8f32:$xd, FPR32:$fj, uimm3:$imm),
(XVINSGR2VR_W $xd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm3:$imm)>;
def : Pat<(vector_insert v4f64:$xd, FPR64:$fj, uimm2:$imm),
(XVINSGR2VR_D $xd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm2:$imm)>;

// scalar_to_vector
def : Pat<(v8f32 (scalar_to_vector FPR32:$fj)),
Expand Down Expand Up @@ -1791,6 +1852,18 @@ foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in {
}

// Vector extraction with constant index.
foreach imm = 16...31 in {
defvar Imm = !and(imm, 15);
def : Pat<(i64 (vector_extract v32i8:$xj, imm)),
(VPICKVE2GR_B (EXTRACT_SUBREG (XVPERMI_D v32i8:$xj, 14), sub_128),
Imm)>;
}
foreach imm = 8...15 in {
defvar Imm = !and(imm, 7);
def : Pat<(i64 (vector_extract v16i16:$xj, imm)),
(VPICKVE2GR_H (EXTRACT_SUBREG (XVPERMI_D v16i16:$xj, 14), sub_128),
Imm)>;
}
def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)),
(VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>;
def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)),
Expand Down
48 changes: 47 additions & 1 deletion llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1482,6 +1482,28 @@ multiclass VstelmPat<PatFrag StoreOp, ValueType vt, LAInst Inst,
(Inst vt:$vd, BaseAddr:$rj, ImmOpnd:$imm, IdxOpnd:$idx)>;
}

multiclass InsertExtractPatV4<ValueType vecty, ValueType elemty> {
foreach imm1 = 0...3 in {
foreach imm2 = 0...3 in {
defvar Imm = !or(!shl(imm2, 4), imm1);
def : Pat<(vector_insert vecty:$vd,
(elemty (vector_extract vecty:$vj, imm1)), imm2),
(VEXTRINS_W $vd, $vj, Imm)>;
}
}
}

multiclass InsertExtractPatV2<ValueType vecty, ValueType elemty> {
foreach imm1 = 0...1 in {
foreach imm2 = 0...1 in {
defvar Imm = !or(!shl(imm2, 4), imm1);
def : Pat<(vector_insert vecty:$vd,
(elemty (vector_extract vecty:$vj, imm1)), imm2),
(VEXTRINS_D $vd, $vj, Imm)>;
}
}
}

let Predicates = [HasExtLSX] in {

// VADD_{B/H/W/D}
Expand Down Expand Up @@ -1782,6 +1804,31 @@ defm : PatCCVrVrF<SETUNE, "VFCMP_CUNE">;
defm : PatCCVrVrF<SETO, "VFCMP_COR">;
defm : PatCCVrVrF<SETUO, "VFCMP_CUN">;

// Insert element extracted from vector into vector.
// VPICKVE2GR_{B/H/W/D} + VINSGR2VR_{B/H/W/D} -> VEXTRINS_{B/H/W/D}
foreach imm1 = 0...15 in {
foreach imm2 = 0...15 in {
defvar Imm = !or(!shl(imm2, 4), imm1);
def : Pat<(vector_insert v16i8:$vd,
(GRLenVT (vector_extract v16i8:$vj, imm1)), imm2),
(VEXTRINS_B $vd, $vj, Imm)>;
}
}

foreach imm1 = 0...7 in {
foreach imm2 = 0...7 in {
defvar Imm = !or(!shl(imm2, 4), imm1);
def : Pat<(vector_insert v8i16:$vd,
(GRLenVT (vector_extract v8i16:$vj, imm1)), imm2),
(VEXTRINS_H $vd, $vj, Imm)>;
}
}

defm : InsertExtractPatV4<v4i32, GRLenVT>;
defm : InsertExtractPatV4<v4f32, f32>;
defm : InsertExtractPatV2<v2i64, GRLenVT>;
defm : InsertExtractPatV2<v2f64, f64>;

// VINSGR2VR_{B/H/W/D}
def : Pat<(vector_insert v16i8:$vd, GRLenVT:$rj, uimm4:$imm),
(VINSGR2VR_B v16i8:$vd, GRLenVT:$rj, uimm4:$imm)>;
Expand All @@ -1791,7 +1838,6 @@ def : Pat<(vector_insert v4i32:$vd, GRLenVT:$rj, uimm2:$imm),
(VINSGR2VR_W v4i32:$vd, GRLenVT:$rj, uimm2:$imm)>;
def : Pat<(vector_insert v2i64:$vd, GRLenVT:$rj, uimm1:$imm),
(VINSGR2VR_D v2i64:$vd, GRLenVT:$rj, uimm1:$imm)>;

def : Pat<(vector_insert v4f32:$vd, FPR32:$fj, uimm2:$imm),
(VINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm2:$imm)>;
def : Pat<(vector_insert v2f64:$vd, FPR64:$fj, uimm1:$imm),
Expand Down
12 changes: 2 additions & 10 deletions llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,12 @@ define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) {
; CHECK-LABEL: shufflevector_v4f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
; CHECK-NEXT: movgr2fr.d $fa2, $a0
; CHECK-NEXT: xvpickve2gr.d $a0, $xr1, 2
; CHECK-NEXT: movgr2fr.d $fa3, $a0
; CHECK-NEXT: movfr2gr.d $a0, $fa2
; CHECK-NEXT: xvinsgr2vr.d $xr2, $a0, 0
; CHECK-NEXT: movfr2gr.d $a0, $fa3
; CHECK-NEXT: xvpickve2gr.d $a0, $xr1, 2
; CHECK-NEXT: xvinsgr2vr.d $xr2, $a0, 1
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
; CHECK-NEXT: movgr2fr.d $fa0, $a0
; CHECK-NEXT: xvpickve2gr.d $a0, $xr1, 3
; CHECK-NEXT: movgr2fr.d $fa1, $a0
; CHECK-NEXT: movfr2gr.d $a0, $fa0
; CHECK-NEXT: xvinsgr2vr.d $xr2, $a0, 2
; CHECK-NEXT: movfr2gr.d $a0, $fa1
; CHECK-NEXT: xvpickve2gr.d $a0, $xr1, 3
; CHECK-NEXT: xvinsgr2vr.d $xr2, $a0, 3
; CHECK-NEXT: xvori.b $xr0, $xr2, 0
; CHECK-NEXT: ret
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,9 @@
define <32 x i8> @insert_extract_v32i8(<32 x i8> %a) nounwind {
; CHECK-LABEL: insert_extract_v32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi.d $sp, $sp, -64
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $fp, $sp, 64
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvst $xr0, $sp, 0
; CHECK-NEXT: ld.b $a0, $sp, 31
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 15
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 1
; CHECK-NEXT: addi.d $sp, $fp, -64
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 64
; CHECK-NEXT: ret
entry:
%b = extractelement <32 x i8> %a, i32 31
Expand All @@ -26,18 +17,9 @@ entry:
define <16 x i16> @insert_extract_v16i16(<16 x i16> %a) nounwind {
; CHECK-LABEL: insert_extract_v16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi.d $sp, $sp, -64
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $fp, $sp, 64
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvst $xr0, $sp, 0
; CHECK-NEXT: ld.h $a0, $sp, 30
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 7
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 1
; CHECK-NEXT: addi.d $sp, $fp, -64
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 64
; CHECK-NEXT: ret
entry:
%b = extractelement <16 x i16> %a, i32 15
Expand All @@ -61,8 +43,6 @@ define <8 x float> @insert_extract_v8f32(<8 x float> %a) nounwind {
; CHECK-LABEL: insert_extract_v8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
; CHECK-NEXT: movgr2fr.w $fa1, $a0
; CHECK-NEXT: movfr2gr.s $a0, $fa1
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1
; CHECK-NEXT: ret
entry:
Expand All @@ -87,8 +67,6 @@ define <4 x double> @insert_extract_v4f64(<4 x double> %a) nounwind {
; CHECK-LABEL: insert_extract_v4f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
; CHECK-NEXT: movgr2fr.d $fa1, $a0
; CHECK-NEXT: movfr2gr.d $a0, $fa1
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1
; CHECK-NEXT: ret
entry:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,7 @@
define <32 x i8> @insert_extract_v32i8(<32 x i8> %a) nounwind {
; CHECK-LABEL: insert_extract_v32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi.d $sp, $sp, -64
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $fp, $sp, 64
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
; CHECK-NEXT: xvst $xr0, $sp, 0
; CHECK-NEXT: ld.b $a1, $sp, 31
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 1
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 1
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
; CHECK-NEXT: addi.d $sp, $fp, -64
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 64
; CHECK-NEXT: xvextrins.b $xr0, $xr0, 31
; CHECK-NEXT: ret
entry:
%b_lo = extractelement <32 x i8> %a, i32 15
Expand All @@ -33,23 +17,7 @@ entry:
define <16 x i16> @insert_extract_v16i16(<16 x i16> %a) nounwind {
; CHECK-LABEL: insert_extract_v16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi.d $sp, $sp, -64
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $fp, $sp, 64
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
; CHECK-NEXT: xvst $xr0, $sp, 0
; CHECK-NEXT: ld.h $a1, $sp, 30
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 1
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
; CHECK-NEXT: addi.d $sp, $fp, -64
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 64
; CHECK-NEXT: xvextrins.h $xr0, $xr0, 23
; CHECK-NEXT: ret
entry:
%b_lo = extractelement <16 x i16> %a, i32 7
Expand All @@ -62,10 +30,7 @@ entry:
define <8 x i32> @insert_extract_v8i32(<8 x i32> %a) nounwind {
; CHECK-LABEL: insert_extract_v8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3
; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 7
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 5
; CHECK-NEXT: xvextrins.w $xr0, $xr0, 19
; CHECK-NEXT: ret
entry:
%b_lo = extractelement <8 x i32> %a, i32 3
Expand All @@ -78,14 +43,7 @@ entry:
define <8 x float> @insert_extract_v8f32(<8 x float> %a) nounwind {
; CHECK-LABEL: insert_extract_v8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3
; CHECK-NEXT: movgr2fr.w $fa1, $a0
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
; CHECK-NEXT: movgr2fr.w $fa2, $a0
; CHECK-NEXT: movfr2gr.s $a0, $fa1
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1
; CHECK-NEXT: movfr2gr.s $a0, $fa2
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 5
; CHECK-NEXT: xvextrins.w $xr0, $xr0, 19
; CHECK-NEXT: ret
entry:
%b_lo = extractelement <8 x float> %a, i32 3
Expand All @@ -98,10 +56,7 @@ entry:
define <4 x i64> @insert_extract_v4i64(<4 x i64> %a) nounwind {
; CHECK-LABEL: insert_extract_v4i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 3
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 2
; CHECK-NEXT: xvextrins.d $xr0, $xr0, 1
; CHECK-NEXT: ret
entry:
%b_lo = extractelement <4 x i64> %a, i32 1
Expand All @@ -114,14 +69,7 @@ entry:
define <4 x double> @insert_extract_v4f64(<4 x double> %a) nounwind {
; CHECK-LABEL: insert_extract_v4f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
; CHECK-NEXT: movgr2fr.d $fa1, $a0
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
; CHECK-NEXT: movgr2fr.d $fa2, $a0
; CHECK-NEXT: movfr2gr.d $a0, $fa1
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
; CHECK-NEXT: movfr2gr.d $a0, $fa2
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 2
; CHECK-NEXT: xvextrins.d $xr0, $xr0, 1
; CHECK-NEXT: ret
entry:
%b_lo = extractelement <4 x double> %a, i32 1
Expand Down
Loading