Skip to content
Draft
4 changes: 4 additions & 0 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,10 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_MultiCoreJitNoProfileGather, W("MultiCoreJitNo

#endif

#ifdef TARGET_ARM64
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_JitUseScalableVectorT, W("JitUseScalableVectorT"), 0, "Accelerate Vector<T> with SVE if available.")
#endif

///
/// Loader heap
///
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/inc/corhdr.h
Original file line number Diff line number Diff line change
Expand Up @@ -1761,6 +1761,7 @@ typedef enum CorInfoHFAElemType : unsigned {
CORINFO_HFA_ELEM_DOUBLE,
CORINFO_HFA_ELEM_VECTOR64,
CORINFO_HFA_ELEM_VECTOR128,
CORINFO_HFA_ELEM_VECTORT,
} CorInfoHFAElemType;

//
Expand Down
24 changes: 20 additions & 4 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,15 @@ bool CodeGen::genInstrWithConstant(instruction ins,
immFitsInIns = emitter::emitIns_valid_imm_for_ldst_offset(imm, size);
break;

case INS_sve_ldr:
case INS_sve_str:
{
assert(size == EA_SCALABLE);
ssize_t count = imm / genTypeSize(TYP_SIMDSV);
immFitsInIns = (-256 <= count && count < 256);
}
break;

default:
assert(!"Unexpected instruction in genInstrWithConstant");
break;
Expand Down Expand Up @@ -2075,10 +2084,14 @@ void CodeGen::instGen_Set_Reg_To_Base_Plus_Imm(emitAttr size,
// If the imm values < 12 bits, we can use a single "add rsvd, reg2, #imm".
// Otherwise, use "mov rsvd, #imm", followed up "add rsvd, reg2, rsvd".

if (imm < 4096)
if (0 <= imm && imm < 4096)
{
GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, dstReg, baseReg, imm);
}
else if (-4095 <= imm && imm < 0)
{
GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, dstReg, baseReg, -imm);
}
else
{
instGen_Set_Reg_To_Imm(size, dstReg, imm);
Expand Down Expand Up @@ -2274,6 +2287,9 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre

switch (tree->TypeGet())
{
case TYP_SIMDSV:
attr = EA_16BYTE; // TODO-SVE: Implement scalable vector constant
FALLTHROUGH;
case TYP_SIMD8:
case TYP_SIMD12:
case TYP_SIMD16:
Expand Down Expand Up @@ -2999,7 +3015,7 @@ void CodeGen::genSimpleReturn(GenTree* treeNode)
}
}
emitAttr attr = emitActualTypeSize(targetType);
GetEmitter()->emitIns_Mov(INS_mov, attr, retReg, op1->GetRegNum(), /* canSkip */ !movRequired);
inst_Mov(targetType, retReg, op1->GetRegNum(), !movRequired, attr);
}

/***********************************************************************************************
Expand Down Expand Up @@ -5306,7 +5322,7 @@ void CodeGen::genSimdUpperSave(GenTreeIntrinsic* node)

GenTreeLclVar* lclNode = op1->AsLclVar();
LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode);
assert(emitTypeSize(varDsc->GetRegisterType(lclNode)) == 16);
assert(varDsc->TypeIs(TYP_STRUCT, TYP_SIMD12, TYP_SIMD16, TYP_SIMDSV)); // TODO-SVE: Handle AAPCS for Z registers

regNumber tgtReg = node->GetRegNum();
assert(tgtReg != REG_NA);
Expand Down Expand Up @@ -5362,7 +5378,7 @@ void CodeGen::genSimdUpperRestore(GenTreeIntrinsic* node)

GenTreeLclVar* lclNode = op1->AsLclVar();
LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode);
assert(emitTypeSize(varDsc->GetRegisterType(lclNode)) == 16);
assert(varDsc->TypeIs(TYP_STRUCT, TYP_SIMD12, TYP_SIMD16, TYP_SIMDSV)); // TODO-SVE: Handle AAPCS for Z registers

regNumber srcReg = node->GetRegNum();
assert(srcReg != REG_NA);
Expand Down
7 changes: 6 additions & 1 deletion src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -809,8 +809,13 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
#endif // TARGET_ARM64
{
emitAttr storeAttr = emitTypeSize(source->TypeGet());
emit->emitIns_S_R(INS_str, storeAttr, srcReg, varNumOut, argOffsetOut);
emit->emitIns_S_R(ins_Store(source->TypeGet()), storeAttr, srcReg, varNumOut, argOffsetOut);
#ifdef TARGET_ARM64
argOffsetOut +=
storeAttr == EA_SCALABLE ? compiler->getVectorTByteLength() : EA_SIZE_IN_BYTES(storeAttr);
#else
argOffsetOut += EA_SIZE_IN_BYTES(storeAttr);
#endif
}
assert(argOffsetOut <= argOffsetMax); // We can't write beyond the outgoing arg area
return;
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3266,6 +3266,7 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed)
#if defined(TARGET_ARM64)
// On arm64 SIMD parameters are HFAs and passed in multiple float
// registers while we can enregister them as single registers.
// TODO-SVE: Ensure this works for Z registers as well.
GetEmitter()->emitIns_R_R_I_I(INS_mov, emitTypeSize(edge->type), node->reg, sourceReg,
edge->destOffset / genTypeSize(edge->type), 0);
#elif defined(UNIX_AMD64_ABI)
Expand Down Expand Up @@ -5906,7 +5907,7 @@ unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass)
var_types hfaType = GetHfaType(hClass);
unsigned classSize = info.compCompHnd->getClassSize(hClass);
// Note that the retail build issues a warning about a potential division by zero without the Max function
unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType)));
unsigned elemSize = Max((unsigned)1, genTypeSize(genActualType(hfaType)));
return classSize / elemSize;
#endif // TARGET_ARM64
}
Expand Down
11 changes: 11 additions & 0 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2269,8 +2269,19 @@ void CodeGen::genCodeForCast(GenTreeOp* tree)
genLongToIntCast(tree);
}
#endif // !TARGET_64BIT
#ifdef TARGET_ARM64
else if (targetType == TYP_SIMDSV || tree->gtOp1->TypeGet() == TYP_SIMDSV)
{
// TODO-SVE: Can we avoid generating these casts altogether?
assert(genTypeSize(tree->CastToType()) == genTypeSize(tree->CastFromType()));
genConsumeOperands(tree);
inst_Mov(tree->CastToType(), tree->GetRegNum(), tree->gtOp1->GetRegNum(), true);
genProduceReg(tree);
}
#endif
else
{
assert(varTypeIsIntegral(targetType) && varTypeIsIntegral(tree->gtOp1));
// Casts int <--> int
genIntToIntCast(tree->AsCast());
}
Expand Down
38 changes: 30 additions & 8 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,12 @@ inline bool _our_GetThreadCycles(uint64_t* cycleOut)

#endif // which host OS

const BYTE genTypeSizes[] = {
BYTE _initGenTypeSizes[] = {
#define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) sz,
#include "typelist.h"
#undef DEF_TP
};
const BYTE (&genTypeSizes)[TYP_COUNT] = _initGenTypeSizes;

const BYTE genTypeAlignments[] = {
#define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) al,
Expand Down Expand Up @@ -609,13 +610,18 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS
// Start by determining if we have an HFA/HVA with a single element.
if (GlobalJitOptions::compFeatureHfa)
{
switch (structSize)
{
case 4:
case 8:
if (structSize == 4 ||
structSize == 8
#ifdef TARGET_ARM64
case 16:
#endif // TARGET_ARM64
// Can pass in V register if structSize == 16, and Z registers for structs with sizes in
// multiples of 16-bytes, depending on hardware availability.
|| structSize == 16 || ((structSize % 16 == 0) && (structSize == genTypeSize(TYP_SIMDSV)))
#endif
)
{
var_types hfaType = GetHfaType(clsHnd);
// We're only interested in the case where the struct size is equal to the size of the hfaType.
if (varTypeIsValidHfaType(hfaType))
{
var_types hfaType = GetHfaType(clsHnd);
// We're only interested in the case where the struct size is equal to the size of the hfaType.
Expand Down Expand Up @@ -861,7 +867,15 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
// The largest "primitive type" is MAX_PASS_SINGLEREG_BYTES
// so we can skip calling getPrimitiveTypeForStruct when we
// have a struct that is larger than that.
if (canReturnInRegister && (useType == TYP_UNKNOWN) && (structSize <= MAX_PASS_SINGLEREG_BYTES))
//
// On ARM64 we can pass structures in scalable vector registers
// which may allow larger structures on some hardware.
#ifdef TARGET_ARM64
unsigned maxStructSize = max((unsigned)MAX_PASS_SINGLEREG_BYTES, getVectorTByteLength());
#else
unsigned maxStructSize = MAX_PASS_SINGLEREG_BYTES;
#endif
if (canReturnInRegister && (useType == TYP_UNKNOWN) && (structSize <= maxStructSize))
{
// We set the "primitive" useType based upon the structSize
// and also examine the clsHnd to see if it is an HFA of count one
Expand Down Expand Up @@ -7084,6 +7098,14 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
}
#endif

#if defined(FEATURE_SIMD) && defined(TARGET_ARM64)
// Initialize the size of Vector<T> from the EE.
_initGenTypeSizes[TYP_SIMDSV] = getVectorTByteLength();
_initGenTypeSizes[TYP_MASK] = getMaskByteLength();
assert(genTypeSize(TYP_SIMDSV) >= 16);
assert(genTypeSize(TYP_MASK) >= 2);
#endif

compCompile(methodCodePtr, methodCodeSize, compileFlags);

#ifdef DEBUG
Expand Down
22 changes: 20 additions & 2 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,10 @@ inline var_types HfaTypeFromElemKind(CorInfoHFAElemType kind)
return TYP_SIMD8;
case CORINFO_HFA_ELEM_VECTOR128:
return TYP_SIMD16;
#ifdef TARGET_ARM64
case CORINFO_HFA_ELEM_VECTORT:
return TYP_SIMDSV;
#endif
#endif
case CORINFO_HFA_ELEM_NONE:
return TYP_UNDEF;
Expand All @@ -178,6 +182,10 @@ inline CorInfoHFAElemType HfaElemKindFromType(var_types type)
return CORINFO_HFA_ELEM_VECTOR64;
case TYP_SIMD16:
return CORINFO_HFA_ELEM_VECTOR128;
#ifdef TARGET_ARM64
case TYP_SIMDSV:
return CORINFO_HFA_ELEM_VECTORT;
#endif
#endif
case TYP_UNDEF:
return CORINFO_HFA_ELEM_NONE;
Expand Down Expand Up @@ -8212,7 +8220,7 @@ class Compiler
assert(type != TYP_STRUCT);
// ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes
// For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes.
return ((type == TYP_SIMD16) || (type == TYP_SIMD12));
return ((type == TYP_SIMDSV) || (type == TYP_SIMD16) || (type == TYP_SIMD12));
}
#else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64)
#error("Unknown target architecture for FEATURE_PARTIAL_SIMD_CALLEE_SAVE")
Expand Down Expand Up @@ -9164,6 +9172,16 @@ class Compiler
#endif
}

#ifdef TARGET_ARM64
uint32_t getMaskByteLength()
{
// Predicate registers have 1 bit for each byte in the vector register.
// We round up to an int as the CLR prefers to work in integers.
assert((getVectorTByteLength() % 8) == 0);
return (uint32_t)roundUp((size_t)getVectorTByteLength() / 8, sizeof(int));
}
#endif

// The minimum and maximum possible number of bytes in a SIMD vector.

// getMaxVectorByteLength
Expand Down Expand Up @@ -12407,7 +12425,7 @@ const instruction INS_BREAKPOINT = INS_ebreak;

/*****************************************************************************/

extern const BYTE genTypeSizes[];
extern const BYTE (&genTypeSizes)[TYP_COUNT];
extern const BYTE genTypeAlignments[];
extern const BYTE genTypeStSzs[];
extern const BYTE genActualTypes[];
Expand Down
8 changes: 6 additions & 2 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1137,13 +1137,12 @@ inline regNumber genFirstRegNumFromMaskAndToggle(SingleTypeRegSet& mask)
* Return the size in bytes of the given type.
*/

extern const BYTE genTypeSizes[TYP_COUNT];
extern const BYTE (&genTypeSizes)[TYP_COUNT];

template <class T>
inline unsigned genTypeSize(T value)
{
assert((unsigned)TypeGet(value) < ArrLen(genTypeSizes));

return genTypeSizes[TypeGet(value)];
}

Expand All @@ -1158,6 +1157,11 @@ extern const BYTE genTypeStSzs[TYP_COUNT];
template <class T>
inline unsigned genTypeStSz(T value)
{
#ifdef TARGET_ARM64
// The size of these types cannot be evaluated in static contexts.
noway_assert(TypeGet(value) != TYP_SIMDSV);
noway_assert(TypeGet(value) != TYP_MASK);
#endif
assert((unsigned)TypeGet(value) < ArrLen(genTypeStSzs));

return genTypeStSzs[TypeGet(value)];
Expand Down
34 changes: 17 additions & 17 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4266,6 +4266,13 @@ void emitter::emitIns_Mov(
{
assert(insOptsNone(opt));

if (attr == EA_SCALABLE)
{
// NEON mov is acceptable for scalable vectors when the vector byte length is 128-bit.
// TODO-SVE: This should not be permitted once Vector<T> has been migrated to SVE.
assert(codeGen->compiler->getVectorTByteLength() == 16);
}

if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip))
{
// These instructions have no side effect and can be skipped
Expand Down Expand Up @@ -4340,6 +4347,7 @@ void emitter::emitIns_Mov(
case INS_fmov:
{
assert(isValidVectorElemsizeFloat(size));
assert(attr != EA_SCALABLE);

if (canSkip && (dstReg == srcReg))
{
Expand Down Expand Up @@ -4387,35 +4395,22 @@ void emitter::emitIns_Mov(

case INS_sve_mov:
{
assert(attr == EA_SCALABLE);
if (isPredicateRegister(dstReg) && isPredicateRegister(srcReg))
{
assert((opt == INS_OPTS_SCALABLE_B) || insOptsNone(opt));
opt = INS_OPTS_SCALABLE_B;
attr = EA_SCALABLE;

if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip))
{
return;
}
opt = INS_OPTS_SCALABLE_B;
fmt = IF_SVE_CZ_4A_L;
}
else if (isVectorRegister(dstReg) && isVectorRegister(srcReg))
{
assert(insOptsScalable(opt));

if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip))
{
return;
}
assert(insOptsScalable(opt) || insOptsNone(opt));
opt = INS_OPTS_SCALABLE_D;
fmt = IF_SVE_AU_3A;
}
else if (isVectorRegister(dstReg) && isGeneralRegisterOrSP(srcReg))
{
assert(insOptsScalable(opt));
if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip))
{
return;
}
srcReg = encodingSPtoZR(srcReg);
fmt = IF_SVE_CB_2A;
}
Expand All @@ -4424,6 +4419,11 @@ void emitter::emitIns_Mov(
unreached();
}

if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip))
{
return;
}

break;
}
default:
Expand Down
Loading
Loading