Skip to content

Commit

Permalink
cpu-o3: align amo operation's latency (#300)
Browse files Browse the repository at this point in the history
The atomic instructions in XS RTL are implemented using a state machine (requiring 10+ cycles), while the previous implementation in gem5 assigned a fixed latency to atomic instructions (previously set to 2 cycles). The latency configurations between the two implementations were mismatched.

This PR aligns the atomic instruction latency in gem5 with the RTL implementation (currently set to 13 cycles). The latency is obtained through a microbenchmark test using amo instructions.

Change-Id: I4cfc42397edc6234b5013a634806bd48a479c49c
  • Loading branch information
happy-lx authored Feb 24, 2025
1 parent 9714711 commit dd4101d
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 18 deletions.
36 changes: 18 additions & 18 deletions src/arch/riscv/isa/decoder.isa
Original file line number Diff line number Diff line change
Expand Up @@ -1031,63 +1031,63 @@ decode QUADRANT default Unknown::unknown() {
TypedAtomicOpFunctor<int32_t> *amo_op =
new AtomicGenericOp<int32_t>(Rs2_sw,
[](int32_t* b, int32_t a){ *b += a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
0x1: AtomicMemOp::amoswap_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<uint32_t> *amo_op =
new AtomicGenericOp<uint32_t>(Rs2_uw,
[](uint32_t* b, uint32_t a){ *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
0x4: AtomicMemOp::amoxor_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<uint32_t> *amo_op =
new AtomicGenericOp<uint32_t>(Rs2_uw,
[](uint32_t* b, uint32_t a){ *b ^= a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
0x8: AtomicMemOp::amoor_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<uint32_t> *amo_op =
new AtomicGenericOp<uint32_t>(Rs2_uw,
[](uint32_t* b, uint32_t a){ *b |= a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
0xc: AtomicMemOp::amoand_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<uint32_t> *amo_op =
new AtomicGenericOp<uint32_t>(Rs2_uw,
[](uint32_t* b, uint32_t a){ *b &= a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
0x10: AtomicMemOp::amomin_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<int32_t> *amo_op =
new AtomicGenericOp<int32_t>(Rs2_sw,
[](int32_t* b, int32_t a){ if (a < *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
0x14: AtomicMemOp::amomax_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<int32_t> *amo_op =
new AtomicGenericOp<int32_t>(Rs2_sw,
[](int32_t* b, int32_t a){ if (a > *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
0x18: AtomicMemOp::amominu_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<uint32_t> *amo_op =
new AtomicGenericOp<uint32_t>(Rs2_uw,
[](uint32_t* b, uint32_t a){ if (a < *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
0x1c: AtomicMemOp::amomaxu_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<uint32_t> *amo_op =
new AtomicGenericOp<uint32_t>(Rs2_uw,
[](uint32_t* b, uint32_t a){ if (a > *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
}
0x3: decode AMOFUNCT {
0x2: LoadReserved::lr_d({{
Expand All @@ -1104,63 +1104,63 @@ decode QUADRANT default Unknown::unknown() {
TypedAtomicOpFunctor<int64_t> *amo_op =
new AtomicGenericOp<int64_t>(Rs2_sd,
[](int64_t* b, int64_t a){ *b += a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
0x1: AtomicMemOp::amoswap_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<uint64_t> *amo_op =
new AtomicGenericOp<uint64_t>(Rs2_ud,
[](uint64_t* b, uint64_t a){ *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
0x4: AtomicMemOp::amoxor_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<uint64_t> *amo_op =
new AtomicGenericOp<uint64_t>(Rs2_ud,
[](uint64_t* b, uint64_t a){ *b ^= a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
0x8: AtomicMemOp::amoor_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<uint64_t> *amo_op =
new AtomicGenericOp<uint64_t>(Rs2_ud,
[](uint64_t* b, uint64_t a){ *b |= a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
0xc: AtomicMemOp::amoand_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<uint64_t> *amo_op =
new AtomicGenericOp<uint64_t>(Rs2_ud,
[](uint64_t* b, uint64_t a){ *b &= a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
0x10: AtomicMemOp::amomin_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<int64_t> *amo_op =
new AtomicGenericOp<int64_t>(Rs2_sd,
[](int64_t* b, int64_t a){ if (a < *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
0x14: AtomicMemOp::amomax_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<int64_t> *amo_op =
new AtomicGenericOp<int64_t>(Rs2_sd,
[](int64_t* b, int64_t a){ if (a > *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
0x18: AtomicMemOp::amominu_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<uint64_t> *amo_op =
new AtomicGenericOp<uint64_t>(Rs2_ud,
[](uint64_t* b, uint64_t a){ if (a < *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
0x1c: AtomicMemOp::amomaxu_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<uint64_t> *amo_op =
new AtomicGenericOp<uint64_t>(Rs2_ud,
[](uint64_t* b, uint64_t a){ if (a > *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
}
}
0x0c: decode FUNCT3 {
Expand Down
1 change: 1 addition & 0 deletions src/cpu/FuncUnit.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class OpClass(Enum):
'SimdShaSigma3',
'SimdPredAlu',
'MemRead', 'MemWrite', 'FloatMemRead', 'FloatMemWrite',
'MemAtomic',
'IprAccess', 'InstPrefetch', 'FMAMul', 'FMAAcc',

'VectorUnitStrideLoad', 'VectorSegUnitStrideLoad',
Expand Down
1 change: 1 addition & 0 deletions src/cpu/o3/FuncUnitConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ class ReadPort(FUDesc):

class WritePort(FUDesc):
opList = [ OpDesc(opClass='MemWrite', opLat=2),
OpDesc(opClass='MemAtomic', opLat=13),
OpDesc(opClass='FloatMemWrite', opLat=3),
OpDesc(opClass='VectorUnitStrideStore'),
OpDesc(opClass='VectorSegUnitStrideStore'),
Expand Down
1 change: 1 addition & 0 deletions src/cpu/op_class.hh
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ static const OpClass SimdShaSigma3Op = enums::SimdShaSigma3;
static const OpClass SimdPredAluOp = enums::SimdPredAlu;
static const OpClass MemReadOp = enums::MemRead;
static const OpClass MemWriteOp = enums::MemWrite;
static const OpClass MemAtomicOp = enums::MemAtomic;
static const OpClass FloatMemReadOp = enums::FloatMemRead;
static const OpClass FloatMemWriteOp = enums::FloatMemWrite;
static const OpClass IprAccessOp = enums::IprAccess;
Expand Down

0 comments on commit dd4101d

Please sign in to comment.