cpu-o3: align amo operation's latency (#300)

The atomic instructions in XS RTL are implemented using a state machine (requiring 10+ cycles), while the previous implementation in gem5 assigned a fixed latency to atomic instructions (previously set to 2 cycles). The latency configurations between the two implementations were mismatched. This PR aligns the atomic instruction latency in gem5 with the RTL implementation (currently set to 13 cycles). The latency is obtained through a microbenchmark test using amo instructions. Change-Id: I4cfc42397edc6234b5013a634806bd48a479c49c
OpenXiangShan · Feb 24, 2025 · dd4101d · dd4101d
1 parent 9714711
commit dd4101d
Show file tree

Hide file tree

Showing 4 changed files with 21 additions and 18 deletions.
diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
@@ -1031,63 +1031,63 @@ decode QUADRANT default Unknown::unknown() {
                     TypedAtomicOpFunctor<int32_t> *amo_op =
                           new AtomicGenericOp<int32_t>(Rs2_sw,
                                   [](int32_t* b, int32_t a){ *b += a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
+                }}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
                 0x1: AtomicMemOp::amoswap_w({{
                     Rd_sd = Mem_sw;
                 }}, {{
                     TypedAtomicOpFunctor<uint32_t> *amo_op =
                           new AtomicGenericOp<uint32_t>(Rs2_uw,
                                   [](uint32_t* b, uint32_t a){ *b = a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
+                }}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
                 0x4: AtomicMemOp::amoxor_w({{
                     Rd_sd = Mem_sw;
                 }}, {{
                     TypedAtomicOpFunctor<uint32_t> *amo_op =
                           new AtomicGenericOp<uint32_t>(Rs2_uw,
                                   [](uint32_t* b, uint32_t a){ *b ^= a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
+                }}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
                 0x8: AtomicMemOp::amoor_w({{
                     Rd_sd = Mem_sw;
                 }}, {{
                     TypedAtomicOpFunctor<uint32_t> *amo_op =
                           new AtomicGenericOp<uint32_t>(Rs2_uw,
                                   [](uint32_t* b, uint32_t a){ *b |= a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
+                }}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
                 0xc: AtomicMemOp::amoand_w({{
                     Rd_sd = Mem_sw;
                 }}, {{
                     TypedAtomicOpFunctor<uint32_t> *amo_op =
                           new AtomicGenericOp<uint32_t>(Rs2_uw,
                                   [](uint32_t* b, uint32_t a){ *b &= a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
+                }}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
                 0x10: AtomicMemOp::amomin_w({{
                     Rd_sd = Mem_sw;
                 }}, {{
                     TypedAtomicOpFunctor<int32_t> *amo_op =
                       new AtomicGenericOp<int32_t>(Rs2_sw,
                         [](int32_t* b, int32_t a){ if (a < *b) *b = a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
+                }}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
                 0x14: AtomicMemOp::amomax_w({{
                     Rd_sd = Mem_sw;
                 }}, {{
                     TypedAtomicOpFunctor<int32_t> *amo_op =
                       new AtomicGenericOp<int32_t>(Rs2_sw,
                         [](int32_t* b, int32_t a){ if (a > *b) *b = a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
+                }}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
                 0x18: AtomicMemOp::amominu_w({{
                     Rd_sd = Mem_sw;
                 }}, {{
                     TypedAtomicOpFunctor<uint32_t> *amo_op =
                       new AtomicGenericOp<uint32_t>(Rs2_uw,
                         [](uint32_t* b, uint32_t a){ if (a < *b) *b = a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
+                }}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
                 0x1c: AtomicMemOp::amomaxu_w({{
                     Rd_sd = Mem_sw;
                 }}, {{
                     TypedAtomicOpFunctor<uint32_t> *amo_op =
                       new AtomicGenericOp<uint32_t>(Rs2_uw,
                         [](uint32_t* b, uint32_t a){ if (a > *b) *b = a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
+                }}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
             }
             0x3: decode AMOFUNCT {
                 0x2: LoadReserved::lr_d({{
@@ -1104,63 +1104,63 @@ decode QUADRANT default Unknown::unknown() {
                     TypedAtomicOpFunctor<int64_t> *amo_op =
                           new AtomicGenericOp<int64_t>(Rs2_sd,
                                   [](int64_t* b, int64_t a){ *b += a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
+                }}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
                 0x1: AtomicMemOp::amoswap_d({{
                     Rd_sd = Mem_sd;
                 }}, {{
                     TypedAtomicOpFunctor<uint64_t> *amo_op =
                           new AtomicGenericOp<uint64_t>(Rs2_ud,
                                   [](uint64_t* b, uint64_t a){ *b = a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
+                }}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
                 0x4: AtomicMemOp::amoxor_d({{
                     Rd_sd = Mem_sd;
                 }}, {{
                     TypedAtomicOpFunctor<uint64_t> *amo_op =
                           new AtomicGenericOp<uint64_t>(Rs2_ud,
                                  [](uint64_t* b, uint64_t a){ *b ^= a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
+                }}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
                 0x8: AtomicMemOp::amoor_d({{
                     Rd_sd = Mem_sd;
                 }}, {{
                     TypedAtomicOpFunctor<uint64_t> *amo_op =
                           new AtomicGenericOp<uint64_t>(Rs2_ud,
                                  [](uint64_t* b, uint64_t a){ *b |= a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
+                }}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
                 0xc: AtomicMemOp::amoand_d({{
                     Rd_sd = Mem_sd;
                 }}, {{
                     TypedAtomicOpFunctor<uint64_t> *amo_op =
                           new AtomicGenericOp<uint64_t>(Rs2_ud,
                                  [](uint64_t* b, uint64_t a){ *b &= a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
+                }}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
                 0x10: AtomicMemOp::amomin_d({{
                     Rd_sd = Mem_sd;
                 }}, {{
                     TypedAtomicOpFunctor<int64_t> *amo_op =
                       new AtomicGenericOp<int64_t>(Rs2_sd,
                         [](int64_t* b, int64_t a){ if (a < *b) *b = a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
+                }}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
                 0x14: AtomicMemOp::amomax_d({{
                     Rd_sd = Mem_sd;
                 }}, {{
                     TypedAtomicOpFunctor<int64_t> *amo_op =
                       new AtomicGenericOp<int64_t>(Rs2_sd,
                         [](int64_t* b, int64_t a){ if (a > *b) *b = a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
+                }}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
                 0x18: AtomicMemOp::amominu_d({{
                     Rd_sd = Mem_sd;
                 }}, {{
                     TypedAtomicOpFunctor<uint64_t> *amo_op =
                       new AtomicGenericOp<uint64_t>(Rs2_ud,
                         [](uint64_t* b, uint64_t a){ if (a < *b) *b = a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
+                }}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
                 0x1c: AtomicMemOp::amomaxu_d({{
                     Rd_sd = Mem_sd;
                 }}, {{
                     TypedAtomicOpFunctor<uint64_t> *amo_op =
                       new AtomicGenericOp<uint64_t>(Rs2_ud,
                         [](uint64_t* b, uint64_t a){ if (a > *b) *b = a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
+                }}, mem_flags=ATOMIC_RETURN_OP, inst_flags=MemAtomicOp);
             }
         }
         0x0c: decode FUNCT3 {

diff --git a/src/cpu/FuncUnit.py b/src/cpu/FuncUnit.py
@@ -55,6 +55,7 @@ class OpClass(Enum):
             'SimdShaSigma3',
             'SimdPredAlu',
             'MemRead', 'MemWrite', 'FloatMemRead', 'FloatMemWrite',
+            'MemAtomic',
             'IprAccess', 'InstPrefetch', 'FMAMul', 'FMAAcc',
 
             'VectorUnitStrideLoad', 'VectorSegUnitStrideLoad',

diff --git a/src/cpu/o3/FuncUnitConfig.py b/src/cpu/o3/FuncUnitConfig.py
@@ -150,6 +150,7 @@ class ReadPort(FUDesc):
 
 class WritePort(FUDesc):
     opList = [ OpDesc(opClass='MemWrite', opLat=2),
+               OpDesc(opClass='MemAtomic', opLat=13),
                OpDesc(opClass='FloatMemWrite', opLat=3),
                OpDesc(opClass='VectorUnitStrideStore'),
                OpDesc(opClass='VectorSegUnitStrideStore'),

diff --git a/src/cpu/op_class.hh b/src/cpu/op_class.hh
@@ -103,6 +103,7 @@ static const OpClass SimdShaSigma3Op = enums::SimdShaSigma3;
 static const OpClass SimdPredAluOp = enums::SimdPredAlu;
 static const OpClass MemReadOp = enums::MemRead;
 static const OpClass MemWriteOp = enums::MemWrite;
+static const OpClass MemAtomicOp = enums::MemAtomic;
 static const OpClass FloatMemReadOp = enums::FloatMemRead;
 static const OpClass FloatMemWriteOp = enums::FloatMemWrite;
 static const OpClass IprAccessOp = enums::IprAccess;